From 793b883ed12a6ae6e2901ddb5e038b77d6f0c0ac Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 14 Sep 2005 16:06:14 -0700
Subject: [PATCH] sky2: driver update.

Here is revised patch against netdev sky2 branch.
It includes whitespace fixes, all the changes from the previous
review as well as some optimizations and timing fixes to
solve some of the hangs.

The stall problem is better but not perfect. It appears that
under stress the chip can't keep up with the bus
and sends a pause frame, then hangs. This version is for
testing, and hopefully other eyes might see the root
cause of the problem.

I don't want to reinvent the ugly watchdog code in the syskonnect
version of sk98lin.  If you read it you will see, the original
driver writer and the hardware developer obviously didn't
understand each other.

Dual port support is included, but not tested yet. It did
require small change to NAPI since both ports share same
IRQ.
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 include/linux/netdevice.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7c717907896d..5e90557715ab 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -780,11 +780,15 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 }
 
 /* Schedule rx intr now? */
+static inline int netif_rx_schedule_test(struct net_device *dev)
+{
+	return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
+}
 
+/* Schedule only if device is up */
 static inline int netif_rx_schedule_prep(struct net_device *dev)
 {
-	return netif_running(dev) &&
-		!test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
+	return netif_running(dev) && netif_rx_schedule_test(dev);
 }
 
 /* Add interface to tail of rx poll list. This assumes that _prep has
-- 
cgit v1.2.3


From c2373ee98982a1c842dfb213c398f388d4227e63 Mon Sep 17 00:00:00 2001
From: Mitch Williams <mitch.a.williams@intel.com>
Date: Wed, 9 Nov 2005 10:34:45 -0800
Subject: [PATCH] net: make dev_valid_name public

dev_valid_name() is a useful function.  Make it public.

Signed-off-by: Mitch Williams <mitch.a.williams@intel.com>
Acked-by: Jay Vosburgh <fubar@us.ibm.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 936f8b76114e..4fceff0e59ec 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -684,6 +684,7 @@ extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
+extern int		dev_valid_name(const char *name);
 extern int		dev_ioctl(unsigned int cmd, void __user *);
 extern int		dev_ethtool(struct ifreq *);
 extern unsigned		dev_get_flags(const struct net_device *);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0b48e294aafe..94e642ee6e2b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -626,7 +626,7 @@ struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mas
  *	Network device names need to be valid file names to
  *	to allow sysfs to work
  */
-static int dev_valid_name(const char *name)
+int dev_valid_name(const char *name)
 {
 	return !(*name == '\0' 
 		 || !strcmp(name, ".")
@@ -3269,6 +3269,7 @@ EXPORT_SYMBOL(__dev_get_by_index);
 EXPORT_SYMBOL(__dev_get_by_name);
 EXPORT_SYMBOL(__dev_remove_pack);
 EXPORT_SYMBOL(__skb_linearize);
+EXPORT_SYMBOL(dev_valid_name);
 EXPORT_SYMBOL(dev_add_pack);
 EXPORT_SYMBOL(dev_alloc_name);
 EXPORT_SYMBOL(dev_close);
-- 
cgit v1.2.3


From e74ac79956ecb56e71a398c57eb10fab8c58a562 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Mon, 14 Nov 2005 18:16:37 -0500
Subject: [libata] remove two unused fields from struct ata_port

---
 include/linux/libata.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index f2dbb684ce9e..83a83babff84 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -330,8 +330,6 @@ struct ata_port {
 
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
-	unsigned int		bus_state;
-	unsigned int		port_state;
 	unsigned int		pio_mask;
 	unsigned int		mwdma_mask;
 	unsigned int		udma_mask;
-- 
cgit v1.2.3


From 0a1225769763779288d759e904c4f5a660844ce4 Mon Sep 17 00:00:00 2001
From: "shemminger@osdl.org" <shemminger@osdl.org>
Date: Wed, 30 Nov 2005 11:45:17 -0800
Subject: [PATCH] sky2: change netif_rx_schedule_test to __netif_schedule_prep

I didn't like the name netif_rx_schedule_test(), in earlier patches
and changed to __netif_rx_schedule_prep to be more consistent.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/net/sky2.c        | 5 +++--
 include/linux/netdevice.h | 8 ++++----
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 2253140ff4dc..f56de9894208 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -1961,10 +1961,11 @@ static irqreturn_t sky2_intr(int irq, void *dev_id, struct pt_regs *regs)
 	if (status & Y2_IS_STAT_BMU) {
 		hw->intr_mask &= ~Y2_IS_STAT_BMU;
 		sky2_write32(hw, B0_IMSK, hw->intr_mask);
-		prefetch(&hw->st_le[hw->st_idx]);
 
-		if (netif_rx_schedule_test(dev0))
+		if (likely(__netif_rx_schedule_prep(dev0))) {
+			prefetch(&hw->st_le[hw->st_idx]);
 			__netif_rx_schedule(dev0);
+		}
 	}
 
 	if (status & Y2_IS_IRQ_PHY1)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 07e114d48bbb..7fda03d338d1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -802,16 +802,16 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 	return (1 << debug_value) - 1;
 }
 
-/* Schedule rx intr now? */
-static inline int netif_rx_schedule_test(struct net_device *dev)
+/* Test if receive needs to be scheduled */
+static inline int __netif_rx_schedule_prep(struct net_device *dev)
 {
 	return !test_and_set_bit(__LINK_STATE_RX_SCHED, &dev->state);
 }
 
-/* Schedule only if device is up */
+/* Test if receive needs to be scheduled but only if up */
 static inline int netif_rx_schedule_prep(struct net_device *dev)
 {
-	return netif_running(dev) && netif_rx_schedule_test(dev);
+	return netif_running(dev) && __netif_rx_schedule_prep(dev);
 }
 
 /* Add interface to tail of rx poll list. This assumes that _prep has
-- 
cgit v1.2.3


From 1f12bcc9d1840fd26bf577065214f1ebeb2609ba Mon Sep 17 00:00:00 2001
From: Steven Whitehouse <steve@chygwyn.com>
Date: Mon, 5 Dec 2005 13:42:06 -0800
Subject: [DECNET]: add memory buffer settings

The patch (originally from Steve) simply adds memory buffer settings to
DECnet similar to those in TCP.

Signed-off-by: Patrick Caulfield <patrick@tykepenguin.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h         |  3 +++
 include/net/dn.h               |  4 ++++
 net/decnet/af_decnet.c         | 25 ++++++++++++++++++++++---
 net/decnet/sysctl_net_decnet.c | 33 +++++++++++++++++++++++++++++++++
 4 files changed, 62 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 6bc03c911a83..4be34ef8c2f7 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -670,6 +670,9 @@ enum {
 	NET_DECNET_DST_GC_INTERVAL = 9,
 	NET_DECNET_CONF = 10,
 	NET_DECNET_NO_FC_MAX_CWND = 11,
+	NET_DECNET_MEM = 12,
+	NET_DECNET_RMEM = 13,
+	NET_DECNET_WMEM = 14,
 	NET_DECNET_DEBUG_LEVEL = 255
 };
 
diff --git a/include/net/dn.h b/include/net/dn.h
index c1dbbd222793..a4b6168e1e25 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -234,4 +234,8 @@ extern int decnet_di_count;
 extern int decnet_dr_count;
 extern int decnet_no_fc_max_cwnd;
 
+extern int sysctl_decnet_mem[3];
+extern int sysctl_decnet_wmem[3];
+extern int sysctl_decnet_rmem[3];
+
 #endif /* _NET_DN_H */
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index f89e55f814d9..d402e9020c68 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -153,6 +153,7 @@ static struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
+static atomic_t decnet_memory_allocated;
 
 static int __dn_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen, int flags);
 static int __dn_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *optlen, int flags);
@@ -446,10 +447,26 @@ static void dn_destruct(struct sock *sk)
 	dst_release(xchg(&sk->sk_dst_cache, NULL));
 }
 
+static int dn_memory_pressure;
+
+static void dn_enter_memory_pressure(void)
+{
+	if (!dn_memory_pressure) {
+		dn_memory_pressure = 1;
+	}
+}
+
 static struct proto dn_proto = {
-	.name	  = "DECNET",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct dn_sock),
+	.name			= "NSP",
+	.owner			= THIS_MODULE,
+	.enter_memory_pressure	= dn_enter_memory_pressure,
+	.memory_pressure	= &dn_memory_pressure,
+	.memory_allocated	= &decnet_memory_allocated,
+	.sysctl_mem		= sysctl_decnet_mem,
+	.sysctl_wmem		= sysctl_decnet_wmem,
+	.sysctl_rmem		= sysctl_decnet_rmem,
+	.max_header		= DN_MAX_NSP_DATA_HEADER + 64,
+	.obj_size		= sizeof(struct dn_sock),
 };
 
 static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
@@ -470,6 +487,8 @@ static struct sock *dn_alloc_sock(struct socket *sock, gfp_t gfp)
 	sk->sk_family      = PF_DECnet;
 	sk->sk_protocol    = 0;
 	sk->sk_allocation  = gfp;
+	sk->sk_sndbuf	   = sysctl_decnet_wmem[1];
+	sk->sk_rcvbuf	   = sysctl_decnet_rmem[1];
 
 	/* Initialization of DECnet Session Control Port		*/
 	scp = DN_SK(sk);
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 02bca49cb508..0e9d2c571165 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -10,6 +10,7 @@
  *
  * Changes:
  * Steve Whitehouse - C99 changes and default device handling
+ * Steve Whitehouse - Memory buffer settings, like the tcp ones
  *
  */
 #include <linux/config.h>
@@ -37,6 +38,11 @@ int decnet_dr_count = 3;
 int decnet_log_martians = 1;
 int decnet_no_fc_max_cwnd = NSP_MIN_WINDOW;
 
+/* Reasonable defaults, I hope, based on tcp's defaults */
+int sysctl_decnet_mem[3] = { 768 << 3, 1024 << 3, 1536 << 3 };
+int sysctl_decnet_wmem[3] = { 4 * 1024, 16 * 1024, 128 * 1024 };
+int sysctl_decnet_rmem[3] = { 4 * 1024, 87380, 87380 * 2 };
+
 #ifdef CONFIG_SYSCTL
 extern int decnet_dst_gc_interval;
 static int min_decnet_time_wait[] = { 5 };
@@ -428,6 +434,33 @@ static ctl_table dn_table[] = {
 		.extra1 = &min_decnet_no_fc_max_cwnd,
 		.extra2 = &max_decnet_no_fc_max_cwnd
 	},
+       {
+                .ctl_name = NET_DECNET_MEM,
+                .procname = "decnet_mem",
+                .data = &sysctl_decnet_mem,
+                .maxlen = sizeof(sysctl_decnet_mem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_RMEM,
+                .procname = "decnet_rmem",
+                .data = &sysctl_decnet_rmem,
+                .maxlen = sizeof(sysctl_decnet_rmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
+        {
+                .ctl_name = NET_DECNET_WMEM,
+                .procname = "decnet_wmem",
+                .data = &sysctl_decnet_wmem,
+                .maxlen = sizeof(sysctl_decnet_wmem),
+                .mode = 0644,
+                .proc_handler = &proc_dointvec,
+                .strategy = &sysctl_intvec,
+        },
 	{
 		.ctl_name = NET_DECNET_DEBUG_LEVEL,
 		.procname = "debug",
-- 
cgit v1.2.3


From a22e2eb0710798009b8e696ae911aef745089dd6 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Mon, 5 Dec 2005 15:38:02 +0800
Subject: [PATCH] libata: move err_mask to ata_queued_cmd

  - remove err_mask from the parameter list of the complete functions
  - move err_mask to ata_queued_cmd
  - initialize qc->err_mask when needed
  - for each function call to ata_qc_complete(), replace the err_mask parameter with qc->err_mask.

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>

===============
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/ahci.c         | 12 ++++++++----
 drivers/scsi/libata-core.c  | 32 +++++++++++++++++++-------------
 drivers/scsi/libata-scsi.c  | 18 ++++++++++--------
 drivers/scsi/libata.h       |  2 +-
 drivers/scsi/pdc_adma.c     | 11 +++++------
 drivers/scsi/sata_mv.c      |  9 ++++++---
 drivers/scsi/sata_promise.c | 14 ++++++++------
 drivers/scsi/sata_qstor.c   |  7 ++++---
 drivers/scsi/sata_sil24.c   | 15 ++++++++++-----
 drivers/scsi/sata_sx4.c     | 15 ++++++++++-----
 include/linux/libata.h      |  7 +++++--
 11 files changed, 86 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/ahci.c b/drivers/scsi/ahci.c
index cfbdd3f071b6..887eaa2a3ebf 100644
--- a/drivers/scsi/ahci.c
+++ b/drivers/scsi/ahci.c
@@ -643,7 +643,8 @@ static void ahci_eng_timeout(struct ata_port *ap)
 	 	 * not being called from the SCSI EH.
 	 	 */
 		qc->scsidone = scsi_finish_command;
-		ata_qc_complete(qc, AC_ERR_OTHER);
+		qc->err_mask |= AC_ERR_OTHER;
+		ata_qc_complete(qc);
 	}
 
 	spin_unlock_irqrestore(&host_set->lock, flags);
@@ -664,7 +665,8 @@ static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
 	ci = readl(port_mmio + PORT_CMD_ISSUE);
 	if (likely((ci & 0x1) == 0)) {
 		if (qc) {
-			ata_qc_complete(qc, 0);
+			assert(qc->err_mask == 0);
+			ata_qc_complete(qc);
 			qc = NULL;
 		}
 	}
@@ -681,8 +683,10 @@ static inline int ahci_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
 		/* command processing has stopped due to error; restart */
 		ahci_restart_port(ap, status);
 
-		if (qc)
-			ata_qc_complete(qc, err_mask);
+		if (qc) {
+			qc->err_mask |= AC_ERR_OTHER;
+			ata_qc_complete(qc);
+		}
 	}
 
 	return 1;
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 0a959566f964..f56b4daf4189 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1053,9 +1053,9 @@ static int ata_qc_wait_err(struct ata_queued_cmd *qc,
 
 	if (wait_for_completion_timeout(wait, 30 * HZ) < 1) {
 		/* timeout handling */
-		unsigned int err_mask = ac_err_mask(ata_chk_status(qc->ap));
+		qc->err_mask |= ac_err_mask(ata_chk_status(qc->ap));
 
-		if (!err_mask) {
+		if (!qc->err_mask) {
 			printk(KERN_WARNING "ata%u: slow completion (cmd %x)\n",
 			       qc->ap->id, qc->tf.command);
 		} else {
@@ -1064,7 +1064,7 @@ static int ata_qc_wait_err(struct ata_queued_cmd *qc,
 			rc = -EIO;
 		}
 
-		ata_qc_complete(qc, err_mask);
+		ata_qc_complete(qc);
 	}
 
 	return rc;
@@ -1175,6 +1175,7 @@ retry:
 				qc->cursg_ofs = 0;
 				qc->cursect = 0;
 				qc->nsect = 1;
+				qc->err_mask = 0;
 				goto retry;
 			}
 		}
@@ -2777,7 +2778,7 @@ skip_map:
  *	None.  (grabs host lock)
  */
 
-void ata_poll_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask)
+void ata_poll_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
 	unsigned long flags;
@@ -2785,7 +2786,7 @@ void ata_poll_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask)
 	spin_lock_irqsave(&ap->host_set->lock, flags);
 	ap->flags &= ~ATA_FLAG_NOINTR;
 	ata_irq_on(ap);
-	ata_qc_complete(qc, err_mask);
+	ata_qc_complete(qc);
 	spin_unlock_irqrestore(&ap->host_set->lock, flags);
 }
 
@@ -2885,7 +2886,8 @@ static int ata_pio_complete (struct ata_port *ap)
 
 	ap->hsm_task_state = HSM_ST_IDLE;
 
-	ata_poll_qc_complete(qc, 0);
+	assert(qc->err_mask == 0);
+	ata_poll_qc_complete(qc);
 
 	/* another command may start at this point */
 
@@ -3261,7 +3263,8 @@ static void ata_pio_error(struct ata_port *ap)
 
 	ap->hsm_task_state = HSM_ST_IDLE;
 
-	ata_poll_qc_complete(qc, AC_ERR_ATA_BUS);
+	qc->err_mask |= AC_ERR_ATA_BUS;
+	ata_poll_qc_complete(qc);
 }
 
 static void ata_pio_task(void *_data)
@@ -3363,7 +3366,8 @@ static void ata_qc_timeout(struct ata_queued_cmd *qc)
 		       ap->id, qc->tf.command, drv_stat, host_stat);
 
 		/* complete taskfile transaction */
-		ata_qc_complete(qc, ac_err_mask(drv_stat));
+		qc->err_mask |= ac_err_mask(drv_stat);
+		ata_qc_complete(qc);
 		break;
 	}
 
@@ -3462,7 +3466,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 	return qc;
 }
 
-int ata_qc_complete_noop(struct ata_queued_cmd *qc, unsigned int err_mask)
+int ata_qc_complete_noop(struct ata_queued_cmd *qc)
 {
 	return 0;
 }
@@ -3521,7 +3525,7 @@ void ata_qc_free(struct ata_queued_cmd *qc)
  *	spin_lock_irqsave(host_set lock)
  */
 
-void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask)
+void ata_qc_complete(struct ata_queued_cmd *qc)
 {
 	int rc;
 
@@ -3538,7 +3542,7 @@ void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask)
 	qc->flags &= ~ATA_QCFLAG_ACTIVE;
 
 	/* call completion callback */
-	rc = qc->complete_fn(qc, err_mask);
+	rc = qc->complete_fn(qc);
 
 	/* if callback indicates not to complete command (non-zero),
 	 * return immediately
@@ -3976,7 +3980,8 @@ inline unsigned int ata_host_intr (struct ata_port *ap,
 		ap->ops->irq_clear(ap);
 
 		/* complete taskfile transaction */
-		ata_qc_complete(qc, ac_err_mask(status));
+		qc->err_mask |= ac_err_mask(status);
+		ata_qc_complete(qc);
 		break;
 
 	default:
@@ -4111,7 +4116,8 @@ static void atapi_packet_task(void *_data)
 err_out_status:
 	status = ata_chk_status(ap);
 err_out:
-	ata_poll_qc_complete(qc, __ac_err_mask(status));
+	qc->err_mask |= __ac_err_mask(status);
+	ata_poll_qc_complete(qc);
 }
 
 
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index ef763ed9a0e5..2aef41112c43 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1203,12 +1203,11 @@ nothing_to_do:
 	return 1;
 }
 
-static int ata_scsi_qc_complete(struct ata_queued_cmd *qc,
-				unsigned int err_mask)
+static int ata_scsi_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct scsi_cmnd *cmd = qc->scsicmd;
 	u8 *cdb = cmd->cmnd;
- 	int need_sense = (err_mask != 0);
+ 	int need_sense = (qc->err_mask != 0);
 
 	/* For ATA pass thru (SAT) commands, generate a sense block if
 	 * user mandated it or if there's an error.  Note that if we
@@ -1955,9 +1954,9 @@ void ata_scsi_badcmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *), u8
 	done(cmd);
 }
 
-static int atapi_sense_complete(struct ata_queued_cmd *qc,unsigned int err_mask)
+static int atapi_sense_complete(struct ata_queued_cmd *qc)
 {
-	if (err_mask && ((err_mask & AC_ERR_DEV) == 0))
+	if (qc->err_mask && ((qc->err_mask & AC_ERR_DEV) == 0))
 		/* FIXME: not quite right; we don't want the
 		 * translation of taskfile registers into
 		 * a sense descriptors, since that's only
@@ -2015,15 +2014,18 @@ static void atapi_request_sense(struct ata_queued_cmd *qc)
 
 	qc->complete_fn = atapi_sense_complete;
 
-	if (ata_qc_issue(qc))
-		ata_qc_complete(qc, AC_ERR_OTHER);
+	if (ata_qc_issue(qc)) {
+		qc->err_mask |= AC_ERR_OTHER;
+		ata_qc_complete(qc);
+	}
 
 	DPRINTK("EXIT\n");
 }
 
-static int atapi_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask)
+static int atapi_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct scsi_cmnd *cmd = qc->scsicmd;
+	unsigned int err_mask = qc->err_mask;
 
 	VPRINTK("ENTER, err_mask 0x%X\n", err_mask);
 
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
index 8ebaa694d18e..686255df76b8 100644
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -39,7 +39,7 @@ struct ata_scsi_args {
 
 /* libata-core.c */
 extern int atapi_enabled;
-extern int ata_qc_complete_noop(struct ata_queued_cmd *qc, unsigned int err_mask);
+extern int ata_qc_complete_noop(struct ata_queued_cmd *qc);
 extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 				      struct ata_device *dev);
 extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc);
diff --git a/drivers/scsi/pdc_adma.c b/drivers/scsi/pdc_adma.c
index f557f17ca00c..e8df0c9ec1e6 100644
--- a/drivers/scsi/pdc_adma.c
+++ b/drivers/scsi/pdc_adma.c
@@ -464,14 +464,12 @@ static inline unsigned int adma_intr_pkt(struct ata_host_set *host_set)
 			continue;
 		qc = ata_qc_from_tag(ap, ap->active_tag);
 		if (qc && (!(qc->tf.ctl & ATA_NIEN))) {
-			unsigned int err_mask = 0;
-
 			if ((status & (aPERR | aPSD | aUIRQ)))
-				err_mask = AC_ERR_OTHER;
+				qc->err_mask |= AC_ERR_OTHER;
 			else if (pp->pkt[0] != cDONE)
-				err_mask = AC_ERR_OTHER;
+				qc->err_mask |= AC_ERR_OTHER;
 
-			ata_qc_complete(qc, err_mask);
+			ata_qc_complete(qc);
 		}
 	}
 	return handled;
@@ -501,7 +499,8 @@ static inline unsigned int adma_intr_mmio(struct ata_host_set *host_set)
 		
 				/* complete taskfile transaction */
 				pp->state = adma_state_idle;
-				ata_qc_complete(qc, ac_err_mask(status));
+				qc->err_mask |= ac_err_mask(status);
+				ata_qc_complete(qc);
 				handled = 1;
 			}
 		}
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index c94176693d1f..3e7866b51ac6 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -1242,8 +1242,10 @@ static void mv_host_intr(struct ata_host_set *host_set, u32 relevant,
 				VPRINTK("port %u IRQ found for qc, "
 					"ata_status 0x%x\n", port,ata_status);
 				/* mark qc status appropriately */
-				if (!(qc->tf.ctl & ATA_NIEN))
-					ata_qc_complete(qc, err_mask);
+				if (!(qc->tf.ctl & ATA_NIEN)) {
+					qc->err_mask |= err_mask;
+					ata_qc_complete(qc);
+				}
 			}
 		}
 	}
@@ -1864,7 +1866,8 @@ static void mv_eng_timeout(struct ata_port *ap)
 	 	 */
 		spin_lock_irqsave(&ap->host_set->lock, flags);
 		qc->scsidone = scsi_finish_command;
-		ata_qc_complete(qc, AC_ERR_OTHER);
+		qc->err_mask |= AC_ERR_OTHER;
+		ata_qc_complete(qc);
 		spin_unlock_irqrestore(&ap->host_set->lock, flags);
 	}
 }
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index 02089069b0f6..e2e146a14f97 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -401,7 +401,8 @@ static void pdc_eng_timeout(struct ata_port *ap)
 	case ATA_PROT_NODATA:
 		printk(KERN_ERR "ata%u: command timeout\n", ap->id);
 		drv_stat = ata_wait_idle(ap);
-		ata_qc_complete(qc, __ac_err_mask(drv_stat));
+		qc->err_mask |= __ac_err_mask(drv_stat);
+		ata_qc_complete(qc);
 		break;
 
 	default:
@@ -410,7 +411,8 @@ static void pdc_eng_timeout(struct ata_port *ap)
 		printk(KERN_ERR "ata%u: unknown timeout, cmd 0x%x stat 0x%x\n",
 		       ap->id, qc->tf.command, drv_stat);
 
-		ata_qc_complete(qc, ac_err_mask(drv_stat));
+		qc->err_mask |= ac_err_mask(drv_stat);
+		ata_qc_complete(qc);
 		break;
 	}
 
@@ -422,21 +424,21 @@ out:
 static inline unsigned int pdc_host_intr( struct ata_port *ap,
                                           struct ata_queued_cmd *qc)
 {
-	unsigned int handled = 0, err_mask = 0;
+	unsigned int handled = 0;
 	u32 tmp;
 	void __iomem *mmio = (void __iomem *) ap->ioaddr.cmd_addr + PDC_GLOBAL_CTL;
 
 	tmp = readl(mmio);
 	if (tmp & PDC_ERR_MASK) {
-		err_mask = AC_ERR_DEV;
+		qc->err_mask |= AC_ERR_DEV;
 		pdc_reset_port(ap);
 	}
 
 	switch (qc->tf.protocol) {
 	case ATA_PROT_DMA:
 	case ATA_PROT_NODATA:
-		err_mask |= ac_err_mask(ata_wait_idle(ap));
-		ata_qc_complete(qc, err_mask);
+		qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
+		ata_qc_complete(qc);
 		handled = 1;
 		break;
 
diff --git a/drivers/scsi/sata_qstor.c b/drivers/scsi/sata_qstor.c
index 6b9c3ae07cb3..de05e2883f9c 100644
--- a/drivers/scsi/sata_qstor.c
+++ b/drivers/scsi/sata_qstor.c
@@ -409,8 +409,8 @@ static inline unsigned int qs_intr_pkt(struct ata_host_set *host_set)
 					case 3: /* device error */
 						pp->state = qs_state_idle;
 						qs_enter_reg_mode(qc->ap);
-						ata_qc_complete(qc,
-							ac_err_mask(sDST));
+						qc->err_mask |= ac_err_mask(sDST);
+						ata_qc_complete(qc);
 						break;
 					default:
 						break;
@@ -447,7 +447,8 @@ static inline unsigned int qs_intr_mmio(struct ata_host_set *host_set)
 
 				/* complete taskfile transaction */
 				pp->state = qs_state_idle;
-				ata_qc_complete(qc, ac_err_mask(status));
+				qc->err_mask |= ac_err_mask(status);
+				ata_qc_complete(qc);
 				handled = 1;
 			}
 		}
diff --git a/drivers/scsi/sata_sil24.c b/drivers/scsi/sata_sil24.c
index e0d6f194f54f..a0ad3ed2200a 100644
--- a/drivers/scsi/sata_sil24.c
+++ b/drivers/scsi/sata_sil24.c
@@ -654,7 +654,8 @@ static void sil24_eng_timeout(struct ata_port *ap)
 	 */
 	printk(KERN_ERR "ata%u: command timeout\n", ap->id);
 	qc->scsidone = scsi_finish_command;
-	ata_qc_complete(qc, AC_ERR_OTHER);
+	qc->err_mask |= AC_ERR_OTHER;
+	ata_qc_complete(qc);
 
 	sil24_reset_controller(ap);
 }
@@ -711,8 +712,10 @@ static void sil24_error_intr(struct ata_port *ap, u32 slot_stat)
 		sil24_reset_controller(ap);
 	}
 
-	if (qc)
-		ata_qc_complete(qc, err_mask);
+	if (qc) {
+		qc->err_mask |= err_mask;
+		ata_qc_complete(qc);
+	}
 }
 
 static inline void sil24_host_intr(struct ata_port *ap)
@@ -734,8 +737,10 @@ static inline void sil24_host_intr(struct ata_port *ap)
 		 */
 		sil24_update_tf(ap);
 
-		if (qc)
-			ata_qc_complete(qc, ac_err_mask(pp->tf.command));
+		if (qc) {
+			qc->err_mask |= ac_err_mask(pp->tf.command);
+			ata_qc_complete(qc);
+		}
 	} else
 		sil24_error_intr(ap, slot_stat);
 }
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index 7c4b53575510..58da854a7c68 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -718,7 +718,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap,
 			VPRINTK("ata%u: read hdma, 0x%x 0x%x\n", ap->id,
 				readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
 			/* get drive status; clear intr; complete txn */
-			ata_qc_complete(qc, ac_err_mask(ata_wait_idle(ap)));
+			qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
+			ata_qc_complete(qc);
 			pdc20621_pop_hdma(qc);
 		}
 
@@ -756,7 +757,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap,
 			VPRINTK("ata%u: write ata, 0x%x 0x%x\n", ap->id,
 				readl(mmio + 0x104), readl(mmio + PDC_HDMA_CTLSTAT));
 			/* get drive status; clear intr; complete txn */
-			ata_qc_complete(qc, ac_err_mask(ata_wait_idle(ap)));
+			qc->err_mask |= ac_err_mask(ata_wait_idle(ap));
+			ata_qc_complete(qc);
 			pdc20621_pop_hdma(qc);
 		}
 		handled = 1;
@@ -766,7 +768,8 @@ static inline unsigned int pdc20621_host_intr( struct ata_port *ap,
 
 		status = ata_busy_wait(ap, ATA_BUSY | ATA_DRQ, 1000);
 		DPRINTK("BUS_NODATA (drv_stat 0x%X)\n", status);
-		ata_qc_complete(qc, ac_err_mask(status));
+		qc->err_mask |= ac_err_mask(status);
+		ata_qc_complete(qc);
 		handled = 1;
 
 	} else {
@@ -881,7 +884,8 @@ static void pdc_eng_timeout(struct ata_port *ap)
 	case ATA_PROT_DMA:
 	case ATA_PROT_NODATA:
 		printk(KERN_ERR "ata%u: command timeout\n", ap->id);
-		ata_qc_complete(qc, __ac_err_mask(ata_wait_idle(ap)));
+		qc->err_mask |= __ac_err_mask(ata_wait_idle(ap));
+		ata_qc_complete(qc);
 		break;
 
 	default:
@@ -890,7 +894,8 @@ static void pdc_eng_timeout(struct ata_port *ap)
 		printk(KERN_ERR "ata%u: unknown timeout, cmd 0x%x stat 0x%x\n",
 		       ap->id, qc->tf.command, drv_stat);
 
-		ata_qc_complete(qc, ac_err_mask(drv_stat));
+		qc->err_mask |= ac_err_mask(drv_stat);
+		ata_qc_complete(qc);
 		break;
 	}
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 83a83babff84..e18ce039cdfd 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -194,7 +194,7 @@ struct ata_port;
 struct ata_queued_cmd;
 
 /* typedefs */
-typedef int (*ata_qc_cb_t) (struct ata_queued_cmd *qc, unsigned int err_mask);
+typedef int (*ata_qc_cb_t) (struct ata_queued_cmd *qc);
 
 struct ata_ioports {
 	unsigned long		cmd_addr;
@@ -279,6 +279,8 @@ struct ata_queued_cmd {
 	/* DO NOT iterate over __sg manually, use ata_for_each_sg() */
 	struct scatterlist	*__sg;
 
+	unsigned int		err_mask;
+
 	ata_qc_cb_t		complete_fn;
 
 	struct completion	*waiting;
@@ -475,7 +477,7 @@ extern void ata_bmdma_start (struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
 extern u8   ata_bmdma_status(struct ata_port *ap);
 extern void ata_bmdma_irq_clear(struct ata_port *ap);
-extern void ata_qc_complete(struct ata_queued_cmd *qc, unsigned int err_mask);
+extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern void ata_eng_timeout(struct ata_port *ap);
 extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd,
 			      void (*done)(struct scsi_cmnd *));
@@ -667,6 +669,7 @@ static inline void ata_qc_reinit(struct ata_queued_cmd *qc)
 	qc->cursect = qc->cursg = qc->cursg_ofs = 0;
 	qc->nsect = 0;
 	qc->nbytes = qc->curbytes = 0;
+	qc->err_mask = 0;
 
 	ata_tf_init(qc->ap, &qc->tf, qc->dev->devno);
 }
-- 
cgit v1.2.3


From 95235ca2c20ac0b31a8eb39e2d599bcc3e9c9a10 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Fri, 2 Dec 2005 10:43:20 -0800
Subject: [CPUFREQ] CPU frequency display in /proc/cpuinfo

What is the value shown in "cpu MHz" of /proc/cpuinfo when CPUs are capable of
changing frequency?

Today the answer is: It depends.
On i386:
SMP kernel - It is always the boot frequency
UP kernel - Scales with the frequency change and shows that was last set.

On x86_64:
There is one single variable cpu_khz that gets written by all the CPUs. So,
the frequency set by last CPU will be seen on /proc/cpuinfo of all the
CPUs in the system. What you see also depends on whether you have constant_tsc
capable CPU or not.

On ia64:
It is always boot time frequency of a particular CPU that gets displayed.

The patch below changes this to:
Show the last known frequency of the particular CPU, when cpufreq is present. If
cpu doesnot support changing of frequency through cpufreq, then boot frequency
will be shown. The patch affects i386, x86_64 and ia64 architectures.

Signed-off-by: Venkatesh Pallipadi<venkatesh.pallipadi@intel.com>
Signed-off-by: Dave Jones <davej@redhat.com>
---
 arch/i386/kernel/cpu/proc.c |  6 +++++-
 arch/ia64/kernel/setup.c    |  8 +++++++-
 arch/x86_64/kernel/setup.c  |  6 +++++-
 drivers/cpufreq/cpufreq.c   | 24 ++++++++++++++++++++++++
 include/linux/cpufreq.h     | 10 ++++++++++
 5 files changed, 51 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index e7921315ae9d..6d91b274589c 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -3,6 +3,7 @@
 #include <linux/string.h>
 #include <asm/semaphore.h>
 #include <linux/seq_file.h>
+#include <linux/cpufreq.h>
 
 /*
  *	Get CPU information for use by the procfs.
@@ -86,8 +87,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "stepping\t: unknown\n");
 
 	if ( cpu_has(c, X86_FEATURE_TSC) ) {
+		unsigned int freq = cpufreq_quick_get(n);
+		if (!freq)
+			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			cpu_khz / 1000, (cpu_khz % 1000));
+			freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index 5add0bcf87a7..088e5dded8dc 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -43,6 +43,7 @@
 #include <linux/initrd.h>
 #include <linux/platform.h>
 #include <linux/pm.h>
+#include <linux/cpufreq.h>
 
 #include <asm/ia32.h>
 #include <asm/machvec.h>
@@ -517,6 +518,7 @@ show_cpuinfo (struct seq_file *m, void *v)
 	char family[32], features[128], *cp, sep;
 	struct cpuinfo_ia64 *c = v;
 	unsigned long mask;
+	unsigned int proc_freq;
 	int i;
 
 	mask = c->features;
@@ -549,6 +551,10 @@ show_cpuinfo (struct seq_file *m, void *v)
 		sprintf(cp, " 0x%lx", mask);
 	}
 
+	proc_freq = cpufreq_quick_get(cpunum);
+	if (!proc_freq)
+		proc_freq = c->proc_freq / 1000;
+
 	seq_printf(m,
 		   "processor  : %d\n"
 		   "vendor     : %s\n"
@@ -565,7 +571,7 @@ show_cpuinfo (struct seq_file *m, void *v)
 		   "BogoMIPS   : %lu.%02lu\n",
 		   cpunum, c->vendor, family, c->model, c->revision, c->archrev,
 		   features, c->ppn, c->number,
-		   c->proc_freq / 1000000, c->proc_freq % 1000000,
+		   proc_freq / 1000, proc_freq % 1000,
 		   c->itc_freq / 1000000, c->itc_freq % 1000000,
 		   lpj*HZ/500000, (lpj*HZ/5000) % 100);
 #ifdef CONFIG_SMP
diff --git a/arch/x86_64/kernel/setup.c b/arch/x86_64/kernel/setup.c
index 750e01dcbdf4..64c4534b930c 100644
--- a/arch/x86_64/kernel/setup.c
+++ b/arch/x86_64/kernel/setup.c
@@ -42,6 +42,7 @@
 #include <linux/edd.h>
 #include <linux/mmzone.h>
 #include <linux/kexec.h>
+#include <linux/cpufreq.h>
 
 #include <asm/mtrr.h>
 #include <asm/uaccess.h>
@@ -1256,8 +1257,11 @@ static int show_cpuinfo(struct seq_file *m, void *v)
 		seq_printf(m, "stepping\t: unknown\n");
 	
 	if (cpu_has(c,X86_FEATURE_TSC)) {
+		unsigned int freq = cpufreq_quick_get((unsigned)(c-cpu_data));
+		if (!freq)
+			freq = cpu_khz;
 		seq_printf(m, "cpu MHz\t\t: %u.%03u\n",
-			     cpu_khz / 1000, (cpu_khz % 1000));
+			     freq / 1000, (freq % 1000));
 	}
 
 	/* Cache size */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 815902c2c856..a9163d02983a 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -822,6 +822,30 @@ static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, unsigne
 }
 
 
+/** 
+ * cpufreq_quick_get - get the CPU frequency (in kHz) frpm policy->cur
+ * @cpu: CPU number
+ *
+ * This is the last known freq, without actually getting it from the driver.
+ * Return value will be same as what is shown in scaling_cur_freq in sysfs.
+ */
+unsigned int cpufreq_quick_get(unsigned int cpu)
+{
+	struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
+	unsigned int ret = 0;
+
+	if (policy) {
+		down(&policy->lock);
+		ret = policy->cur;
+		up(&policy->lock);
+		cpufreq_cpu_put(policy);
+	}
+
+	return (ret);
+}
+EXPORT_SYMBOL(cpufreq_quick_get);
+
+
 /** 
  * cpufreq_get - get the current CPU frequency (in kHz)
  * @cpu: CPU number
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index d068176b7ad7..c31650df9241 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -256,6 +256,16 @@ int cpufreq_update_policy(unsigned int cpu);
 /* query the current CPU frequency (in kHz). If zero, cpufreq couldn't detect it */
 unsigned int cpufreq_get(unsigned int cpu);
 
+/* query the last known CPU freq (in kHz). If zero, cpufreq couldn't detect it */
+#ifdef CONFIG_CPU_FREQ
+unsigned int cpufreq_quick_get(unsigned int cpu);
+#else
+static inline unsigned int cpufreq_quick_get(unsigned int cpu)
+{
+	return 0;
+}
+#endif
+
 
 /*********************************************************************
  *                       CPUFREQ DEFAULT GOVERNOR                    *
-- 
cgit v1.2.3


From 7fc7e2eeecb599ba719c4c4503100fc8cd6a6920 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Sun, 11 Dec 2005 19:57:52 -0800
Subject: Remove (at least temporarily) the "incomplete PFN mapping" support

With the previous commit, we can handle arbitrary shared re-mappings
even without this complexity, and since the only known private mappings
are for strange users of /dev/mem (which never create an incomplete one),
there seems to be no reason to support it.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h |  1 -
 mm/memory.c        | 46 +---------------------------------------------
 2 files changed, 1 insertion(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 29f02d8513f6..e5677f456742 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -163,7 +163,6 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
-#define VM_INCOMPLETE	0x02000000	/* Strange partial PFN mapping marker */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/mm/memory.c b/mm/memory.c
index e65f8fc8ea67..430a72ed08d5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1227,50 +1227,6 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *
 }
 EXPORT_SYMBOL(vm_insert_page);
 
-/*
- * Somebody does a pfn remapping that doesn't actually work as a vma.
- *
- * Do it as individual pages instead, and warn about it. It's bad form,
- * and very inefficient.
- */
-static int incomplete_pfn_remap(struct vm_area_struct *vma,
-		unsigned long start, unsigned long end,
-		unsigned long pfn, pgprot_t prot)
-{
-	static int warn = 10;
-	struct page *page;
-	int retval;
-
-	if (!(vma->vm_flags & VM_INCOMPLETE)) {
-		if (warn) {
-			warn--;
-			printk("%s does an incomplete pfn remapping", current->comm);
-			dump_stack();
-		}
-	}
-	vma->vm_flags |= VM_INCOMPLETE | VM_IO | VM_RESERVED;
-
-	if (start < vma->vm_start || end > vma->vm_end)
-		return -EINVAL;
-
-	if (!pfn_valid(pfn))
-		return -EINVAL;
-
-	page = pfn_to_page(pfn);
-	if (!PageReserved(page))
-		return -EINVAL;
-
-	retval = 0;
-	while (start < end) {
-		retval = insert_page(vma->vm_mm, start, page, prot);
-		if (retval < 0)
-			break;
-		start += PAGE_SIZE;
-		page++;
-	}
-	return retval;
-}
-
 /*
  * maps a range of physical memory into the requested pages. the old
  * mappings are removed. any references to nonexistent pages results
@@ -1365,7 +1321,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
 	 */
 	if (!(vma->vm_flags & VM_SHARED)) {
 		if (addr != vma->vm_start || end != vma->vm_end)
-			return incomplete_pfn_remap(vma, addr, end, pfn, prot);
+			return -EINVAL;
 		vma->vm_pgoff = pfn;
 	}
 
-- 
cgit v1.2.3


From ab4720ec76b756e1f8705e207a7b392b0453afd6 Mon Sep 17 00:00:00 2001
From: Dipankar Sarma <dipankar@in.ibm.com>
Date: Mon, 12 Dec 2005 00:37:05 -0800
Subject: [PATCH] add rcu_barrier() synchronization point

This introduces a new interface - rcu_barrier() which waits until all
the RCUs queued until this call have been completed.

Reiser4 needs this, because we do more than just freeing memory object
in our RCU callback: we also remove it from the list hanging off
super-block.  This means, that before freeing reiser4-specific portion
of super-block (during umount) we have to wait until all pending RCU
callbacks are executed.

The only change of reiser4 made to the original patch, is exporting of
rcu_barrier().

Cc: Hans Reiser <reiser@namesys.com>
Cc: Vladimir V. Saveliev <vs@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcupdate.c        | 41 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index cce25591eec2..a471f3bb713e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -100,6 +100,7 @@ struct rcu_data {
 	struct rcu_head *donelist;
 	struct rcu_head **donetail;
 	int cpu;
+	struct rcu_head barrier;
 };
 
 DECLARE_PER_CPU(struct rcu_data, rcu_data);
@@ -285,6 +286,7 @@ extern void FASTCALL(call_rcu_bh(struct rcu_head *head,
 extern __deprecated_for_modules void synchronize_kernel(void);
 extern void synchronize_rcu(void);
 void synchronize_idle(void);
+extern void rcu_barrier(void);
 
 #endif /* __KERNEL__ */
 #endif /* __LINUX_RCUPDATE_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index c4d159a21e04..f45b91723dc6 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -116,6 +116,10 @@ void fastcall call_rcu(struct rcu_head *head,
 	local_irq_restore(flags);
 }
 
+static atomic_t rcu_barrier_cpu_count;
+static struct semaphore rcu_barrier_sema;
+static struct completion rcu_barrier_completion;
+
 /**
  * call_rcu_bh - Queue an RCU for invocation after a quicker grace period.
  * @head: structure to be used for queueing the RCU updates.
@@ -162,6 +166,42 @@ long rcu_batches_completed(void)
 	return rcu_ctrlblk.completed;
 }
 
+static void rcu_barrier_callback(struct rcu_head *notused)
+{
+	if (atomic_dec_and_test(&rcu_barrier_cpu_count))
+		complete(&rcu_barrier_completion);
+}
+
+/*
+ * Called with preemption disabled, and from cross-cpu IRQ context.
+ */
+static void rcu_barrier_func(void *notused)
+{
+	int cpu = smp_processor_id();
+	struct rcu_data *rdp = &per_cpu(rcu_data, cpu);
+	struct rcu_head *head;
+
+	head = &rdp->barrier;
+	atomic_inc(&rcu_barrier_cpu_count);
+	call_rcu(head, rcu_barrier_callback);
+}
+
+/**
+ * rcu_barrier - Wait until all the in-flight RCUs are complete.
+ */
+void rcu_barrier(void)
+{
+	BUG_ON(in_interrupt());
+	/* Take cpucontrol semaphore to protect against CPU hotplug */
+	down(&rcu_barrier_sema);
+	init_completion(&rcu_barrier_completion);
+	atomic_set(&rcu_barrier_cpu_count, 0);
+	on_each_cpu(rcu_barrier_func, NULL, 0, 1);
+	wait_for_completion(&rcu_barrier_completion);
+	up(&rcu_barrier_sema);
+}
+EXPORT_SYMBOL_GPL(rcu_barrier);
+
 /*
  * Invoke the completed RCU callbacks. They are expected to be in
  * a per-cpu list.
@@ -457,6 +497,7 @@ static struct notifier_block __devinitdata rcu_nb = {
  */
 void __init rcu_init(void)
 {
+	sema_init(&rcu_barrier_sema, 1);
 	rcu_cpu_notify(&rcu_nb, CPU_UP_PREPARE,
 			(void *)(long)smp_processor_id());
 	/* Register notifier for non-boot CPUs */
-- 
cgit v1.2.3


From 64123fd42c7a1e4ebf6acd2399c98caddc7e0c26 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Mon, 12 Dec 2005 00:37:09 -0800
Subject: [PATCH] Add getnstimestamp function

There are several functions that might seem appropriate for a timestamp:

get_cycles()
current_kernel_time()
do_gettimeofday()
<read jiffies/jiffies_64>

Each has problems with combinations of SMP-safety, low resolution, and
monotonicity. This patch adds a new function that returns a monotonic SMP-safe
timestamp with nanosecond resolution where available.

Changes:
	Split timestamp into separate patch
	Moved to kernel/time.c
	Renamed to getnstimestamp
	Fixed unintended-pointer-arithmetic bug

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/time.h |  1 +
 kernel/time.c        | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index bfbe92d0767c..797ccd813bb0 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -95,6 +95,7 @@ struct itimerval;
 extern int do_setitimer(int which, struct itimerval *value, struct itimerval *ovalue);
 extern int do_getitimer(int which, struct itimerval *value);
 extern void getnstimeofday (struct timespec *tv);
+extern void getnstimestamp(struct timespec *ts);
 
 extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 
diff --git a/kernel/time.c b/kernel/time.c
index 245d595a13cb..b94bfa8c03e0 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -561,6 +561,28 @@ void getnstimeofday(struct timespec *tv)
 EXPORT_SYMBOL_GPL(getnstimeofday);
 #endif
 
+void getnstimestamp(struct timespec *ts)
+{
+	unsigned int seq;
+	struct timespec wall2mono;
+
+	/* synchronize with settimeofday() changes */
+	do {
+		seq = read_seqbegin(&xtime_lock);
+		getnstimeofday(ts);
+		wall2mono = wall_to_monotonic;
+	} while(unlikely(read_seqretry(&xtime_lock, seq)));
+
+	/* adjust to monotonicaly-increasing values */
+	ts->tv_sec += wall2mono.tv_sec;
+	ts->tv_nsec += wall2mono.tv_nsec;
+	while (unlikely(ts->tv_nsec >= NSEC_PER_SEC)) {
+		ts->tv_nsec -= NSEC_PER_SEC;
+		ts->tv_sec++;
+	}
+}
+EXPORT_SYMBOL_GPL(getnstimestamp);
+
 #if (BITS_PER_LONG < 64)
 u64 get_jiffies_64(void)
 {
-- 
cgit v1.2.3


From 5650b736ad328f7f3e4120e8790940289b8ac144 Mon Sep 17 00:00:00 2001
From: Matt Helsley <matthltc@us.ibm.com>
Date: Mon, 12 Dec 2005 00:37:10 -0800
Subject: [PATCH] Add timestamp field to process events

This adds a timestamp field to the events sent via the process event
connector.  The timestamp allows listeners to accurately account the
duration(s) between a process' events and offers strong means with which
to determine the order of events with respect to a given task while also
avoiding the addition of per-task data.

This alters the size and layout of the event structure and hence would
break compatibility if process events connector as it stands in 2.6.15-rc2
were released as a mainline kernel.

Signed-off-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/connector/cn_proc.c | 5 +++++
 include/linux/cn_proc.h     | 2 ++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c
index fcdf0fff13a6..969d2b4aaec0 100644
--- a/drivers/connector/cn_proc.c
+++ b/drivers/connector/cn_proc.c
@@ -56,6 +56,7 @@ void proc_fork_connector(struct task_struct *task)
 	msg = (struct cn_msg*)buffer;
 	ev = (struct proc_event*)msg->data;
 	get_seq(&msg->seq, &ev->cpu);
+	getnstimestamp(&ev->timestamp);
 	ev->what = PROC_EVENT_FORK;
 	ev->event_data.fork.parent_pid = task->real_parent->pid;
 	ev->event_data.fork.parent_tgid = task->real_parent->tgid;
@@ -81,6 +82,7 @@ void proc_exec_connector(struct task_struct *task)
 	msg = (struct cn_msg*)buffer;
 	ev = (struct proc_event*)msg->data;
 	get_seq(&msg->seq, &ev->cpu);
+	getnstimestamp(&ev->timestamp);
 	ev->what = PROC_EVENT_EXEC;
 	ev->event_data.exec.process_pid = task->pid;
 	ev->event_data.exec.process_tgid = task->tgid;
@@ -114,6 +116,7 @@ void proc_id_connector(struct task_struct *task, int which_id)
 	} else
 	     	return;
 	get_seq(&msg->seq, &ev->cpu);
+	getnstimestamp(&ev->timestamp);
 
 	memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id));
 	msg->ack = 0; /* not used */
@@ -133,6 +136,7 @@ void proc_exit_connector(struct task_struct *task)
 	msg = (struct cn_msg*)buffer;
 	ev = (struct proc_event*)msg->data;
 	get_seq(&msg->seq, &ev->cpu);
+	getnstimestamp(&ev->timestamp);
 	ev->what = PROC_EVENT_EXIT;
 	ev->event_data.exit.process_pid = task->pid;
 	ev->event_data.exit.process_tgid = task->tgid;
@@ -165,6 +169,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack)
 	msg = (struct cn_msg*)buffer;
 	ev = (struct proc_event*)msg->data;
 	msg->seq = rcvd_seq;
+	getnstimestamp(&ev->timestamp);
 	ev->cpu = -1;
 	ev->what = PROC_EVENT_NONE;
 	ev->event_data.ack.err = err;
diff --git a/include/linux/cn_proc.h b/include/linux/cn_proc.h
index c948f678e04e..1417de935057 100644
--- a/include/linux/cn_proc.h
+++ b/include/linux/cn_proc.h
@@ -26,6 +26,7 @@
 #define CN_PROC_H
 
 #include <linux/types.h>
+#include <linux/time.h>
 #include <linux/connector.h>
 
 /*
@@ -65,6 +66,7 @@ struct proc_event {
 		PROC_EVENT_EXIT = 0x80000000
 	} what;
 	__u32 cpu;
+	struct timespec timestamp;
 	union { /* must be last field of proc_event struct */
 		struct {
 			__u32 err;
-- 
cgit v1.2.3


From b88cb42428f14fabdaf947150c00d65891820635 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Mon, 12 Dec 2005 00:37:11 -0800
Subject: [PATCH] add hlist_replace_rcu()

Add list_replace_rcu: replace old entry by new one.

Signed-off-by: Paul E. McKenney <paulmck@us.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/list.h | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index fbfca73355a3..8e3388284530 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -202,12 +202,15 @@ static inline void list_del_rcu(struct list_head *entry)
  *
  * The old entry will be replaced with the new entry atomically.
  */
-static inline void list_replace_rcu(struct list_head *old, struct list_head *new){
+static inline void list_replace_rcu(struct list_head *old,
+				struct list_head *new)
+{
 	new->next = old->next;
 	new->prev = old->prev;
 	smp_wmb();
 	new->next->prev = new;
 	new->prev->next = new;
+	old->prev = LIST_POISON2;
 }
 
 /**
@@ -578,6 +581,27 @@ static inline void hlist_del_init(struct hlist_node *n)
 	}
 }
 
+/*
+ * hlist_replace_rcu - replace old entry by new one
+ * @old : the element to be replaced
+ * @new : the new element to insert
+ *
+ * The old entry will be replaced with the new entry atomically.
+ */
+static inline void hlist_replace_rcu(struct hlist_node *old,
+					struct hlist_node *new)
+{
+	struct hlist_node *next = old->next;
+
+	new->next = next;
+	new->pprev = old->pprev;
+	smp_wmb();
+	if (next)
+		new->next->pprev = &new->next;
+	*new->pprev = new;
+	old->pprev = LIST_POISON2;
+}
+
 static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
 {
 	struct hlist_node *first = h->first;
-- 
cgit v1.2.3


From 8140a5005bc6f1c9d0fa103460d50d472e6e3426 Mon Sep 17 00:00:00 2001
From: John McCutchan <ttb@tentacle.dhs.org>
Date: Mon, 12 Dec 2005 00:37:14 -0800
Subject: [PATCH] inotify: add two inotify_add_watch flags

The below patch lets userspace have more control over the inodes that
inotify will watch.  It introduces two new flags.

        IN_ONLYDIR -- only watch the inode if it is a directory.
        This is needed to avoid the race that can occur when we want to be
        sure that we are watching a directory.

        IN_DONT_FOLLOW -- don't follow a symlink.  In combination
        with IN_ONLYDIR we can make sure that we don't watch the target of
        symlinks.

The issues the flags fix came up when writing the gnome-vfs inotify
backend.  Default behaviour is unchanged.

Signed-off-by: John McCutchan <ttb@tentacle.dhs.org>
Acked-by: Robert Love <rml@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/inotify.c            | 13 ++++++++++---
 include/linux/inotify.h |  2 ++
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inotify.c b/fs/inotify.c
index bf7ce1d2412b..2fecb7af4a77 100644
--- a/fs/inotify.c
+++ b/fs/inotify.c
@@ -364,11 +364,12 @@ static int inotify_dev_get_wd(struct inotify_device *dev,
 /*
  * find_inode - resolve a user-given path to a specific inode and return a nd
  */
-static int find_inode(const char __user *dirname, struct nameidata *nd)
+static int find_inode(const char __user *dirname, struct nameidata *nd,
+		      unsigned flags)
 {
 	int error;
 
-	error = __user_walk(dirname, LOOKUP_FOLLOW, nd);
+	error = __user_walk(dirname, flags, nd);
 	if (error)
 		return error;
 	/* you can only watch an inode if you have read permissions on it */
@@ -933,6 +934,7 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 	struct file *filp;
 	int ret, fput_needed;
 	int mask_add = 0;
+	unsigned flags = 0;
 
 	filp = fget_light(fd, &fput_needed);
 	if (unlikely(!filp))
@@ -944,7 +946,12 @@ asmlinkage long sys_inotify_add_watch(int fd, const char __user *path, u32 mask)
 		goto fput_and_out;
 	}
 
-	ret = find_inode(path, &nd);
+	if (!(mask & IN_DONT_FOLLOW))
+		flags |= LOOKUP_FOLLOW;
+	if (mask & IN_ONLYDIR)
+		flags |= LOOKUP_DIRECTORY;
+
+	ret = find_inode(path, &nd, flags);
 	if (unlikely(ret))
 		goto fput_and_out;
 
diff --git a/include/linux/inotify.h b/include/linux/inotify.h
index ee5b239092ed..267c88b5f742 100644
--- a/include/linux/inotify.h
+++ b/include/linux/inotify.h
@@ -47,6 +47,8 @@ struct inotify_event {
 #define IN_MOVE			(IN_MOVED_FROM | IN_MOVED_TO) /* moves */
 
 /* special flags */
+#define IN_ONLYDIR		0x01000000	/* only watch the path if it is a directory */
+#define IN_DONT_FOLLOW		0x02000000	/* don't follow a sym link */
 #define IN_MASK_ADD		0x20000000	/* add to the mask of an already existing watch */
 #define IN_ISDIR		0x40000000	/* event occurred against dir */
 #define IN_ONESHOT		0x80000000	/* only send event once */
-- 
cgit v1.2.3


From afd1a0c9ac281eed3b22b293ccd92af7b0d60889 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Mon, 12 Dec 2005 00:37:27 -0800
Subject: [PATCH] V4L/DVB: (3086c) Whitespaces cleanups part 3

Clean up whitespaces at v4l/dvb files

Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/common/saa7146_core.c         |   6 +-
 drivers/media/common/saa7146_fops.c         |  32 +--
 drivers/media/common/saa7146_i2c.c          |  16 +-
 drivers/media/common/saa7146_vbi.c          |   4 +-
 drivers/media/common/saa7146_video.c        |  46 ++--
 drivers/media/dvb/dvb-core/demux.h          |  92 ++++----
 drivers/media/dvb/dvb-core/dmxdev.c         |   2 +-
 drivers/media/dvb/dvb-core/dmxdev.h         |  64 +++---
 drivers/media/dvb/dvb-core/dvb_ca_en50221.c |   2 +-
 drivers/media/dvb/dvb-core/dvb_filter.c     | 332 ++++++++++++++--------------
 drivers/media/dvb/dvb-core/dvb_filter.h     | 102 ++++-----
 drivers/media/dvb/dvb-core/dvb_frontend.c   |   8 +-
 drivers/media/dvb/dvb-core/dvb_frontend.h   |   8 +-
 drivers/media/dvb/dvb-core/dvb_net.c        |  42 ++--
 drivers/media/dvb/dvb-core/dvb_ringbuffer.c | 262 +++++++++++-----------
 drivers/media/dvb/dvb-core/dvb_ringbuffer.h |  20 +-
 drivers/media/dvb/dvb-core/dvbdev.c         | 142 ++++++------
 drivers/media/dvb/dvb-core/dvbdev.h         |   6 +-
 drivers/media/video/bt832.c                 |  14 +-
 drivers/media/video/bttv-driver.c           |  78 +++----
 drivers/media/video/ir-kbd-i2c.c            |   4 +-
 drivers/media/video/msp3400.c               |  12 +-
 drivers/media/video/saa7134/saa6752hs.c     |  16 +-
 drivers/media/video/saa7134/saa7134-alsa.c  |   8 +-
 drivers/media/video/saa7134/saa7134-oss.c   |  86 +++----
 drivers/media/video/tda9887.c               |  14 +-
 drivers/media/video/tvaudio.c               |  14 +-
 drivers/media/video/tveeprom.c              |   2 +-
 include/linux/dvb/audio.h                   |  28 +--
 include/linux/dvb/ca.h                      |  36 +--
 include/linux/dvb/dmx.h                     |  20 +-
 include/linux/dvb/osd.h                     |  58 ++---
 include/linux/dvb/video.h                   |  44 ++--
 include/media/saa7146.h                     |   4 +-
 include/media/saa7146_vv.h                  |  10 +-
 35 files changed, 817 insertions(+), 817 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/common/saa7146_core.c b/drivers/media/common/saa7146_core.c
index 206cc2f61f26..2899d34e5f7e 100644
--- a/drivers/media/common/saa7146_core.c
+++ b/drivers/media/common/saa7146_core.c
@@ -174,8 +174,8 @@ void saa7146_pgtable_free(struct pci_dev *pci, struct saa7146_pgtable *pt)
 
 int saa7146_pgtable_alloc(struct pci_dev *pci, struct saa7146_pgtable *pt)
 {
-        u32          *cpu;
-        dma_addr_t   dma_addr;
+	u32          *cpu;
+	dma_addr_t   dma_addr;
 
 	cpu = pci_alloc_consistent(pci, PAGE_SIZE, &dma_addr);
 	if (NULL == cpu) {
@@ -405,7 +405,7 @@ static int saa7146_init_one(struct pci_dev *pci, const struct pci_device_id *ent
 
 	pci_set_drvdata(pci, dev);
 
-        init_MUTEX(&dev->lock);
+	init_MUTEX(&dev->lock);
 	spin_lock_init(&dev->int_slock);
 	spin_lock_init(&dev->slock);
 
diff --git a/drivers/media/common/saa7146_fops.c b/drivers/media/common/saa7146_fops.c
index 37888989ea2e..09ec964dec5c 100644
--- a/drivers/media/common/saa7146_fops.c
+++ b/drivers/media/common/saa7146_fops.c
@@ -1,6 +1,6 @@
 #include <media/saa7146_vv.h>
 
-#define BOARD_CAN_DO_VBI(dev)   (dev->revision != 0 && dev->vv_data->vbi_minor != -1) 
+#define BOARD_CAN_DO_VBI(dev)   (dev->revision != 0 && dev->vv_data->vbi_minor != -1)
 
 /****************************************************************************/
 /* resource management functions, shamelessly stolen from saa7134 driver */
@@ -102,9 +102,9 @@ void saa7146_buffer_finish(struct saa7146_dev *dev,
 	/* finish current buffer */
 	if (NULL == q->curr) {
 		DEB_D(("aiii. no current buffer\n"));
-		return;	
+		return;
 	}
-			
+
 	q->curr->vb.state = state;
 	do_gettimeofday(&q->curr->vb.ts);
 	wake_up(&q->curr->vb.done);
@@ -143,13 +143,13 @@ void saa7146_buffer_next(struct saa7146_dev *dev,
 			// fixme: fix this for vflip != 0
 
 			saa7146_write(dev, PROT_ADDR1, 0);
-			saa7146_write(dev, MC2, (MASK_02|MASK_18));		
+			saa7146_write(dev, MC2, (MASK_02|MASK_18));
 
 			/* write the address of the rps-program */
 			saa7146_write(dev, RPS_ADDR0, dev->d_rps0.dma_handle);
 			/* turn on rps */
 			saa7146_write(dev, MC1, (MASK_12 | MASK_28));
-				
+
 /*
 			printk("vdma%d.base_even:     0x%08x\n", 1,saa7146_read(dev,BASE_EVEN1));
 			printk("vdma%d.base_odd:      0x%08x\n", 1,saa7146_read(dev,BASE_ODD1));
@@ -246,7 +246,7 @@ static int fops_open(struct inode *inode, struct file *file)
 		goto out;
 	}
 	memset(fh,0,sizeof(*fh));
-	
+
 	file->private_data = fh;
 	fh->dev = dev;
 	fh->type = type;
@@ -275,7 +275,7 @@ out:
 		file->private_data = NULL;
 	}
 	up(&saa7146_devices_lock);
-        return result;
+	return result;
 }
 
 static int fops_release(struct inode *inode, struct file *file)
@@ -405,7 +405,7 @@ static struct file_operations video_fops =
 static void vv_callback(struct saa7146_dev *dev, unsigned long status)
 {
 	u32 isr = status;
-	
+
 	DEB_INT(("dev:%p, isr:0x%08x\n",dev,(u32)status));
 
 	if (0 != (isr & (MASK_27))) {
@@ -454,11 +454,11 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
 	   handle different devices that might need different
 	   configuration data) */
 	dev->ext_vv_data = ext_vv;
-	
+
 	vv->video_minor = -1;
 	vv->vbi_minor = -1;
 
-	vv->d_clipping.cpu_addr = pci_alloc_consistent(dev->pci, SAA7146_CLIPPING_MEM, &vv->d_clipping.dma_handle);	
+	vv->d_clipping.cpu_addr = pci_alloc_consistent(dev->pci, SAA7146_CLIPPING_MEM, &vv->d_clipping.dma_handle);
 	if( NULL == vv->d_clipping.cpu_addr ) {
 		ERR(("out of memory. aborting.\n"));
 		kfree(vv);
@@ -468,7 +468,7 @@ int saa7146_vv_init(struct saa7146_dev* dev, struct saa7146_ext_vv *ext_vv)
 
 	saa7146_video_uops.init(dev,vv);
 	saa7146_vbi_uops.init(dev,vv);
-	
+
 	dev->vv_data = vv;
 	dev->vv_callback = &vv_callback;
 
@@ -480,12 +480,12 @@ int saa7146_vv_release(struct saa7146_dev* dev)
 	struct saa7146_vv *vv = dev->vv_data;
 
 	DEB_EE(("dev:%p\n",dev));
- 
+
 	pci_free_consistent(dev->pci, SAA7146_RPS_MEM, vv->d_clipping.cpu_addr, vv->d_clipping.dma_handle);
- 	kfree(vv);
+	kfree(vv);
 	dev->vv_data = NULL;
 	dev->vv_callback = NULL;
-	
+
 	return 0;
 }
 
@@ -498,7 +498,7 @@ int saa7146_register_device(struct video_device **vid, struct saa7146_dev* dev,
 	DEB_EE(("dev:%p, name:'%s', type:%d\n",dev,name,type));
 
 	// released by vfd->release
- 	vfd = video_device_alloc();
+	vfd = video_device_alloc();
 	if (vfd == NULL)
 		return -ENOMEM;
 
@@ -530,7 +530,7 @@ int saa7146_register_device(struct video_device **vid, struct saa7146_dev* dev,
 int saa7146_unregister_device(struct video_device **vid, struct saa7146_dev* dev)
 {
 	struct saa7146_vv *vv = dev->vv_data;
-	
+
 	DEB_EE(("dev:%p\n",dev));
 
 	if( VFL_TYPE_GRABBER == (*vid)->type ) {
diff --git a/drivers/media/common/saa7146_i2c.c b/drivers/media/common/saa7146_i2c.c
index fec6beab8c28..8aabdd8fb3c5 100644
--- a/drivers/media/common/saa7146_i2c.c
+++ b/drivers/media/common/saa7146_i2c.c
@@ -276,8 +276,8 @@ int saa7146_i2c_transfer(struct saa7146_dev *dev, const struct i2c_msg *msgs, in
 	int i = 0, count = 0;
 	u32* buffer = dev->d_i2c.cpu_addr;
 	int err = 0;
-        int address_err = 0;
-        int short_delay = 0;
+	int address_err = 0;
+	int short_delay = 0;
 
 	if (down_interruptible (&dev->i2c_lock))
 		return -ERESTARTSYS;
@@ -325,7 +325,7 @@ int saa7146_i2c_transfer(struct saa7146_dev *dev, const struct i2c_msg *msgs, in
 					if( 0 != (SAA7146_USE_I2C_IRQ & dev->ext->flags)) {
 						goto out;
 					}
-				        address_err++;
+					address_err++;
 				}
 				DEB_I2C(("error while sending message(s). starting again.\n"));
 				break;
@@ -336,14 +336,14 @@ int saa7146_i2c_transfer(struct saa7146_dev *dev, const struct i2c_msg *msgs, in
 			break;
 		}
 
-	        /* delay a bit before retrying */
-	        msleep(10);
+		/* delay a bit before retrying */
+		msleep(10);
 
 	} while (err != num && retries--);
 
-        /* if every retry had an address error, exit right away */
-        if (address_err == retries) {
-	        goto out;
+	/* if every retry had an address error, exit right away */
+	if (address_err == retries) {
+		goto out;
 	}
 
 	/* if any things had to be read, get the results */
diff --git a/drivers/media/common/saa7146_vbi.c b/drivers/media/common/saa7146_vbi.c
index cb86a97fda1f..063986ec16b5 100644
--- a/drivers/media/common/saa7146_vbi.c
+++ b/drivers/media/common/saa7146_vbi.c
@@ -6,8 +6,8 @@ static int vbi_workaround(struct saa7146_dev *dev)
 {
 	struct saa7146_vv *vv = dev->vv_data;
 
-        u32          *cpu;
-        dma_addr_t   dma_addr;
+	u32          *cpu;
+	dma_addr_t   dma_addr;
 
 	int count = 0;
 	int i;
diff --git a/drivers/media/common/saa7146_video.c b/drivers/media/common/saa7146_video.c
index 8dd4d15ca36d..1d961023b837 100644
--- a/drivers/media/common/saa7146_video.c
+++ b/drivers/media/common/saa7146_video.c
@@ -150,23 +150,23 @@ static int try_win(struct saa7146_dev *dev, struct v4l2_window *win)
 	maxh  = vv->standard->v_max_out;
 
 	if (V4L2_FIELD_ANY == field) {
-                field = (win->w.height > maxh/2)
-                        ? V4L2_FIELD_INTERLACED
-                        : V4L2_FIELD_TOP;
-	        }
-        switch (field) {
-        case V4L2_FIELD_TOP:
-        case V4L2_FIELD_BOTTOM:
-        case V4L2_FIELD_ALTERNATE:
-                maxh = maxh / 2;
-                break;
-        case V4L2_FIELD_INTERLACED:
-                break;
-        default: {
+		field = (win->w.height > maxh/2)
+		        ? V4L2_FIELD_INTERLACED
+		        : V4L2_FIELD_TOP;
+		}
+	switch (field) {
+	case V4L2_FIELD_TOP:
+	case V4L2_FIELD_BOTTOM:
+	case V4L2_FIELD_ALTERNATE:
+		maxh = maxh / 2;
+		break;
+	case V4L2_FIELD_INTERLACED:
+		break;
+	default: {
 		DEB_D(("no known field mode '%d'.\n",field));
-                return -EINVAL;
+		return -EINVAL;
+	}
 	}
-        }
 
 	win->field = field;
 	if (win->w.width > maxw)
@@ -887,7 +887,7 @@ int saa7146_video_do_ioctl(struct inode *inode, struct file *file, unsigned int
 
 		DEB_EE(("VIDIOC_QUERYCAP\n"));
 
-                strcpy(cap->driver, "saa7146 v4l2");
+		strcpy(cap->driver, "saa7146 v4l2");
 		strlcpy(cap->card, dev->ext->name, sizeof(cap->card));
 		sprintf(cap->bus_info,"PCI:%s", pci_name(dev->pci));
 		cap->version = SAA7146_VERSION_CODE;
@@ -1011,19 +1011,19 @@ int saa7146_video_do_ioctl(struct inode *inode, struct file *file, unsigned int
 		err = set_control(fh,arg);
 		return err;
 	}
-        case VIDIOC_G_PARM:
-        {
-                struct v4l2_streamparm *parm = arg;
+	case VIDIOC_G_PARM:
+	{
+		struct v4l2_streamparm *parm = arg;
 		if( parm->type != V4L2_BUF_TYPE_VIDEO_CAPTURE ) {
 			return -EINVAL;
 		}
-                memset(&parm->parm.capture,0,sizeof(struct v4l2_captureparm));
+		memset(&parm->parm.capture,0,sizeof(struct v4l2_captureparm));
 		parm->parm.capture.readbuffers = 1;
 		// fixme: only for PAL!
 		parm->parm.capture.timeperframe.numerator = 1;
 		parm->parm.capture.timeperframe.denominator = 25;
-                return 0;
-        }
+		return 0;
+	}
 	case VIDIOC_G_FMT:
 	{
 		struct v4l2_format *f = arg;
@@ -1383,7 +1383,7 @@ static struct videobuf_queue_ops video_qops = {
 
 static void video_init(struct saa7146_dev *dev, struct saa7146_vv *vv)
 {
-        INIT_LIST_HEAD(&vv->video_q.queue);
+	INIT_LIST_HEAD(&vv->video_q.queue);
 
 	init_timer(&vv->video_q.timeout);
 	vv->video_q.timeout.function = saa7146_buffer_timeout;
diff --git a/drivers/media/dvb/dvb-core/demux.h b/drivers/media/dvb/dvb-core/demux.h
index 7d7b0067f228..9f025825b2d2 100644
--- a/drivers/media/dvb/dvb-core/demux.h
+++ b/drivers/media/dvb/dvb-core/demux.h
@@ -86,25 +86,25 @@ enum dmx_success {
 
 enum dmx_ts_pes
 {  /* also send packets to decoder (if it exists) */
-        DMX_TS_PES_AUDIO0,
+	DMX_TS_PES_AUDIO0,
 	DMX_TS_PES_VIDEO0,
 	DMX_TS_PES_TELETEXT0,
 	DMX_TS_PES_SUBTITLE0,
 	DMX_TS_PES_PCR0,
 
-        DMX_TS_PES_AUDIO1,
+	DMX_TS_PES_AUDIO1,
 	DMX_TS_PES_VIDEO1,
 	DMX_TS_PES_TELETEXT1,
 	DMX_TS_PES_SUBTITLE1,
 	DMX_TS_PES_PCR1,
 
-        DMX_TS_PES_AUDIO2,
+	DMX_TS_PES_AUDIO2,
 	DMX_TS_PES_VIDEO2,
 	DMX_TS_PES_TELETEXT2,
 	DMX_TS_PES_SUBTITLE2,
 	DMX_TS_PES_PCR2,
 
-        DMX_TS_PES_AUDIO3,
+	DMX_TS_PES_AUDIO3,
 	DMX_TS_PES_VIDEO3,
 	DMX_TS_PES_TELETEXT3,
 	DMX_TS_PES_SUBTITLE3,
@@ -121,17 +121,17 @@ enum dmx_ts_pes
 
 
 struct dmx_ts_feed {
-        int is_filtering; /* Set to non-zero when filtering in progress */
-        struct dmx_demux *parent; /* Back-pointer */
-        void *priv; /* Pointer to private data of the API client */
-        int (*set) (struct dmx_ts_feed *feed,
+	int is_filtering; /* Set to non-zero when filtering in progress */
+	struct dmx_demux *parent; /* Back-pointer */
+	void *priv; /* Pointer to private data of the API client */
+	int (*set) (struct dmx_ts_feed *feed,
 		    u16 pid,
 		    int type,
 		    enum dmx_ts_pes pes_type,
 		    size_t circular_buffer_size,
 		    struct timespec timeout);
-        int (*start_filtering) (struct dmx_ts_feed* feed);
-        int (*stop_filtering) (struct dmx_ts_feed* feed);
+	int (*start_filtering) (struct dmx_ts_feed* feed);
+	int (*stop_filtering) (struct dmx_ts_feed* feed);
 };
 
 /*--------------------------------------------------------------------------*/
@@ -139,35 +139,35 @@ struct dmx_ts_feed {
 /*--------------------------------------------------------------------------*/
 
 struct dmx_section_filter {
-        u8 filter_value [DMX_MAX_FILTER_SIZE];
-        u8 filter_mask [DMX_MAX_FILTER_SIZE];
-        u8 filter_mode [DMX_MAX_FILTER_SIZE];
-        struct dmx_section_feed* parent; /* Back-pointer */
-        void* priv; /* Pointer to private data of the API client */
+	u8 filter_value [DMX_MAX_FILTER_SIZE];
+	u8 filter_mask [DMX_MAX_FILTER_SIZE];
+	u8 filter_mode [DMX_MAX_FILTER_SIZE];
+	struct dmx_section_feed* parent; /* Back-pointer */
+	void* priv; /* Pointer to private data of the API client */
 };
 
 struct dmx_section_feed {
-        int is_filtering; /* Set to non-zero when filtering in progress */
-        struct dmx_demux* parent; /* Back-pointer */
-        void* priv; /* Pointer to private data of the API client */
+	int is_filtering; /* Set to non-zero when filtering in progress */
+	struct dmx_demux* parent; /* Back-pointer */
+	void* priv; /* Pointer to private data of the API client */
 
-        int check_crc;
+	int check_crc;
 	u32 crc_val;
 
-        u8 *secbuf;
-        u8 secbuf_base[DMX_MAX_SECFEED_SIZE];
-        u16 secbufp, seclen, tsfeedp;
+	u8 *secbuf;
+	u8 secbuf_base[DMX_MAX_SECFEED_SIZE];
+	u16 secbufp, seclen, tsfeedp;
 
-        int (*set) (struct dmx_section_feed* feed,
+	int (*set) (struct dmx_section_feed* feed,
 		    u16 pid,
 		    size_t circular_buffer_size,
 		    int check_crc);
-        int (*allocate_filter) (struct dmx_section_feed* feed,
+	int (*allocate_filter) (struct dmx_section_feed* feed,
 				struct dmx_section_filter** filter);
-        int (*release_filter) (struct dmx_section_feed* feed,
+	int (*release_filter) (struct dmx_section_feed* feed,
 			       struct dmx_section_filter* filter);
-        int (*start_filtering) (struct dmx_section_feed* feed);
-        int (*stop_filtering) (struct dmx_section_feed* feed);
+	int (*start_filtering) (struct dmx_section_feed* feed);
+	int (*stop_filtering) (struct dmx_section_feed* feed);
 };
 
 /*--------------------------------------------------------------------------*/
@@ -205,10 +205,10 @@ enum dmx_frontend_source {
 };
 
 struct dmx_frontend {
-        struct list_head connectivity_list; /* List of front-ends that can
+	struct list_head connectivity_list; /* List of front-ends that can
 					       be connected to a particular
 					       demux */
-        enum dmx_frontend_source source;
+	enum dmx_frontend_source source;
 };
 
 /*--------------------------------------------------------------------------*/
@@ -240,38 +240,38 @@ struct dmx_frontend {
 #define DMX_FE_ENTRY(list) list_entry(list, struct dmx_frontend, connectivity_list)
 
 struct dmx_demux {
-        u32 capabilities;            /* Bitfield of capability flags */
-        struct dmx_frontend* frontend;    /* Front-end connected to the demux */
-        void* priv;                  /* Pointer to private data of the API client */
-        int (*open) (struct dmx_demux* demux);
-        int (*close) (struct dmx_demux* demux);
-        int (*write) (struct dmx_demux* demux, const char* buf, size_t count);
-        int (*allocate_ts_feed) (struct dmx_demux* demux,
+	u32 capabilities;            /* Bitfield of capability flags */
+	struct dmx_frontend* frontend;    /* Front-end connected to the demux */
+	void* priv;                  /* Pointer to private data of the API client */
+	int (*open) (struct dmx_demux* demux);
+	int (*close) (struct dmx_demux* demux);
+	int (*write) (struct dmx_demux* demux, const char* buf, size_t count);
+	int (*allocate_ts_feed) (struct dmx_demux* demux,
 				 struct dmx_ts_feed** feed,
 				 dmx_ts_cb callback);
-        int (*release_ts_feed) (struct dmx_demux* demux,
+	int (*release_ts_feed) (struct dmx_demux* demux,
 				struct dmx_ts_feed* feed);
-        int (*allocate_section_feed) (struct dmx_demux* demux,
+	int (*allocate_section_feed) (struct dmx_demux* demux,
 				      struct dmx_section_feed** feed,
 				      dmx_section_cb callback);
-        int (*release_section_feed) (struct dmx_demux* demux,
+	int (*release_section_feed) (struct dmx_demux* demux,
 				     struct dmx_section_feed* feed);
-        int (*add_frontend) (struct dmx_demux* demux,
+	int (*add_frontend) (struct dmx_demux* demux,
 			     struct dmx_frontend* frontend);
-        int (*remove_frontend) (struct dmx_demux* demux,
+	int (*remove_frontend) (struct dmx_demux* demux,
 				struct dmx_frontend* frontend);
-        struct list_head* (*get_frontends) (struct dmx_demux* demux);
-        int (*connect_frontend) (struct dmx_demux* demux,
+	struct list_head* (*get_frontends) (struct dmx_demux* demux);
+	int (*connect_frontend) (struct dmx_demux* demux,
 				 struct dmx_frontend* frontend);
-        int (*disconnect_frontend) (struct dmx_demux* demux);
+	int (*disconnect_frontend) (struct dmx_demux* demux);
 
-        int (*get_pes_pids) (struct dmx_demux* demux, u16 *pids);
+	int (*get_pes_pids) (struct dmx_demux* demux, u16 *pids);
 
 	int (*get_caps) (struct dmx_demux* demux, struct dmx_caps *caps);
 
 	int (*set_source) (struct dmx_demux* demux, const dmx_source_t *src);
 
-        int (*get_stc) (struct dmx_demux* demux, unsigned int num,
+	int (*get_stc) (struct dmx_demux* demux, unsigned int num,
 			u64 *stc, unsigned int *base);
 };
 
diff --git a/drivers/media/dvb/dvb-core/dmxdev.c b/drivers/media/dvb/dvb-core/dmxdev.c
index 8028c3a5e287..7b8373ad121b 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.c
+++ b/drivers/media/dvb/dvb-core/dmxdev.c
@@ -947,7 +947,7 @@ static int dvb_demux_do_ioctl(struct inode *inode, struct file *file,
 
 	case DMX_GET_STC:
 		if (!dmxdev->demux->get_stc) {
-		        ret=-EINVAL;
+			ret=-EINVAL;
 			break;
 		}
 		ret = dmxdev->demux->get_stc(dmxdev->demux,
diff --git a/drivers/media/dvb/dvb-core/dmxdev.h b/drivers/media/dvb/dvb-core/dmxdev.h
index 395a9cd75012..fd72920c2199 100644
--- a/drivers/media/dvb/dvb-core/dmxdev.h
+++ b/drivers/media/dvb/dvb-core/dmxdev.h
@@ -53,51 +53,51 @@ enum dmxdev_state {
 };
 
 struct dmxdev_buffer {
-        u8 *data;
-        int size;
-        int pread;
-        int pwrite;
+	u8 *data;
+	int size;
+	int pread;
+	int pwrite;
 	wait_queue_head_t queue;
-        int error;
+	int error;
 };
 
 struct dmxdev_filter {
 	struct dvb_device *dvbdev;
 
-        union {
-	        struct dmx_section_filter *sec;
+	union {
+		struct dmx_section_filter *sec;
 	} filter;
 
-        union {
-                struct dmx_ts_feed *ts;
-                struct dmx_section_feed *sec;
+	union {
+		struct dmx_ts_feed *ts;
+		struct dmx_section_feed *sec;
 	} feed;
 
-        union {
-	        struct dmx_sct_filter_params sec;
-	        struct dmx_pes_filter_params pes;
+	union {
+		struct dmx_sct_filter_params sec;
+		struct dmx_pes_filter_params pes;
 	} params;
 
-        int type;
-        enum dmxdev_state state;
-        struct dmxdev *dev;
-        struct dmxdev_buffer buffer;
+	int type;
+	enum dmxdev_state state;
+	struct dmxdev *dev;
+	struct dmxdev_buffer buffer;
 
 	struct semaphore mutex;
 
-        /* only for sections */
-        struct timer_list timer;
-        int todo;
-        u8 secheader[3];
+	/* only for sections */
+	struct timer_list timer;
+	int todo;
+	u8 secheader[3];
 
-        u16 pid;
+	u16 pid;
 };
 
 
 struct dmxdev_dvr {
-        int state;
-        struct dmxdev *dev;
-        struct dmxdev_buffer buffer;
+	int state;
+	struct dmxdev *dev;
+	struct dmxdev_buffer buffer;
 };
 
 
@@ -105,16 +105,16 @@ struct dmxdev {
 	struct dvb_device *dvbdev;
 	struct dvb_device *dvr_dvbdev;
 
-        struct dmxdev_filter *filter;
-        struct dmxdev_dvr *dvr;
-        struct dmx_demux *demux;
+	struct dmxdev_filter *filter;
+	struct dmxdev_dvr *dvr;
+	struct dmx_demux *demux;
 
-        int filternum;
-        int capabilities;
+	int filternum;
+	int capabilities;
 #define DMXDEV_CAP_DUPLEX 1
-        struct dmx_frontend *dvr_orig_fe;
+	struct dmx_frontend *dvr_orig_fe;
 
-        struct dmxdev_buffer dvr_buffer;
+	struct dmxdev_buffer dvr_buffer;
 #define DVR_BUFFER_SIZE (10*188*1024)
 
 	struct semaphore mutex;
diff --git a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
index cb2e7d6ba283..5956c35d34ac 100644
--- a/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
+++ b/drivers/media/dvb/dvb-core/dvb_ca_en50221.c
@@ -498,7 +498,7 @@ static int dvb_ca_en50221_parse_attributes(struct dvb_ca_private *ca, int slot)
 	/* process the CFTABLE_ENTRY tuples, and any after those */
 	while ((!end_chain) && (address < 0x1000)) {
 		if ((status = dvb_ca_en50221_read_tuple(ca, slot, &address, &tupleType,
-						        &tupleLength, tuple)) < 0)
+							&tupleLength, tuple)) < 0)
 			return status;
 		switch (tupleType) {
 		case 0x1B:	// CISTPL_CFTABLE_ENTRY
diff --git a/drivers/media/dvb/dvb-core/dvb_filter.c b/drivers/media/dvb/dvb-core/dvb_filter.c
index bd5143906084..c49fd0bd7181 100644
--- a/drivers/media/dvb/dvb-core/dvb_filter.c
+++ b/drivers/media/dvb/dvb-core/dvb_filter.c
@@ -72,12 +72,12 @@ static int read_picture_header(u8 *headr, struct mpg_picture *pic, int field, in
 	u8 pct;
 
 	if (pr) printk( "Pic header: ");
-        pic->temporal_reference[field] = (( headr[0] << 2 ) |
+	pic->temporal_reference[field] = (( headr[0] << 2 ) |
 					  (headr[1] & 0x03) )& 0x03ff;
 	if (pr) printk( " temp ref: 0x%04x", pic->temporal_reference[field]);
 
 	pct = ( headr[1] >> 2 ) & 0x07;
-        pic->picture_coding_type[field] = pct;
+	pic->picture_coding_type[field] = pct;
 	if (pr) {
 		switch(pct){
 			case I_FRAME:
@@ -93,17 +93,17 @@ static int read_picture_header(u8 *headr, struct mpg_picture *pic, int field, in
 	}
 
 
-        pic->vinfo.vbv_delay  = (( headr[1] >> 5 ) | ( headr[2] << 3) |
+	pic->vinfo.vbv_delay  = (( headr[1] >> 5 ) | ( headr[2] << 3) |
 				 ( (headr[3] & 0x1F) << 11) ) & 0xffff;
 
 	if (pr) printk( " vbv delay: 0x%04x", pic->vinfo.vbv_delay);
 
-        pic->picture_header_parameter = ( headr[3] & 0xe0 ) |
+	pic->picture_header_parameter = ( headr[3] & 0xe0 ) |
 		((headr[4] & 0x80) >> 3);
 
-        if ( pct == B_FRAME ){
-                pic->picture_header_parameter |= ( headr[4] >> 3 ) & 0x0f;
-        }
+	if ( pct == B_FRAME ){
+		pic->picture_header_parameter |= ( headr[4] >> 3 ) & 0x0f;
+	}
 	if (pr) printk( " pic head param: 0x%x",
 			pic->picture_header_parameter);
 
@@ -124,18 +124,18 @@ static int read_gop_header(u8 *headr, struct mpg_picture *pic, int pr)
 		       ((headr[0]<<4)& 0x30)| ((headr[1]>>4)& 0x0F),
 		       ((headr[1]<<3)& 0x38)| ((headr[2]>>5)& 0x0F));
 
-        if ( ( headr[3] & 0x40 ) != 0 ){
-                pic->closed_gop = 1;
-        } else {
-                pic->closed_gop = 0;
-        }
+	if ( ( headr[3] & 0x40 ) != 0 ){
+		pic->closed_gop = 1;
+	} else {
+		pic->closed_gop = 0;
+	}
 	if (pr) printk("closed: %d", pic->closed_gop);
 
-        if ( ( headr[3] & 0x20 ) != 0 ){
-                pic->broken_link = 1;
-        } else {
-                pic->broken_link = 0;
-        }
+	if ( ( headr[3] & 0x20 ) != 0 ){
+		pic->broken_link = 1;
+	} else {
+		pic->broken_link = 0;
+	}
 	if (pr) printk(" broken: %d\n", pic->broken_link);
 
 	return 0;
@@ -146,7 +146,7 @@ static int read_gop_header(u8 *headr, struct mpg_picture *pic, int pr)
 /* needs 8 byte input */
 static int read_sequence_header(u8 *headr, struct dvb_video_info *vi, int pr)
 {
-        int sw;
+	int sw;
 	int form = -1;
 
 	if (pr) printk("Reading sequence header\n");
@@ -154,9 +154,9 @@ static int read_sequence_header(u8 *headr, struct dvb_video_info *vi, int pr)
 	vi->horizontal_size	= ((headr[1] &0xF0) >> 4) | (headr[0] << 4);
 	vi->vertical_size	= ((headr[1] &0x0F) << 8) | (headr[2]);
 
-        sw = (int)((headr[3]&0xF0) >> 4) ;
+	sw = (int)((headr[3]&0xF0) >> 4) ;
 
-        switch( sw ){
+	switch( sw ){
 	case 1:
 		if (pr)
 			printk("Videostream: ASPECT: 1:1");
@@ -165,84 +165,84 @@ static int read_sequence_header(u8 *headr, struct dvb_video_info *vi, int pr)
 	case 2:
 		if (pr)
 			printk("Videostream: ASPECT: 4:3");
-                vi->aspect_ratio = 133;
+		vi->aspect_ratio = 133;
 		break;
 	case 3:
 		if (pr)
 			printk("Videostream: ASPECT: 16:9");
-                vi->aspect_ratio = 177;
+		vi->aspect_ratio = 177;
 		break;
 	case 4:
 		if (pr)
 			printk("Videostream: ASPECT: 2.21:1");
-                vi->aspect_ratio = 221;
+		vi->aspect_ratio = 221;
 		break;
 
-        case 5 ... 15:
+	case 5 ... 15:
 		if (pr)
 			printk("Videostream: ASPECT: reserved");
-                vi->aspect_ratio = 0;
+		vi->aspect_ratio = 0;
 		break;
 
-        default:
-                vi->aspect_ratio = 0;
-                return -1;
+	default:
+		vi->aspect_ratio = 0;
+		return -1;
 	}
 
 	if (pr)
 		printk("  Size = %dx%d",vi->horizontal_size,vi->vertical_size);
 
-        sw = (int)(headr[3]&0x0F);
+	sw = (int)(headr[3]&0x0F);
 
-        switch ( sw ) {
+	switch ( sw ) {
 	case 1:
 		if (pr)
 			printk("  FRate: 23.976 fps");
-                vi->framerate = 23976;
+		vi->framerate = 23976;
 		form = -1;
 		break;
 	case 2:
 		if (pr)
 			printk("  FRate: 24 fps");
-                vi->framerate = 24000;
+		vi->framerate = 24000;
 		form = -1;
 		break;
 	case 3:
 		if (pr)
 			printk("  FRate: 25 fps");
-                vi->framerate = 25000;
+		vi->framerate = 25000;
 		form = VIDEO_MODE_PAL;
 		break;
 	case 4:
 		if (pr)
 			printk("  FRate: 29.97 fps");
-                vi->framerate = 29970;
+		vi->framerate = 29970;
 		form = VIDEO_MODE_NTSC;
 		break;
 	case 5:
 		if (pr)
 			printk("  FRate: 30 fps");
-                vi->framerate = 30000;
+		vi->framerate = 30000;
 		form = VIDEO_MODE_NTSC;
 		break;
 	case 6:
 		if (pr)
 			printk("  FRate: 50 fps");
-                vi->framerate = 50000;
+		vi->framerate = 50000;
 		form = VIDEO_MODE_PAL;
 		break;
 	case 7:
 		if (pr)
 			printk("  FRate: 60 fps");
-                vi->framerate = 60000;
+		vi->framerate = 60000;
 		form = VIDEO_MODE_NTSC;
 		break;
 	}
 
 	vi->bit_rate = (headr[4] << 10) | (headr[5] << 2) | (headr[6] & 0x03);
 
-        vi->vbv_buffer_size
-                = (( headr[6] & 0xF8) >> 3 ) | (( headr[7] & 0x1F )<< 5);
+	vi->vbv_buffer_size
+		= (( headr[6] & 0xF8) >> 3 ) | (( headr[7] & 0x1F )<< 5);
 
 	if (pr){
 		printk("  BRate: %d Mbit/s",4*(vi->bit_rate)/10000);
@@ -250,7 +250,7 @@ static int read_sequence_header(u8 *headr, struct dvb_video_info *vi, int pr)
 		printk("\n");
 	}
 
-        vi->video_format = form;
+	vi->video_format = form;
 
 	return 0;
 }
@@ -308,7 +308,7 @@ static int get_ainfo(u8 *mbuf, int count, struct dvb_audio_info *ai, int pr)
 	if (!found) return -1;
 
 	if (c+3 >= count) return -1;
-        headr = mbuf+c;
+	headr = mbuf+c;
 
 	ai->layer = (headr[1] & 0x06) >> 1;
 
@@ -368,7 +368,7 @@ int dvb_filter_get_ac3info(u8 *mbuf, int count, struct dvb_audio_info *ai, int p
 	if (c+5 >= count) return -1;
 
 	ai->layer = 0;  // 0 for AC3
-        headr = mbuf+c+2;
+	headr = mbuf+c+2;
 
 	frame = (headr[2]&0x3f);
 	ai->bit_rate = ac3_bitrates[frame >> 1]*1000;
@@ -396,159 +396,159 @@ EXPORT_SYMBOL(dvb_filter_get_ac3info);
 #if 0
 static u8 *skip_pes_header(u8 **bufp)
 {
-        u8 *inbuf = *bufp;
-        u8 *buf = inbuf;
-        u8 *pts = NULL;
-        int skip = 0;
+	u8 *inbuf = *bufp;
+	u8 *buf = inbuf;
+	u8 *pts = NULL;
+	int skip = 0;
 
 	static const int mpeg1_skip_table[16] = {
 		1, 0xffff,      5,     10, 0xffff, 0xffff, 0xffff, 0xffff,
-	        0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
+		0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff
 	};
 
 
-        if ((inbuf[6] & 0xc0) == 0x80){ /* mpeg2 */
-                if (buf[7] & PTS_ONLY)
-                        pts = buf+9;
-                else pts = NULL;
-                buf = inbuf + 9 + inbuf[8];
-        } else {        /* mpeg1 */
-                for (buf = inbuf + 6; *buf == 0xff; buf++)
-                        if (buf == inbuf + 6 + 16) {
-                                break;
-                        }
-                if ((*buf & 0xc0) == 0x40)
-                        buf += 2;
-                skip = mpeg1_skip_table [*buf >> 4];
-                if (skip == 5 || skip == 10) pts = buf;
-                else pts = NULL;
-
-                buf += mpeg1_skip_table [*buf >> 4];
-        }
-
-        *bufp = buf;
-        return pts;
+	if ((inbuf[6] & 0xc0) == 0x80){ /* mpeg2 */
+		if (buf[7] & PTS_ONLY)
+		        pts = buf+9;
+		else pts = NULL;
+		buf = inbuf + 9 + inbuf[8];
+	} else {        /* mpeg1 */
+		for (buf = inbuf + 6; *buf == 0xff; buf++)
+		        if (buf == inbuf + 6 + 16) {
+		                break;
+		        }
+		if ((*buf & 0xc0) == 0x40)
+		        buf += 2;
+		skip = mpeg1_skip_table [*buf >> 4];
+		if (skip == 5 || skip == 10) pts = buf;
+		else pts = NULL;
+
+		buf += mpeg1_skip_table [*buf >> 4];
+	}
+
+	*bufp = buf;
+	return pts;
 }
 #endif
 
 #if 0
 static void initialize_quant_matrix( u32 *matrix )
 {
-        int i;
-
-        matrix[0]  = 0x08101013;
-        matrix[1]  = 0x10131616;
-        matrix[2]  = 0x16161616;
-        matrix[3]  = 0x1a181a1b;
-        matrix[4]  = 0x1b1b1a1a;
-        matrix[5]  = 0x1a1a1b1b;
-        matrix[6]  = 0x1b1d1d1d;
-        matrix[7]  = 0x2222221d;
-        matrix[8]  = 0x1d1d1b1b;
-        matrix[9]  = 0x1d1d2020;
-        matrix[10] = 0x22222526;
-        matrix[11] = 0x25232322;
-        matrix[12] = 0x23262628;
-        matrix[13] = 0x28283030;
-        matrix[14] = 0x2e2e3838;
-        matrix[15] = 0x3a454553;
-
-        for ( i = 16 ; i < 32 ; i++ )
-                matrix[i] = 0x10101010;
+	int i;
+
+	matrix[0]  = 0x08101013;
+	matrix[1]  = 0x10131616;
+	matrix[2]  = 0x16161616;
+	matrix[3]  = 0x1a181a1b;
+	matrix[4]  = 0x1b1b1a1a;
+	matrix[5]  = 0x1a1a1b1b;
+	matrix[6]  = 0x1b1d1d1d;
+	matrix[7]  = 0x2222221d;
+	matrix[8]  = 0x1d1d1b1b;
+	matrix[9]  = 0x1d1d2020;
+	matrix[10] = 0x22222526;
+	matrix[11] = 0x25232322;
+	matrix[12] = 0x23262628;
+	matrix[13] = 0x28283030;
+	matrix[14] = 0x2e2e3838;
+	matrix[15] = 0x3a454553;
+
+	for ( i = 16 ; i < 32 ; i++ )
+		matrix[i] = 0x10101010;
 }
 #endif
 
 #if 0
 static void initialize_mpg_picture(struct mpg_picture *pic)
 {
-        int i;
-
-        /* set MPEG1 */
-        pic->mpeg1_flag = 1;
-        pic->profile_and_level = 0x4A ;        /* MP@LL */
-        pic->progressive_sequence = 1;
-        pic->low_delay = 0;
-
-        pic->sequence_display_extension_flag = 0;
-        for ( i = 0 ; i < 4 ; i++ ){
-                pic->frame_centre_horizontal_offset[i] = 0;
-                pic->frame_centre_vertical_offset[i] = 0;
-        }
-        pic->last_frame_centre_horizontal_offset = 0;
-        pic->last_frame_centre_vertical_offset = 0;
-
-        pic->picture_display_extension_flag[0] = 0;
-        pic->picture_display_extension_flag[1] = 0;
-        pic->sequence_header_flag = 0;
+	int i;
+
+	/* set MPEG1 */
+	pic->mpeg1_flag = 1;
+	pic->profile_and_level = 0x4A ;        /* MP@LL */
+	pic->progressive_sequence = 1;
+	pic->low_delay = 0;
+
+	pic->sequence_display_extension_flag = 0;
+	for ( i = 0 ; i < 4 ; i++ ){
+		pic->frame_centre_horizontal_offset[i] = 0;
+		pic->frame_centre_vertical_offset[i] = 0;
+	}
+	pic->last_frame_centre_horizontal_offset = 0;
+	pic->last_frame_centre_vertical_offset = 0;
+
+	pic->picture_display_extension_flag[0] = 0;
+	pic->picture_display_extension_flag[1] = 0;
+	pic->sequence_header_flag = 0;
 	pic->gop_flag = 0;
-        pic->sequence_end_flag = 0;
+	pic->sequence_end_flag = 0;
 }
 #endif
 
 #if 0
 static void mpg_set_picture_parameter( int32_t field_type, struct mpg_picture *pic )
 {
-        int16_t last_h_offset;
-        int16_t last_v_offset;
-
-        int16_t *p_h_offset;
-        int16_t *p_v_offset;
-
-        if ( pic->mpeg1_flag ){
-                pic->picture_structure[field_type] = VIDEO_FRAME_PICTURE;
-                pic->top_field_first = 0;
-                pic->repeat_first_field = 0;
-                pic->progressive_frame = 1;
-                pic->picture_coding_parameter = 0x000010;
-        }
-
-        /* Reset flag */
-        pic->picture_display_extension_flag[field_type] = 0;
-
-        last_h_offset = pic->last_frame_centre_horizontal_offset;
-        last_v_offset = pic->last_frame_centre_vertical_offset;
-        if ( field_type == FIRST_FIELD ){
-                p_h_offset = pic->frame_centre_horizontal_offset;
-                p_v_offset = pic->frame_centre_vertical_offset;
-                *p_h_offset = last_h_offset;
-                *(p_h_offset + 1) = last_h_offset;
-                *(p_h_offset + 2) = last_h_offset;
-                *p_v_offset = last_v_offset;
-                *(p_v_offset + 1) = last_v_offset;
-                *(p_v_offset + 2) = last_v_offset;
-        } else {
-                pic->frame_centre_horizontal_offset[3] = last_h_offset;
-                pic->frame_centre_vertical_offset[3] = last_v_offset;
-        }
+	int16_t last_h_offset;
+	int16_t last_v_offset;
+
+	int16_t *p_h_offset;
+	int16_t *p_v_offset;
+
+	if ( pic->mpeg1_flag ){
+		pic->picture_structure[field_type] = VIDEO_FRAME_PICTURE;
+		pic->top_field_first = 0;
+		pic->repeat_first_field = 0;
+		pic->progressive_frame = 1;
+		pic->picture_coding_parameter = 0x000010;
+	}
+
+	/* Reset flag */
+	pic->picture_display_extension_flag[field_type] = 0;
+
+	last_h_offset = pic->last_frame_centre_horizontal_offset;
+	last_v_offset = pic->last_frame_centre_vertical_offset;
+	if ( field_type == FIRST_FIELD ){
+		p_h_offset = pic->frame_centre_horizontal_offset;
+		p_v_offset = pic->frame_centre_vertical_offset;
+		*p_h_offset = last_h_offset;
+		*(p_h_offset + 1) = last_h_offset;
+		*(p_h_offset + 2) = last_h_offset;
+		*p_v_offset = last_v_offset;
+		*(p_v_offset + 1) = last_v_offset;
+		*(p_v_offset + 2) = last_v_offset;
+	} else {
+		pic->frame_centre_horizontal_offset[3] = last_h_offset;
+		pic->frame_centre_vertical_offset[3] = last_v_offset;
+	}
 }
 #endif
 
 #if 0
 static void init_mpg_picture( struct mpg_picture *pic, int chan, int32_t field_type)
 {
-        pic->picture_header = 0;
-        pic->sequence_header_data
-                = ( INIT_HORIZONTAL_SIZE << 20 )
-                        | ( INIT_VERTICAL_SIZE << 8 )
-                        | ( INIT_ASPECT_RATIO << 4 )
-                        | ( INIT_FRAME_RATE );
-        pic->mpeg1_flag = 0;
-        pic->vinfo.horizontal_size
-                = INIT_DISP_HORIZONTAL_SIZE;
-        pic->vinfo.vertical_size
-                = INIT_DISP_VERTICAL_SIZE;
-        pic->picture_display_extension_flag[field_type]
-                = 0;
-        pic->pts_flag[field_type] = 0;
-
-        pic->sequence_gop_header = 0;
-        pic->picture_header = 0;
-        pic->sequence_header_flag = 0;
-        pic->gop_flag = 0;
-        pic->sequence_end_flag = 0;
-        pic->sequence_display_extension_flag = 0;
-        pic->last_frame_centre_horizontal_offset = 0;
-        pic->last_frame_centre_vertical_offset = 0;
+	pic->picture_header = 0;
+	pic->sequence_header_data
+		= ( INIT_HORIZONTAL_SIZE << 20 )
+		        | ( INIT_VERTICAL_SIZE << 8 )
+		        | ( INIT_ASPECT_RATIO << 4 )
+		        | ( INIT_FRAME_RATE );
+	pic->mpeg1_flag = 0;
+	pic->vinfo.horizontal_size
+		= INIT_DISP_HORIZONTAL_SIZE;
+	pic->vinfo.vertical_size
+		= INIT_DISP_VERTICAL_SIZE;
+	pic->picture_display_extension_flag[field_type]
+		= 0;
+	pic->pts_flag[field_type] = 0;
+
+	pic->sequence_gop_header = 0;
+	pic->picture_header = 0;
+	pic->sequence_header_flag = 0;
+	pic->gop_flag = 0;
+	pic->sequence_end_flag = 0;
+	pic->sequence_display_extension_flag = 0;
+	pic->last_frame_centre_horizontal_offset = 0;
+	pic->last_frame_centre_vertical_offset = 0;
 	pic->channel = chan;
 }
 #endif
@@ -588,11 +588,11 @@ int dvb_filter_pes2ts(struct dvb_filter_pes2ts *p2ts, unsigned char *pes,
 		buf[1]&=~0x40;
 	}
 	if (!len)
-	        return 0;
+		return 0;
 	buf[3]=0x30|((p2ts->cc++)&0x0f);
 	rest=183-len;
 	if (rest) {
-	        buf[5]=0x00;
+		buf[5]=0x00;
 		if (rest-1)
 			memset(buf+6, 0xff, rest-1);
 	}
diff --git a/drivers/media/dvb/dvb-core/dvb_filter.h b/drivers/media/dvb/dvb-core/dvb_filter.h
index b0848f7836b7..375e3be184b1 100644
--- a/drivers/media/dvb/dvb-core/dvb_filter.h
+++ b/drivers/media/dvb/dvb-core/dvb_filter.h
@@ -29,8 +29,8 @@ typedef int (dvb_filter_pes2ts_cb_t) (void *, unsigned char *);
 
 struct dvb_filter_pes2ts {
 	unsigned char buf[188];
-        unsigned char cc;
-        dvb_filter_pes2ts_cb_t *cb;
+	unsigned char cc;
+	dvb_filter_pes2ts_cb_t *cb;
 	void *priv;
 };
 
@@ -162,7 +162,7 @@ struct dvb_video_info {
 	u32 bit_rate;
 	u32 comp_bit_rate;
 	u32 vbv_buffer_size;
-        s16 vbv_delay;
+	s16 vbv_delay;
 	u32 CSPF;
 	u32 off;
 };
@@ -173,60 +173,60 @@ struct dvb_video_info {
 #define VIDEO_FRAME_PICTURE 0x03
 
 struct mpg_picture {
-        int       channel;
+	int       channel;
 	struct dvb_video_info vinfo;
-        u32      *sequence_gop_header;
-        u32      *picture_header;
-        s32       time_code;
-        int       low_delay;
-        int       closed_gop;
-        int       broken_link;
-        int       sequence_header_flag;
-        int       gop_flag;
-        int       sequence_end_flag;
-
-        u8        profile_and_level;
-        s32       picture_coding_parameter;
-        u32       matrix[32];
-        s8        matrix_change_flag;
-
-        u8        picture_header_parameter;
+	u32      *sequence_gop_header;
+	u32      *picture_header;
+	s32       time_code;
+	int       low_delay;
+	int       closed_gop;
+	int       broken_link;
+	int       sequence_header_flag;
+	int       gop_flag;
+	int       sequence_end_flag;
+
+	u8        profile_and_level;
+	s32       picture_coding_parameter;
+	u32       matrix[32];
+	s8        matrix_change_flag;
+
+	u8        picture_header_parameter;
   /* bit 0 - 2: bwd f code
      bit 3    : fpb vector
      bit 4 - 6: fwd f code
      bit 7    : fpf vector */
 
-        int       mpeg1_flag;
-        int       progressive_sequence;
-        int       sequence_display_extension_flag;
-        u32       sequence_header_data;
-        s16       last_frame_centre_horizontal_offset;
-        s16       last_frame_centre_vertical_offset;
-
-        u32       pts[2]; /* [0] 1st field, [1] 2nd field */
-        int       top_field_first;
-        int       repeat_first_field;
-        int       progressive_frame;
-        int       bank;
-        int       forward_bank;
-        int       backward_bank;
-        int       compress;
-        s16       frame_centre_horizontal_offset[OFF_SIZE];
-                  /* [0-2] 1st field, [3] 2nd field */
-        s16       frame_centre_vertical_offset[OFF_SIZE];
-                  /* [0-2] 1st field, [3] 2nd field */
-        s16       temporal_reference[2];
-                  /* [0] 1st field, [1] 2nd field */
-
-        s8        picture_coding_type[2];
-                  /* [0] 1st field, [1] 2nd field */
-        s8        picture_structure[2];
-                  /* [0] 1st field, [1] 2nd field */
-        s8        picture_display_extension_flag[2];
-                  /* [0] 1st field, [1] 2nd field */
-                  /* picture_display_extenion() 0:no 1:exit*/
-        s8        pts_flag[2];
-                  /* [0] 1st field, [1] 2nd field */
+	int       mpeg1_flag;
+	int       progressive_sequence;
+	int       sequence_display_extension_flag;
+	u32       sequence_header_data;
+	s16       last_frame_centre_horizontal_offset;
+	s16       last_frame_centre_vertical_offset;
+
+	u32       pts[2]; /* [0] 1st field, [1] 2nd field */
+	int       top_field_first;
+	int       repeat_first_field;
+	int       progressive_frame;
+	int       bank;
+	int       forward_bank;
+	int       backward_bank;
+	int       compress;
+	s16       frame_centre_horizontal_offset[OFF_SIZE];
+		  /* [0-2] 1st field, [3] 2nd field */
+	s16       frame_centre_vertical_offset[OFF_SIZE];
+		  /* [0-2] 1st field, [3] 2nd field */
+	s16       temporal_reference[2];
+		  /* [0] 1st field, [1] 2nd field */
+
+	s8        picture_coding_type[2];
+		  /* [0] 1st field, [1] 2nd field */
+	s8        picture_structure[2];
+		  /* [0] 1st field, [1] 2nd field */
+	s8        picture_display_extension_flag[2];
+		  /* [0] 1st field, [1] 2nd field */
+		  /* picture_display_extenion() 0:no 1:exit*/
+	s8        pts_flag[2];
+		  /* [0] 1st field, [1] 2nd field */
 };
 
 struct dvb_audio_info {
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.c b/drivers/media/dvb/dvb-core/dvb_frontend.c
index 6ffa6b216363..95ea5095e07e 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.c
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.c
@@ -372,10 +372,10 @@ static int dvb_frontend_thread(void *data)
 
 	snprintf (name, sizeof(name), "kdvb-fe-%i", fe->dvb->num);
 
-        lock_kernel();
-        daemonize(name);
-        sigfillset(&current->blocked);
-        unlock_kernel();
+	lock_kernel();
+	daemonize(name);
+	sigfillset(&current->blocked);
+	unlock_kernel();
 
 	fepriv->status = 0;
 	dvb_frontend_init(fe);
diff --git a/drivers/media/dvb/dvb-core/dvb_frontend.h b/drivers/media/dvb/dvb-core/dvb_frontend.h
index 348c9b0b988a..1e0840d02f1f 100644
--- a/drivers/media/dvb/dvb-core/dvb_frontend.h
+++ b/drivers/media/dvb/dvb-core/dvb_frontend.h
@@ -41,10 +41,10 @@
 #include "dvbdev.h"
 
 struct dvb_frontend_tune_settings {
-        int min_delay_ms;
-        int step_size;
-        int max_drift;
-        struct dvb_frontend_parameters parameters;
+	int min_delay_ms;
+	int step_size;
+	int max_drift;
+	struct dvb_frontend_parameters parameters;
 };
 
 struct dvb_frontend;
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index df536bd2e103..86bba81e851e 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -713,7 +713,7 @@ static int dvb_net_ts_callback(const u8 *buffer1, size_t buffer1_len,
 	if (buffer1_len > 32768)
 		printk(KERN_WARNING "length > 32k: %zu.\n", buffer1_len);
 	/* printk("TS callback: %u bytes, %u TS cells @ %p.\n",
-	          buffer1_len, buffer1_len / TS_SZ, buffer1); */
+		  buffer1_len, buffer1_len / TS_SZ, buffer1); */
 	dvb_net_ule(dev, buffer1, buffer1_len);
 	return 0;
 }
@@ -721,8 +721,8 @@ static int dvb_net_ts_callback(const u8 *buffer1, size_t buffer1_len,
 
 static void dvb_net_sec(struct net_device *dev, u8 *pkt, int pkt_len)
 {
-        u8 *eth;
-        struct sk_buff *skb;
+	u8 *eth;
+	struct sk_buff *skb;
 	struct net_device_stats *stats = &(((struct dvb_net_priv *) dev->priv)->stats);
 	int snap = 0;
 
@@ -754,7 +754,7 @@ static void dvb_net_sec(struct net_device *dev, u8 *pkt, int pkt_len)
 			return;
 		}
 		snap = 8;
-        }
+	}
 	if (pkt[7]) {
 		/* FIXME: assemble datagram from multiple sections */
 		stats->rx_errors++;
@@ -778,14 +778,14 @@ static void dvb_net_sec(struct net_device *dev, u8 *pkt, int pkt_len)
 	memcpy(eth + 14, pkt + 12 + snap, pkt_len - 12 - 4 - snap);
 
 	/* create ethernet header: */
-        eth[0]=pkt[0x0b];
-        eth[1]=pkt[0x0a];
-        eth[2]=pkt[0x09];
-        eth[3]=pkt[0x08];
-        eth[4]=pkt[0x04];
-        eth[5]=pkt[0x03];
+	eth[0]=pkt[0x0b];
+	eth[1]=pkt[0x0a];
+	eth[2]=pkt[0x09];
+	eth[3]=pkt[0x08];
+	eth[4]=pkt[0x04];
+	eth[5]=pkt[0x03];
 
-        eth[6]=eth[7]=eth[8]=eth[9]=eth[10]=eth[11]=0;
+	eth[6]=eth[7]=eth[8]=eth[9]=eth[10]=eth[11]=0;
 
 	if (snap) {
 		eth[12] = pkt[18];
@@ -807,7 +807,7 @@ static void dvb_net_sec(struct net_device *dev, u8 *pkt, int pkt_len)
 
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
-        netif_rx(skb);
+	netif_rx(skb);
 }
 
 static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len,
@@ -815,7 +815,7 @@ static int dvb_net_sec_callback(const u8 *buffer1, size_t buffer1_len,
 		 struct dmx_section_filter *filter,
 		 enum dmx_success success)
 {
-        struct net_device *dev = filter->priv;
+	struct net_device *dev = filter->priv;
 
 	/**
 	 * we rely on the DVB API definition where exactly one complete
@@ -885,8 +885,8 @@ static int dvb_net_feed_start(struct net_device *dev)
 {
 	int ret = 0, i;
 	struct dvb_net_priv *priv = dev->priv;
-        struct dmx_demux *demux = priv->demux;
-        unsigned char *mac = (unsigned char *) dev->dev_addr;
+	struct dmx_demux *demux = priv->demux;
+	unsigned char *mac = (unsigned char *) dev->dev_addr;
 
 	dprintk("%s: rx_mode %i\n", __FUNCTION__, priv->rx_mode);
 	down(&priv->mutex);
@@ -1129,12 +1129,12 @@ static int dvb_net_stop(struct net_device *dev)
 	struct dvb_net_priv *priv = dev->priv;
 
 	priv->in_use--;
-        return dvb_net_feed_stop(dev);
+	return dvb_net_feed_stop(dev);
 }
 
 static struct net_device_stats * dvb_net_get_stats(struct net_device *dev)
 {
-        return &((struct dvb_net_priv*) dev->priv)->stats;
+	return &((struct dvb_net_priv*) dev->priv)->stats;
 }
 
 static void dvb_net_setup(struct net_device *dev)
@@ -1360,10 +1360,10 @@ static struct file_operations dvb_net_fops = {
 };
 
 static struct dvb_device dvbdev_net = {
-        .priv = NULL,
-        .users = 1,
-        .writers = 1,
-        .fops = &dvb_net_fops,
+	.priv = NULL,
+	.users = 1,
+	.writers = 1,
+	.fops = &dvb_net_fops,
 };
 
 
diff --git a/drivers/media/dvb/dvb-core/dvb_ringbuffer.c b/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
index fb6d94a69d71..283c6e9339a4 100644
--- a/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
+++ b/drivers/media/dvb/dvb-core/dvb_ringbuffer.c
@@ -42,216 +42,216 @@
 
 void dvb_ringbuffer_init(struct dvb_ringbuffer *rbuf, void *data, size_t len)
 {
-        rbuf->pread=rbuf->pwrite=0;
-        rbuf->data=data;
-        rbuf->size=len;
+	rbuf->pread=rbuf->pwrite=0;
+	rbuf->data=data;
+	rbuf->size=len;
 
-        init_waitqueue_head(&rbuf->queue);
+	init_waitqueue_head(&rbuf->queue);
 
-        spin_lock_init(&(rbuf->lock));
+	spin_lock_init(&(rbuf->lock));
 }
 
 
 int dvb_ringbuffer_empty(struct dvb_ringbuffer *rbuf)
 {
-        return (rbuf->pread==rbuf->pwrite);
+	return (rbuf->pread==rbuf->pwrite);
 }
 
 
 ssize_t dvb_ringbuffer_free(struct dvb_ringbuffer *rbuf)
 {
-        ssize_t free;
+	ssize_t free;
 
-        free = rbuf->pread - rbuf->pwrite;
-        if (free <= 0)
-                free += rbuf->size;
-        return free-1;
+	free = rbuf->pread - rbuf->pwrite;
+	if (free <= 0)
+		free += rbuf->size;
+	return free-1;
 }
 
 
 ssize_t dvb_ringbuffer_avail(struct dvb_ringbuffer *rbuf)
 {
-        ssize_t avail;
+	ssize_t avail;
 
-        avail = rbuf->pwrite - rbuf->pread;
-        if (avail < 0)
-                avail += rbuf->size;
-        return avail;
+	avail = rbuf->pwrite - rbuf->pread;
+	if (avail < 0)
+		avail += rbuf->size;
+	return avail;
 }
 
 
 void dvb_ringbuffer_flush(struct dvb_ringbuffer *rbuf)
 {
-        rbuf->pread = rbuf->pwrite;
+	rbuf->pread = rbuf->pwrite;
 }
 
 
 void dvb_ringbuffer_flush_spinlock_wakeup(struct dvb_ringbuffer *rbuf)
 {
-        unsigned long flags;
+	unsigned long flags;
 
-        spin_lock_irqsave(&rbuf->lock, flags);
-        dvb_ringbuffer_flush(rbuf);
-        spin_unlock_irqrestore(&rbuf->lock, flags);
+	spin_lock_irqsave(&rbuf->lock, flags);
+	dvb_ringbuffer_flush(rbuf);
+	spin_unlock_irqrestore(&rbuf->lock, flags);
 
-        wake_up(&rbuf->queue);
+	wake_up(&rbuf->queue);
 }
 
 
 ssize_t dvb_ringbuffer_read(struct dvb_ringbuffer *rbuf, u8 *buf, size_t len, int usermem)
 {
-        size_t todo = len;
-        size_t split;
-
-        split = (rbuf->pread + len > rbuf->size) ? rbuf->size - rbuf->pread : 0;
-        if (split > 0) {
-                if (!usermem)
-                        memcpy(buf, rbuf->data+rbuf->pread, split);
-                else
-                        if (copy_to_user(buf, rbuf->data+rbuf->pread, split))
-                                return -EFAULT;
-                buf += split;
-                todo -= split;
-                rbuf->pread = 0;
-        }
-        if (!usermem)
-                memcpy(buf, rbuf->data+rbuf->pread, todo);
-        else
-                if (copy_to_user(buf, rbuf->data+rbuf->pread, todo))
-                        return -EFAULT;
-
-        rbuf->pread = (rbuf->pread + todo) % rbuf->size;
-
-        return len;
+	size_t todo = len;
+	size_t split;
+
+	split = (rbuf->pread + len > rbuf->size) ? rbuf->size - rbuf->pread : 0;
+	if (split > 0) {
+		if (!usermem)
+		        memcpy(buf, rbuf->data+rbuf->pread, split);
+		else
+		        if (copy_to_user(buf, rbuf->data+rbuf->pread, split))
+		                return -EFAULT;
+		buf += split;
+		todo -= split;
+		rbuf->pread = 0;
+	}
+	if (!usermem)
+		memcpy(buf, rbuf->data+rbuf->pread, todo);
+	else
+		if (copy_to_user(buf, rbuf->data+rbuf->pread, todo))
+		        return -EFAULT;
+
+	rbuf->pread = (rbuf->pread + todo) % rbuf->size;
+
+	return len;
 }
 
 
 ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf, size_t len)
 {
-        size_t todo = len;
-        size_t split;
+	size_t todo = len;
+	size_t split;
 
-        split = (rbuf->pwrite + len > rbuf->size) ? rbuf->size - rbuf->pwrite : 0;
+	split = (rbuf->pwrite + len > rbuf->size) ? rbuf->size - rbuf->pwrite : 0;
 
-        if (split > 0) {
-                memcpy(rbuf->data+rbuf->pwrite, buf, split);
-                buf += split;
-                todo -= split;
-                rbuf->pwrite = 0;
-        }
-        memcpy(rbuf->data+rbuf->pwrite, buf, todo);
-        rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
+	if (split > 0) {
+		memcpy(rbuf->data+rbuf->pwrite, buf, split);
+		buf += split;
+		todo -= split;
+		rbuf->pwrite = 0;
+	}
+	memcpy(rbuf->data+rbuf->pwrite, buf, todo);
+	rbuf->pwrite = (rbuf->pwrite + todo) % rbuf->size;
 
-        return len;
+	return len;
 }
 
 ssize_t dvb_ringbuffer_pkt_write(struct dvb_ringbuffer *rbuf, u8* buf, size_t len)
 {
-        int status;
-        ssize_t oldpwrite = rbuf->pwrite;
+	int status;
+	ssize_t oldpwrite = rbuf->pwrite;
 
-        DVB_RINGBUFFER_WRITE_BYTE(rbuf, len >> 8);
-        DVB_RINGBUFFER_WRITE_BYTE(rbuf, len & 0xff);
-        DVB_RINGBUFFER_WRITE_BYTE(rbuf, PKT_READY);
-        status = dvb_ringbuffer_write(rbuf, buf, len);
+	DVB_RINGBUFFER_WRITE_BYTE(rbuf, len >> 8);
+	DVB_RINGBUFFER_WRITE_BYTE(rbuf, len & 0xff);
+	DVB_RINGBUFFER_WRITE_BYTE(rbuf, PKT_READY);
+	status = dvb_ringbuffer_write(rbuf, buf, len);
 
-        if (status < 0) rbuf->pwrite = oldpwrite;
-        return status;
+	if (status < 0) rbuf->pwrite = oldpwrite;
+	return status;
 }
 
 ssize_t dvb_ringbuffer_pkt_read(struct dvb_ringbuffer *rbuf, size_t idx,
-                                int offset, u8* buf, size_t len, int usermem)
+		                int offset, u8* buf, size_t len, int usermem)
 {
-        size_t todo;
-        size_t split;
-        size_t pktlen;
-
-        pktlen = rbuf->data[idx] << 8;
-        pktlen |= rbuf->data[(idx + 1) % rbuf->size];
-        if (offset > pktlen) return -EINVAL;
-        if ((offset + len) > pktlen) len = pktlen - offset;
-
-        idx = (idx + DVB_RINGBUFFER_PKTHDRSIZE + offset) % rbuf->size;
-        todo = len;
-        split = ((idx + len) > rbuf->size) ? rbuf->size - idx : 0;
-        if (split > 0) {
-                if (!usermem)
-                        memcpy(buf, rbuf->data+idx, split);
-                else
-                        if (copy_to_user(buf, rbuf->data+idx, split))
-                                return -EFAULT;
-                buf += split;
-                todo -= split;
-                idx = 0;
-        }
-        if (!usermem)
-                memcpy(buf, rbuf->data+idx, todo);
-        else
-                if (copy_to_user(buf, rbuf->data+idx, todo))
-                        return -EFAULT;
-
-        return len;
+	size_t todo;
+	size_t split;
+	size_t pktlen;
+
+	pktlen = rbuf->data[idx] << 8;
+	pktlen |= rbuf->data[(idx + 1) % rbuf->size];
+	if (offset > pktlen) return -EINVAL;
+	if ((offset + len) > pktlen) len = pktlen - offset;
+
+	idx = (idx + DVB_RINGBUFFER_PKTHDRSIZE + offset) % rbuf->size;
+	todo = len;
+	split = ((idx + len) > rbuf->size) ? rbuf->size - idx : 0;
+	if (split > 0) {
+		if (!usermem)
+		        memcpy(buf, rbuf->data+idx, split);
+		else
+		        if (copy_to_user(buf, rbuf->data+idx, split))
+		                return -EFAULT;
+		buf += split;
+		todo -= split;
+		idx = 0;
+	}
+	if (!usermem)
+		memcpy(buf, rbuf->data+idx, todo);
+	else
+		if (copy_to_user(buf, rbuf->data+idx, todo))
+		        return -EFAULT;
+
+	return len;
 }
 
 void dvb_ringbuffer_pkt_dispose(struct dvb_ringbuffer *rbuf, size_t idx)
 {
-        size_t pktlen;
-
-        rbuf->data[(idx + 2) % rbuf->size] = PKT_DISPOSED;
-
-        // clean up disposed packets
-        while(dvb_ringbuffer_avail(rbuf) > DVB_RINGBUFFER_PKTHDRSIZE) {
-                if (DVB_RINGBUFFER_PEEK(rbuf, 2) == PKT_DISPOSED) {
-                        pktlen = DVB_RINGBUFFER_PEEK(rbuf, 0) << 8;
-                        pktlen |= DVB_RINGBUFFER_PEEK(rbuf, 1);
-                        DVB_RINGBUFFER_SKIP(rbuf, pktlen + DVB_RINGBUFFER_PKTHDRSIZE);
-                } else {
-                        // first packet is not disposed, so we stop cleaning now
-                        break;
-                }
-        }
+	size_t pktlen;
+
+	rbuf->data[(idx + 2) % rbuf->size] = PKT_DISPOSED;
+
+	// clean up disposed packets
+	while(dvb_ringbuffer_avail(rbuf) > DVB_RINGBUFFER_PKTHDRSIZE) {
+		if (DVB_RINGBUFFER_PEEK(rbuf, 2) == PKT_DISPOSED) {
+		        pktlen = DVB_RINGBUFFER_PEEK(rbuf, 0) << 8;
+		        pktlen |= DVB_RINGBUFFER_PEEK(rbuf, 1);
+		        DVB_RINGBUFFER_SKIP(rbuf, pktlen + DVB_RINGBUFFER_PKTHDRSIZE);
+		} else {
+		        // first packet is not disposed, so we stop cleaning now
+		        break;
+		}
+	}
 }
 
 ssize_t dvb_ringbuffer_pkt_next(struct dvb_ringbuffer *rbuf, size_t idx, size_t* pktlen)
 {
-        int consumed;
-        int curpktlen;
-        int curpktstatus;
+	int consumed;
+	int curpktlen;
+	int curpktstatus;
 
-        if (idx == -1) {
+	if (idx == -1) {
 	       idx = rbuf->pread;
 	} else {
-                curpktlen = rbuf->data[idx] << 8;
-                curpktlen |= rbuf->data[(idx + 1) % rbuf->size];
-	        idx = (idx + curpktlen + DVB_RINGBUFFER_PKTHDRSIZE) % rbuf->size;
+		curpktlen = rbuf->data[idx] << 8;
+		curpktlen |= rbuf->data[(idx + 1) % rbuf->size];
+		idx = (idx + curpktlen + DVB_RINGBUFFER_PKTHDRSIZE) % rbuf->size;
 	}
 
-        consumed = (idx - rbuf->pread) % rbuf->size;
+	consumed = (idx - rbuf->pread) % rbuf->size;
 
-        while((dvb_ringbuffer_avail(rbuf) - consumed) > DVB_RINGBUFFER_PKTHDRSIZE) {
+	while((dvb_ringbuffer_avail(rbuf) - consumed) > DVB_RINGBUFFER_PKTHDRSIZE) {
 
-                curpktlen = rbuf->data[idx] << 8;
-                curpktlen |= rbuf->data[(idx + 1) % rbuf->size];
-                curpktstatus = rbuf->data[(idx + 2) % rbuf->size];
+		curpktlen = rbuf->data[idx] << 8;
+		curpktlen |= rbuf->data[(idx + 1) % rbuf->size];
+		curpktstatus = rbuf->data[(idx + 2) % rbuf->size];
 
-                if (curpktstatus == PKT_READY) {
-                        *pktlen = curpktlen;
-                        return idx;
-                }
+		if (curpktstatus == PKT_READY) {
+		        *pktlen = curpktlen;
+		        return idx;
+		}
 
-                consumed += curpktlen + DVB_RINGBUFFER_PKTHDRSIZE;
-                idx = (idx + curpktlen + DVB_RINGBUFFER_PKTHDRSIZE) % rbuf->size;
-        }
+		consumed += curpktlen + DVB_RINGBUFFER_PKTHDRSIZE;
+		idx = (idx + curpktlen + DVB_RINGBUFFER_PKTHDRSIZE) % rbuf->size;
+	}
 
-        // no packets available
-        return -1;
+	// no packets available
+	return -1;
 }
 
 
diff --git a/drivers/media/dvb/dvb-core/dvb_ringbuffer.h b/drivers/media/dvb/dvb-core/dvb_ringbuffer.h
index d18e9c4ba9ea..fa476f662f82 100644
--- a/drivers/media/dvb/dvb-core/dvb_ringbuffer.h
+++ b/drivers/media/dvb/dvb-core/dvb_ringbuffer.h
@@ -31,13 +31,13 @@
 #include <linux/wait.h>
 
 struct dvb_ringbuffer {
-        u8               *data;
-        ssize_t           size;
-        ssize_t           pread;
-        ssize_t           pwrite;
+	u8               *data;
+	ssize_t           size;
+	ssize_t           pread;
+	ssize_t           pwrite;
 
-        wait_queue_head_t queue;
-        spinlock_t        lock;
+	wait_queue_head_t queue;
+	spinlock_t        lock;
 };
 
 #define DVB_RINGBUFFER_PKTHDRSIZE 3
@@ -106,7 +106,7 @@ extern void dvb_ringbuffer_flush_spinlock_wakeup(struct dvb_ringbuffer *rbuf);
 ** returns number of bytes transferred or -EFAULT
 */
 extern ssize_t dvb_ringbuffer_read(struct dvb_ringbuffer *rbuf, u8 *buf,
-                                   size_t len, int usermem);
+		                   size_t len, int usermem);
 
 
 /* write routines & macros */
@@ -121,7 +121,7 @@ extern ssize_t dvb_ringbuffer_read(struct dvb_ringbuffer *rbuf, u8 *buf,
 ** returns number of bytes transferred or -EFAULT
 */
 extern ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf,
-                                    size_t len);
+		                    size_t len);
 
 
 /**
@@ -133,7 +133,7 @@ extern ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf,
  * returns Number of bytes written, or -EFAULT, -ENOMEM, -EVINAL.
  */
 extern ssize_t dvb_ringbuffer_pkt_write(struct dvb_ringbuffer *rbuf, u8* buf,
-                                        size_t len);
+		                        size_t len);
 
 /**
  * Read from a packet in the ringbuffer. Note: unlike dvb_ringbuffer_read(), this
@@ -149,7 +149,7 @@ extern ssize_t dvb_ringbuffer_pkt_write(struct dvb_ringbuffer *rbuf, u8* buf,
  * returns Number of bytes read, or -EFAULT.
  */
 extern ssize_t dvb_ringbuffer_pkt_read(struct dvb_ringbuffer *rbuf, size_t idx,
-                                       int offset, u8* buf, size_t len, int usermem);
+		                       int offset, u8* buf, size_t len, int usermem);
 
 /**
  * Dispose of a packet in the ring buffer.
diff --git a/drivers/media/dvb/dvb-core/dvbdev.c b/drivers/media/dvb/dvb-core/dvbdev.c
index 477b4fa56430..a4aee8665854 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.c
+++ b/drivers/media/dvb/dvb-core/dvbdev.c
@@ -47,7 +47,7 @@ static LIST_HEAD(dvb_adapter_list);
 static DECLARE_MUTEX(dvbdev_register_lock);
 
 static const char * const dnames[] = {
-        "video", "audio", "sec", "frontend", "demux", "dvr", "ca",
+	"video", "audio", "sec", "frontend", "demux", "dvr", "ca",
 	"net", "osd"
 };
 
@@ -90,15 +90,15 @@ static int dvb_device_open(struct inode *inode, struct file *file)
 
 		file->private_data = dvbdev;
 		old_fops = file->f_op;
-                file->f_op = fops_get(dvbdev->fops);
-                if(file->f_op->open)
-                        err = file->f_op->open(inode,file);
-                if (err) {
-                        fops_put(file->f_op);
-                        file->f_op = fops_get(old_fops);
-                }
-                fops_put(old_fops);
-                return err;
+		file->f_op = fops_get(dvbdev->fops);
+		if(file->f_op->open)
+		        err = file->f_op->open(inode,file);
+		if (err) {
+		        fops_put(file->f_op);
+		        file->f_op = fops_get(old_fops);
+		}
+		fops_put(old_fops);
+		return err;
 	}
 	return -ENODEV;
 }
@@ -117,21 +117,21 @@ static struct cdev dvb_device_cdev = {
 
 int dvb_generic_open(struct inode *inode, struct file *file)
 {
-        struct dvb_device *dvbdev = file->private_data;
+	struct dvb_device *dvbdev = file->private_data;
 
-        if (!dvbdev)
-                return -ENODEV;
+	if (!dvbdev)
+		return -ENODEV;
 
 	if (!dvbdev->users)
-                return -EBUSY;
+		return -EBUSY;
 
 	if ((file->f_flags & O_ACCMODE) == O_RDONLY) {
-                if (!dvbdev->readers)
-		        return -EBUSY;
+		if (!dvbdev->readers)
+			return -EBUSY;
 		dvbdev->readers--;
 	} else {
-                if (!dvbdev->writers)
-		        return -EBUSY;
+		if (!dvbdev->writers)
+			return -EBUSY;
 		dvbdev->writers--;
 	}
 
@@ -143,10 +143,10 @@ EXPORT_SYMBOL(dvb_generic_open);
 
 int dvb_generic_release(struct inode *inode, struct file *file)
 {
-        struct dvb_device *dvbdev = file->private_data;
+	struct dvb_device *dvbdev = file->private_data;
 
 	if (!dvbdev)
-                return -ENODEV;
+		return -ENODEV;
 
 	if ((file->f_flags & O_ACCMODE) == O_RDONLY) {
 		dvbdev->readers++;
@@ -163,10 +163,10 @@ EXPORT_SYMBOL(dvb_generic_release);
 int dvb_generic_ioctl(struct inode *inode, struct file *file,
 		      unsigned int cmd, unsigned long arg)
 {
-        struct dvb_device *dvbdev = file->private_data;
+	struct dvb_device *dvbdev = file->private_data;
 
-        if (!dvbdev)
-	        return -ENODEV;
+	if (!dvbdev)
+		return -ENODEV;
 
 	if (!dvbdev->kernel_ioctl)
 		return -EINVAL;
@@ -334,63 +334,63 @@ EXPORT_SYMBOL(dvb_unregister_adapter);
    to the v4l "videodev.o" module, which is unnecessary for some
    cards (ie. the budget dvb-cards don't need the v4l module...) */
 int dvb_usercopy(struct inode *inode, struct file *file,
-	             unsigned int cmd, unsigned long arg,
+		     unsigned int cmd, unsigned long arg,
 		     int (*func)(struct inode *inode, struct file *file,
 		     unsigned int cmd, void *arg))
 {
-        char    sbuf[128];
-        void    *mbuf = NULL;
-        void    *parg = NULL;
-        int     err  = -EINVAL;
-
-        /*  Copy arguments into temp kernel buffer  */
-        switch (_IOC_DIR(cmd)) {
-        case _IOC_NONE:
+	char    sbuf[128];
+	void    *mbuf = NULL;
+	void    *parg = NULL;
+	int     err  = -EINVAL;
+
+	/*  Copy arguments into temp kernel buffer  */
+	switch (_IOC_DIR(cmd)) {
+	case _IOC_NONE:
 		/*
 		 * For this command, the pointer is actually an integer
 		 * argument.
 		 */
 		parg = (void *) arg;
 		break;
-        case _IOC_READ: /* some v4l ioctls are marked wrong ... */
-        case _IOC_WRITE:
-        case (_IOC_WRITE | _IOC_READ):
-                if (_IOC_SIZE(cmd) <= sizeof(sbuf)) {
-                        parg = sbuf;
-                } else {
-                        /* too big to allocate from stack */
-                        mbuf = kmalloc(_IOC_SIZE(cmd),GFP_KERNEL);
-                        if (NULL == mbuf)
-                                return -ENOMEM;
-                        parg = mbuf;
-                }
-
-                err = -EFAULT;
-                if (copy_from_user(parg, (void __user *)arg, _IOC_SIZE(cmd)))
-                        goto out;
-                break;
-        }
-
-        /* call driver */
-        if ((err = func(inode, file, cmd, parg)) == -ENOIOCTLCMD)
-                err = -EINVAL;
-
-        if (err < 0)
-                goto out;
-
-        /*  Copy results into user buffer  */
-        switch (_IOC_DIR(cmd))
-        {
-        case _IOC_READ:
-        case (_IOC_WRITE | _IOC_READ):
-                if (copy_to_user((void __user *)arg, parg, _IOC_SIZE(cmd)))
-                        err = -EFAULT;
-                break;
-        }
+	case _IOC_READ: /* some v4l ioctls are marked wrong ... */
+	case _IOC_WRITE:
+	case (_IOC_WRITE | _IOC_READ):
+		if (_IOC_SIZE(cmd) <= sizeof(sbuf)) {
+		        parg = sbuf;
+		} else {
+		        /* too big to allocate from stack */
+		        mbuf = kmalloc(_IOC_SIZE(cmd),GFP_KERNEL);
+		        if (NULL == mbuf)
+		                return -ENOMEM;
+		        parg = mbuf;
+		}
+
+		err = -EFAULT;
+		if (copy_from_user(parg, (void __user *)arg, _IOC_SIZE(cmd)))
+		        goto out;
+		break;
+	}
+
+	/* call driver */
+	if ((err = func(inode, file, cmd, parg)) == -ENOIOCTLCMD)
+		err = -EINVAL;
+
+	if (err < 0)
+		goto out;
+
+	/*  Copy results into user buffer  */
+	switch (_IOC_DIR(cmd))
+	{
+	case _IOC_READ:
+	case (_IOC_WRITE | _IOC_READ):
+		if (copy_to_user((void __user *)arg, parg, _IOC_SIZE(cmd)))
+		        err = -EFAULT;
+		break;
+	}
 
 out:
-        kfree(mbuf);
-        return err;
+	kfree(mbuf);
+	return err;
 }
 
 static int __init init_dvbdev(void)
@@ -427,10 +427,10 @@ error:
 
 static void __exit exit_dvbdev(void)
 {
-        devfs_remove("dvb");
+	devfs_remove("dvb");
 	class_destroy(dvb_class);
 	cdev_del(&dvb_device_cdev);
-        unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS);
+	unregister_chrdev_region(MKDEV(DVB_MAJOR, 0), MAX_DVB_MINORS);
 }
 
 module_init(init_dvbdev);
diff --git a/drivers/media/dvb/dvb-core/dvbdev.h b/drivers/media/dvb/dvb-core/dvbdev.h
index a251867f30f1..0cc6e4a0e27c 100644
--- a/drivers/media/dvb/dvb-core/dvbdev.h
+++ b/drivers/media/dvb/dvb-core/dvbdev.h
@@ -68,8 +68,8 @@ struct dvb_device {
 	int writers;
 	int users;
 
-        /* don't really need those !? -- FIXME: use video_usercopy  */
-        int (*kernel_ioctl)(struct inode *inode, struct file *file,
+	/* don't really need those !? -- FIXME: use video_usercopy  */
+	int (*kernel_ioctl)(struct inode *inode, struct file *file,
 			    unsigned int cmd, void *arg);
 
 	void *priv;
@@ -97,7 +97,7 @@ we simply define out own dvb_usercopy(), which will hopefully become
 generic_usercopy()  someday... */
 
 extern int dvb_usercopy(struct inode *inode, struct file *file,
-	                    unsigned int cmd, unsigned long arg,
+		            unsigned int cmd, unsigned long arg,
 			    int (*func)(struct inode *inode, struct file *file,
 			    unsigned int cmd, void *arg));
 
diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c
index e4063950ae57..3ca1d768bfd3 100644
--- a/drivers/media/video/bt832.c
+++ b/drivers/media/video/bt832.c
@@ -231,18 +231,18 @@ bt832_command(struct i2c_client *client, unsigned int cmd, void *arg)
 
 static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
-        .name           = "i2c bt832 driver",
-        .id             = -1, /* FIXME */
-        .flags          = I2C_DF_NOTIFY,
-        .attach_adapter = bt832_probe,
-        .detach_client  = bt832_detach,
-        .command        = bt832_command,
+	.name           = "i2c bt832 driver",
+	.id             = -1, /* FIXME */
+	.flags          = I2C_DF_NOTIFY,
+	.attach_adapter = bt832_probe,
+	.detach_client  = bt832_detach,
+	.command        = bt832_command,
 };
 static struct i2c_client client_template =
 {
 	.name       = "bt832",
 	.flags      = I2C_CLIENT_ALLOW_USE,
-        .driver     = &driver,
+	.driver     = &driver,
 };
 
 
diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c
index 3c58a2a68906..24f5afddafbb 100644
--- a/drivers/media/video/bttv-driver.c
+++ b/drivers/media/video/bttv-driver.c
@@ -727,71 +727,71 @@ void free_btres(struct bttv *btv, struct bttv_fh *fh, int bits)
 
 static void set_pll_freq(struct bttv *btv, unsigned int fin, unsigned int fout)
 {
-        unsigned char fl, fh, fi;
+	unsigned char fl, fh, fi;
 
-        /* prevent overflows */
-        fin/=4;
-        fout/=4;
+	/* prevent overflows */
+	fin/=4;
+	fout/=4;
 
-        fout*=12;
-        fi=fout/fin;
+	fout*=12;
+	fi=fout/fin;
 
-        fout=(fout%fin)*256;
-        fh=fout/fin;
+	fout=(fout%fin)*256;
+	fh=fout/fin;
 
-        fout=(fout%fin)*256;
-        fl=fout/fin;
+	fout=(fout%fin)*256;
+	fl=fout/fin;
 
-        btwrite(fl, BT848_PLL_F_LO);
-        btwrite(fh, BT848_PLL_F_HI);
-        btwrite(fi|BT848_PLL_X, BT848_PLL_XCI);
+	btwrite(fl, BT848_PLL_F_LO);
+	btwrite(fh, BT848_PLL_F_HI);
+	btwrite(fi|BT848_PLL_X, BT848_PLL_XCI);
 }
 
 static void set_pll(struct bttv *btv)
 {
-        int i;
+	int i;
 
-        if (!btv->pll.pll_crystal)
-                return;
+	if (!btv->pll.pll_crystal)
+		return;
 
 	if (btv->pll.pll_ofreq == btv->pll.pll_current) {
 		dprintk("bttv%d: PLL: no change required\n",btv->c.nr);
-                return;
-        }
+		return;
+	}
 
-        if (btv->pll.pll_ifreq == btv->pll.pll_ofreq) {
-                /* no PLL needed */
-                if (btv->pll.pll_current == 0)
-                        return;
+	if (btv->pll.pll_ifreq == btv->pll.pll_ofreq) {
+		/* no PLL needed */
+		if (btv->pll.pll_current == 0)
+		        return;
 		bttv_printk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n",
-                           btv->c.nr,btv->pll.pll_ifreq);
-                btwrite(0x00,BT848_TGCTRL);
-                btwrite(0x00,BT848_PLL_XCI);
-                btv->pll.pll_current = 0;
-                return;
-        }
+		           btv->c.nr,btv->pll.pll_ifreq);
+		btwrite(0x00,BT848_TGCTRL);
+		btwrite(0x00,BT848_PLL_XCI);
+		btv->pll.pll_current = 0;
+		return;
+	}
 
 	bttv_printk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr,
-                   btv->pll.pll_ifreq, btv->pll.pll_ofreq);
+		   btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 	set_pll_freq(btv, btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 
-        for (i=0; i<10; i++) {
+	for (i=0; i<10; i++) {
 		/*  Let other people run while the PLL stabilizes */
 		bttv_printk(".");
 		msleep(10);
 
-                if (btread(BT848_DSTATUS) & BT848_DSTATUS_PLOCK) {
+		if (btread(BT848_DSTATUS) & BT848_DSTATUS_PLOCK) {
 			btwrite(0,BT848_DSTATUS);
-                } else {
-                        btwrite(0x08,BT848_TGCTRL);
-                        btv->pll.pll_current = btv->pll.pll_ofreq;
+		} else {
+		        btwrite(0x08,BT848_TGCTRL);
+		        btv->pll.pll_current = btv->pll.pll_ofreq;
 			bttv_printk(" ok\n");
-                        return;
-                }
-        }
-        btv->pll.pll_current = -1;
+		        return;
+		}
+	}
+	btv->pll.pll_current = -1;
 	bttv_printk("failed\n");
-        return;
+	return;
 }
 
 /* used to switch between the bt848's analog/digital video capture modes */
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 124c502ea1f3..ca4027422e76 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -296,7 +296,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	IR_KEYTAB_TYPE *ir_codes = NULL;
 	char *name;
 	int ir_type;
-        struct IR_i2c *ir;
+	struct IR_i2c *ir;
 	struct input_dev *input_dev;
 
 	ir = kzalloc(sizeof(struct IR_i2c), GFP_KERNEL);
@@ -304,7 +304,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	if (!ir || !input_dev) {
 		kfree(ir);
 		input_free_device(input_dev);
-                return -ENOMEM;
+		return -ENOMEM;
 	}
 
 	ir->c = client_template;
diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c
index a23fb0338986..61b99b0d8583 100644
--- a/drivers/media/video/msp3400.c
+++ b/drivers/media/video/msp3400.c
@@ -1559,11 +1559,11 @@ static void msp_wake_thread(struct i2c_client *client);
 static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "msp3400",
-        .id             = I2C_DRIVERID_MSP3400,
-        .flags          = I2C_DF_NOTIFY,
-        .attach_adapter = msp_probe,
-        .detach_client  = msp_detach,
-        .command        = msp_command,
+	.id             = I2C_DRIVERID_MSP3400,
+	.flags          = I2C_DF_NOTIFY,
+	.attach_adapter = msp_probe,
+	.detach_client  = msp_detach,
+	.command        = msp_command,
 	.driver = {
 		.suspend = msp_suspend,
 		.resume  = msp_resume,
@@ -1574,7 +1574,7 @@ static struct i2c_client client_template =
 {
 	.name      = "(unset)",
 	.flags     = I2C_CLIENT_ALLOW_USE,
-        .driver    = &driver,
+	.driver    = &driver,
 };
 
 static int msp_attach(struct i2c_adapter *adap, int addr, int kind)
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index cdd1ed9c8065..cf1e0c2b0351 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -523,7 +523,7 @@ static int saa6752hs_attach(struct i2c_adapter *adap, int addr, int kind)
 	h->standard = 0;
 
 	i2c_set_clientdata(&h->client, h);
-        i2c_attach_client(&h->client);
+	i2c_attach_client(&h->client);
 	return 0;
 }
 
@@ -597,19 +597,19 @@ saa6752hs_command(struct i2c_client *client, unsigned int cmd, void *arg)
 
 static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
-        .name           = "i2c saa6752hs MPEG encoder",
-        .id             = I2C_DRIVERID_SAA6752HS,
-        .flags          = I2C_DF_NOTIFY,
-        .attach_adapter = saa6752hs_probe,
-        .detach_client  = saa6752hs_detach,
-        .command        = saa6752hs_command,
+	.name           = "i2c saa6752hs MPEG encoder",
+	.id             = I2C_DRIVERID_SAA6752HS,
+	.flags          = I2C_DF_NOTIFY,
+	.attach_adapter = saa6752hs_probe,
+	.detach_client  = saa6752hs_detach,
+	.command        = saa6752hs_command,
 };
 
 static struct i2c_client client_template =
 {
 	.name       = "saa6752hs",
 	.flags      = I2C_CLIENT_ALLOW_USE,
-        .driver     = &driver,
+	.driver     = &driver,
 };
 
 static int __init saa6752hs_init_module(void)
diff --git a/drivers/media/video/saa7134/saa7134-alsa.c b/drivers/media/video/saa7134/saa7134-alsa.c
index 263c6e2e3e8e..f2788ebb59a8 100644
--- a/drivers/media/video/saa7134/saa7134-alsa.c
+++ b/drivers/media/video/saa7134/saa7134-alsa.c
@@ -59,7 +59,7 @@ module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for SAA7134 capture interface(s).");
 
 #define dprintk(fmt, arg...)    if (debug) \
-        printk(KERN_DEBUG "%s/alsa: " fmt, dev->name , ## arg)
+	printk(KERN_DEBUG "%s/alsa: " fmt, dev->name , ## arg)
 
 /*
  * Main chip structure
@@ -208,8 +208,8 @@ static void saa7134_irq_alsa_done(struct saa7134_dev *dev,
 
 static irqreturn_t saa7134_alsa_irq(int irq, void *dev_id, struct pt_regs *regs)
 {
-        struct saa7134_dmasound *dmasound = dev_id;
-        struct saa7134_dev *dev = dmasound->priv_data;
+	struct saa7134_dmasound *dmasound = dev_id;
+	struct saa7134_dev *dev = dmasound->priv_data;
 
 	unsigned long report, status;
 	int loop, handled = 0;
@@ -985,7 +985,7 @@ static int saa7134_alsa_init(void)
 	struct saa7134_dev *dev = NULL;
 	struct list_head *list;
 
-        printk(KERN_INFO "saa7134 ALSA driver for DMA sound loaded\n");
+	printk(KERN_INFO "saa7134 ALSA driver for DMA sound loaded\n");
 
 	list_for_each(list,&saa7134_devlist) {
 		dev = list_entry(list, struct saa7134_dev, devlist);
diff --git a/drivers/media/video/saa7134/saa7134-oss.c b/drivers/media/video/saa7134/saa7134-oss.c
index 5a579194e455..1f68b9f3fdec 100644
--- a/drivers/media/video/saa7134/saa7134-oss.c
+++ b/drivers/media/video/saa7134/saa7134-oss.c
@@ -782,36 +782,36 @@ struct file_operations saa7134_mixer_fops = {
 
 static irqreturn_t saa7134_oss_irq(int irq, void *dev_id, struct pt_regs *regs)
 {
-        struct saa7134_dmasound *dmasound = dev_id;
-        struct saa7134_dev *dev = dmasound->priv_data;
-        unsigned long report, status;
-        int loop, handled = 0;
-
-        for (loop = 0; loop < 10; loop++) {
-                report = saa_readl(SAA7134_IRQ_REPORT);
-                status = saa_readl(SAA7134_IRQ_STATUS);
-
-                if (report & SAA7134_IRQ_REPORT_DONE_RA3) {
-                        handled = 1;
-                        saa_writel(SAA7134_IRQ_REPORT,report);
-                        saa7134_irq_oss_done(dev, status);
-                } else {
-                        goto out;
-                }
-        }
-
-        if (loop == 10) {
-                dprintk("error! looping IRQ!");
-        }
+	struct saa7134_dmasound *dmasound = dev_id;
+	struct saa7134_dev *dev = dmasound->priv_data;
+	unsigned long report, status;
+	int loop, handled = 0;
+
+	for (loop = 0; loop < 10; loop++) {
+		report = saa_readl(SAA7134_IRQ_REPORT);
+		status = saa_readl(SAA7134_IRQ_STATUS);
+
+		if (report & SAA7134_IRQ_REPORT_DONE_RA3) {
+		        handled = 1;
+		        saa_writel(SAA7134_IRQ_REPORT,report);
+		        saa7134_irq_oss_done(dev, status);
+		} else {
+		        goto out;
+		}
+	}
+
+	if (loop == 10) {
+		dprintk("error! looping IRQ!");
+	}
 out:
-        return IRQ_RETVAL(handled);
+	return IRQ_RETVAL(handled);
 }
 
 int saa7134_oss_init1(struct saa7134_dev *dev)
 {
 
-        if ((request_irq(dev->pci->irq, saa7134_oss_irq,
-                         SA_SHIRQ | SA_INTERRUPT, dev->name,
+	if ((request_irq(dev->pci->irq, saa7134_oss_irq,
+		         SA_SHIRQ | SA_INTERRUPT, dev->name,
 			(void*) &dev->dmasound)) < 0)
 		return -1;
 
@@ -905,7 +905,7 @@ static int saa7134_dsp_create(struct saa7134_dev *dev)
 
 	err = dev->dmasound.minor_dsp =
 		register_sound_dsp(&saa7134_dsp_fops,
-                			dsp_nr[dev->nr]);
+					dsp_nr[dev->nr]);
 	if (err < 0) {
 		goto fail;
 	}
@@ -923,7 +923,7 @@ static int saa7134_dsp_create(struct saa7134_dev *dev)
 	return 0;
 
 fail:
-        unregister_sound_dsp(dev->dmasound.minor_dsp);
+	unregister_sound_dsp(dev->dmasound.minor_dsp);
 	return 0;
 
 
@@ -956,49 +956,49 @@ static int oss_device_exit(struct saa7134_dev *dev)
 
 static int saa7134_oss_init(void)
 {
-        struct saa7134_dev *dev = NULL;
-        struct list_head *list;
+	struct saa7134_dev *dev = NULL;
+	struct list_head *list;
 
-        printk(KERN_INFO "saa7134 OSS driver for DMA sound loaded\n");
+	printk(KERN_INFO "saa7134 OSS driver for DMA sound loaded\n");
 
-        list_for_each(list,&saa7134_devlist) {
-                dev = list_entry(list, struct saa7134_dev, devlist);
+	list_for_each(list,&saa7134_devlist) {
+		dev = list_entry(list, struct saa7134_dev, devlist);
 		if (dev->dmasound.priv_data == NULL) {
 			oss_device_init(dev);
 		} else {
-                	printk(KERN_ERR "saa7134 OSS: DMA sound is being handled by ALSA, ignoring %s\n",dev->name);
+			printk(KERN_ERR "saa7134 OSS: DMA sound is being handled by ALSA, ignoring %s\n",dev->name);
 			return -EBUSY;
 		}
-        }
+	}
 
-        if (dev == NULL)
-                printk(KERN_INFO "saa7134 OSS: no saa7134 cards found\n");
+	if (dev == NULL)
+		printk(KERN_INFO "saa7134 OSS: no saa7134 cards found\n");
 
 	dmasound_init = oss_device_init;
 	dmasound_exit = oss_device_exit;
 
-        return 0;
+	return 0;
 
 }
 
 static void saa7134_oss_exit(void)
 {
-        struct saa7134_dev *dev = NULL;
-        struct list_head *list;
+	struct saa7134_dev *dev = NULL;
+	struct list_head *list;
 
-        list_for_each(list,&saa7134_devlist) {
-                dev = list_entry(list, struct saa7134_dev, devlist);
+	list_for_each(list,&saa7134_devlist) {
+		dev = list_entry(list, struct saa7134_dev, devlist);
 
 		/* Device isn't registered by OSS, probably ALSA's */
 		if (!dev->dmasound.minor_dsp)
 			continue;
 
 		oss_device_exit(dev);
-        }
+	}
 
-        printk(KERN_INFO "saa7134 OSS driver for DMA sound unloaded\n");
+	printk(KERN_INFO "saa7134 OSS driver for DMA sound unloaded\n");
 
-        return;
+	return;
 }
 
 module_init(saa7134_oss_init);
diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
index 4249127c0a1d..14a2066a61b2 100644
--- a/drivers/media/video/tda9887.c
+++ b/drivers/media/video/tda9887.c
@@ -819,12 +819,12 @@ static int tda9887_resume(struct device * dev)
 
 static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
-        .name           = "i2c tda9887 driver",
-        .id             = -1, /* FIXME */
-        .flags          = I2C_DF_NOTIFY,
-        .attach_adapter = tda9887_probe,
-        .detach_client  = tda9887_detach,
-        .command        = tda9887_command,
+	.name           = "i2c tda9887 driver",
+	.id             = -1, /* FIXME */
+	.flags          = I2C_DF_NOTIFY,
+	.attach_adapter = tda9887_probe,
+	.detach_client  = tda9887_detach,
+	.command        = tda9887_command,
 	.driver = {
 		.suspend = tda9887_suspend,
 		.resume  = tda9887_resume,
@@ -834,7 +834,7 @@ static struct i2c_client client_template =
 {
 	.name      = "tda9887",
 	.flags     = I2C_CLIENT_ALLOW_USE,
-        .driver    = &driver,
+	.driver    = &driver,
 };
 
 static int __init tda9887_init_module(void)
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index c31bf28b73fe..67757fe07d8d 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -1506,18 +1506,18 @@ static int chip_attach(struct i2c_adapter *adap, int addr, int kind)
 		return -EIO;
 	}
 	tvaudio_info("%s found @ 0x%x (%s)\n", desc->name, addr<<1, adap->name);
-        if (desc->flags) {
-                tvaudio_dbg("matches:%s%s%s.\n",
-                        (desc->flags & CHIP_HAS_VOLUME)     ? " volume"      : "",
-                        (desc->flags & CHIP_HAS_BASSTREBLE) ? " bass/treble" : "",
-                        (desc->flags & CHIP_HAS_INPUTSEL)   ? " audiomux"    : "");
-        }
+	if (desc->flags) {
+		tvaudio_dbg("matches:%s%s%s.\n",
+		        (desc->flags & CHIP_HAS_VOLUME)     ? " volume"      : "",
+		        (desc->flags & CHIP_HAS_BASSTREBLE) ? " bass/treble" : "",
+		        (desc->flags & CHIP_HAS_INPUTSEL)   ? " audiomux"    : "");
+	}
 
 	/* fill required data structures */
 	strcpy(chip->c.name,desc->name);
 	chip->type = desc-chiplist;
 	chip->shadow.count = desc->registers+1;
-        chip->prevmode = -1;
+	chip->prevmode = -1;
 	/* register */
 	i2c_attach_client(&chip->c);
 
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c
index d95aecebbda3..5c04989471b6 100644
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -753,7 +753,7 @@ tveeprom_detect_client(struct i2c_adapter *adapter,
 	client->driver = &i2c_driver_tveeprom;
 	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name), "tveeprom");
-        i2c_attach_client(client);
+	i2c_attach_client(client);
 	return 0;
 }
 
diff --git a/include/linux/dvb/audio.h b/include/linux/dvb/audio.h
index cc314443f1c4..2b8797084685 100644
--- a/include/linux/dvb/audio.h
+++ b/include/linux/dvb/audio.h
@@ -32,39 +32,39 @@
 
 
 typedef enum {
-        AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */
+	AUDIO_SOURCE_DEMUX, /* Select the demux as the main source */
 	AUDIO_SOURCE_MEMORY /* Select internal memory as the main source */
 } audio_stream_source_t;
 
 
 typedef enum {
 	AUDIO_STOPPED,      /* Device is stopped */
-        AUDIO_PLAYING,      /* Device is currently playing */
+	AUDIO_PLAYING,      /* Device is currently playing */
 	AUDIO_PAUSED        /* Device is paused */
 } audio_play_state_t;
 
 
 typedef enum {
-        AUDIO_STEREO,
-        AUDIO_MONO_LEFT,
+	AUDIO_STEREO,
+	AUDIO_MONO_LEFT,
 	AUDIO_MONO_RIGHT
 } audio_channel_select_t;
 
 
 typedef struct audio_mixer {
-        unsigned int volume_left;
-        unsigned int volume_right;
+	unsigned int volume_left;
+	unsigned int volume_right;
   // what else do we need? bass, pass-through, ...
 } audio_mixer_t;
 
 
 typedef struct audio_status {
-        int                    AV_sync_state;  /* sync audio and video? */
-        int                    mute_state;     /* audio is muted */
-        audio_play_state_t     play_state;     /* current playback state */
-        audio_stream_source_t  stream_source;  /* current stream source */
-        audio_channel_select_t channel_select; /* currently selected channel */
-        int                    bypass_mode;    /* pass on audio data to */
+	int                    AV_sync_state;  /* sync audio and video? */
+	int                    mute_state;     /* audio is muted */
+	audio_play_state_t     play_state;     /* current playback state */
+	audio_stream_source_t  stream_source;  /* current stream source */
+	audio_channel_select_t channel_select; /* currently selected channel */
+	int                    bypass_mode;    /* pass on audio data to */
 	audio_mixer_t	       mixer_state;    /* current mixer state */
 } audio_status_t;                              /* separate decoder hardware */
 
@@ -74,8 +74,8 @@ struct audio_karaoke{  /* if Vocal1 or Vocal2 are non-zero, they get mixed  */
 	int vocal1;    /* into left and right t at 70% each */
 	int vocal2;    /* if both, Vocal1 and Vocal2 are non-zero, Vocal1 gets*/
 	int melody;    /* mixed into the left channel and */
-                       /* Vocal2 into the right channel at 100% each. */
-                       /* if Melody is non-zero, the melody channel gets mixed*/
+		       /* Vocal2 into the right channel at 100% each. */
+		       /* if Melody is non-zero, the melody channel gets mixed*/
 } audio_karaoke_t;     /* into left and right  */
 
 
diff --git a/include/linux/dvb/ca.h b/include/linux/dvb/ca.h
index 558af0cc7692..c18537f3e449 100644
--- a/include/linux/dvb/ca.h
+++ b/include/linux/dvb/ca.h
@@ -27,16 +27,16 @@
 /* slot interface types and info */
 
 typedef struct ca_slot_info {
-        int num;               /* slot number */
+	int num;               /* slot number */
 
-        int type;              /* CA interface this slot supports */
+	int type;              /* CA interface this slot supports */
 #define CA_CI            1     /* CI high level interface */
 #define CA_CI_LINK       2     /* CI link layer level interface */
 #define CA_CI_PHYS       4     /* CI physical layer level interface */
 #define CA_DESCR         8     /* built-in descrambler */
 #define CA_SC          128     /* simple smart card interface */
 
-        unsigned int flags;
+	unsigned int flags;
 #define CA_CI_MODULE_PRESENT 1 /* module (or card) inserted */
 #define CA_CI_MODULE_READY   2
 } ca_slot_info_t;
@@ -45,37 +45,37 @@ typedef struct ca_slot_info {
 /* descrambler types and info */
 
 typedef struct ca_descr_info {
-        unsigned int num;          /* number of available descramblers (keys) */
-        unsigned int type;         /* type of supported scrambling system */
+	unsigned int num;          /* number of available descramblers (keys) */
+	unsigned int type;         /* type of supported scrambling system */
 #define CA_ECD           1
 #define CA_NDS           2
 #define CA_DSS           4
 } ca_descr_info_t;
 
 typedef struct ca_caps {
-        unsigned int slot_num;     /* total number of CA card and module slots */
-        unsigned int slot_type;    /* OR of all supported types */
-        unsigned int descr_num;    /* total number of descrambler slots (keys) */
-        unsigned int descr_type;   /* OR of all supported types */
+	unsigned int slot_num;     /* total number of CA card and module slots */
+	unsigned int slot_type;    /* OR of all supported types */
+	unsigned int descr_num;    /* total number of descrambler slots (keys) */
+	unsigned int descr_type;   /* OR of all supported types */
 } ca_caps_t;
 
 /* a message to/from a CI-CAM */
 typedef struct ca_msg {
-        unsigned int index;
-        unsigned int type;
-        unsigned int length;
-        unsigned char msg[256];
+	unsigned int index;
+	unsigned int type;
+	unsigned int length;
+	unsigned char msg[256];
 } ca_msg_t;
 
 typedef struct ca_descr {
-        unsigned int index;
-        unsigned int parity;	/* 0 == even, 1 == odd */
-        unsigned char cw[8];
+	unsigned int index;
+	unsigned int parity;	/* 0 == even, 1 == odd */
+	unsigned char cw[8];
 } ca_descr_t;
 
 typedef struct ca_pid {
-        unsigned int pid;
-        int index;		/* -1 == disable*/
+	unsigned int pid;
+	int index;		/* -1 == disable*/
 } ca_pid_t;
 
 #define CA_RESET          _IO('o', 128)
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
index ce3f829da82c..263e9e19f772 100644
--- a/include/linux/dvb/dmx.h
+++ b/include/linux/dvb/dmx.h
@@ -1,4 +1,4 @@
-/* 
+/*
  * dmx.h
  *
  * Copyright (C) 2000 Marcus Metzler <marcus@convergence.de>
@@ -38,10 +38,10 @@ typedef enum
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
-	                 /* (to be retrieved via the read command).*/
+		         /* (to be retrieved via the read command).*/
 	DMX_OUT_TS_TAP   /* Output multiplexed into a new TS  */
-	                 /* (to be retrieved by reading from the */
-	                 /* logical DVR device).                 */
+		         /* (to be retrieved by reading from the */
+		         /* logical DVR device).                 */
 } dmx_output_t;
 
 
@@ -54,25 +54,25 @@ typedef enum
 
 typedef enum
 {
-        DMX_PES_AUDIO0,
+	DMX_PES_AUDIO0,
 	DMX_PES_VIDEO0,
 	DMX_PES_TELETEXT0,
 	DMX_PES_SUBTITLE0,
 	DMX_PES_PCR0,
 
-        DMX_PES_AUDIO1,
+	DMX_PES_AUDIO1,
 	DMX_PES_VIDEO1,
 	DMX_PES_TELETEXT1,
 	DMX_PES_SUBTITLE1,
 	DMX_PES_PCR1,
 
-        DMX_PES_AUDIO2,
+	DMX_PES_AUDIO2,
 	DMX_PES_VIDEO2,
 	DMX_PES_TELETEXT2,
 	DMX_PES_SUBTITLE2,
 	DMX_PES_PCR2,
 
-        DMX_PES_AUDIO3,
+	DMX_PES_AUDIO3,
 	DMX_PES_VIDEO3,
 	DMX_PES_TELETEXT3,
 	DMX_PES_SUBTITLE3,
@@ -90,8 +90,8 @@ typedef enum
 
 typedef enum
 {
-        DMX_SCRAMBLING_EV,
-        DMX_FRONTEND_EV
+	DMX_SCRAMBLING_EV,
+	DMX_FRONTEND_EV
 } dmx_event_t;
 
 
diff --git a/include/linux/dvb/osd.h b/include/linux/dvb/osd.h
index 0e1973d54a6b..880e68435832 100644
--- a/include/linux/dvb/osd.h
+++ b/include/linux/dvb/osd.h
@@ -98,43 +98,43 @@ typedef enum {
 } OSD_Command;
 
 typedef struct osd_cmd_s {
-        OSD_Command cmd;
-        int x0;
-        int y0;
-        int x1;
-        int y1;
-        int color;
-        void __user *data;
+	OSD_Command cmd;
+	int x0;
+	int y0;
+	int x1;
+	int y1;
+	int color;
+	void __user *data;
 } osd_cmd_t;
 
 /* OSD_OpenRaw: set 'color' to desired window type */
 typedef enum {
-        OSD_BITMAP1,           /* 1 bit bitmap */
-        OSD_BITMAP2,           /* 2 bit bitmap */
-        OSD_BITMAP4,           /* 4 bit bitmap */
-        OSD_BITMAP8,           /* 8 bit bitmap */
-        OSD_BITMAP1HR,         /* 1 Bit bitmap half resolution */
-        OSD_BITMAP2HR,         /* 2 bit bitmap half resolution */
-        OSD_BITMAP4HR,         /* 4 bit bitmap half resolution */
-        OSD_BITMAP8HR,         /* 8 bit bitmap half resolution */
-        OSD_YCRCB422,          /* 4:2:2 YCRCB Graphic Display */
-        OSD_YCRCB444,          /* 4:4:4 YCRCB Graphic Display */
-        OSD_YCRCB444HR,        /* 4:4:4 YCRCB graphic half resolution */
-        OSD_VIDEOTSIZE,        /* True Size Normal MPEG Video Display */
-        OSD_VIDEOHSIZE,        /* MPEG Video Display Half Resolution */
-        OSD_VIDEOQSIZE,        /* MPEG Video Display Quarter Resolution */
-        OSD_VIDEODSIZE,        /* MPEG Video Display Double Resolution */
-        OSD_VIDEOTHSIZE,       /* True Size MPEG Video Display Half Resolution */
-        OSD_VIDEOTQSIZE,       /* True Size MPEG Video Display Quarter Resolution*/
-        OSD_VIDEOTDSIZE,       /* True Size MPEG Video Display Double Resolution */
-        OSD_VIDEONSIZE,        /* Full Size MPEG Video Display */
-        OSD_CURSOR             /* Cursor */
+	OSD_BITMAP1,           /* 1 bit bitmap */
+	OSD_BITMAP2,           /* 2 bit bitmap */
+	OSD_BITMAP4,           /* 4 bit bitmap */
+	OSD_BITMAP8,           /* 8 bit bitmap */
+	OSD_BITMAP1HR,         /* 1 Bit bitmap half resolution */
+	OSD_BITMAP2HR,         /* 2 bit bitmap half resolution */
+	OSD_BITMAP4HR,         /* 4 bit bitmap half resolution */
+	OSD_BITMAP8HR,         /* 8 bit bitmap half resolution */
+	OSD_YCRCB422,          /* 4:2:2 YCRCB Graphic Display */
+	OSD_YCRCB444,          /* 4:4:4 YCRCB Graphic Display */
+	OSD_YCRCB444HR,        /* 4:4:4 YCRCB graphic half resolution */
+	OSD_VIDEOTSIZE,        /* True Size Normal MPEG Video Display */
+	OSD_VIDEOHSIZE,        /* MPEG Video Display Half Resolution */
+	OSD_VIDEOQSIZE,        /* MPEG Video Display Quarter Resolution */
+	OSD_VIDEODSIZE,        /* MPEG Video Display Double Resolution */
+	OSD_VIDEOTHSIZE,       /* True Size MPEG Video Display Half Resolution */
+	OSD_VIDEOTQSIZE,       /* True Size MPEG Video Display Quarter Resolution*/
+	OSD_VIDEOTDSIZE,       /* True Size MPEG Video Display Double Resolution */
+	OSD_VIDEONSIZE,        /* Full Size MPEG Video Display */
+	OSD_CURSOR             /* Cursor */
 } osd_raw_window_t;
 
 typedef struct osd_cap_s {
-        int  cmd;
+	int  cmd;
 #define OSD_CAP_MEMSIZE         1  /* memory size */
-        long val;
+	long val;
 } osd_cap_t;
 
 
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
index 941045e9ab89..2e15051b234b 100644
--- a/include/linux/dvb/video.h
+++ b/include/linux/dvb/video.h
@@ -36,7 +36,7 @@
 
 typedef enum {
 	VIDEO_FORMAT_4_3,     /* Select 4:3 format */
-        VIDEO_FORMAT_16_9,    /* Select 16:9 format. */
+	VIDEO_FORMAT_16_9,    /* Select 16:9 format. */
 	VIDEO_FORMAT_221_1    /* 2.21:1 */
 } video_format_t;
 
@@ -54,7 +54,7 @@ typedef enum {
 
 
 typedef enum {
-        VIDEO_PAN_SCAN,       /* use pan and scan format */
+	VIDEO_PAN_SCAN,       /* use pan and scan format */
 	VIDEO_LETTER_BOX,     /* use letterbox format */
 	VIDEO_CENTER_CUT_OUT  /* use center cut out format */
 } video_displayformat_t;
@@ -66,7 +66,7 @@ typedef struct {
 } video_size_t;
 
 typedef enum {
-        VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */
+	VIDEO_SOURCE_DEMUX, /* Select the demux as the main source */
 	VIDEO_SOURCE_MEMORY /* If this source is selected, the stream
 			       comes from the user through the write
 			       system call */
@@ -75,35 +75,35 @@ typedef enum {
 
 typedef enum {
 	VIDEO_STOPPED, /* Video is stopped */
-        VIDEO_PLAYING, /* Video is currently playing */
+	VIDEO_PLAYING, /* Video is currently playing */
 	VIDEO_FREEZED  /* Video is freezed */
 } video_play_state_t;
 
 
 struct video_event {
-        int32_t type;
+	int32_t type;
 #define VIDEO_EVENT_SIZE_CHANGED	1
 #define VIDEO_EVENT_FRAME_RATE_CHANGED	2
-        time_t timestamp;
+	time_t timestamp;
 	union {
-	        video_size_t size;
+		video_size_t size;
 		unsigned int frame_rate;	/* in frames per 1000sec */
 	} u;
 };
 
 
 struct video_status {
-        int                   video_blank;   /* blank video on freeze? */
-        video_play_state_t    play_state;    /* current state of playback */
-        video_stream_source_t stream_source; /* current source (demux/memory) */
-        video_format_t        video_format;  /* current aspect ratio of stream*/
-        video_displayformat_t display_format;/* selected cropping mode */
+	int                   video_blank;   /* blank video on freeze? */
+	video_play_state_t    play_state;    /* current state of playback */
+	video_stream_source_t stream_source; /* current source (demux/memory) */
+	video_format_t        video_format;  /* current aspect ratio of stream*/
+	video_displayformat_t display_format;/* selected cropping mode */
 };
 
 
 struct video_still_picture {
-        char __user *iFrame;        /* pointer to a single iframe in memory */
-        int32_t size;
+	char __user *iFrame;        /* pointer to a single iframe in memory */
+	int32_t size;
 };
 
 
@@ -111,19 +111,19 @@ typedef
 struct video_highlight {
 	int     active;      /*    1=show highlight, 0=hide highlight */
 	uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
-                             /*    3- 0  Background pixel contrast */
+		             /*    3- 0  Background pixel contrast */
 	uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-                             /*    3- 0  Emphasis pixel-1 contrast */
+		             /*    3- 0  Emphasis pixel-1 contrast */
 	uint8_t color1;      /*    7- 4  Pattern pixel color */
-                             /*    3- 0  Background pixel color */
+		             /*    3- 0  Background pixel color */
 	uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
-                             /*    3- 0  Emphasis pixel-1 color */
+		             /*    3- 0  Emphasis pixel-1 color */
 	uint32_t ypos;       /*   23-22  auto action mode */
-                             /*   21-12  start y */
-                             /*    9- 0  end y */
+		             /*   21-12  start y */
+		             /*    9- 0  end y */
 	uint32_t xpos;       /*   23-22  button color number */
-                             /*   21-12  start x */
-                             /*    9- 0  end x */
+		             /*   21-12  start x */
+		             /*    9- 0  end x */
 } video_highlight_t;
 
 
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index 2a897c3a6a9a..101f56c8a641 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -112,7 +112,7 @@ struct saa7146_dev
 
 	/* different device locks */
 	spinlock_t			slock;
-        struct semaphore		lock;
+	struct semaphore		lock;
 
 	unsigned char			__iomem *mem;		/* pointer to mapped IO memory */
 	int				revision;	/* chip revision; needed for bug-workarounds*/
@@ -133,7 +133,7 @@ struct saa7146_dev
 	void (*vv_callback)(struct saa7146_dev *dev, unsigned long status);
 
 	/* i2c-stuff */
-        struct semaphore	i2c_lock;
+	struct semaphore	i2c_lock;
 	u32			i2c_bitrate;
 	struct saa7146_dma	d_i2c;	/* pointer to i2c memory */
 	wait_queue_head_t	i2c_wq;
diff --git a/include/media/saa7146_vv.h b/include/media/saa7146_vv.h
index 64691753721e..16af9299315f 100644
--- a/include/media/saa7146_vv.h
+++ b/include/media/saa7146_vv.h
@@ -113,7 +113,7 @@ struct saa7146_vv
 	/* vbi capture */
 	struct saa7146_dmaqueue		vbi_q;
 	/* vbi workaround interrupt queue */
-        wait_queue_head_t		vbi_wq;
+	wait_queue_head_t		vbi_wq;
 	int				vbi_fieldcount;
 	struct saa7146_fh		*vbi_streaming;
 
@@ -181,10 +181,10 @@ struct saa7146_ext_vv
 };
 
 struct saa7146_use_ops  {
-        void (*init)(struct saa7146_dev *, struct saa7146_vv *);
-        int(*open)(struct saa7146_dev *, struct file *);
-        void (*release)(struct saa7146_dev *, struct file *);
-        void (*irq_done)(struct saa7146_dev *, unsigned long status);
+	void (*init)(struct saa7146_dev *, struct saa7146_vv *);
+	int(*open)(struct saa7146_dev *, struct file *);
+	void (*release)(struct saa7146_dev *, struct file *);
+	void (*irq_done)(struct saa7146_dev *, unsigned long status);
 	ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
 };
 
-- 
cgit v1.2.3


From 674434c691e10015660022fc00b04985a23ef87b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Mon, 12 Dec 2005 00:37:28 -0800
Subject: [PATCH] V4L/DVB: (3086c) Whitespaces cleanups part 4

Clean up whitespaces at v4l/dvb files

Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/dvb/README.dvb-usb              | 50 +++++++++++++--------------
 Documentation/dvb/README.flexcop              |  2 +-
 Documentation/dvb/avermedia.txt               |  2 +-
 Documentation/dvb/cards.txt                   |  8 ++---
 Documentation/dvb/contributors.txt            |  4 +--
 Documentation/dvb/readme.txt                  |  4 +--
 drivers/media/common/Kconfig                  |  6 ++--
 drivers/media/common/Makefile                 |  4 +--
 drivers/media/common/ir-common.c              |  1 +
 drivers/media/video/Kconfig                   | 14 ++++----
 drivers/media/video/bttv-driver.c             | 16 ++++-----
 drivers/media/video/bttvp.h                   |  1 +
 drivers/media/video/cx88/cx88-blackbird.c     |  2 ++
 drivers/media/video/cx88/cx88-input.c         |  2 --
 drivers/media/video/cx88/cx88.h               |  1 -
 drivers/media/video/em28xx/em28xx-core.c      |  7 ++--
 drivers/media/video/em28xx/em28xx-i2c.c       |  4 +--
 drivers/media/video/em28xx/em28xx-video.c     |  2 +-
 drivers/media/video/em28xx/em28xx.h           |  8 ++---
 drivers/media/video/ir-kbd-gpio.c             |  8 ++---
 drivers/media/video/ir-kbd-i2c.c              |  5 +--
 drivers/media/video/msp3400.c                 |  6 +++-
 drivers/media/video/saa6588.c                 |  1 -
 drivers/media/video/saa711x.c                 |  2 +-
 drivers/media/video/saa7134/saa6752hs.c       |  1 +
 drivers/media/video/saa7134/saa7134-alsa.c    | 10 +++++-
 drivers/media/video/saa7134/saa7134-core.c    |  1 +
 drivers/media/video/saa7134/saa7134-empress.c |  1 +
 drivers/media/video/saa7134/saa7134-i2c.c     |  2 +-
 drivers/media/video/saa7134/saa7134-oss.c     | 19 +++++-----
 drivers/media/video/tda9887.c                 |  1 +
 drivers/media/video/tvaudio.c                 | 22 ++++++------
 drivers/media/video/tveeprom.c                |  1 +
 drivers/media/video/tvp5150.c                 |  3 +-
 drivers/media/video/video-buf-dvb.c           |  2 ++
 include/linux/dvb/dmx.h                       |  6 ++--
 include/linux/dvb/video.h                     | 16 ++++-----
 include/media/saa7146.h                       |  2 +-
 38 files changed, 134 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/dvb/README.dvb-usb b/Documentation/dvb/README.dvb-usb
index ac0797ea646c..46b78b7331c2 100644
--- a/Documentation/dvb/README.dvb-usb
+++ b/Documentation/dvb/README.dvb-usb
@@ -50,12 +50,12 @@ http://www.linuxtv.org/wiki/index.php/DVB_USB
 0. History & News:
   2005-06-30 - added support for WideView WT-220U (Thanks to Steve Chang)
   2005-05-30 - added basic isochronous support to the dvb-usb-framework
-               added support for Conexant Hybrid reference design and Nebula DigiTV USB
+	       added support for Conexant Hybrid reference design and Nebula DigiTV USB
   2005-04-17 - all dibusb devices ported to make use of the dvb-usb-framework
   2005-04-02 - re-enabled and improved remote control code.
   2005-03-31 - ported the Yakumo/Hama/Typhoon DVB-T USB2.0 device to dvb-usb.
   2005-03-30 - first commit of the dvb-usb-module based on the dibusb-source. First device is a new driver for the
-               TwinhanDTV Alpha / MagicBox II USB2.0-only DVB-T device.
+	       TwinhanDTV Alpha / MagicBox II USB2.0-only DVB-T device.
 
   (change from dvb-dibusb to dvb-usb)
   2005-03-28 - added support for the AVerMedia AverTV DVB-T USB2.0 device (Thanks to Glen Harris and Jiun-Kuei Jung, AVerMedia)
@@ -64,50 +64,50 @@ http://www.linuxtv.org/wiki/index.php/DVB_USB
   2005-02-02 - added support for the Hauppauge Win-TV Nova-T USB2
   2005-01-31 - distorted streaming is gone for USB1.1 devices
   2005-01-13 - moved the mirrored pid_filter_table back to dvb-dibusb
-             - first almost working version for HanfTek UMT-010
-             - found out, that Yakumo/HAMA/Typhoon are predecessors of the HanfTek UMT-010
+	     - first almost working version for HanfTek UMT-010
+	     - found out, that Yakumo/HAMA/Typhoon are predecessors of the HanfTek UMT-010
   2005-01-10 - refactoring completed, now everything is very delightful
-             - tuner quirks for some weird devices (Artec T1 AN2235 device has sometimes a
-               Panasonic Tuner assembled). Tunerprobing implemented. Thanks a lot to Gunnar Wittich.
+	     - tuner quirks for some weird devices (Artec T1 AN2235 device has sometimes a
+	       Panasonic Tuner assembled). Tunerprobing implemented. Thanks a lot to Gunnar Wittich.
   2004-12-29 - after several days of struggling around bug of no returning URBs fixed.
   2004-12-26 - refactored the dibusb-driver, splitted into separate files
-             - i2c-probing enabled
+	     - i2c-probing enabled
   2004-12-06 - possibility for demod i2c-address probing
-             - new usb IDs (Compro, Artec)
+	     - new usb IDs (Compro, Artec)
   2004-11-23 - merged changes from DiB3000MC_ver2.1
-             - revised the debugging
-             - possibility to deliver the complete TS for USB2.0
+	     - revised the debugging
+	     - possibility to deliver the complete TS for USB2.0
   2004-11-21 - first working version of the dib3000mc/p frontend driver.
   2004-11-12 - added additional remote control keys. Thanks to Uwe Hanke.
   2004-11-07 - added remote control support. Thanks to David Matthews.
   2004-11-05 - added support for a new devices (Grandtec/Avermedia/Artec)
-             - merged my changes (for dib3000mb/dibusb) to the FE_REFACTORING, because it became HEAD
-             - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems
-               better settled there (added xfer_ops-struct)
-             - created a common files for frontends (mc/p/mb)
+	     - merged my changes (for dib3000mb/dibusb) to the FE_REFACTORING, because it became HEAD
+	     - moved transfer control (pid filter, fifo control) from usb driver to frontend, it seems
+	       better settled there (added xfer_ops-struct)
+	     - created a common files for frontends (mc/p/mb)
   2004-09-28 - added support for a new device (Unkown, vendor ID is Hyper-Paltek)
   2004-09-20 - added support for a new device (Compro DVB-U2000), thanks
-               to Amaury Demol for reporting
-             - changed usb TS transfer method (several urbs, stopping transfer
-               before setting a new pid)
+	       to Amaury Demol for reporting
+	     - changed usb TS transfer method (several urbs, stopping transfer
+	       before setting a new pid)
   2004-09-13 - added support for a new device (Artec T1 USB TVBOX), thanks
-               to Christian Motschke for reporting
+	       to Christian Motschke for reporting
   2004-09-05 - released the dibusb device and dib3000mb-frontend driver
 
   (old news for vp7041.c)
   2004-07-15 - found out, by accident, that the device has a TUA6010XS for
-               PLL
+	       PLL
   2004-07-12 - figured out, that the driver should also work with the
-               CTS Portable (Chinese Television System)
+	       CTS Portable (Chinese Television System)
   2004-07-08 - firmware-extraction-2.422-problem solved, driver is now working
-               properly with firmware extracted from 2.422
-             - #if for 2.6.4 (dvb), compile issue
-             - changed firmware handling, see vp7041.txt sec 1.1
+	       properly with firmware extracted from 2.422
+	     - #if for 2.6.4 (dvb), compile issue
+	     - changed firmware handling, see vp7041.txt sec 1.1
   2004-07-02 - some tuner modifications, v0.1, cleanups, first public
   2004-06-28 - now using the dvb_dmx_swfilter_packets, everything
-               runs fine now
+	       runs fine now
   2004-06-27 - able to watch and switching channels (pre-alpha)
-             - no section filtering yet
+	     - no section filtering yet
   2004-06-06 - first TS received, but kernel oops :/
   2004-05-14 - firmware loader is working
   2004-05-11 - start writing the driver
diff --git a/Documentation/dvb/README.flexcop b/Documentation/dvb/README.flexcop
index a50c70f9ca72..5515469de7cf 100644
--- a/Documentation/dvb/README.flexcop
+++ b/Documentation/dvb/README.flexcop
@@ -174,7 +174,7 @@ Debugging
 Everything which is identical in the following table, can be put into a common
 flexcop-module.
 
-                  PCI                  USB
+		  PCI                  USB
 -------------------------------------------------------------------------------
 Different:
 Register access:  accessing IO memory  USB control message
diff --git a/Documentation/dvb/avermedia.txt b/Documentation/dvb/avermedia.txt
index 09020ebd202b..2dc260b2b0a4 100644
--- a/Documentation/dvb/avermedia.txt
+++ b/Documentation/dvb/avermedia.txt
@@ -1,6 +1,6 @@
 
 HOWTO: Get An Avermedia DVB-T working under Linux
-           ______________________________________________
+	   ______________________________________________
 
    Table of Contents
    Assumptions and Introduction
diff --git a/Documentation/dvb/cards.txt b/Documentation/dvb/cards.txt
index 19329cf7b097..9e10092440e1 100644
--- a/Documentation/dvb/cards.txt
+++ b/Documentation/dvb/cards.txt
@@ -16,7 +16,7 @@ Hardware supported by the linuxtv.org DVB drivers
   shielding, and the whole metal box has its own part number.
 
 
-o Frontends drivers: 
+o Frontends drivers:
   - dvb_dummy_fe: for testing...
   DVB-S:
    - ves1x93		: Alps BSRV2 (ves1893 demodulator) and dbox2 (ves1993)
@@ -24,7 +24,7 @@ o Frontends drivers:
    - grundig_29504-491	: Grundig 29504-491 (Philips TDA8083 demodulator), tsa5522 PLL
    - mt312		: Zarlink mt312 or Mitel vp310 demodulator, sl1935 or tsa5059 PLL
    - stv0299		: Alps BSRU6 (tsa5059 PLL), LG TDQB-S00x (tsa5059 PLL),
-   			  LG TDQF-S001F (sl1935 PLL), Philips SU1278 (tua6100 PLL), 
+			  LG TDQF-S001F (sl1935 PLL), Philips SU1278 (tua6100 PLL),
 			  Philips SU1278SH (tsa5059 PLL), Samsung TBMU24112IMB
   DVB-C:
    - ves1820		: various (ves1820 demodulator, sp5659c or spXXXX PLL)
@@ -35,8 +35,8 @@ o Frontends drivers:
    - grundig_29504-401	: Grundig 29504-401 (LSI L64781 demodulator), tsa5060 PLL
    - tda1004x		: Philips tda10045h (td1344 or tdm1316l PLL)
    - nxt6000 		: Alps TDME7 (MITEL SP5659 PLL), Alps TDED4 (TI ALP510 PLL),
-               		  Comtech DVBT-6k07 (SP5730 PLL)
-               		  (NxtWave Communications NXT6000 demodulator)
+			  Comtech DVBT-6k07 (SP5730 PLL)
+			  (NxtWave Communications NXT6000 demodulator)
    - sp887x		: Microtune 7202D
    - dib3000mb	: DiBcom 3000-MB demodulator
   DVB-S/C/T:
diff --git a/Documentation/dvb/contributors.txt b/Documentation/dvb/contributors.txt
index 2cbd2d0f6fdf..4c33cced5f65 100644
--- a/Documentation/dvb/contributors.txt
+++ b/Documentation/dvb/contributors.txt
@@ -15,7 +15,7 @@ Michael Holzt <kju@debian.org>
 
 Diego Picciani <d.picciani@novacomp.it>
   for CyberLogin for Linux which allows logging onto EON
-  (in case you are wondering where CyberLogin is, EON changed its login 
+  (in case you are wondering where CyberLogin is, EON changed its login
   procedure and CyberLogin is no longer used.)
 
 Martin Schaller <martin@smurf.franken.de>
@@ -57,7 +57,7 @@ Augusto Cardoso <augusto@carhil.net>
 Davor Emard <emard@softhome.net>
   for his work on the budget drivers, the demux code,
   the module unloading problems, ...
-  
+
 Hans-Frieder Vogt <hfvogt@arcor.de>
   for his work on calculating and checking the crc's for the
   TechnoTrend/Hauppauge DEC driver firmware
diff --git a/Documentation/dvb/readme.txt b/Documentation/dvb/readme.txt
index 754c98c6ad94..f5c50b22de3b 100644
--- a/Documentation/dvb/readme.txt
+++ b/Documentation/dvb/readme.txt
@@ -20,7 +20,7 @@ http://linuxtv.org/downloads/
 
 What's inside this directory:
 
-"cards.txt" 
+"cards.txt"
 contains a list of supported hardware.
 
 "contributors.txt"
@@ -37,7 +37,7 @@ that require it.
 contains detailed informations about the
 TT DEC2000/DEC3000 USB DVB hardware.
 
-"bt8xx.txt" 
+"bt8xx.txt"
 contains detailed installation instructions for the
 various bt8xx based "budget" DVB cards
 (Nebula, Pinnacle PCTV, Twinhan DST)
diff --git a/drivers/media/common/Kconfig b/drivers/media/common/Kconfig
index caebd0a1c021..6a901a0268e1 100644
--- a/drivers/media/common/Kconfig
+++ b/drivers/media/common/Kconfig
@@ -1,12 +1,12 @@
 config VIDEO_SAA7146
-        tristate
+	tristate
 	select I2C
 
 config VIDEO_SAA7146_VV
-        tristate
+	tristate
 	select VIDEO_BUF
 	select VIDEO_VIDEOBUF
 	select VIDEO_SAA7146
 
 config VIDEO_VIDEOBUF
-        tristate
+	tristate
diff --git a/drivers/media/common/Makefile b/drivers/media/common/Makefile
index 97b4341255ea..bd458cb9b4ea 100644
--- a/drivers/media/common/Makefile
+++ b/drivers/media/common/Makefile
@@ -1,5 +1,5 @@
-saa7146-objs    := saa7146_i2c.o saa7146_core.o 
-saa7146_vv-objs := saa7146_vv_ksyms.o saa7146_fops.o saa7146_video.o saa7146_hlp.o saa7146_vbi.o  
+saa7146-objs    := saa7146_i2c.o saa7146_core.o
+saa7146_vv-objs := saa7146_vv_ksyms.o saa7146_fops.o saa7146_video.o saa7146_hlp.o saa7146_vbi.o
 
 obj-$(CONFIG_VIDEO_SAA7146) += saa7146.o
 obj-$(CONFIG_VIDEO_SAA7146_VV) += saa7146_vv.o
diff --git a/drivers/media/common/ir-common.c b/drivers/media/common/ir-common.c
index 7972c73bc14e..97fa3fc571c4 100644
--- a/drivers/media/common/ir-common.c
+++ b/drivers/media/common/ir-common.c
@@ -313,6 +313,7 @@ void ir_input_init(struct input_dev *dev, struct ir_input_state *ir,
 	if (ir_codes)
 		memcpy(ir->ir_codes, ir_codes, sizeof(ir->ir_codes));
 
+
 	dev->keycode     = ir->ir_codes;
 	dev->keycodesize = sizeof(IR_KEYTAB_TYPE);
 	dev->keycodemax  = IR_KEYTAB_SIZE;
diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index cc4a723e24db..fc87efc5049c 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -188,7 +188,7 @@ config VIDEO_ZORAN
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called zr36067.
- 
+
 config VIDEO_ZORAN_BUZ
 	tristate "Iomega Buz support"
 	depends on VIDEO_ZORAN
@@ -204,8 +204,8 @@ config VIDEO_ZORAN_DC10
 
 config VIDEO_ZORAN_DC30
 	tristate "Pinnacle/Miro DC30(+) support"
- 	depends on VIDEO_ZORAN
- 	help
+	depends on VIDEO_ZORAN
+	help
 	  Support for the Pinnacle/Miro DC30(+) MJPEG capture/playback
 	  card. This also supports really old DC10 cards based on the
 	  zr36050 MJPEG codec and zr36016 VFE.
@@ -260,7 +260,7 @@ config VIDEO_MXB
 	---help---
 	  This is a video4linux driver for the 'Multimedia eXtension Board'
 	  TV card by Siemens-Nixdorf.
-	  
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called mxb.
 
@@ -274,7 +274,7 @@ config VIDEO_DPC
 	  for SAA7146 bases boards, so if you have some unsupported
 	  saa7146 based, analog video card, chances are good that it
 	  will work with this skeleton driver.
-	  
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called dpc7146.
 
@@ -285,7 +285,7 @@ config VIDEO_HEXIUM_ORION
 	---help---
 	  This is a video4linux driver for the Hexium HV-PCI6 and
 	  Orion frame grabber cards by Hexium.
-	  
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called hexium_orion.
 
@@ -297,7 +297,7 @@ config VIDEO_HEXIUM_GEMINI
 	  This is a video4linux driver for the Hexium Gemini frame
 	  grabber card by Hexium. Please note that the Gemini Dual
 	  card is *not* fully supported.
-	  
+
 	  To compile this driver as a module, choose M here: the
 	  module will be called hexium_gemini.
 
diff --git a/drivers/media/video/bttv-driver.c b/drivers/media/video/bttv-driver.c
index 24f5afddafbb..1ddf9ba613ef 100644
--- a/drivers/media/video/bttv-driver.c
+++ b/drivers/media/video/bttv-driver.c
@@ -762,9 +762,9 @@ static void set_pll(struct bttv *btv)
 	if (btv->pll.pll_ifreq == btv->pll.pll_ofreq) {
 		/* no PLL needed */
 		if (btv->pll.pll_current == 0)
-		        return;
+			return;
 		bttv_printk(KERN_INFO "bttv%d: PLL can sleep, using XTAL (%d).\n",
-		           btv->c.nr,btv->pll.pll_ifreq);
+			btv->c.nr,btv->pll.pll_ifreq);
 		btwrite(0x00,BT848_TGCTRL);
 		btwrite(0x00,BT848_PLL_XCI);
 		btv->pll.pll_current = 0;
@@ -772,7 +772,7 @@ static void set_pll(struct bttv *btv)
 	}
 
 	bttv_printk(KERN_INFO "bttv%d: PLL: %d => %d ",btv->c.nr,
-		   btv->pll.pll_ifreq, btv->pll.pll_ofreq);
+		btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 	set_pll_freq(btv, btv->pll.pll_ifreq, btv->pll.pll_ofreq);
 
 	for (i=0; i<10; i++) {
@@ -783,10 +783,10 @@ static void set_pll(struct bttv *btv)
 		if (btread(BT848_DSTATUS) & BT848_DSTATUS_PLOCK) {
 			btwrite(0,BT848_DSTATUS);
 		} else {
-		        btwrite(0x08,BT848_TGCTRL);
-		        btv->pll.pll_current = btv->pll.pll_ofreq;
+			btwrite(0x08,BT848_TGCTRL);
+			btv->pll.pll_current = btv->pll.pll_ofreq;
 			bttv_printk(" ok\n");
-		        return;
+			return;
 		}
 	}
 	btv->pll.pll_current = -1;
@@ -1964,7 +1964,7 @@ static int setup_window(struct bttv_fh *fh, struct bttv *btv,
 	}
 
 	down(&fh->cap.lock);
-	kfree(fh->ov.clips);
+		kfree(fh->ov.clips);
 	fh->ov.clips    = clips;
 	fh->ov.nclips   = n;
 
@@ -2758,7 +2758,7 @@ static int bttv_do_ioctl(struct inode *inode, struct file *file,
 			fh->ov.w.height = fb->fmt.height;
 			btv->init.ov.w.width  = fb->fmt.width;
 			btv->init.ov.w.height = fb->fmt.height;
-			kfree(fh->ov.clips);
+				kfree(fh->ov.clips);
 			fh->ov.clips = NULL;
 			fh->ov.nclips = 0;
 
diff --git a/drivers/media/video/bttvp.h b/drivers/media/video/bttvp.h
index 3aa9c6e4fc33..1e6a5632c3c7 100644
--- a/drivers/media/video/bttvp.h
+++ b/drivers/media/video/bttvp.h
@@ -45,6 +45,7 @@
 #include <media/tveeprom.h>
 #include <media/ir-common.h>
 
+
 #include "bt848.h"
 #include "bttv.h"
 #include "btcx-risc.h"
diff --git a/drivers/media/video/cx88/cx88-blackbird.c b/drivers/media/video/cx88/cx88-blackbird.c
index 4ae3f78cccf2..74e57a53116f 100644
--- a/drivers/media/video/cx88/cx88-blackbird.c
+++ b/drivers/media/video/cx88/cx88-blackbird.c
@@ -616,6 +616,8 @@ static int blackbird_load_firmware(struct cx8802_dev *dev)
 
 	retval = request_firmware(&firmware, BLACKBIRD_FIRM_ENC_FILENAME,
 				  &dev->pci->dev);
+
+
 	if (retval != 0) {
 		dprintk(0, "ERROR: Hotplug firmware request failed (%s).\n",
 			BLACKBIRD_FIRM_ENC_FILENAME);
diff --git a/drivers/media/video/cx88/cx88-input.c b/drivers/media/video/cx88/cx88-input.c
index 38b12ebaa49e..461019dca901 100644
--- a/drivers/media/video/cx88/cx88-input.c
+++ b/drivers/media/video/cx88/cx88-input.c
@@ -453,7 +453,6 @@ int cx88_ir_init(struct cx88_core *core, struct pci_dev *pci)
 		input_dev->id.product = pci->device;
 	}
 	input_dev->cdev.dev = &pci->dev;
-
 	/* record handles to ourself */
 	ir->core = core;
 	core->ir = ir;
@@ -586,7 +585,6 @@ void cx88_ir_irq(struct cx88_core *core)
 MODULE_AUTHOR("Gerd Knorr, Pavel Machek, Chris Pascoe");
 MODULE_DESCRIPTION("input driver for cx88 GPIO-based IR remote controls");
 MODULE_LICENSE("GPL");
-
 /*
  * Local variables:
  * c-basic-offset: 8
diff --git a/drivers/media/video/cx88/cx88.h b/drivers/media/video/cx88/cx88.h
index 27fb080fd7aa..77beafc5c327 100644
--- a/drivers/media/video/cx88/cx88.h
+++ b/drivers/media/video/cx88/cx88.h
@@ -411,7 +411,6 @@ struct cx8802_dev {
 	struct videobuf_dvb        dvb;
 	void*                      fe_handle;
 	int                        (*fe_release)(void *handle);
-
 	/* for switching modulation types */
 	unsigned char              ts_gen_cntrl;
 
diff --git a/drivers/media/video/em28xx/em28xx-core.c b/drivers/media/video/em28xx/em28xx-core.c
index 9f6e5e5355a1..517573221972 100644
--- a/drivers/media/video/em28xx/em28xx-core.c
+++ b/drivers/media/video/em28xx/em28xx-core.c
@@ -39,7 +39,7 @@ MODULE_PARM_DESC(core_debug,"enable debug messages [core]");
 #define em28xx_coredbg(fmt, arg...) do {\
 	if (core_debug) \
 		printk(KERN_INFO "%s %s :"fmt, \
-			 dev->name, __FUNCTION__ , ##arg); } while (0)
+			 dev->name, __FUNCTION__, ##arg); } while (0)
 
 static unsigned int reg_debug;
 module_param(reg_debug,int,0644);
@@ -48,7 +48,7 @@ MODULE_PARM_DESC(reg_debug,"enable debug messages [URB reg]");
 #define em28xx_regdbg(fmt, arg...) do {\
 	if (reg_debug) \
 		printk(KERN_INFO "%s %s :"fmt, \
-			 dev->name, __FUNCTION__ , ##arg); } while (0)
+			 dev->name, __FUNCTION__, ##arg); } while (0)
 
 static unsigned int isoc_debug;
 module_param(isoc_debug,int,0644);
@@ -57,7 +57,7 @@ MODULE_PARM_DESC(isoc_debug,"enable debug messages [isoc transfers]");
 #define em28xx_isocdbg(fmt, arg...) do {\
 	if (isoc_debug) \
 		printk(KERN_INFO "%s %s :"fmt, \
-			 dev->name, __FUNCTION__ , ##arg); } while (0)
+			 dev->name, __FUNCTION__, ##arg); } while (0)
 
 static int alt = EM28XX_PINOUT;
 module_param(alt, int, 0644);
@@ -158,6 +158,7 @@ static void rvfree(void *mem, size_t size)
 	vfree(mem);
 }
 
+
 /*
  * em28xx_request_buffers()
  * allocate a number of buffers
diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c
index b32d9852f34c..29e21ad187cc 100644
--- a/drivers/media/video/em28xx/em28xx-i2c.c
+++ b/drivers/media/video/em28xx/em28xx-i2c.c
@@ -41,10 +41,10 @@ module_param(i2c_debug, int, 0644);
 MODULE_PARM_DESC(i2c_debug, "enable debug messages [i2c]");
 
 #define dprintk1(lvl,fmt, args...) if (i2c_debug>=lvl) do {\
-			printk(fmt , ##args); } while (0)
+			printk(fmt, ##args); } while (0)
 #define dprintk2(lvl,fmt, args...) if (i2c_debug>=lvl) do{ \
 			printk(KERN_DEBUG "%s at %s: " fmt, \
-			dev->name, __FUNCTION__ , ##args); } while (0)
+			dev->name, __FUNCTION__, ##args); } while (0)
 
 /*
  * em2800_i2c_send_max4()
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index abec32c175aa..c08e505e7009 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -45,7 +45,7 @@
 #define em28xx_videodbg(fmt, arg...) do {\
 	if (video_debug) \
 		printk(KERN_INFO "%s %s :"fmt, \
-			 dev->name, __FUNCTION__ , ##arg); } while (0)
+			 dev->name, __FUNCTION__, ##arg); } while (0)
 
 MODULE_AUTHOR(DRIVER_AUTHOR);
 MODULE_DESCRIPTION(DRIVER_DESC);
diff --git a/drivers/media/video/em28xx/em28xx.h b/drivers/media/video/em28xx/em28xx.h
index 5c7a41ce69f3..1e2ee43db394 100644
--- a/drivers/media/video/em28xx/em28xx.h
+++ b/drivers/media/video/em28xx/em28xx.h
@@ -392,18 +392,18 @@ extern const unsigned int em28xx_bcount;
 /* printk macros */
 
 #define em28xx_err(fmt, arg...) do {\
-	printk(KERN_ERR fmt , ##arg); } while (0)
+	printk(KERN_ERR fmt, ##arg); } while (0)
 
 #define em28xx_errdev(fmt, arg...) do {\
 	printk(KERN_ERR "%s: "fmt,\
-			dev->name , ##arg); } while (0)
+			dev->name, ##arg); } while (0)
 
 #define em28xx_info(fmt, arg...) do {\
 	printk(KERN_INFO "%s: "fmt,\
-			dev->name , ##arg); } while (0)
+			dev->name, ##arg); } while (0)
 #define em28xx_warn(fmt, arg...) do {\
 	printk(KERN_WARNING "%s: "fmt,\
-			dev->name , ##arg); } while (0)
+			dev->name, ##arg); } while (0)
 
 inline static int em28xx_audio_source(struct em28xx *dev, int input)
 {
diff --git a/drivers/media/video/ir-kbd-gpio.c b/drivers/media/video/ir-kbd-gpio.c
index 6345e29e4951..de1385e5d05e 100644
--- a/drivers/media/video/ir-kbd-gpio.c
+++ b/drivers/media/video/ir-kbd-gpio.c
@@ -291,13 +291,12 @@ struct IR {
 	u32                     mask_keycode;
 	u32                     mask_keydown;
 	u32                     mask_keyup;
-	u32			polling;
+	u32                     polling;
 	u32                     last_gpio;
 	struct work_struct      work;
 	struct timer_list       timer;
 
 	/* RC5 gpio */
-
 	u32 rc5_gpio;
 	struct timer_list timer_end;	/* timer_end for code completion */
 	struct timer_list timer_keyup;	/* timer_end for key release */
@@ -647,7 +646,7 @@ static int ir_probe(struct device *dev)
 		driver.any_irq = ir_rc5_irq;
 		driver.gpio_irq = NULL;
 		ir->rc5_gpio = 1;
-		 break;
+		break;
 	}
 	if (NULL == ir_codes) {
 		kfree(ir);
@@ -657,7 +656,7 @@ static int ir_probe(struct device *dev)
 
 	if (ir->rc5_gpio) {
 		u32 gpio;
-		/* enable remote irq */
+	    	/* enable remote irq */
 		bttv_gpio_inout(sub->core, (1 << 4), 1 << 4);
 		gpio = bttv_gpio_read(sub->core);
 		bttv_gpio_write(sub->core, gpio & ~(1 << 4));
@@ -726,6 +725,7 @@ static int ir_remove(struct device *dev)
 		del_timer(&ir->timer);
 		flush_scheduled_work();
 	}
+
 	if (ir->rc5_gpio) {
 		u32 gpio;
 
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index ca4027422e76..52c54b15fd4a 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -40,6 +40,7 @@
 #include <linux/i2c.h>
 #include <linux/workqueue.h>
 #include <asm/semaphore.h>
+
 #include <media/ir-common.h>
 #include <media/ir-kbd-i2c.h>
 
@@ -299,7 +300,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	struct IR_i2c *ir;
 	struct input_dev *input_dev;
 
-	ir = kzalloc(sizeof(struct IR_i2c), GFP_KERNEL);
+	ir = kzalloc(sizeof(struct IR_i2c),GFP_KERNEL);
 	input_dev = input_allocate_device();
 	if (!ir || !input_dev) {
 		kfree(ir);
@@ -360,7 +361,7 @@ static int ir_attach(struct i2c_adapter *adap, int addr,
 	/* register i2c device
 	 * At device register, IR codes may be changed to be
 	 * board dependent.
-	*/
+	 */
 	i2c_attach_client(&ir->c);
 
 	/* If IR not supported or disabled, unregisters driver */
diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c
index 61b99b0d8583..d86f8e92e534 100644
--- a/drivers/media/video/msp3400.c
+++ b/drivers/media/video/msp3400.c
@@ -134,7 +134,7 @@ struct msp3400c {
 	int rxsubchans;
 
 	int muted;
-	int left, right;        /* volume */
+	int left, right;	/* volume */
 	int bass, treble;
 
 	/* shadow register set */
@@ -882,6 +882,7 @@ static void watch_stereo(struct i2c_client *client)
 		msp->watch_stereo = 0;
 }
 
+
 static int msp3400c_thread(void *data)
 {
 	struct i2c_client *client = data;
@@ -889,6 +890,7 @@ static int msp3400c_thread(void *data)
 	struct CARRIER_DETECT *cd;
 	int count, max1,max2,val1,val2, val,this;
 
+
 	msp3400_info("msp3400 daemon started\n");
 	for (;;) {
 		msp3400_dbg_mediumvol("msp3400 thread: sleep\n");
@@ -1162,6 +1164,7 @@ static int msp3410d_thread(void *data)
 	int mode,val,i,std;
 
 	msp3400_info("msp3410 daemon started\n");
+
 	for (;;) {
 		msp3400_dbg_mediumvol("msp3410 thread: sleep\n");
 		msp34xx_sleep(msp,-1);
@@ -1384,6 +1387,7 @@ static int msp34xxg_thread(void *data)
 	int val, std, i;
 
 	msp3400_info("msp34xxg daemon started\n");
+
 	msp->source = 1; /* default */
 	for (;;) {
 		msp3400_dbg_mediumvol("msp34xxg thread: sleep\n");
diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c
index dca3ddfd510f..923322503e8f 100644
--- a/drivers/media/video/saa6588.c
+++ b/drivers/media/video/saa6588.c
@@ -422,7 +422,6 @@ static int saa6588_attach(struct i2c_adapter *adap, int addr, int kind)
 	s->timer.function = saa6588_timer;
 	s->timer.data = (unsigned long)s;
 	schedule_work(&s->work);
-
 	return 0;
 }
 
diff --git a/drivers/media/video/saa711x.c b/drivers/media/video/saa711x.c
index 59e13fdea780..31f7b950b01c 100644
--- a/drivers/media/video/saa711x.c
+++ b/drivers/media/video/saa711x.c
@@ -59,7 +59,7 @@ MODULE_PARM_DESC(debug, " Set the default Debug level.  Default: 0 (Off) - (0-1)
 #define dprintk(num, format, args...) \
 	do { \
 		if (debug >= num) \
-			printk(format , ##args); \
+			printk(format, ##args); \
 	} while (0)
 
 /* ----------------------------------------------------------------------- */
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index cf1e0c2b0351..a61d24f588f7 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -524,6 +524,7 @@ static int saa6752hs_attach(struct i2c_adapter *adap, int addr, int kind)
 
 	i2c_set_clientdata(&h->client, h);
 	i2c_attach_client(&h->client);
+
 	return 0;
 }
 
diff --git a/drivers/media/video/saa7134/saa7134-alsa.c b/drivers/media/video/saa7134/saa7134-alsa.c
index f2788ebb59a8..b24a26b065c2 100644
--- a/drivers/media/video/saa7134/saa7134-alsa.c
+++ b/drivers/media/video/saa7134/saa7134-alsa.c
@@ -51,6 +51,7 @@ MODULE_PARM_DESC(debug,"enable debug messages [alsa]");
 #define MIXER_ADDR_LINE2	2
 #define MIXER_ADDR_LAST		2
 
+
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;	/* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;	/* ID for this card */
 static int enable[SNDRV_CARDS] = {1, [1 ... (SNDRV_CARDS - 1)] = 0};
@@ -59,11 +60,14 @@ module_param_array(index, int, NULL, 0444);
 MODULE_PARM_DESC(index, "Index value for SAA7134 capture interface(s).");
 
 #define dprintk(fmt, arg...)    if (debug) \
-	printk(KERN_DEBUG "%s/alsa: " fmt, dev->name , ## arg)
+	printk(KERN_DEBUG "%s/alsa: " fmt, dev->name, ## arg)
+
+
 
 /*
  * Main chip structure
  */
+
 typedef struct snd_card_saa7134 {
 	snd_card_t *card;
 	spinlock_t mixer_lock;
@@ -1004,6 +1008,7 @@ static int saa7134_alsa_init(void)
 		printk(KERN_INFO "saa7134 ALSA: no saa7134 cards found\n");
 
 	return 0;
+
 }
 
 /*
@@ -1027,3 +1032,6 @@ module_init(saa7134_alsa_init);
 module_exit(saa7134_alsa_exit);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Ricardo Cerqueira");
+
+
+
diff --git a/drivers/media/video/saa7134/saa7134-core.c b/drivers/media/video/saa7134/saa7134-core.c
index 1a093bf176f3..23d8747338ed 100644
--- a/drivers/media/video/saa7134/saa7134-core.c
+++ b/drivers/media/video/saa7134/saa7134-core.c
@@ -71,6 +71,7 @@ static unsigned int radio_nr[] = {[0 ... (SAA7134_MAXBOARDS - 1)] = UNSET };
 static unsigned int tuner[]    = {[0 ... (SAA7134_MAXBOARDS - 1)] = UNSET };
 static unsigned int card[]     = {[0 ... (SAA7134_MAXBOARDS - 1)] = UNSET };
 
+
 module_param_array(video_nr, int, NULL, 0444);
 module_param_array(vbi_nr,   int, NULL, 0444);
 module_param_array(radio_nr, int, NULL, 0444);
diff --git a/drivers/media/video/saa7134/saa7134-empress.c b/drivers/media/video/saa7134/saa7134-empress.c
index e9ec69efb4c9..575f3e835f91 100644
--- a/drivers/media/video/saa7134/saa7134-empress.c
+++ b/drivers/media/video/saa7134/saa7134-empress.c
@@ -36,6 +36,7 @@ MODULE_AUTHOR("Gerd Knorr <kraxel@bytesex.org> [SuSE Labs]");
 MODULE_LICENSE("GPL");
 
 static unsigned int empress_nr[] = {[0 ... (SAA7134_MAXBOARDS - 1)] = UNSET };
+
 module_param_array(empress_nr, int, NULL, 0444);
 MODULE_PARM_DESC(empress_nr,"ts device number");
 
diff --git a/drivers/media/video/saa7134/saa7134-i2c.c b/drivers/media/video/saa7134/saa7134-i2c.c
index 7575043f0874..df9dd36721e0 100644
--- a/drivers/media/video/saa7134/saa7134-i2c.c
+++ b/drivers/media/video/saa7134/saa7134-i2c.c
@@ -333,7 +333,7 @@ static int attach_inform(struct i2c_client *client)
 	struct tuner_setup tun_setup;
 
 	d1printk( "%s i2c attach [addr=0x%x,client=%s]\n",
-		 client->driver->name, client->addr, client->name);
+		client->driver->name, client->addr, client->name);
 
 	/* Am I an i2c remote control? */
 
diff --git a/drivers/media/video/saa7134/saa7134-oss.c b/drivers/media/video/saa7134/saa7134-oss.c
index 1f68b9f3fdec..513a699a6df2 100644
--- a/drivers/media/video/saa7134/saa7134-oss.c
+++ b/drivers/media/video/saa7134/saa7134-oss.c
@@ -792,11 +792,11 @@ static irqreturn_t saa7134_oss_irq(int irq, void *dev_id, struct pt_regs *regs)
 		status = saa_readl(SAA7134_IRQ_STATUS);
 
 		if (report & SAA7134_IRQ_REPORT_DONE_RA3) {
-		        handled = 1;
-		        saa_writel(SAA7134_IRQ_REPORT,report);
-		        saa7134_irq_oss_done(dev, status);
+			handled = 1;
+			saa_writel(SAA7134_IRQ_REPORT,report);
+			saa7134_irq_oss_done(dev, status);
 		} else {
-		        goto out;
+			goto out;
 		}
 	}
 
@@ -811,7 +811,7 @@ int saa7134_oss_init1(struct saa7134_dev *dev)
 {
 
 	if ((request_irq(dev->pci->irq, saa7134_oss_irq,
-		         SA_SHIRQ | SA_INTERRUPT, dev->name,
+			 SA_SHIRQ | SA_INTERRUPT, dev->name,
 			(void*) &dev->dmasound)) < 0)
 		return -1;
 
@@ -905,20 +905,20 @@ static int saa7134_dsp_create(struct saa7134_dev *dev)
 
 	err = dev->dmasound.minor_dsp =
 		register_sound_dsp(&saa7134_dsp_fops,
-					dsp_nr[dev->nr]);
+				   dsp_nr[dev->nr]);
 	if (err < 0) {
 		goto fail;
 	}
 	printk(KERN_INFO "%s: registered device dsp%d\n",
-		dev->name,dev->dmasound.minor_dsp >> 4);
+	       dev->name,dev->dmasound.minor_dsp >> 4);
 
 	err = dev->dmasound.minor_mixer =
 		register_sound_mixer(&saa7134_mixer_fops,
-					mixer_nr[dev->nr]);
+				     mixer_nr[dev->nr]);
 	if (err < 0)
 		goto fail;
 	printk(KERN_INFO "%s: registered device mixer%d\n",
-		dev->name,dev->dmasound.minor_mixer >> 4);
+	       dev->name,dev->dmasound.minor_mixer >> 4);
 
 	return 0;
 
@@ -994,6 +994,7 @@ static void saa7134_oss_exit(void)
 			continue;
 
 		oss_device_exit(dev);
+
 	}
 
 	printk(KERN_INFO "saa7134 OSS driver for DMA sound unloaded\n");
diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
index 14a2066a61b2..2f2414e90e8b 100644
--- a/drivers/media/video/tda9887.c
+++ b/drivers/media/video/tda9887.c
@@ -12,6 +12,7 @@
 #include <media/audiochip.h>
 #include <media/tuner.h>
 
+
 /* Chips:
    TDA9885 (PAL, NTSC)
    TDA9886 (PAL, SECAM, NTSC)
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index 67757fe07d8d..5b20e8177cad 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -188,7 +188,7 @@ static int chip_write(struct CHIPSTATE *chip, int subaddr, int val)
 		buffer[1] = val;
 		if (2 != i2c_master_send(&chip->c,buffer,2)) {
 			tvaudio_warn("%s: I/O error (write reg%d=0x%x)\n",
-						chip->c.name, subaddr, val);
+			chip->c.name, subaddr, val);
 			return -1;
 		}
 	}
@@ -216,7 +216,7 @@ static int chip_read(struct CHIPSTATE *chip)
 		chip->c.name);
 		return -1;
 	}
-	tvaudio_dbg("%s: chip_read: 0x%x\n",chip->c.name,buffer);
+	tvaudio_dbg("%s: chip_read: 0x%x\n",chip->c.name, buffer);
 	return buffer;
 }
 
@@ -235,7 +235,7 @@ static int chip_read2(struct CHIPSTATE *chip, int subaddr)
 		return -1;
 	}
 	tvaudio_dbg("%s: chip_read2: reg%d=0x%x\n",
-			chip->c.name,subaddr,read[0]);
+		chip->c.name, subaddr,read[0]);
 	return read[0];
 }
 
@@ -248,7 +248,7 @@ static int chip_cmd(struct CHIPSTATE *chip, char *name, audiocmd *cmd)
 
 	/* update our shadow register set; print bytes if (debug > 0) */
 	tvaudio_dbg("%s: chip_cmd(%s): reg=%d, data:",
-		chip->c.name,name,cmd->bytes[0]);
+		chip->c.name, name,cmd->bytes[0]);
 	for (i = 1; i < cmd->count; i++) {
 		if (debug)
 			printk(" 0x%x",cmd->bytes[i]);
@@ -322,7 +322,7 @@ static void generic_checkmode(struct CHIPSTATE *chip)
 	int mode = desc->getmode(chip);
 
 	if (mode == chip->prevmode)
-		return;
+	return;
 
 	tvaudio_dbg("%s: thread checkmode\n", chip->c.name);
 	chip->prevmode = mode;
@@ -1508,13 +1508,13 @@ static int chip_attach(struct i2c_adapter *adap, int addr, int kind)
 	tvaudio_info("%s found @ 0x%x (%s)\n", desc->name, addr<<1, adap->name);
 	if (desc->flags) {
 		tvaudio_dbg("matches:%s%s%s.\n",
-		        (desc->flags & CHIP_HAS_VOLUME)     ? " volume"      : "",
-		        (desc->flags & CHIP_HAS_BASSTREBLE) ? " bass/treble" : "",
-		        (desc->flags & CHIP_HAS_INPUTSEL)   ? " audiomux"    : "");
+			(desc->flags & CHIP_HAS_VOLUME)     ? " volume"      : "",
+			(desc->flags & CHIP_HAS_BASSTREBLE) ? " bass/treble" : "",
+			(desc->flags & CHIP_HAS_INPUTSEL)   ? " audiomux"    : "");
 	}
 
 	/* fill required data structures */
-	strcpy(chip->c.name,desc->name);
+	strcpy(chip->c.name, desc->name);
 	chip->type = desc-chiplist;
 	chip->shadow.count = desc->registers+1;
 	chip->prevmode = -1;
@@ -1604,7 +1604,7 @@ static int chip_command(struct i2c_client *client,
 	struct CHIPSTATE *chip = i2c_get_clientdata(client);
 	struct CHIPDESC  *desc = chiplist + chip->type;
 
-	tvaudio_dbg("%s: chip_command 0x%x\n",chip->c.name,cmd);
+	tvaudio_dbg("%s: chip_command 0x%x\n", chip->c.name, cmd);
 
 	switch (cmd) {
 	case AUDC_SET_INPUT:
@@ -1624,7 +1624,7 @@ static int chip_command(struct i2c_client *client,
 
 	/* --- v4l ioctls --- */
 	/* take care: bttv does userspace copying, we'll get a
-					kernel pointer here... */
+	kernel pointer here... */
 	case VIDIOCGAUDIO:
 	{
 		struct video_audio *va = arg;
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c
index 5c04989471b6..cd7cf1bd12b4 100644
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -754,6 +754,7 @@ tveeprom_detect_client(struct i2c_adapter *adapter,
 	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name), "tveeprom");
 	i2c_attach_client(client);
+
 	return 0;
 }
 
diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index 81e6d4494e7d..97431e26d229 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -31,7 +31,7 @@ MODULE_PARM_DESC(debug, "Debug level (0-1)");
 #define dprintk(num, format, args...) \
 	do { \
 		if (debug >= num) \
-			printk(format , ##args); \
+			printk(format, ##args); \
 	} while (0)
 
 /* supported controls */
@@ -770,7 +770,6 @@ static int tvp5150_detect_client(struct i2c_adapter *adapter,
 
 	if (debug > 1)
 		dump_reg(client);
-
 	return 0;
 }
 
diff --git a/drivers/media/video/video-buf-dvb.c b/drivers/media/video/video-buf-dvb.c
index 55f129e964eb..0a4004a4393c 100644
--- a/drivers/media/video/video-buf-dvb.c
+++ b/drivers/media/video/video-buf-dvb.c
@@ -13,6 +13,7 @@
  * (at your option) any later version.
  */
 
+
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/device.h>
@@ -247,3 +248,4 @@ EXPORT_SYMBOL(videobuf_dvb_unregister);
  * compile-command: "make DVB=1"
  * End:
  */
+
diff --git a/include/linux/dvb/dmx.h b/include/linux/dvb/dmx.h
index 263e9e19f772..2787b8a22ff1 100644
--- a/include/linux/dvb/dmx.h
+++ b/include/linux/dvb/dmx.h
@@ -38,10 +38,10 @@ typedef enum
 {
 	DMX_OUT_DECODER, /* Streaming directly to decoder. */
 	DMX_OUT_TAP,     /* Output going to a memory buffer */
-		         /* (to be retrieved via the read command).*/
+			 /* (to be retrieved via the read command).*/
 	DMX_OUT_TS_TAP   /* Output multiplexed into a new TS  */
-		         /* (to be retrieved by reading from the */
-		         /* logical DVR device).                 */
+			 /* (to be retrieved by reading from the */
+			 /* logical DVR device).                 */
 } dmx_output_t;
 
 
diff --git a/include/linux/dvb/video.h b/include/linux/dvb/video.h
index 2e15051b234b..b1999bfeaa56 100644
--- a/include/linux/dvb/video.h
+++ b/include/linux/dvb/video.h
@@ -111,19 +111,19 @@ typedef
 struct video_highlight {
 	int     active;      /*    1=show highlight, 0=hide highlight */
 	uint8_t contrast1;   /*    7- 4  Pattern pixel contrast */
-		             /*    3- 0  Background pixel contrast */
+			     /*    3- 0  Background pixel contrast */
 	uint8_t contrast2;   /*    7- 4  Emphasis pixel-2 contrast */
-		             /*    3- 0  Emphasis pixel-1 contrast */
+			     /*    3- 0  Emphasis pixel-1 contrast */
 	uint8_t color1;      /*    7- 4  Pattern pixel color */
-		             /*    3- 0  Background pixel color */
+			     /*    3- 0  Background pixel color */
 	uint8_t color2;      /*    7- 4  Emphasis pixel-2 color */
-		             /*    3- 0  Emphasis pixel-1 color */
+			     /*    3- 0  Emphasis pixel-1 color */
 	uint32_t ypos;       /*   23-22  auto action mode */
-		             /*   21-12  start y */
-		             /*    9- 0  end y */
+			     /*   21-12  start y */
+			     /*    9- 0  end y */
 	uint32_t xpos;       /*   23-22  button color number */
-		             /*   21-12  start x */
-		             /*    9- 0  end x */
+			     /*   21-12  start x */
+			     /*    9- 0  end x */
 } video_highlight_t;
 
 
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index 101f56c8a641..e5be2b9b846b 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -14,7 +14,7 @@
 #include <linux/vmalloc.h>	/* for vmalloc() */
 #include <linux/mm.h>		/* for vmalloc_to_page() */
 
-#define SAA7146_VERSION_CODE 0x000500   /* 0.5.0 */
+#define SAA7146_VERSION_CODE 0x000500	/* 0.5.0 */
 
 #define saa7146_write(sxy,adr,dat)    writel((dat),(sxy->mem+(adr)))
 #define saa7146_read(sxy,adr)         readl(sxy->mem+(adr))
-- 
cgit v1.2.3


From dd815408106f3c56c3050493dda97f9355aa4971 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Date: Mon, 12 Dec 2005 00:37:31 -0800
Subject: [PATCH] V4L/DVB: (3151) I2C ID renamed to I2C_DRIVERID_INFRARED

I2C ID renamed to I2C_DRIVERID_INFRARED

Acked-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@brturbo.com.br>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/video/ir-kbd-i2c.c | 2 +-
 include/linux/i2c-id.h           | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 52c54b15fd4a..740e543311af 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -279,7 +279,7 @@ static int ir_probe(struct i2c_adapter *adap);
 
 static struct i2c_driver driver = {
 	.name           = "ir remote kbd driver",
-	.id             = I2C_DRIVERID_I2C_IR,
+	.id             = I2C_DRIVERID_INFRARED,
 	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = ir_probe,
 	.detach_client  = ir_detach,
diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index ef3b5632e63a..006c81ef4d50 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -108,7 +108,7 @@
 #define I2C_DRIVERID_SAA7127	72	/* saa7124 video encoder	*/
 #define I2C_DRIVERID_SAA711X	73	/* saa711x video encoders	*/
 #define I2C_DRIVERID_AKITAIOEXP	74	/* IO Expander on Sharp SL-C1000 */
-#define I2C_DRIVERID_I2C_IR	75	/* I2C InfraRed on Video boards */
+#define I2C_DRIVERID_INFRARED	75	/* I2C InfraRed on Video boards */
 
 #define I2C_DRIVERID_EXP0	0xF0	/* experimental use id's	*/
 #define I2C_DRIVERID_EXP1	0xF1
-- 
cgit v1.2.3


From 00d7c05ab168c10f9b520e07400923267bc04419 Mon Sep 17 00:00:00 2001
From: Keshavamurthy Anil S <anil.s.keshavamurthy@intel.com>
Date: Mon, 12 Dec 2005 00:37:33 -0800
Subject: [PATCH] kprobes: no probes on critical path

For Kprobes critical path is the path from debug break exception handler
till the control reaches kprobes exception code.  No probes can be
supported in this path as we will end up in recursion.

This patch prevents this by moving the below function to safe __kprobes
section onto which no probes can be inserted.

Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kprobes.h | 7 ++++++-
 kernel/sys.c            | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index e373c4a9de53..434ecfd7cf48 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -37,6 +37,7 @@
 #include <linux/spinlock.h>
 #include <linux/rcupdate.h>
 
+#ifdef CONFIG_KPROBES
 #include <asm/kprobes.h>
 
 /* kprobe_status settings */
@@ -147,7 +148,6 @@ struct kretprobe_instance {
 	struct task_struct *task;
 };
 
-#ifdef CONFIG_KPROBES
 extern spinlock_t kretprobe_lock;
 extern int arch_prepare_kprobe(struct kprobe *p);
 extern void arch_copy_kprobe(struct kprobe *p);
@@ -195,6 +195,11 @@ void add_rp_inst(struct kretprobe_instance *ri);
 void kprobe_flush_task(struct task_struct *tk);
 void recycle_rp_inst(struct kretprobe_instance *ri);
 #else /* CONFIG_KPROBES */
+
+#define __kprobes	/**/
+struct jprobe;
+struct kretprobe;
+
 static inline struct kprobe *kprobe_running(void)
 {
 	return NULL;
diff --git a/kernel/sys.c b/kernel/sys.c
index bce933ebb29f..eecf84526afe 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -32,6 +32,7 @@
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
+#include <linux/kprobes.h>
 
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -168,7 +169,7 @@ EXPORT_SYMBOL(notifier_chain_unregister);
  *	of the last notifier function called.
  */
  
-int notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
+int __kprobes notifier_call_chain(struct notifier_block **n, unsigned long val, void *v)
 {
 	int ret=NOTIFY_DONE;
 	struct notifier_block *nb = *n;
-- 
cgit v1.2.3


From bf8d5c52c3b6b27061e3b7d779057fd9a6cac164 Mon Sep 17 00:00:00 2001
From: Keshavamurthy Anil S <anil.s.keshavamurthy@intel.com>
Date: Mon, 12 Dec 2005 00:37:34 -0800
Subject: [PATCH] kprobes: increment kprobe missed count for multiprobes

When multiple probes are registered at the same address and if due to some
recursion (probe getting triggered within a probe handler), we skip calling
pre_handlers and just increment nmissed field.

The below patch make sure it walks the list for multiple probes case.
Without the below patch we get incorrect results of nmissed count for
multiple probe case.

Signed-off-by: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/kprobes.c    |  2 +-
 arch/ia64/kernel/kprobes.c    |  2 +-
 arch/powerpc/kernel/kprobes.c |  2 +-
 arch/sparc64/kernel/kprobes.c |  2 +-
 arch/x86_64/kernel/kprobes.c  |  2 +-
 include/linux/kprobes.h       |  1 +
 kernel/kprobes.c              | 13 +++++++++++++
 7 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/kprobes.c b/arch/i386/kernel/kprobes.c
index 32b0c24ab9a6..19edcd526ba4 100644
--- a/arch/i386/kernel/kprobes.c
+++ b/arch/i386/kernel/kprobes.c
@@ -191,7 +191,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 */
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, regs, kcb);
-			p->nmissed++;
+			kprobes_inc_nmissed_count(p);
 			prepare_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c
index 2895d6e6062f..89a70400c4f6 100644
--- a/arch/ia64/kernel/kprobes.c
+++ b/arch/ia64/kernel/kprobes.c
@@ -630,7 +630,7 @@ static int __kprobes pre_kprobes_handler(struct die_args *args)
 			 */
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, kcb);
-			p->nmissed++;
+			kprobes_inc_nmissed_count(p);
 			prepare_ss(p, regs);
 			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 511af54e6230..5368f9c2e6bf 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -177,7 +177,7 @@ static inline int kprobe_handler(struct pt_regs *regs)
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, regs, kcb);
 			kcb->kprobe_saved_msr = regs->msr;
-			p->nmissed++;
+			kprobes_inc_nmissed_count(p);
 			prepare_singlestep(p, regs);
 			kcb->kprobe_status = KPROBE_REENTER;
 			return 1;
diff --git a/arch/sparc64/kernel/kprobes.c b/arch/sparc64/kernel/kprobes.c
index 96bd09b098f4..a97b0f0727ab 100644
--- a/arch/sparc64/kernel/kprobes.c
+++ b/arch/sparc64/kernel/kprobes.c
@@ -138,7 +138,7 @@ static int __kprobes kprobe_handler(struct pt_regs *regs)
 			 */
 			save_previous_kprobe(kcb);
 			set_current_kprobe(p, regs, kcb);
-			p->nmissed++;
+			kprobes_inc_nmissed_count(p);
 			kcb->kprobe_status = KPROBE_REENTER;
 			prepare_singlestep(p, regs, kcb);
 			return 1;
diff --git a/arch/x86_64/kernel/kprobes.c b/arch/x86_64/kernel/kprobes.c
index dddeb678b440..afe11f4fbd1d 100644
--- a/arch/x86_64/kernel/kprobes.c
+++ b/arch/x86_64/kernel/kprobes.c
@@ -329,7 +329,7 @@ int __kprobes kprobe_handler(struct pt_regs *regs)
 				 */
 				save_previous_kprobe(kcb);
 				set_current_kprobe(p, regs, kcb);
-				p->nmissed++;
+				kprobes_inc_nmissed_count(p);
 				prepare_singlestep(p, regs);
 				kcb->kprobe_status = KPROBE_REENTER;
 				return 1;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 434ecfd7cf48..c03f2dc933de 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -158,6 +158,7 @@ extern int arch_init_kprobes(void);
 extern void show_registers(struct pt_regs *regs);
 extern kprobe_opcode_t *get_insn_slot(void);
 extern void free_insn_slot(kprobe_opcode_t *slot);
+extern void kprobes_inc_nmissed_count(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index e4f0fc62bd3e..3bb71e63a37e 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -246,6 +246,19 @@ static int __kprobes aggr_break_handler(struct kprobe *p, struct pt_regs *regs)
 	return ret;
 }
 
+/* Walks the list and increments nmissed count for multiprobe case */
+void __kprobes kprobes_inc_nmissed_count(struct kprobe *p)
+{
+	struct kprobe *kp;
+	if (p->pre_handler != aggr_pre_handler) {
+		p->nmissed++;
+	} else {
+		list_for_each_entry_rcu(kp, &p->list, list)
+			kp->nmissed++;
+	}
+	return;
+}
+
 /* Called with kretprobe_lock held */
 struct kretprobe_instance __kprobes *get_free_rp_inst(struct kretprobe *rp)
 {
-- 
cgit v1.2.3


From 68799398cea44b81d1e919f842d8d84d471053d5 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@redhat.com>
Date: Mon, 12 Dec 2005 00:37:35 -0800
Subject: [PATCH] broken cast in parport_pc

Spotted by a Fedora user.  Compiling with DEBUG_PARPORT set fails due to
the broken cast.

Just remove it.

Signed-off-by: Dave Jones <davej@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/parport_pc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h
index 7825c76cbd00..c6f762470879 100644
--- a/include/linux/parport_pc.h
+++ b/include/linux/parport_pc.h
@@ -86,7 +86,7 @@ extern __inline__ void dump_parport_state (char *str, struct parport *p)
 	unsigned char dcr = inb (CONTROL (p));
 	unsigned char dsr = inb (STATUS (p));
 	static char *ecr_modes[] = {"SPP", "PS2", "PPFIFO", "ECP", "xXx", "yYy", "TST", "CFG"};
-	const struct parport_pc_private *priv = (parport_pc_private *)p->physport->private_data;
+	const struct parport_pc_private *priv = p->physport->private_data;
 	int i;
 
 	printk (KERN_DEBUG "*** parport state (%s): ecr=[%s", str, ecr_modes[(ecr & 0xe0) >> 5]);
-- 
cgit v1.2.3


From 4743484718e1d710321f24f8ef7d0124a48291b3 Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 12 Dec 2005 22:17:16 -0800
Subject: [PATCH] fbcon: Add ability to save/restore graphics state

Add hooks to save and restore the graphics state.  These hooks are called in
fbcon_blank() when entering/leaving KD_GRAPHICS mode.  This is needed by
savagefb at least so it can cooperate with savage_dri and by cyblafb.

State save/restoration can be full or partial.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/console/fbcon.c | 5 ++++-
 include/linux/fb.h            | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index c024ffd0266d..bd4500a8992e 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2191,11 +2191,14 @@ static int fbcon_blank(struct vc_data *vc, int blank, int mode_switch)
 		ops->graphics = 1;
 
 		if (!blank) {
+			if (info->fbops->fb_save_state)
+				info->fbops->fb_save_state(info);
 			var.activate = FB_ACTIVATE_NOW | FB_ACTIVATE_FORCE;
 			fb_set_var(info, &var);
 			ops->graphics = 0;
 			ops->var = info->var;
-		}
+		} else if (info->fbops->fb_restore_state)
+			info->fbops->fb_restore_state(info);
 	}
 
  	if (!fbcon_is_inactive(vc, info)) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 04a58f33ec53..55ccaf3706c1 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -617,6 +617,12 @@ struct fb_ops {
 
 	/* perform fb specific mmap */
 	int (*fb_mmap)(struct fb_info *info, struct file *file, struct vm_area_struct *vma);
+
+	/* save current hardware state */
+	void (*fb_save_state)(struct fb_info *info);
+
+	/* restore saved state */
+	void (*fb_restore_state)(struct fb_info *info);
 };
 
 #ifdef CONFIG_FB_TILEBLITTING
-- 
cgit v1.2.3


From 39942fd8ff57c8623451bbfaffe8a184cc8b463a Mon Sep 17 00:00:00 2001
From: Knut Petersen <Knut_Petersen@t-online.de>
Date: Mon, 12 Dec 2005 22:17:19 -0800
Subject: [PATCH] fbdev: fix switch to KD_TEXT, enhanced version

Every framebuffer driver relies on the assumption that the set_par()
function of the driver is called before drawing functions and other
functions dependent on the hardware state are executed.

Whenever you switch from X to a framebuffer console for the very first
time, there is a chance that a broken X system has _not_ set the mode to
KD_GRAPHICS, thus the vt and framebuffer code executes a screen redraw and
several other functions before a set_par() is executed.  This is believed
to be not a bug of linux but a bug of X/xdm.  At least some X releases used
by SuSE and Debian show this behaviour.

There was a 2nd case, but that has been fixed by Antonino Daplas on
10-dec-2005.

This patch allows drivers to set a flag to inform fbcon_switch() that they
prefer a set_par() call on every console switch, working around the
problems caused by the broken X releases.

The flag will be used by the next release of cyblafb and might help other
drivers that assume a hardware state different to the one used by X.

As the default behaviour does not change, this patch should be acceptable
to everybody.

Signed-off-by: Knut Petersen <Knut_Petersen@t-online.de>
Acked-by: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/console/fbcon.c |  3 ++-
 include/linux/fb.h            | 12 ++++++++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/video/console/fbcon.c b/drivers/video/console/fbcon.c
index 8068d2f7efe6..3660e51b2612 100644
--- a/drivers/video/console/fbcon.c
+++ b/drivers/video/console/fbcon.c
@@ -2103,7 +2103,8 @@ static int fbcon_switch(struct vc_data *vc)
 	fb_set_var(info, &var);
 	ops->var = info->var;
 
-	if (old_info != NULL && old_info != info) {
+	if (old_info != NULL && (old_info != info ||
+				 info->flags & FBINFO_MISC_ALWAYS_SETPAR)) {
 		if (info->fbops->fb_set_par)
 			info->fbops->fb_set_par(info);
 		fbcon_del_cursor_timer(old_info);
diff --git a/include/linux/fb.h b/include/linux/fb.h
index 55ccaf3706c1..dab3a4decb44 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -732,6 +732,18 @@ struct fb_tile_ops {
 						  from userspace */
 #define FBINFO_MISC_TILEBLITTING       0x20000 /* use tile blitting */
 
+/* A driver may set this flag to indicate that it does want a set_par to be
+ * called every time when fbcon_switch is executed. The advantage is that with
+ * this flag set you can really be shure that set_par is always called before
+ * any of the functions dependant on the correct hardware state or altering
+ * that state, even if you are using some broken X releases. The disadvantage
+ * is that it introduces unwanted delays to every console switch if set_par
+ * is slow. It is a good idea to try this flag in the drivers initialization
+ * code whenever there is a bug report related to switching between X and the
+ * framebuffer console.
+ */
+#define FBINFO_MISC_ALWAYS_SETPAR   0x40000
+
 struct fb_info {
 	int node;
 	int flags;
-- 
cgit v1.2.3


From be0d9b6c7aeaad1683059c00131cabd4c894c17c Mon Sep 17 00:00:00 2001
From: "Antonino A. Daplas" <adaplas@gmail.com>
Date: Mon, 12 Dec 2005 22:17:21 -0800
Subject: [PATCH] fbdev: Fix incorrect unaligned access in little-endian
 machines

The drawing function cfbfillrect does not work correctly when access is not
unsigned-long aligned.  It manifests as extra lines of pixels that are not
complete drawn.  Reversing the shift operator solves the problem, so I would
presume that this bug would manifest only on little endian machines.  The
function cfbcopyarea may also have this bug.

Aligned access should present no problems.

Signed-off-by: Antonino Daplas <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/video/cfbcopyarea.c |  8 ++++----
 drivers/video/cfbfillrect.c | 16 ++++++++--------
 drivers/video/cfbimgblt.c   | 36 ++++++++++++------------------------
 include/linux/fb.h          | 12 ++++++++++++
 4 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/video/cfbcopyarea.c b/drivers/video/cfbcopyarea.c
index cdc71572cf35..74415325b016 100644
--- a/drivers/video/cfbcopyarea.c
+++ b/drivers/video/cfbcopyarea.c
@@ -64,8 +64,8 @@ bitcpy(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem *src
 	int const shift = dst_idx-src_idx;
 	int left, right;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (!shift) {
 		// Same alignment for source and dest
@@ -216,8 +216,8 @@ bitcpy_rev(unsigned long __iomem *dst, int dst_idx, const unsigned long __iomem
 
 	shift = dst_idx-src_idx;
 
-	first = ~0UL << (bits - 1 - dst_idx);
-	last = ~(~0UL << (bits - 1 - ((dst_idx-n) % bits)));
+	first = FB_SHIFT_LOW(~0UL, bits - 1 - dst_idx);
+	last = ~(FB_SHIFT_LOW(~0UL, bits - 1 - ((dst_idx-n) % bits)));
 
 	if (!shift) {
 		// Same alignment for source and dest
diff --git a/drivers/video/cfbfillrect.c b/drivers/video/cfbfillrect.c
index 167d9314e6eb..e5ff62e9cfb8 100644
--- a/drivers/video/cfbfillrect.c
+++ b/drivers/video/cfbfillrect.c
@@ -110,8 +110,8 @@ bitfill_aligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat, unsi
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -167,8 +167,8 @@ bitfill_unaligned(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -221,8 +221,8 @@ bitfill_aligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat,
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
@@ -290,8 +290,8 @@ bitfill_unaligned_rev(unsigned long __iomem *dst, int dst_idx, unsigned long pat
 	if (!n)
 		return;
 
-	first = ~0UL >> dst_idx;
-	last = ~(~0UL >> ((dst_idx+n) % bits));
+	first = FB_SHIFT_HIGH(~0UL, dst_idx);
+	last = ~(FB_SHIFT_HIGH(~0UL, (dst_idx+n) % bits));
 
 	if (dst_idx+n <= bits) {
 		// Single word
diff --git a/drivers/video/cfbimgblt.c b/drivers/video/cfbimgblt.c
index 7a01742a82af..910e2338a27e 100644
--- a/drivers/video/cfbimgblt.c
+++ b/drivers/video/cfbimgblt.c
@@ -76,18 +76,6 @@ static u32 cfb_tab32[] = {
 #define FB_WRITEL fb_writel
 #define FB_READL  fb_readl
 
-#if defined (__BIG_ENDIAN)
-#define LEFT_POS(bpp)          (32 - bpp)
-#define SHIFT_HIGH(val, bits)  ((val) >> (bits))
-#define SHIFT_LOW(val, bits)   ((val) << (bits))
-#define BIT_NR(b)              (7 - (b))
-#else
-#define LEFT_POS(bpp)          (0)
-#define SHIFT_HIGH(val, bits)  ((val) << (bits))
-#define SHIFT_LOW(val, bits)   ((val) >> (bits))
-#define BIT_NR(b)              (b)
-#endif
-
 static inline void color_imageblit(const struct fb_image *image, 
 				   struct fb_info *p, u8 __iomem *dst1, 
 				   u32 start_index,
@@ -109,7 +97,7 @@ static inline void color_imageblit(const struct fb_image *image,
 		val = 0;
 		
 		if (start_index) {
-			u32 start_mask = ~(SHIFT_HIGH(~(u32)0, start_index));
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0, start_index));
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
@@ -119,20 +107,20 @@ static inline void color_imageblit(const struct fb_image *image,
 				color = palette[*src];
 			else
 				color = *src;
-			color <<= LEFT_POS(bpp);
-			val |= SHIFT_HIGH(color, shift);
+			color <<= FB_LEFT_POS(bpp);
+			val |= FB_SHIFT_HIGH(color, shift);
 			if (shift >= null_bits) {
 				FB_WRITEL(val, dst++);
 	
 				val = (shift == null_bits) ? 0 : 
-					SHIFT_LOW(color, 32 - shift);
+					FB_SHIFT_LOW(color, 32 - shift);
 			}
 			shift += bpp;
 			shift &= (32 - 1);
 			src++;
 		}
 		if (shift) {
-			u32 end_mask = SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
@@ -162,8 +150,8 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 	u32 i, j, l;
 	
 	dst2 = (u32 __iomem *) dst1;
-	fgcolor <<= LEFT_POS(bpp);
-	bgcolor <<= LEFT_POS(bpp);
+	fgcolor <<= FB_LEFT_POS(bpp);
+	bgcolor <<= FB_LEFT_POS(bpp);
 
 	for (i = image->height; i--; ) {
 		shift = val = 0;
@@ -174,21 +162,21 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 
 		/* write leading bits */
 		if (start_index) {
-			u32 start_mask = ~(SHIFT_HIGH(~(u32)0, start_index));
+			u32 start_mask = ~(FB_SHIFT_HIGH(~(u32)0,start_index));
 			val = FB_READL(dst) & start_mask;
 			shift = start_index;
 		}
 
 		while (j--) {
 			l--;
-			color = (*s & 1 << (BIT_NR(l))) ? fgcolor : bgcolor;
-			val |= SHIFT_HIGH(color, shift);
+			color = (*s & 1 << (FB_BIT_NR(l))) ? fgcolor : bgcolor;
+			val |= FB_SHIFT_HIGH(color, shift);
 			
 			/* Did the bitshift spill bits to the next long? */
 			if (shift >= null_bits) {
 				FB_WRITEL(val, dst++);
 				val = (shift == null_bits) ? 0 :
-					 SHIFT_LOW(color,32 - shift);
+					FB_SHIFT_LOW(color,32 - shift);
 			}
 			shift += bpp;
 			shift &= (32 - 1);
@@ -197,7 +185,7 @@ static inline void slow_imageblit(const struct fb_image *image, struct fb_info *
 
 		/* write trailing bits */
  		if (shift) {
-			u32 end_mask = SHIFT_HIGH(~(u32)0, shift);
+			u32 end_mask = FB_SHIFT_HIGH(~(u32)0, shift);
 
 			FB_WRITEL((FB_READL(dst) & end_mask) | val, dst);
 		}
diff --git a/include/linux/fb.h b/include/linux/fb.h
index dab3a4decb44..a973be2cfe61 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -833,6 +833,18 @@ struct fb_info {
 #define fb_writeq(b,addr) (*(volatile u64 *) (addr) = (b))
 #define fb_memset memset
 
+#endif
+
+#if defined (__BIG_ENDIAN)
+#define FB_LEFT_POS(bpp)          (32 - bpp)
+#define FB_SHIFT_HIGH(val, bits)  ((val) >> (bits))
+#define FB_SHIFT_LOW(val, bits)   ((val) << (bits))
+#define FB_BIT_NR(b)              (7 - (b))
+#else
+#define FB_LEFT_POS(bpp)          (0)
+#define FB_SHIFT_HIGH(val, bits)  ((val) << (bits))
+#define FB_SHIFT_LOW(val, bits)   ((val) >> (bits))
+#define FB_BIT_NR(b)              (b)
 #endif
 
     /*
-- 
cgit v1.2.3


From a2a7a662f80d8b7f2295a36de1f9b033ed0b910c Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 13 Dec 2005 14:48:31 +0900
Subject: [PATCH] libata: implement ata_exec_internal()

This patch implements ata_exec_internal() function which performs
libata internal command execution.  Previously, this was done by each
user by manually initializing a qc, issueing it, waiting for its
completion and handling errors.  In addition to obvious code
factoring, using ata_exec_internal() fixes the following bugs.

* qc not freed on issue failure
* ap->qactive clearing could race with the next internal command
* race between timeout handling and irq
* ignoring error condition not represented in tf->status

Also, qc & hardware are not accessed anymore once it's completed,
making internal commands more conformant with general semantics.
ata_exec_internal() also makes it easy to issue internal commands from
multiple threads if that becomes necessary.

This patch only implements ata_exec_internal().  A following patch
will convert all users.

Signed-off-by: Tejun Heo <htejun@gmail.com>

--

Jeff, all patches have been regenerated against upstream branch as of
today.  (575ab52a218e4ff0667a6cbd972c3af443ee8713)

Also, I took out a debug printk from ata_exec_internal (don't know how
that one got left there).  Other than that, all patches are identical
to the previous posting.

Thanks. :-)
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/libata-core.c | 99 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/libata.h     |  2 +
 2 files changed, 101 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index a0060cf31e0d..de80abeab065 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -1046,6 +1046,105 @@ static unsigned int ata_pio_modes(const struct ata_device *adev)
 	return modes;
 }
 
+struct ata_exec_internal_arg {
+	unsigned int err_mask;
+	struct ata_taskfile *tf;
+	struct completion *waiting;
+};
+
+int ata_qc_complete_internal(struct ata_queued_cmd *qc)
+{
+	struct ata_exec_internal_arg *arg = qc->private_data;
+	struct completion *waiting = arg->waiting;
+
+	if (!(qc->err_mask & ~AC_ERR_DEV))
+		qc->ap->ops->tf_read(qc->ap, arg->tf);
+	arg->err_mask = qc->err_mask;
+	arg->waiting = NULL;
+	complete(waiting);
+
+	return 0;
+}
+
+/**
+ *	ata_exec_internal - execute libata internal command
+ *	@ap: Port to which the command is sent
+ *	@dev: Device to which the command is sent
+ *	@tf: Taskfile registers for the command and the result
+ *	@dma_dir: Data tranfer direction of the command
+ *	@buf: Data buffer of the command
+ *	@buflen: Length of data buffer
+ *
+ *	Executes libata internal command with timeout.  @tf contains
+ *	command on entry and result on return.  Timeout and error
+ *	conditions are reported via return value.  No recovery action
+ *	is taken after a command times out.  It's caller's duty to
+ *	clean up after timeout.
+ *
+ *	LOCKING:
+ *	None.  Should be called with kernel context, might sleep.
+ */
+
+static unsigned
+ata_exec_internal(struct ata_port *ap, struct ata_device *dev,
+		  struct ata_taskfile *tf,
+		  int dma_dir, void *buf, unsigned int buflen)
+{
+	u8 command = tf->command;
+	struct ata_queued_cmd *qc;
+	DECLARE_COMPLETION(wait);
+	unsigned long flags;
+	struct ata_exec_internal_arg arg;
+
+	spin_lock_irqsave(&ap->host_set->lock, flags);
+
+	qc = ata_qc_new_init(ap, dev);
+	BUG_ON(qc == NULL);
+
+	qc->tf = *tf;
+	qc->dma_dir = dma_dir;
+	if (dma_dir != DMA_NONE) {
+		ata_sg_init_one(qc, buf, buflen);
+		qc->nsect = buflen / ATA_SECT_SIZE;
+	}
+
+	arg.waiting = &wait;
+	arg.tf = tf;
+	qc->private_data = &arg;
+	qc->complete_fn = ata_qc_complete_internal;
+
+	if (ata_qc_issue(qc))
+		goto issue_fail;
+
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+
+	if (!wait_for_completion_timeout(&wait, ATA_TMOUT_INTERNAL)) {
+		spin_lock_irqsave(&ap->host_set->lock, flags);
+
+		/* We're racing with irq here.  If we lose, the
+		 * following test prevents us from completing the qc
+		 * again.  If completion irq occurs after here but
+		 * before the caller cleans up, it will result in a
+		 * spurious interrupt.  We can live with that.
+		 */
+		if (arg.waiting) {
+			qc->err_mask = AC_ERR_OTHER;
+			ata_qc_complete(qc);
+			printk(KERN_WARNING "ata%u: qc timeout (cmd 0x%x)\n",
+			       ap->id, command);
+		}
+
+		spin_unlock_irqrestore(&ap->host_set->lock, flags);
+	}
+
+	return arg.err_mask;
+
+ issue_fail:
+	ata_qc_free(qc);
+	spin_unlock_irqrestore(&ap->host_set->lock, flags);
+	return AC_ERR_OTHER;
+}
+
 static int ata_qc_wait_err(struct ata_queued_cmd *qc,
 			   struct completion *wait)
 {
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e18ce039cdfd..833e57afd54c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -135,6 +135,8 @@ enum {
 	ATA_TMOUT_BOOT_QUICK	= 7 * HZ,	/* hueristic */
 	ATA_TMOUT_CDB		= 30 * HZ,
 	ATA_TMOUT_CDB_QUICK	= 5 * HZ,
+	ATA_TMOUT_INTERNAL	= 30 * HZ,
+	ATA_TMOUT_INTERNAL_QUICK = 5 * HZ,
 
 	/* ATA bus states */
 	BUS_UNKNOWN		= 0,
-- 
cgit v1.2.3


From b5632303401c231bf270ef36f1013e52caf4caf9 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Tue, 13 Dec 2005 14:51:25 +0900
Subject: [PATCH] libata: remove unused qc->waiting

There is no user of qc->waiting left after ata_exec_internal()
changes.  Kill the field.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jgarzik@pobox.com>
---
 drivers/scsi/libata-core.c | 14 ++------------
 include/linux/libata.h     |  2 --
 2 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 1c4dbf3e9818..9ea102587914 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -3503,7 +3503,7 @@ struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 static void __ata_qc_complete(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
-	unsigned int tag, do_clear = 0;
+	unsigned int tag;
 
 	qc->flags = 0;
 	tag = qc->tag;
@@ -3511,17 +3511,8 @@ static void __ata_qc_complete(struct ata_queued_cmd *qc)
 		if (tag == ap->active_tag)
 			ap->active_tag = ATA_TAG_POISON;
 		qc->tag = ATA_TAG_POISON;
-		do_clear = 1;
-	}
-
-	if (qc->waiting) {
-		struct completion *waiting = qc->waiting;
-		qc->waiting = NULL;
-		complete(waiting);
-	}
-
-	if (likely(do_clear))
 		clear_bit(tag, &ap->qactive);
+	}
 }
 
 /**
@@ -3537,7 +3528,6 @@ static void __ata_qc_complete(struct ata_queued_cmd *qc)
 void ata_qc_free(struct ata_queued_cmd *qc)
 {
 	assert(qc != NULL);	/* ata_qc_from_tag _might_ return NULL */
-	assert(qc->waiting == NULL);	/* nothing should be waiting */
 
 	__ata_qc_complete(qc);
 }
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 833e57afd54c..46337e71613e 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -285,8 +285,6 @@ struct ata_queued_cmd {
 
 	ata_qc_cb_t		complete_fn;
 
-	struct completion	*waiting;
-
 	void			*private_data;
 };
 
-- 
cgit v1.2.3


From 50630195bbdfe1ca775d94cd68a5f18bc1b717e4 Mon Sep 17 00:00:00 2001
From: Jeff Garzik <jgarzik@pobox.com>
Date: Tue, 13 Dec 2005 02:29:45 -0500
Subject: [libata] mark certain hardware (or drivers) with a no-atapi flag

Some hardware does not support the PACKET command at all.
Other hardware supports ATAPI, but the driver does something nasty such
as calling BUG() when an ATAPI command is issued.

For these such cases, we mark them with a new flag, ATA_FLAG_NO_ATAPI.

Initial version contributed by Ben Collins.
---
 drivers/scsi/libata-scsi.c  |  7 +++++--
 drivers/scsi/sata_mv.c      |  3 ++-
 drivers/scsi/sata_promise.c | 12 ++++++------
 drivers/scsi/sata_sx4.c     |  3 ++-
 include/linux/libata.h      |  1 +
 5 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index 379e87089764..72ddba98f8fb 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -2173,9 +2173,12 @@ ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
 	if (unlikely(!ata_dev_present(dev)))
 		return NULL;
 
-	if (!atapi_enabled) {
-		if (unlikely(dev->class == ATA_DEV_ATAPI))
+	if (!atapi_enabled || (ap->flags & ATA_FLAG_NO_ATAPI)) {
+		if (unlikely(dev->class == ATA_DEV_ATAPI)) {
+			printk(KERN_WARNING "ata%u(%u): WARNING: ATAPI is %s, device ignored.\n",
+			       ap->id, dev->devno, atapi_enabled ? "not supported with this driver" : "disabled");
 			return NULL;
+		}
 	}
 
 	return dev;
diff --git a/drivers/scsi/sata_mv.c b/drivers/scsi/sata_mv.c
index ab7432a5778e..9321cdf45680 100644
--- a/drivers/scsi/sata_mv.c
+++ b/drivers/scsi/sata_mv.c
@@ -86,7 +86,8 @@ enum {
 	MV_FLAG_DUAL_HC		= (1 << 30),  /* two SATA Host Controllers */
 	MV_FLAG_IRQ_COALESCE	= (1 << 29),  /* IRQ coalescing capability */
 	MV_COMMON_FLAGS		= (ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
-				   ATA_FLAG_SATA_RESET | ATA_FLAG_MMIO),
+				   ATA_FLAG_SATA_RESET | ATA_FLAG_MMIO |
+				   ATA_FLAG_NO_ATAPI),
 	MV_6XXX_FLAGS		= MV_FLAG_IRQ_COALESCE,
 
 	CRQB_FLAG_READ		= (1 << 0),
diff --git a/drivers/scsi/sata_promise.c b/drivers/scsi/sata_promise.c
index 8a8e3e3ef0ed..2691625f9bce 100644
--- a/drivers/scsi/sata_promise.c
+++ b/drivers/scsi/sata_promise.c
@@ -70,6 +70,9 @@ enum {
 	PDC_HAS_PATA		= (1 << 1), /* PDC20375 has PATA */
 
 	PDC_RESET		= (1 << 11), /* HDMA reset */
+
+	PDC_COMMON_FLAGS	= ATA_FLAG_NO_LEGACY | ATA_FLAG_SRST |
+				  ATA_FLAG_MMIO | ATA_FLAG_NO_ATAPI,
 };
 
 
@@ -162,8 +165,7 @@ static struct ata_port_info pdc_port_info[] = {
 	/* board_2037x */
 	{
 		.sht		= &pdc_ata_sht,
-		.host_flags	= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
-				  ATA_FLAG_SRST | ATA_FLAG_MMIO,
+		.host_flags	= PDC_COMMON_FLAGS | ATA_FLAG_SATA,
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
@@ -173,8 +175,7 @@ static struct ata_port_info pdc_port_info[] = {
 	/* board_20319 */
 	{
 		.sht		= &pdc_ata_sht,
-		.host_flags	= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
-				  ATA_FLAG_SRST | ATA_FLAG_MMIO,
+		.host_flags	= PDC_COMMON_FLAGS | ATA_FLAG_SATA,
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
@@ -184,8 +185,7 @@ static struct ata_port_info pdc_port_info[] = {
 	/* board_20619 */
 	{
 		.sht		= &pdc_ata_sht,
-		.host_flags	= ATA_FLAG_NO_LEGACY | ATA_FLAG_SRST |
-				  ATA_FLAG_MMIO | ATA_FLAG_SLAVE_POSS,
+		.host_flags	= PDC_COMMON_FLAGS | ATA_FLAG_SLAVE_POSS,
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
diff --git a/drivers/scsi/sata_sx4.c b/drivers/scsi/sata_sx4.c
index dcc3ad9a9d6e..ac7b0d819ebc 100644
--- a/drivers/scsi/sata_sx4.c
+++ b/drivers/scsi/sata_sx4.c
@@ -220,7 +220,8 @@ static struct ata_port_info pdc_port_info[] = {
 	{
 		.sht		= &pdc_sata_sht,
 		.host_flags	= ATA_FLAG_SATA | ATA_FLAG_NO_LEGACY |
-				  ATA_FLAG_SRST | ATA_FLAG_MMIO,
+				  ATA_FLAG_SRST | ATA_FLAG_MMIO |
+				  ATA_FLAG_NO_ATAPI,
 		.pio_mask	= 0x1f, /* pio0-4 */
 		.mwdma_mask	= 0x07, /* mwdma0-2 */
 		.udma_mask	= 0x7f, /* udma0-6 ; FIXME */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index f2dbb684ce9e..41ea7dbc1755 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -122,6 +122,7 @@ enum {
 	ATA_FLAG_NOINTR		= (1 << 9), /* FIXME: Remove this once
 					     * proper HSM is in place. */
 	ATA_FLAG_DEBUGMSG	= (1 << 10),
+	ATA_FLAG_NO_ATAPI	= (1 << 11), /* No ATAPI support */
 
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
-- 
cgit v1.2.3


From 4e06cbd42c41f9e49fcfe5ee45c749eefaae9cf4 Mon Sep 17 00:00:00 2001
From: "Moore, Eric Dean" <Eric.Moore@lsil.com>
Date: Thu, 1 Dec 2005 16:51:02 -0700
Subject: [SCSI] pci_ids.h: add subclass code for SAS Controllers

Signed-off-by: Eric Moore <Eric.Moore@lsil.com>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1e737e269db9..aa76fc4e38c2 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -15,6 +15,7 @@
 #define PCI_CLASS_STORAGE_FLOPPY	0x0102
 #define PCI_CLASS_STORAGE_IPI		0x0103
 #define PCI_CLASS_STORAGE_RAID		0x0104
+#define PCI_CLASS_STORAGE_SAS		0x0107
 #define PCI_CLASS_STORAGE_OTHER		0x0180
 
 #define PCI_BASE_CLASS_NETWORK		0x02
-- 
cgit v1.2.3


From ceef833bae05e393859f1946a9802fb61f0febdf Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Thu, 15 Dec 2005 02:11:55 +0100
Subject: [PATCH] via82cxxx IDE: Add VT8251 ISA bridge

Some motherboards (such as the Asus P5V800-MX) ship a
PCI_DEVICE_ID_VIA_82C586_1 IDE controller alongside a VT8251 southbridge.

This southbridge is currently unrecognised in the via82cxxx IDE driver,
preventing those users from getting DMA access to disks.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/pci/via82cxxx.c | 1 +
 include/linux/pci_ids.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ide/pci/via82cxxx.c b/drivers/ide/pci/via82cxxx.c
index 7161ce0ef5aa..86fb1e0286d3 100644
--- a/drivers/ide/pci/via82cxxx.c
+++ b/drivers/ide/pci/via82cxxx.c
@@ -80,6 +80,7 @@ static struct via_isa_bridge {
 	u16 flags;
 } via_isa_bridges[] = {
 	{ "vt6410",	PCI_DEVICE_ID_VIA_6410,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
+	{ "vt8251",	PCI_DEVICE_ID_VIA_8251,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8237",	PCI_DEVICE_ID_VIA_8237,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8235",	PCI_DEVICE_ID_VIA_8235,     0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
 	{ "vt8233a",	PCI_DEVICE_ID_VIA_8233A,    0x00, 0x2f, VIA_UDMA_133 | VIA_BAD_AST },
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1e737e269db9..4db67b3b05cc 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1244,6 +1244,7 @@
 #define PCI_DEVICE_ID_VIA_8378_0	0x3205
 #define PCI_DEVICE_ID_VIA_8783_0	0x3208
 #define PCI_DEVICE_ID_VIA_8237		0x3227
+#define PCI_DEVICE_ID_VIA_8251		0x3287
 #define PCI_DEVICE_ID_VIA_3296_0	0x0296
 #define PCI_DEVICE_ID_VIA_8231		0x8231
 #define PCI_DEVICE_ID_VIA_8231_4	0x8235
-- 
cgit v1.2.3


From 65e5f2e3b457b6b20a5c4481312189d141a33d24 Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jordan.crouse@amd.com>
Date: Thu, 15 Dec 2005 02:16:18 +0100
Subject: [PATCH] ide: core modifications for AU1200

bart: slightly modified by me

Signed-off-by: Jordan Crouse <jordan.crouse@amd.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-dma.c                     | 15 ++++++---------
 include/asm-mips/mach-au1x00/au1xxx_ide.h |  5 -----
 include/linux/ide.h                       |  6 ++++++
 3 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-dma.c b/drivers/ide/ide-dma.c
index 1e1531334c25..0523da77425a 100644
--- a/drivers/ide/ide-dma.c
+++ b/drivers/ide/ide-dma.c
@@ -90,11 +90,6 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
-struct drive_list_entry {
-	const char *id_model;
-	const char *id_firmware;
-};
-
 static const struct drive_list_entry drive_whitelist [] = {
 
 	{ "Micropolis 2112A"	,       "ALL"		},
@@ -139,7 +134,7 @@ static const struct drive_list_entry drive_blacklist [] = {
 };
 
 /**
- *	in_drive_list	-	look for drive in black/white list
+ *	ide_in_drive_list	-	look for drive in black/white list
  *	@id: drive identifier
  *	@drive_table: list to inspect
  *
@@ -147,7 +142,7 @@ static const struct drive_list_entry drive_blacklist [] = {
  *	Returns 1 if the drive is found in the table.
  */
 
-static int in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table)
+int ide_in_drive_list(struct hd_driveid *id, const struct drive_list_entry *drive_table)
 {
 	for ( ; drive_table->id_model ; drive_table++)
 		if ((!strcmp(drive_table->id_model, id->model)) &&
@@ -157,6 +152,8 @@ static int in_drive_list(struct hd_driveid *id, const struct drive_list_entry *d
 	return 0;
 }
 
+EXPORT_SYMBOL_GPL(ide_in_drive_list);
+
 /**
  *	ide_dma_intr	-	IDE DMA interrupt handler
  *	@drive: the drive the interrupt is for
@@ -663,7 +660,7 @@ int __ide_dma_bad_drive (ide_drive_t *drive)
 {
 	struct hd_driveid *id = drive->id;
 
-	int blacklist = in_drive_list(id, drive_blacklist);
+	int blacklist = ide_in_drive_list(id, drive_blacklist);
 	if (blacklist) {
 		printk(KERN_WARNING "%s: Disabling (U)DMA for %s (blacklisted)\n",
 				    drive->name, id->model);
@@ -677,7 +674,7 @@ EXPORT_SYMBOL(__ide_dma_bad_drive);
 int __ide_dma_good_drive (ide_drive_t *drive)
 {
 	struct hd_driveid *id = drive->id;
-	return in_drive_list(id, drive_whitelist);
+	return ide_in_drive_list(id, drive_whitelist);
 }
 
 EXPORT_SYMBOL(__ide_dma_good_drive);
diff --git a/include/asm-mips/mach-au1x00/au1xxx_ide.h b/include/asm-mips/mach-au1x00/au1xxx_ide.h
index 33d275c3b84c..0c3c127e619e 100644
--- a/include/asm-mips/mach-au1x00/au1xxx_ide.h
+++ b/include/asm-mips/mach-au1x00/au1xxx_ide.h
@@ -87,11 +87,6 @@ typedef struct
 } _auide_hwif;
 
 #ifdef CONFIG_BLK_DEV_IDE_AU1XXX_MDMA2_DBDMA
-struct drive_list_entry {
-        const char * id_model;
-        const char * id_firmware;
-};
-
 /* HD white list */
 static const struct drive_list_entry dma_white_list [] = {
 /*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index a39c3c59789d..a6b28dcf8f0d 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -1371,6 +1371,12 @@ void ide_init_sg_cmd(ide_drive_t *, struct request *);
 #define GOOD_DMA_DRIVE		1
 
 #ifdef CONFIG_BLK_DEV_IDEDMA
+struct drive_list_entry {
+	const char *id_model;
+	const char *id_firmware;
+};
+
+int ide_in_drive_list(struct hd_driveid *, const struct drive_list_entry *);
 int __ide_dma_bad_drive(ide_drive_t *);
 int __ide_dma_good_drive(ide_drive_t *);
 int ide_use_dma(ide_drive_t *);
-- 
cgit v1.2.3


From 517bd1d5eac739a7f398058a9524386667fff032 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 15 Dec 2005 02:19:57 +0100
Subject: [PATCH] ide: cleanup ide.h

Remove:
* stale comment
* unused HOST() macro
* unused ata_{error,control}_t types
* unused atapi_select_t type
* ide_init_subdrivers() prototype

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 122 ----------------------------------------------------
 1 file changed, 122 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index a6b28dcf8f0d..613534b77054 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -23,17 +23,6 @@
 #include <asm/io.h>
 #include <asm/semaphore.h>
 
-/*
- * This is the multiple IDE interface driver, as evolved from hd.c.
- * It supports up to four IDE interfaces, on one or more IRQs (usually 14 & 15).
- * There can be up to two drives per interface, as per the ATA-2 spec.
- *
- * Primary i/f:    ide0: major=3;  (hda)         minor=0; (hdb)         minor=64
- * Secondary i/f:  ide1: major=22; (hdc or hd1a) minor=0; (hdd or hd1b) minor=64
- * Tertiary i/f:   ide2: major=33; (hde)         minor=0; (hdf)         minor=64
- * Quaternary i/f: ide3: major=34; (hdg)         minor=0; (hdh)         minor=64
- */
-
 /******************************************************************************
  * IDE driver configuration options (play with these as desired):
  *
@@ -193,11 +182,6 @@ typedef unsigned char	byte;	/* used everywhere */
 #define WAIT_CMD	(10*HZ)	/* 10sec  - maximum wait for an IRQ to happen */
 #define WAIT_MIN_SLEEP	(2*HZ/100)	/* 20msec - minimum sleep time */
 
-#define HOST(hwif,chipset)					\
-{								\
-	return ((hwif)->chipset == chipset) ? 1 : 0;		\
-}
-
 /*
  * Check for an interrupt and acknowledge the interrupt status
  */
@@ -390,45 +374,6 @@ typedef union {
 	} b;
 } ata_nsector_t, ata_data_t, atapi_bcount_t, ata_index_t;
 
-/*
- * ATA-IDE Error Register
- *
- * mark		: Bad address mark
- * tzero	: Couldn't find track 0
- * abrt		: Aborted Command
- * mcr		: Media Change Request
- * id		: ID field not found
- * mce		: Media Change Event
- * ecc		: Uncorrectable ECC error
- * bdd		: dual meaing
- */
-typedef union {
-	unsigned all			:8;
-	struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-		unsigned mark		:1;
-		unsigned tzero		:1;
-		unsigned abrt		:1;
-		unsigned mcr		:1;
-		unsigned id		:1;
-		unsigned mce		:1;
-		unsigned ecc		:1;
-		unsigned bdd		:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned bdd		:1;
-		unsigned ecc		:1;
-		unsigned mce		:1;
-		unsigned id		:1;
-		unsigned mcr		:1;
-		unsigned abrt		:1;
-		unsigned tzero		:1;
-		unsigned mark		:1;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	} b;
-} ata_error_t;
-
 /*
  * ATA-IDE Select Register, aka Device-Head
  *
@@ -503,39 +448,6 @@ typedef union {
 	} b;
 } ata_status_t, atapi_status_t;
 
-/*
- * ATA-IDE Control Register
- *
- * bit0		: Should be set to zero
- * nIEN		: device INTRQ to host
- * SRST		: host soft reset bit
- * bit3		: ATA-2 thingy, Should be set to 1
- * reserved456	: Reserved
- * HOB		: 48-bit address ordering, High Ordered Bit
- */
-typedef union {
-	unsigned all			: 8;
-	struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-		unsigned bit0		: 1;
-		unsigned nIEN		: 1;
-		unsigned SRST		: 1;
-		unsigned bit3		: 1;
-		unsigned reserved456	: 3;
-		unsigned HOB		: 1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned HOB		: 1;
-		unsigned reserved456	: 3;
-		unsigned bit3		: 1;
-		unsigned SRST		: 1;
-		unsigned nIEN		: 1;
-		unsigned bit0		: 1;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	} b;
-} ata_control_t;
-
 /*
  * ATAPI Feature Register
  *
@@ -617,39 +529,6 @@ typedef union {
 	} b;
 } atapi_error_t;
 
-/*
- * ATAPI floppy Drive Select Register
- *
- * sam_lun	: Logical unit number
- * reserved3	: Reserved
- * drv		: The responding drive will be drive 0 (0) or drive 1 (1)
- * one5		: Should be set to 1
- * reserved6	: Reserved
- * one7		: Should be set to 1
- */
-typedef union {
-	unsigned all			:8;
-	struct {
-#if defined(__LITTLE_ENDIAN_BITFIELD)
-		unsigned sam_lun	:3;
-		unsigned reserved3	:1;
-		unsigned drv		:1;
-		unsigned one5		:1;
-		unsigned reserved6	:1;
-		unsigned one7		:1;
-#elif defined(__BIG_ENDIAN_BITFIELD)
-		unsigned one7		:1;
-		unsigned reserved6	:1;
-		unsigned one5		:1;
-		unsigned drv		:1;
-		unsigned reserved3	:1;
-		unsigned sam_lun	:3;
-#else
-#error "Please fix <asm/byteorder.h>"
-#endif
-	} b;
-} atapi_select_t;
-
 /*
  * Status returned from various ide_ functions
  */
@@ -1298,7 +1177,6 @@ extern int ide_spin_wait_hwgroup(ide_drive_t *);
 extern void ide_timer_expiry(unsigned long);
 extern irqreturn_t ide_intr(int irq, void *dev_id, struct pt_regs *regs);
 extern void do_ide_request(request_queue_t *);
-extern void ide_init_subdrivers(void);
 
 void ide_init_disk(struct gendisk *, ide_drive_t *);
 
-- 
cgit v1.2.3


From 7b4df9ece9b4c4a754bd1f5603cdabff26b987e5 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
Date: Thu, 15 Dec 2005 02:20:15 +0100
Subject: [PATCH] ide: cleanup ide_driver_t

Remove unused fields: ioctl, ata[pi]_prebuilder.

Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 include/linux/ide.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ide.h b/include/linux/ide.h
index 613534b77054..7b6a6a58e465 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -980,10 +980,7 @@ typedef struct ide_driver_s {
 	int		(*end_request)(ide_drive_t *, int, int);
 	ide_startstop_t	(*error)(ide_drive_t *, struct request *rq, u8, u8);
 	ide_startstop_t	(*abort)(ide_drive_t *, struct request *rq);
-	int		(*ioctl)(ide_drive_t *, struct inode *, struct file *, unsigned int, unsigned long);
 	ide_proc_entry_t	*proc;
-	void		(*ata_prebuilder)(ide_drive_t *);
-	void		(*atapi_prebuilder)(ide_drive_t *);
 	struct device_driver	gen_driver;
 } ide_driver_t;
 
-- 
cgit v1.2.3


From 6e39b69e7ea9205c5f80aeac3ef999ab8fb1a4cc Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 11 Nov 2005 05:30:24 -0600
Subject: [SCSI] export blk layer functions needed for blk_execute_rq_nowait

To send async requests we need these two functions exported.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 block/ll_rw_blk.c      | 6 +++++-
 include/linux/blkdev.h | 5 +++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 99c9ca6d5992..c525b5a2b598 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -2306,6 +2306,8 @@ void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 	generic_unplug_device(q);
 }
 
+EXPORT_SYMBOL_GPL(blk_execute_rq_nowait);
+
 /**
  * blk_execute_rq - insert a request into queue for execution
  * @q:		queue to insert the request in
@@ -2444,7 +2446,7 @@ void disk_round_stats(struct gendisk *disk)
 /*
  * queue lock must be held
  */
-static void __blk_put_request(request_queue_t *q, struct request *req)
+void __blk_put_request(request_queue_t *q, struct request *req)
 {
 	struct request_list *rl = req->rl;
 
@@ -2473,6 +2475,8 @@ static void __blk_put_request(request_queue_t *q, struct request *req)
 	}
 }
 
+EXPORT_SYMBOL_GPL(__blk_put_request);
+
 void blk_put_request(struct request *req)
 {
 	unsigned long flags;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a33a31e71bbc..9a68716dcf75 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -558,6 +558,7 @@ extern void blk_unregister_queue(struct gendisk *disk);
 extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
 extern void blk_put_request(struct request *);
+extern void __blk_put_request(request_queue_t *, struct request *);
 extern void blk_end_sync_rq(struct request *rq);
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, gfp_t);
@@ -579,6 +580,10 @@ extern int blk_rq_map_kern(request_queue_t *, struct request *, void *, unsigned
 extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_iovec *, int);
 extern int blk_execute_rq(request_queue_t *, struct gendisk *,
 			  struct request *, int);
+extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
+				  struct request *, int,
+				  void (*done)(struct request *));
+
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
 	return bdev->bd_disk->queue;
-- 
cgit v1.2.3


From 6e68af666f5336254b5715dca591026b7324499a Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 11 Nov 2005 05:30:27 -0600
Subject: [SCSI] Convert SCSI mid-layer to scsi_execute_async

Add scsi helpers to create really-large-requests and convert
scsi-ml to scsi_execute_async().

Per Jens's previous comments, I placed this function in scsi_lib.c.
I made it follow all the queue's limits - I think I did at least :), so
I removed the warning on the function header.

I think the scsi_execute_* functions should eventually take a request_queue
and be placed some place where the dm-multipath hw_handler can use them
if that failover code is going to stay in the kernel. That conversion
patch will be sent in another mail though.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_error.c  |  47 ++-------
 drivers/scsi/scsi_lib.c    | 230 ++++++++++++++++++++++++++++++++++++++-------
 drivers/scsi/scsi_priv.h   |   1 -
 fs/bio.c                   |  20 ++++
 include/linux/bio.h        |   2 +
 include/scsi/scsi_device.h |   6 ++
 6 files changed, 233 insertions(+), 73 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 18c5d2523014..53ea62d3b53d 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1314,23 +1314,6 @@ int scsi_decide_disposition(struct scsi_cmnd *scmd)
 	}
 }
 
-/**
- * scsi_eh_lock_done - done function for eh door lock request
- * @scmd:	SCSI command block for the door lock request
- *
- * Notes:
- * 	We completed the asynchronous door lock request, and it has either
- * 	locked the door or failed.  We must free the command structures
- * 	associated with this request.
- **/
-static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
-{
-	struct scsi_request *sreq = scmd->sc_request;
-
-	scsi_release_request(sreq);
-}
-
-
 /**
  * scsi_eh_lock_door - Prevent medium removal for the specified device
  * @sdev:	SCSI device to prevent medium removal
@@ -1353,29 +1336,17 @@ static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
  **/
 static void scsi_eh_lock_door(struct scsi_device *sdev)
 {
-	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
+	unsigned char cmnd[MAX_COMMAND_SIZE];
 
-	if (unlikely(!sreq)) {
-		printk(KERN_ERR "%s: request allocate failed,"
-		       "prevent media removal cmd not sent\n", __FUNCTION__);
-		return;
-	}
+	cmnd[0] = ALLOW_MEDIUM_REMOVAL;
+	cmnd[1] = 0;
+	cmnd[2] = 0;
+	cmnd[3] = 0;
+	cmnd[4] = SCSI_REMOVAL_PREVENT;
+	cmnd[5] = 0;
 
-	sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
-	sreq->sr_cmnd[1] = 0;
-	sreq->sr_cmnd[2] = 0;
-	sreq->sr_cmnd[3] = 0;
-	sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
-	sreq->sr_cmnd[5] = 0;
-	sreq->sr_data_direction = DMA_NONE;
-	sreq->sr_bufflen = 0;
-	sreq->sr_buffer = NULL;
-	sreq->sr_allowed = 5;
-	sreq->sr_done = scsi_eh_lock_done;
-	sreq->sr_timeout_per_command = 10 * HZ;
-	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
-
-	scsi_insert_special_req(sreq, 1);
+	scsi_execute_async(sdev, cmnd, DMA_NONE, NULL, 0, 0, 10 * HZ,
+			   5, NULL, NULL, GFP_KERNEL);
 }
 
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1f2782767ca9..eb0cfbfbcf8f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -63,39 +63,6 @@ static struct scsi_host_sg_pool scsi_sg_pools[] = {
 }; 	
 #undef SP
 
-
-/*
- * Function:    scsi_insert_special_req()
- *
- * Purpose:     Insert pre-formed request into request queue.
- *
- * Arguments:   sreq	- request that is ready to be queued.
- *              at_head	- boolean.  True if we should insert at head
- *                        of queue, false if we should insert at tail.
- *
- * Lock status: Assumed that lock is not held upon entry.
- *
- * Returns:     Nothing
- *
- * Notes:       This function is called from character device and from
- *              ioctl types of functions where the caller knows exactly
- *              what SCSI command needs to be issued.   The idea is that
- *              we merely inject the command into the queue (at the head
- *              for now), and then call the queue request function to actually
- *              process it.
- */
-int scsi_insert_special_req(struct scsi_request *sreq, int at_head)
-{
-	/*
-	 * Because users of this function are apt to reuse requests with no
-	 * modification, we have to sanitise the request flags here
-	 */
-	sreq->sr_request->flags &= ~REQ_DONTPREP;
-	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
-		       	   at_head, sreq);
-	return 0;
-}
-
 static void scsi_run_queue(struct request_queue *q);
 
 /*
@@ -249,8 +216,13 @@ void scsi_do_req(struct scsi_request *sreq, const void *cmnd,
 
 	/*
 	 * head injection *required* here otherwise quiesce won't work
+	 *
+	 * Because users of this function are apt to reuse requests with no
+	 * modification, we have to sanitise the request flags here
 	 */
-	scsi_insert_special_req(sreq, 1);
+	sreq->sr_request->flags &= ~REQ_DONTPREP;
+	blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request,
+		       	   1, sreq);
 }
 EXPORT_SYMBOL(scsi_do_req);
 
@@ -327,6 +299,196 @@ int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 }
 EXPORT_SYMBOL(scsi_execute_req);
 
+struct scsi_io_context {
+	void *data;
+	void (*done)(void *data, char *sense, int result, int resid);
+	char sense[SCSI_SENSE_BUFFERSIZE];
+};
+
+static void scsi_end_async(struct request *req)
+{
+	struct scsi_io_context *sioc = req->end_io_data;
+
+	if (sioc->done)
+		sioc->done(sioc->data, sioc->sense, req->errors, req->data_len);
+
+	kfree(sioc);
+	__blk_put_request(req->q, req);
+}
+
+static int scsi_merge_bio(struct request *rq, struct bio *bio)
+{
+	struct request_queue *q = rq->q;
+
+	bio->bi_flags &= ~(1 << BIO_SEG_VALID);
+	if (rq_data_dir(rq) == WRITE)
+		bio->bi_rw |= (1 << BIO_RW);
+	blk_queue_bounce(q, &bio);
+
+	if (!rq->bio)
+		blk_rq_bio_prep(q, rq, bio);
+	else if (!q->back_merge_fn(q, rq, bio))
+		return -EINVAL;
+	else {
+		rq->biotail->bi_next = bio;
+		rq->biotail = bio;
+		rq->hard_nr_sectors += bio_sectors(bio);
+		rq->nr_sectors = rq->hard_nr_sectors;
+	}
+
+	return 0;
+}
+
+static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error)
+{
+	if (bio->bi_size)
+		return 1;
+
+	bio_put(bio);
+	return 0;
+}
+
+/**
+ * scsi_req_map_sg - map a scatterlist into a request
+ * @rq:		request to fill
+ * @sg:		scatterlist
+ * @nsegs:	number of elements
+ * @bufflen:	len of buffer
+ * @gfp:	memory allocation flags
+ *
+ * scsi_req_map_sg maps a scatterlist into a request so that the
+ * request can be sent to the block layer. We do not trust the scatterlist
+ * sent to use, as some ULDs use that struct to only organize the pages.
+ */
+static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl,
+			   int nsegs, unsigned bufflen, gfp_t gfp)
+{
+	struct request_queue *q = rq->q;
+	int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	unsigned int data_len = 0, len, bytes, off;
+	struct page *page;
+	struct bio *bio = NULL;
+	int i, err, nr_vecs = 0;
+
+	for (i = 0; i < nsegs; i++) {
+		page = sgl[i].page;
+		off = sgl[i].offset;
+		len = sgl[i].length;
+		data_len += len;
+
+		while (len > 0) {
+			bytes = min_t(unsigned int, len, PAGE_SIZE - off);
+
+			if (!bio) {
+				nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
+				nr_pages -= nr_vecs;
+
+				bio = bio_alloc(gfp, nr_vecs);
+				if (!bio) {
+					err = -ENOMEM;
+					goto free_bios;
+				}
+				bio->bi_end_io = scsi_bi_endio;
+			}
+
+			if (bio_add_pc_page(q, bio, page, bytes, off) !=
+			    bytes) {
+				bio_put(bio);
+				err = -EINVAL;
+				goto free_bios;
+			}
+
+			if (bio->bi_vcnt >= nr_vecs) {
+				err = scsi_merge_bio(rq, bio);
+				if (err) {
+					bio_endio(bio, bio->bi_size, 0);
+					goto free_bios;
+				}
+				bio = NULL;
+			}
+
+			page++;
+			len -= bytes;
+			off = 0;
+		}
+	}
+
+	rq->buffer = rq->data = NULL;
+	rq->data_len = data_len;
+	return 0;
+
+free_bios:
+	while ((bio = rq->bio) != NULL) {
+		rq->bio = bio->bi_next;
+		/*
+		 * call endio instead of bio_put incase it was bounced
+		 */
+		bio_endio(bio, bio->bi_size, 0);
+	}
+
+	return err;
+}
+
+/**
+ * scsi_execute_async - insert request
+ * @sdev:	scsi device
+ * @cmd:	scsi command
+ * @data_direction: data direction
+ * @buffer:	data buffer (this can be a kernel buffer or scatterlist)
+ * @bufflen:	len of buffer
+ * @use_sg:	if buffer is a scatterlist this is the number of elements
+ * @timeout:	request timeout in seconds
+ * @retries:	number of times to retry request
+ * @flags:	or into request flags
+ **/
+int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
+		       int data_direction, void *buffer, unsigned bufflen,
+		       int use_sg, int timeout, int retries, void *privdata,
+		       void (*done)(void *, char *, int, int), gfp_t gfp)
+{
+	struct request *req;
+	struct scsi_io_context *sioc;
+	int err = 0;
+	int write = (data_direction == DMA_TO_DEVICE);
+
+	sioc = kzalloc(sizeof(*sioc), gfp);
+	if (!sioc)
+		return DRIVER_ERROR << 24;
+
+	req = blk_get_request(sdev->request_queue, write, gfp);
+	if (!req)
+		goto free_sense;
+
+	if (use_sg)
+		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
+	else if (bufflen)
+		err = blk_rq_map_kern(req->q, req, buffer, bufflen, gfp);
+
+	if (err)
+		goto free_req;
+
+	req->cmd_len = COMMAND_SIZE(cmd[0]);
+	memcpy(req->cmd, cmd, req->cmd_len);
+	req->sense = sioc->sense;
+	req->sense_len = 0;
+	req->timeout = timeout;
+	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
+	req->end_io_data = sioc;
+
+	sioc->data = privdata;
+	sioc->done = done;
+
+	blk_execute_rq_nowait(req->q, NULL, req, 1, scsi_end_async);
+	return 0;
+
+free_req:
+	blk_put_request(req);
+free_sense:
+	kfree(sioc);
+	return DRIVER_ERROR << 24;
+}
+EXPORT_SYMBOL_GPL(scsi_execute_async);
+
 /*
  * Function:    scsi_init_cmd_errh()
  *
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index a8d121c8fbcd..f04e7e11f57a 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -40,7 +40,6 @@ extern void scsi_exit_hosts(void);
 extern int scsi_dispatch_cmd(struct scsi_cmnd *cmd);
 extern int scsi_setup_command_freelist(struct Scsi_Host *shost);
 extern void scsi_destroy_command_freelist(struct Scsi_Host *shost);
-extern int scsi_insert_special_req(struct scsi_request *sreq, int);
 extern void scsi_init_cmd_from_req(struct scsi_cmnd *cmd,
 		struct scsi_request *sreq);
 extern void __scsi_release_request(struct scsi_request *sreq);
diff --git a/fs/bio.c b/fs/bio.c
index 460554b07ff9..4d21ee3873ec 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -385,6 +385,25 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 	return len;
 }
 
+/**
+ *	bio_add_pc_page	-	attempt to add page to bio
+ *	@bio: destination bio
+ *	@page: page to add
+ *	@len: vec entry length
+ *	@offset: vec entry offset
+ *
+ *	Attempt to add a page to the bio_vec maplist. This can fail for a
+ *	number of reasons, such as the bio being full or target block
+ *	device limitations. The target block device must allow bio's
+ *      smaller than PAGE_SIZE, so it is always possible to add a single
+ *      page to an empty bio. This should only be used by REQ_PC bios.
+ */
+int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page,
+		    unsigned int len, unsigned int offset)
+{
+	return __bio_add_page(q, bio, page, len, offset);
+}
+
 /**
  *	bio_add_page	-	attempt to add page to bio
  *	@bio: destination bio
@@ -1228,6 +1247,7 @@ EXPORT_SYMBOL(bio_clone);
 EXPORT_SYMBOL(bio_phys_segments);
 EXPORT_SYMBOL(bio_hw_segments);
 EXPORT_SYMBOL(bio_add_page);
+EXPORT_SYMBOL(bio_add_pc_page);
 EXPORT_SYMBOL(bio_get_nr_vecs);
 EXPORT_SYMBOL(bio_map_user);
 EXPORT_SYMBOL(bio_unmap_user);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 685fd3720df5..b60ffe32cd21 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -292,6 +292,8 @@ extern struct bio *bio_clone(struct bio *, gfp_t);
 extern void bio_init(struct bio *);
 
 extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int);
+extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *,
+			   unsigned int, unsigned int);
 extern int bio_get_nr_vecs(struct block_device *);
 extern struct bio *bio_map_user(struct request_queue *, struct block_device *,
 				unsigned long, unsigned int, int);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 063e32fe036c..e94ca4d36035 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -274,6 +274,12 @@ extern int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 extern int scsi_execute_req(struct scsi_device *sdev, const unsigned char *cmd,
 			    int data_direction, void *buffer, unsigned bufflen,
 			    struct scsi_sense_hdr *, int timeout, int retries);
+extern int scsi_execute_async(struct scsi_device *sdev,
+			      const unsigned char *cmd, int data_direction,
+			      void *buffer, unsigned bufflen, int use_sg,
+			      int timeout, int retries, void *privdata,
+			      void (*done)(void *, char *, int, int),
+			      gfp_t gfp);
 
 static inline unsigned int sdev_channel(struct scsi_device *sdev)
 {
-- 
cgit v1.2.3


From 17e01f216b611fc46956dcd9063aec4de75991e3 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Fri, 11 Nov 2005 05:31:37 -0600
Subject: [SCSI] add retries field to request for REQ_BLOCK_PC use

For tape we need to control the retries. This patch adds a retries
counter on the request for REQ_BLOCK_PC commands originating from
scsi_execute* to use. REQ_BLOCK_PC commands comming from the block
layer SG_IO path continue to use the retires set in the ULD init_command.
(scsi_execute* does not set the gendisk so we do not execute
the init_command in that path).

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_lib.c | 4 +++-
 include/linux/blkdev.h  | 1 +
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index eb0cfbfbcf8f..365843a1561f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -259,6 +259,7 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 	memcpy(req->cmd, cmd, req->cmd_len);
 	req->sense = sense;
 	req->sense_len = 0;
+	req->retries = retries;
 	req->timeout = timeout;
 	req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET;
 
@@ -472,6 +473,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense = sioc->sense;
 	req->sense_len = 0;
 	req->timeout = timeout;
+	req->retries = retries;
 	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
 	req->end_io_data = sioc;
 
@@ -1393,7 +1395,7 @@ static int scsi_prep_fn(struct request_queue *q, struct request *req)
 				cmd->sc_data_direction = DMA_NONE;
 			
 			cmd->transfersize = req->data_len;
-			cmd->allowed = 3;
+			cmd->allowed = req->retries;
 			cmd->timeout_per_command = req->timeout;
 			cmd->done = scsi_generic_done;
 		}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9a68716dcf75..509e9a03a328 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -184,6 +184,7 @@ struct request {
 	void *sense;
 
 	unsigned int timeout;
+	int retries;
 
 	/*
 	 * For Power Management requests
-- 
cgit v1.2.3


From defd94b75409b983f94548ea2f52ff5787ddb848 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 5 Dec 2005 02:37:06 -0600
Subject: [SCSI] seperate max_sectors from max_hw_sectors

- export __blk_put_request and blk_execute_rq_nowait
needed for async REQ_BLOCK_PC requests
- seperate max_hw_sectors and max_sectors for block/scsi_ioctl.c and
SG_IO bio.c helpers per Jens's last comments. Since block/scsi_ioctl.c SG_IO was
already testing against max_sectors and SCSI-ml was setting max_sectors and
max_hw_sectors to the same value this does not change any scsi SG_IO behavior. It only
prepares ll_rw_blk.c, scsi_ioctl.c and bio.c for when SCSI-ml begins to set
a valid max_hw_sectors for all LLDs. Today if a LLD does not set it
SCSI-ml sets it to a safe default and some LLDs set it to a artificial low
value to overcome memory and feedback issues.

Note: Since we now cap max_sectors to BLK_DEF_MAX_SECTORS, which is 1024,
drivers that used to call blk_queue_max_sectors with a large value of
max_sectors will now see the fs requests capped to BLK_DEF_MAX_SECTORS.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 block/ll_rw_blk.c       | 34 ++++++++++++++++++++++++++--------
 block/scsi_ioctl.c      |  2 +-
 drivers/md/dm-table.c   |  2 +-
 drivers/scsi/scsi_lib.c |  2 +-
 fs/bio.c                | 20 +++++++++++---------
 include/linux/blkdev.h  |  3 ++-
 6 files changed, 42 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c525b5a2b598..d4beb9a89ee0 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -239,7 +239,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
 	q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
 	q->backing_dev_info.state = 0;
 	q->backing_dev_info.capabilities = BDI_CAP_MAP_COPY;
-	blk_queue_max_sectors(q, MAX_SECTORS);
+	blk_queue_max_sectors(q, SAFE_MAX_SECTORS);
 	blk_queue_hardsect_size(q, 512);
 	blk_queue_dma_alignment(q, 511);
 	blk_queue_congestion_threshold(q);
@@ -555,7 +555,12 @@ void blk_queue_max_sectors(request_queue_t *q, unsigned short max_sectors)
 		printk("%s: set to minimum %d\n", __FUNCTION__, max_sectors);
 	}
 
-	q->max_sectors = q->max_hw_sectors = max_sectors;
+	if (BLK_DEF_MAX_SECTORS > max_sectors)
+		q->max_hw_sectors = q->max_sectors = max_sectors;
+ 	else {
+		q->max_sectors = BLK_DEF_MAX_SECTORS;
+		q->max_hw_sectors = max_sectors;
+	}
 }
 
 EXPORT_SYMBOL(blk_queue_max_sectors);
@@ -657,8 +662,8 @@ EXPORT_SYMBOL(blk_queue_hardsect_size);
 void blk_queue_stack_limits(request_queue_t *t, request_queue_t *b)
 {
 	/* zero is "infinity" */
-	t->max_sectors = t->max_hw_sectors =
-		min_not_zero(t->max_sectors,b->max_sectors);
+	t->max_sectors = min_not_zero(t->max_sectors,b->max_sectors);
+	t->max_hw_sectors = min_not_zero(t->max_hw_sectors,b->max_hw_sectors);
 
 	t->max_phys_segments = min(t->max_phys_segments,b->max_phys_segments);
 	t->max_hw_segments = min(t->max_hw_segments,b->max_hw_segments);
@@ -1293,9 +1298,15 @@ static inline int ll_new_hw_segment(request_queue_t *q,
 static int ll_back_merge_fn(request_queue_t *q, struct request *req, 
 			    struct bio *bio)
 {
+	unsigned short max_sectors;
 	int len;
 
-	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
+	if (unlikely(blk_pc_request(req)))
+		max_sectors = q->max_hw_sectors;
+	else
+		max_sectors = q->max_sectors;
+
+	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -1325,9 +1336,16 @@ static int ll_back_merge_fn(request_queue_t *q, struct request *req,
 static int ll_front_merge_fn(request_queue_t *q, struct request *req, 
 			     struct bio *bio)
 {
+	unsigned short max_sectors;
 	int len;
 
-	if (req->nr_sectors + bio_sectors(bio) > q->max_sectors) {
+	if (unlikely(blk_pc_request(req)))
+		max_sectors = q->max_hw_sectors;
+	else
+		max_sectors = q->max_sectors;
+
+
+	if (req->nr_sectors + bio_sectors(bio) > max_sectors) {
 		req->flags |= REQ_NOMERGE;
 		if (req == q->last_merge)
 			q->last_merge = NULL;
@@ -2144,7 +2162,7 @@ int blk_rq_map_user(request_queue_t *q, struct request *rq, void __user *ubuf,
 	struct bio *bio;
 	int reading;
 
-	if (len > (q->max_sectors << 9))
+	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
 	if (!len || !ubuf)
 		return -EINVAL;
@@ -2259,7 +2277,7 @@ int blk_rq_map_kern(request_queue_t *q, struct request *rq, void *kbuf,
 {
 	struct bio *bio;
 
-	if (len > (q->max_sectors << 9))
+	if (len > (q->max_hw_sectors << 9))
 		return -EINVAL;
 	if (!len || !kbuf)
 		return -EINVAL;
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 382dea7b224c..4e390dfd3157 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -233,7 +233,7 @@ static int sg_io(struct file *file, request_queue_t *q,
 	if (verify_command(file, cmd))
 		return -EPERM;
 
-	if (hdr->dxfer_len > (q->max_sectors << 9))
+	if (hdr->dxfer_len > (q->max_hw_sectors << 9))
 		return -EIO;
 
 	if (hdr->dxfer_len)
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index a6d3baa46f61..a6f2dc66c3db 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -638,7 +638,7 @@ int dm_split_args(int *argc, char ***argvp, char *input)
 static void check_for_valid_limits(struct io_restrictions *rs)
 {
 	if (!rs->max_sectors)
-		rs->max_sectors = MAX_SECTORS;
+		rs->max_sectors = SAFE_MAX_SECTORS;
 	if (!rs->max_phys_segments)
 		rs->max_phys_segments = MAX_PHYS_SEGMENTS;
 	if (!rs->max_hw_segments)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 54a72f197487..14ad2a785a34 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -462,6 +462,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req = blk_get_request(sdev->request_queue, write, gfp);
 	if (!req)
 		goto free_sense;
+	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
 
 	if (use_sg)
 		err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp);
@@ -477,7 +478,6 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd,
 	req->sense_len = 0;
 	req->timeout = timeout;
 	req->retries = retries;
-	req->flags |= REQ_BLOCK_PC | REQ_QUIET;
 	req->end_io_data = sioc;
 
 	sioc->data = privdata;
diff --git a/fs/bio.c b/fs/bio.c
index 4d21ee3873ec..38d3e8023a07 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -313,7 +313,8 @@ int bio_get_nr_vecs(struct block_device *bdev)
 }
 
 static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
-			  *page, unsigned int len, unsigned int offset)
+			  *page, unsigned int len, unsigned int offset,
+			  unsigned short max_sectors)
 {
 	int retried_segments = 0;
 	struct bio_vec *bvec;
@@ -327,7 +328,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 	if (bio->bi_vcnt >= bio->bi_max_vecs)
 		return 0;
 
-	if (((bio->bi_size + len) >> 9) > q->max_sectors)
+	if (((bio->bi_size + len) >> 9) > max_sectors)
 		return 0;
 
 	/*
@@ -401,7 +402,7 @@ static int __bio_add_page(request_queue_t *q, struct bio *bio, struct page
 int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page,
 		    unsigned int len, unsigned int offset)
 {
-	return __bio_add_page(q, bio, page, len, offset);
+	return __bio_add_page(q, bio, page, len, offset, q->max_hw_sectors);
 }
 
 /**
@@ -420,8 +421,8 @@ int bio_add_pc_page(request_queue_t *q, struct bio *bio, struct page *page,
 int bio_add_page(struct bio *bio, struct page *page, unsigned int len,
 		 unsigned int offset)
 {
-	return __bio_add_page(bdev_get_queue(bio->bi_bdev), bio, page,
-			      len, offset);
+	struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+	return __bio_add_page(q, bio, page, len, offset, q->max_sectors);
 }
 
 struct bio_map_data {
@@ -533,7 +534,7 @@ struct bio *bio_copy_user(request_queue_t *q, unsigned long uaddr,
 			break;
 		}
 
-		if (__bio_add_page(q, bio, page, bytes, 0) < bytes) {
+		if (bio_add_pc_page(q, bio, page, bytes, 0) < bytes) {
 			ret = -EINVAL;
 			break;
 		}
@@ -647,7 +648,8 @@ static struct bio *__bio_map_user_iov(request_queue_t *q,
 			/*
 			 * sorry...
 			 */
-			if (__bio_add_page(q, bio, pages[j], bytes, offset) < bytes)
+			if (bio_add_pc_page(q, bio, pages[j], bytes, offset) <
+					    bytes)
 				break;
 
 			len -= bytes;
@@ -820,8 +822,8 @@ static struct bio *__bio_map_kern(request_queue_t *q, void *data,
 		if (bytes > len)
 			bytes = len;
 
-		if (__bio_add_page(q, bio, virt_to_page(data), bytes,
-				   offset) < bytes)
+		if (bio_add_pc_page(q, bio, virt_to_page(data), bytes,
+				    offset) < bytes)
 			break;
 
 		data += bytes;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 509e9a03a328..a18500d196e1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -702,7 +702,8 @@ extern int blkdev_issue_flush(struct block_device *, sector_t *);
 
 #define MAX_PHYS_SEGMENTS 128
 #define MAX_HW_SEGMENTS 128
-#define MAX_SECTORS 255
+#define SAFE_MAX_SECTORS 255
+#define BLK_DEF_MAX_SECTORS 1024
 
 #define MAX_SEGMENT_SIZE	65536
 
-- 
cgit v1.2.3


From 4d7672b46244abffea1953e55688c0ea143dd617 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@g5.osdl.org>
Date: Fri, 16 Dec 2005 10:21:23 -0800
Subject: Make sure we copy pages inserted with "vm_insert_page()" on fork

The logic that decides that a fork() might be able to avoid copying a VM
area when it can be re-created by page faults didn't know about the new
vm_insert_page() case.

Also make some things a bit more anal wrt VM_PFNMAP.

Pointed out by Hugh Dickins <hugh@veritas.com>

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 1 +
 mm/memory.c        | 3 ++-
 mm/mmap.c          | 2 +-
 mm/mremap.c        | 2 +-
 4 files changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index e5677f456742..a06a84d347fb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -163,6 +163,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_MAPPED_COPY	0x01000000	/* T if mapped copy of data (nommu mmap) */
+#define VM_INSERTPAGE	0x02000000	/* The vma has had "vm_insert_page()" done on it */
 
 #ifndef VM_STACK_DEFAULT_FLAGS		/* arch can override this */
 #define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS
diff --git a/mm/memory.c b/mm/memory.c
index d22f78c8a381..d8dde07a3656 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -574,7 +574,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
 	 * readonly mappings. The tradeoff is that copy_page_range is more
 	 * efficient than faulting.
 	 */
-	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP))) {
+	if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
 		if (!vma->anon_vma)
 			return 0;
 	}
@@ -1228,6 +1228,7 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, struct page *
 		return -EFAULT;
 	if (!page_count(page))
 		return -EINVAL;
+	vma->vm_flags |= VM_INSERTPAGE;
 	return insert_page(vma->vm_mm, addr, page, vma->vm_page_prot);
 }
 EXPORT_SYMBOL(vm_insert_page);
diff --git a/mm/mmap.c b/mm/mmap.c
index 11ca5927d5ff..64ba4dbcb7de 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -611,7 +611,7 @@ again:			remove_next = 1 + (end > next->vm_end);
  * If the vma has a ->close operation then the driver probably needs to release
  * per-vma resources, so we don't attempt to merge those.
  */
-#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED)
+#define VM_SPECIAL (VM_IO | VM_DONTCOPY | VM_DONTEXPAND | VM_RESERVED | VM_PFNMAP)
 
 static inline int is_mergeable_vma(struct vm_area_struct *vma,
 			struct file *file, unsigned long vm_flags)
diff --git a/mm/mremap.c b/mm/mremap.c
index b535438c363c..ddaeee9a0b69 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -323,7 +323,7 @@ unsigned long do_mremap(unsigned long addr,
 	/* We can't remap across vm area boundaries */
 	if (old_len > vma->vm_end - addr)
 		goto out;
-	if (vma->vm_flags & VM_DONTEXPAND) {
+	if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
 		if (new_len > old_len)
 			goto out;
 	}
-- 
cgit v1.2.3


From dc86e88c2bb8a7603ee175fbb6a9e92cf3293dd8 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Mon, 12 Dec 2005 09:34:32 -0800
Subject: [IA64] Add __read_mostly support for IA64

sparc64, i386 and x86_64 have support for a special data section dedicated
to rarely updated data that is frequently read. The section was created to
avoid false sharing of those rarely read data with frequently written kernel
data.

This patch creates such a data section for ia64 and will group rarely written
data into this section.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/vmlinux.lds.S | 3 +++
 include/linux/cache.h          | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 30d8564e9603..73af6267d2ef 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -177,6 +177,9 @@ SECTIONS
 	}
   . = ALIGN(PAGE_SIZE);		/* make sure the gate page doesn't expose kernel data */
 
+  .data.read_mostly : AT(ADDR(.data.read_mostly) - LOAD_OFFSET)
+        { *(.data.read_mostly) }
+
   .data.cacheline_aligned : AT(ADDR(.data.cacheline_aligned) - LOAD_OFFSET)
         { *(.data.cacheline_aligned) }
 
diff --git a/include/linux/cache.h b/include/linux/cache.h
index f6b5a46c5f82..0b7ecf3af78a 100644
--- a/include/linux/cache.h
+++ b/include/linux/cache.h
@@ -13,7 +13,7 @@
 #define SMP_CACHE_BYTES L1_CACHE_BYTES
 #endif
 
-#if defined(CONFIG_X86) || defined(CONFIG_SPARC64)
+#if defined(CONFIG_X86) || defined(CONFIG_SPARC64) || defined(CONFIG_IA64)
 #define __read_mostly __attribute__((__section__(".data.read_mostly")))
 #else
 #define __read_mostly
-- 
cgit v1.2.3


From 37b1cc3910f7976369fc0ed55068a686e92555e6 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Fri, 16 Dec 2005 11:17:29 +0900
Subject: [PATCH] mtd onenand driver: check correct manufacturer

This (and the three subsequent patches) is working well on OMAP H4 with
2.6.15-rc4 kernel and passes the LTP fs test.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/mtd/onenand/onenand_base.c | 16 ++++++++++------
 include/linux/mtd/onenand.h        |  1 -
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index f67d5d6eb9a6..33d6f5c2e053 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1346,7 +1346,6 @@ static void onenand_print_device_info(int device)
 
 static const struct onenand_manufacturers onenand_manuf_ids[] = {
         {ONENAND_MFR_SAMSUNG, "Samsung"},
-        {ONENAND_MFR_UNKNOWN, "Unknown"}
 };
 
 /**
@@ -1357,17 +1356,22 @@ static const struct onenand_manufacturers onenand_manuf_ids[] = {
  */
 static int onenand_check_maf(int manuf)
 {
+	int size = ARRAY_SIZE(onenand_manuf_ids);
+	char *name;
         int i;
 
-        for (i = 0; onenand_manuf_ids[i].id; i++) {
+	for (i = 0; i < size; i++)
                 if (manuf == onenand_manuf_ids[i].id)
                         break;
-        }
 
-        printk(KERN_DEBUG "OneNAND Manufacturer: %s (0x%0x)\n",
-                onenand_manuf_ids[i].name, manuf);
+	if (i < size)
+		name = onenand_manuf_ids[i].name;
+	else
+		name = "Unknown";
+
+	printk(KERN_DEBUG "OneNAND Manufacturer: %s (0x%0x)\n", name, manuf);
 
-        return (i != ONENAND_MFR_UNKNOWN);
+	return (i == size);
 }
 
 /**
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index f1fd4215686a..53423d3b43bf 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -140,7 +140,6 @@ struct onenand_chip {
  * OneNAND Flash Manufacturer ID Codes
  */
 #define ONENAND_MFR_SAMSUNG	0xec
-#define ONENAND_MFR_UNKNOWN	0x00
 
 /**
  * struct nand_manufacturers - NAND Flash Manufacturer ID Structure
-- 
cgit v1.2.3


From 532a37cf8d05dd1aa5631be836036204b0d2b4a1 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Fri, 16 Dec 2005 11:17:29 +0900
Subject: [PATCH] mtd onenand driver: reduce stack usage

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/mtd/onenand/onenand_base.c | 27 ++++++++++++++++++++++++---
 drivers/mtd/onenand/onenand_bbt.c  |  4 ++--
 include/linux/mtd/onenand.h        |  4 ++--
 3 files changed, 28 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index d57afbaaedc4..a53a73fc2a5a 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -940,7 +940,7 @@ static int onenand_writev_ecc(struct mtd_info *mtd, const struct kvec *vecs,
 	u_char *eccbuf, struct nand_oobinfo *oobsel)
 {
 	struct onenand_chip *this = mtd->priv;
-	unsigned char buffer[MAX_ONENAND_PAGESIZE], *pbuf;
+	unsigned char *pbuf;
 	size_t total_len, len;
 	int i, written = 0;
 	int ret = 0;
@@ -975,7 +975,7 @@ static int onenand_writev_ecc(struct mtd_info *mtd, const struct kvec *vecs,
 	/* Loop until all keve's data has been written */
 	len = 0;
 	while (count) {
-		pbuf = buffer;
+		pbuf = this->page_buf;
 		/*
 		 * If the given tuple is >= pagesize then
 		 * write it out from the iov
@@ -995,7 +995,7 @@ static int onenand_writev_ecc(struct mtd_info *mtd, const struct kvec *vecs,
 			int cnt = 0, thislen;
 			while (cnt < mtd->oobblock) {
 				thislen = min_t(int, mtd->oobblock - cnt, vecs->iov_len - len);
-				memcpy(buffer + cnt, vecs->iov_base + len, thislen);
+				memcpy(this->page_buf + cnt, vecs->iov_base + len, thislen);
 				cnt += thislen;
 				len += thislen;
 
@@ -1519,6 +1519,18 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 		this->read_bufferram = onenand_sync_read_bufferram;
 	}
 
+	/* Allocate buffers, if necessary */
+	if (!this->page_buf) {
+		size_t len;
+		len = mtd->oobblock + mtd->oobsize;
+		this->page_buf = kmalloc(len, GFP_KERNEL);
+		if (!this->page_buf) {
+			printk(KERN_ERR "onenand_scan(): Can't allocate page_buf\n");
+			return -ENOMEM;
+		}
+		this->options |= ONENAND_PAGEBUF_ALLOC;
+	}
+
 	this->state = FL_READY;
 	init_waitqueue_head(&this->wq);
 	spin_lock_init(&this->chip_lock);
@@ -1580,12 +1592,21 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
  */
 void onenand_release(struct mtd_info *mtd)
 {
+	struct onenand_chip *this = mtd->priv;
+
 #ifdef CONFIG_MTD_PARTITIONS
 	/* Deregister partitions */
 	del_mtd_partitions (mtd);
 #endif
 	/* Deregister the device */
 	del_mtd_device (mtd);
+
+	/* Free bad block table memory, if allocated */
+	if (this->bbm)
+		kfree(this->bbm);
+	/* Buffer allocated by onenand_scan */
+	if (this->options & ONENAND_PAGEBUF_ALLOC)
+		kfree(this->page_buf);
 }
 
 EXPORT_SYMBOL_GPL(onenand_scan);
diff --git a/drivers/mtd/onenand/onenand_bbt.c b/drivers/mtd/onenand/onenand_bbt.c
index f40190f499e1..4510d3361eaa 100644
--- a/drivers/mtd/onenand/onenand_bbt.c
+++ b/drivers/mtd/onenand/onenand_bbt.c
@@ -118,10 +118,10 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf, struct nand_bbt_descr
  */
 static inline int onenand_memory_bbt (struct mtd_info *mtd, struct nand_bbt_descr *bd)
 {
-	unsigned char data_buf[MAX_ONENAND_PAGESIZE];
+	struct onenand_chip *this = mtd->priv;
 
         bd->options &= ~NAND_BBT_SCANEMPTY;
-        return create_bbt(mtd, data_buf, bd, -1);
+	return create_bbt(mtd, this->page_buf, bd, -1);
 }
 
 /**
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 53423d3b43bf..7419b5fab133 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -17,7 +17,6 @@
 #include <linux/mtd/bbm.h>
 
 #define MAX_BUFFERRAM		2
-#define MAX_ONENAND_PAGESIZE	(2048 + 64)
 
 /* Scan and identify a OneNAND device */
 extern int onenand_scan(struct mtd_info *mtd, int max_chips);
@@ -110,6 +109,7 @@ struct onenand_chip {
 	spinlock_t		chip_lock;
 	wait_queue_head_t	wq;
 	onenand_state_t		state;
+	unsigned char		*page_buf;
 
 	struct nand_oobinfo	*autooob;
 
@@ -134,7 +134,7 @@ struct onenand_chip {
  * Options bits
  */
 #define ONENAND_CONT_LOCK		(0x0001)
-
+#define ONENAND_PAGEBUF_ALLOC		(0x1000)
 
 /*
  * OneNAND Flash Manufacturer ID Codes
-- 
cgit v1.2.3


From 6b80ebedbee87c5b2213fc3635bf0bd7450bce30 Mon Sep 17 00:00:00 2001
From: Kristian Slavov <kristian.slavov@nomadiclab.com>
Date: Mon, 19 Dec 2005 13:54:44 -0800
Subject: [RTNETLINK]: Fix RTNLGRP definitions in rtnetlink.h

I reported a problem and gave hints to the solution, but nobody seemed
to react. So I prepared a patch against 2.6.14.4.

Tested on 2.6.14.4 with "ip monitor addr" and with the program
attached, while adding and removing IPv6 address. Both programs didn't
receive any messages.  Tested 2.6.14.4 + this patch, and both programs
received add and remove messages.

Signed-off-by: Kristian Slavov <kristian.slavov@nomadiclab.com>
Acked-by: Jamal Hadi salim <hadi@cyberus.ca>
ACKed-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index c231e9a08f0b..d50482ba27fe 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -866,6 +866,7 @@ enum rtnetlink_groups {
 #define	RTNLGRP_IPV4_MROUTE	RTNLGRP_IPV4_MROUTE
 	RTNLGRP_IPV4_ROUTE,
 #define RTNLGRP_IPV4_ROUTE	RTNLGRP_IPV4_ROUTE
+	RTNLGRP_NOP1,
 	RTNLGRP_IPV6_IFADDR,
 #define RTNLGRP_IPV6_IFADDR	RTNLGRP_IPV6_IFADDR
 	RTNLGRP_IPV6_MROUTE,
@@ -876,8 +877,11 @@ enum rtnetlink_groups {
 #define RTNLGRP_IPV6_IFINFO	RTNLGRP_IPV6_IFINFO
 	RTNLGRP_DECnet_IFADDR,
 #define RTNLGRP_DECnet_IFADDR	RTNLGRP_DECnet_IFADDR
+	RTNLGRP_NOP2,
 	RTNLGRP_DECnet_ROUTE,
 #define RTNLGRP_DECnet_ROUTE	RTNLGRP_DECnet_ROUTE
+	RTNLGRP_NOP3,
+	RTNLGRP_NOP4,
 	RTNLGRP_IPV6_PREFIX,
 #define RTNLGRP_IPV6_PREFIX	RTNLGRP_IPV6_PREFIX
 	__RTNLGRP_MAX
-- 
cgit v1.2.3


From 29884df0d89c1df0dec3449405bc41569bb44800 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 13 Dec 2005 16:13:54 -0500
Subject: NFS: Fix another O_DIRECT race

 Ensure we call unmap_mapping_range() and sync dirty pages to disk before
 doing an NFS direct write.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c        | 24 ++++++------------------
 fs/nfs/file.c          | 23 ++++-------------------
 fs/nfs/inode.c         | 28 +++++++++++++++++++++++-----
 include/linux/nfs_fs.h |  1 +
 4 files changed, 34 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b497c71384e8..079228817603 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -678,15 +678,9 @@ nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t
 	if (!count)
 		goto out;
 
-	if (mapping->nrpages) {
-		retval = filemap_fdatawrite(mapping);
-		if (retval == 0)
-			retval = nfs_wb_all(inode);
-		if (retval == 0)
-			retval = filemap_fdatawait(mapping);
-		if (retval)
-			goto out;
-	}
+	retval = nfs_sync_mapping(mapping);
+	if (retval)
+		goto out;
 
 	retval = nfs_direct_read(inode, ctx, &iov, pos, 1);
 	if (retval > 0)
@@ -764,15 +758,9 @@ nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count,
 	if (!count)
 		goto out;
 
-	if (mapping->nrpages) {
-		retval = filemap_fdatawrite(mapping);
-		if (retval == 0)
-			retval = nfs_wb_all(inode);
-		if (retval == 0)
-			retval = filemap_fdatawait(mapping);
-		if (retval)
-			goto out;
-	}
+	retval = nfs_sync_mapping(mapping);
+	if (retval)
+		goto out;
 
 	retval = nfs_direct_write(inode, ctx, &iov, pos, 1);
 	if (mapping->nrpages)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 57d3e77d97ee..eb5cd4c3bbfd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -433,11 +433,7 @@ static int do_unlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * Flush all pending writes before doing anything
 	 * with locks..
 	 */
-	filemap_fdatawrite(filp->f_mapping);
-	down(&inode->i_sem);
-	nfs_wb_all(inode);
-	up(&inode->i_sem);
-	filemap_fdatawait(filp->f_mapping);
+	nfs_sync_mapping(filp->f_mapping);
 
 	/* NOTE: special case
 	 * 	If we're signalled while cleaning up locks on process exit, we
@@ -465,15 +461,8 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * Flush all pending writes before doing anything
 	 * with locks..
 	 */
-	status = filemap_fdatawrite(filp->f_mapping);
-	if (status == 0) {
-		down(&inode->i_sem);
-		status = nfs_wb_all(inode);
-		up(&inode->i_sem);
-		if (status == 0)
-			status = filemap_fdatawait(filp->f_mapping);
-	}
-	if (status < 0)
+	status = nfs_sync_mapping(filp->f_mapping);
+	if (status != 0)
 		goto out;
 
 	lock_kernel();
@@ -497,11 +486,7 @@ static int do_setlk(struct file *filp, int cmd, struct file_lock *fl)
 	 * Make sure we clear the cache whenever we try to get the lock.
 	 * This makes locking act as a cache coherency point.
 	 */
-	filemap_fdatawrite(filp->f_mapping);
-	down(&inode->i_sem);
-	nfs_wb_all(inode);	/* we may have slept */
-	up(&inode->i_sem);
-	filemap_fdatawait(filp->f_mapping);
+	nfs_sync_mapping(filp->f_mapping);
 	nfs_zap_caches(inode);
 out:
 	rpc_clnt_sigunmask(NFS_CLIENT(inode), &oldset);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index afd75d0463fd..432f41cd75e6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -640,6 +640,27 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
 	return 0;
 }
 
+/**
+ * nfs_sync_mapping - helper to flush all mmapped dirty data to disk
+ */
+int nfs_sync_mapping(struct address_space *mapping)
+{
+	int ret;
+
+	if (mapping->nrpages == 0)
+		return 0;
+	unmap_mapping_range(mapping, 0, 0, 0);
+	ret = filemap_fdatawrite(mapping);
+	if (ret != 0)
+		goto out;
+	ret = filemap_fdatawait(mapping);
+	if (ret != 0)
+		goto out;
+	ret = nfs_wb_all(mapping->host);
+out:
+	return ret;
+}
+
 /*
  * Invalidate the local caches
  */
@@ -1179,11 +1200,8 @@ void nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	struct nfs_inode *nfsi = NFS_I(inode);
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
-		if (S_ISREG(inode->i_mode)) {
-			if (filemap_fdatawrite(mapping) == 0)
-				filemap_fdatawait(mapping);
-			nfs_wb_all(inode);
-		}
+		if (S_ISREG(inode->i_mode))
+			nfs_sync_mapping(mapping);
 		invalidate_inode_pages2(mapping);
 
 		spin_lock(&inode->i_lock);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 12787a9b0259..2516adeccecf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -291,6 +291,7 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long
 /*
  * linux/fs/nfs/inode.c
  */
+extern int nfs_sync_mapping(struct address_space *mapping);
 extern void nfs_zap_caches(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *);
-- 
cgit v1.2.3


From fd30fc3256824f03c2ff9317269d66f72f7042ca Mon Sep 17 00:00:00 2001
From: Tom Zanussi <zanussi@us.ibm.com>
Date: Tue, 20 Dec 2005 13:10:22 -0600
Subject: [PATCH] relayfs: remove warning printk() in relay_switch_subbuf()

There's currently a diagnostic printk in relay_switch_subbuf() meant as
a warning if you accidentally try to log an event larger than the
sub-buffer size.

The problem is if this happens while logging from somewhere it's not
safe to be doing printks, such as in the scheduler, you can end up with
a deadlock.  This patch removes the warning from relay_switch_subbuf()
and instead prints some diagnostic info when the channel is closed.

Thanks to Mathieu Desnoyers for pointing out the problem and
suggesting a fix.

Signed-off-by: Tom Zanussi <zanussi@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/relayfs/relay.c         | 8 ++++++--
 include/linux/relayfs_fs.h | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/relayfs/relay.c b/fs/relayfs/relay.c
index 16446a15c96d..2a6f7f12b7f9 100644
--- a/fs/relayfs/relay.c
+++ b/fs/relayfs/relay.c
@@ -333,8 +333,7 @@ size_t relay_switch_subbuf(struct rchan_buf *buf, size_t length)
 	return length;
 
 toobig:
-	printk(KERN_WARNING "relayfs: event too large (%Zd)\n", length);
-	WARN_ON(1);
+	buf->chan->last_toobig = length;
 	return 0;
 }
 
@@ -399,6 +398,11 @@ void relay_close(struct rchan *chan)
 		relay_close_buf(chan->buf[i]);
 	}
 
+	if (chan->last_toobig)
+		printk(KERN_WARNING "relayfs: one or more items not logged "
+		       "[item size (%Zd) > sub-buffer size (%Zd)]\n",
+		       chan->last_toobig, chan->subbuf_size);
+
 	kref_put(&chan->kref, relay_destroy_channel);
 }
 
diff --git a/include/linux/relayfs_fs.h b/include/linux/relayfs_fs.h
index cfafc3e76bc2..fb7e80737325 100644
--- a/include/linux/relayfs_fs.h
+++ b/include/linux/relayfs_fs.h
@@ -20,9 +20,9 @@
 #include <linux/kref.h>
 
 /*
- * Tracks changes to rchan_buf struct
+ * Tracks changes to rchan/rchan_buf structs
  */
-#define RELAYFS_CHANNEL_VERSION		5
+#define RELAYFS_CHANNEL_VERSION		6
 
 /*
  * Per-cpu relay channel buffer
@@ -60,6 +60,7 @@ struct rchan
 	struct rchan_callbacks *cb;	/* client callbacks */
 	struct kref kref;		/* channel refcount */
 	void *private_data;		/* for user-defined data */
+	size_t last_toobig;		/* tried to log event > subbuf size */
 	struct rchan_buf *buf[NR_CPUS]; /* per-cpu channel buffers */
 };
 
-- 
cgit v1.2.3


From 58c4fb86eabcbc385d954843a635b7f4327be6b0 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 21 Dec 2005 22:56:42 +0900
Subject: [IPV6]: Flag RTF_ANYCAST for anycast routes.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/linux/ipv6_route.h |  1 +
 net/ipv6/route.c           | 13 +++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6_route.h b/include/linux/ipv6_route.h
index e2f935038013..d7c41d1d706a 100644
--- a/include/linux/ipv6_route.h
+++ b/include/linux/ipv6_route.h
@@ -18,6 +18,7 @@
 					   fallback, no routers on link */
 #define RTF_ADDRCONF	0x00040000	/* addrconf route - RA		*/
 #define RTF_PREFIX_RT	0x00080000	/* A prefix only route - RA	*/
+#define RTF_ANYCAST	0x00100000	/* Anycast			*/
 
 #define RTF_NONEXTHOP	0x00200000	/* route with no nexthop	*/
 #define RTF_EXPIRES	0x00400000
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 7c68bfbee361..66140f13d119 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -413,11 +413,14 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
 	rt = ip6_rt_copy(ort);
 
 	if (rt) {
-		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
-
-		if (!(rt->rt6i_flags&RTF_GATEWAY))
+		if (!(rt->rt6i_flags&RTF_GATEWAY)) {
+			if (rt->rt6i_dst.plen != 128 &&
+			    ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
+				rt->rt6i_flags |= RTF_ANYCAST;
 			ipv6_addr_copy(&rt->rt6i_gateway, daddr);
+		}
 
+		ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 		rt->rt6i_dst.plen = 128;
 		rt->rt6i_flags |= RTF_CACHE;
 		rt->u.dst.flags |= DST_HOST;
@@ -1413,7 +1416,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 	rt->u.dst.obsolete = -1;
 
 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
-	if (!anycast)
+	if (anycast)
+		rt->rt6i_flags |= RTF_ANYCAST;
+	else
 		rt->rt6i_flags |= RTF_LOCAL;
 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 	if (rt->rt6i_nexthop == NULL) {
-- 
cgit v1.2.3


From 23f9b317e0ba4fbc5fc9524275d0105fa87e2027 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Wed, 21 Dec 2005 02:27:50 +0100
Subject: [PATCH] include/linux/irq.h: #include <linux/smp.h>

Jan's crosscompile page [1] shows, that one regression in 2.6.15-rc is
that the v850 defconfig does no longer compile.

The compile error is:

<--  snip  -->

...
  CC      arch/v850/kernel/setup.o
In file included from /usr/src/ctest/rc/kernel/arch/v850/kernel/setup.c:17:
/usr/src/ctest/rc/kernel/include/linux/irq.h:13:43: asm/smp.h: No such file or directory
make[2]: *** [arch/v850/kernel/setup.o] Error 1

<--  snip  -->

The #include <asm/smp.h> in irq.h was intruduced in 2.6.15-rc.

Since include/linux/irq.h needs code from asm/smp.h only in the
CONFIG_SMP=y case and linux/smp.h #include's asm/smp.h only in the
CONFIG_SMP=y case, I'm suggesting this patch to #include <linux/smp.h>
in irq.h.

I've tested the compilation with both CONFIG_SMP=y and CONFIG_SMP=n
on i386.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index c516382fbec2..f04ba20712a2 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -10,7 +10,7 @@
  */
 
 #include <linux/config.h>
-#include <asm/smp.h>		/* cpu_online_map */
+#include <linux/smp.h>
 
 #if !defined(CONFIG_ARCH_S390)
 
-- 
cgit v1.2.3


From d6f029130fb83b36fb709a187275b0494035d689 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nico@cam.org>
Date: Wed, 21 Dec 2005 12:26:25 -0500
Subject: [PATCH] fix race with preempt_enable()

Currently a simple

	void foo(void) { preempt_enable(); }

produces the following code on ARM:

foo:
	bic	r3, sp, #8128
	bic	r3, r3, #63
	ldr	r2, [r3, #4]
	ldr	r1, [r3, #0]
	sub	r2, r2, #1
	tst	r1, #4
	str	r2, [r3, #4]
	blne	preempt_schedule
	mov	pc, lr

The problem is that the TIF_NEED_RESCHED flag is loaded _before_ the
preemption count is stored back, hence any interrupt coming within that
3 instruction window causing TIF_NEED_RESCHED to be set won't be
seen and scheduling won't happen as it should.

Nothing currently prevents gcc from performing that reordering.  There
is already a barrier() before the decrement of the preemption count, but
another one is needed between this and the TIF_NEED_RESCHED flag test
for proper code ordering.

Signed-off-by: Nicolas Pitre <nico@cam.org>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/preempt.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index d9a2f5254a51..5769d14d1e6a 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -48,6 +48,7 @@ do { \
 #define preempt_enable() \
 do { \
 	preempt_enable_no_resched(); \
+	barrier(); \
 	preempt_check_resched(); \
 } while (0)
 
-- 
cgit v1.2.3


From 01e33b5a2a153eec74dd87522e264948030b88c1 Mon Sep 17 00:00:00 2001
From: Kurt Huwig <k.huwig@iku-ag.de>
Date: Sun, 25 Dec 2005 00:13:08 +0100
Subject: [PATCH] n_r3964: fixed usage of HZ; removed bad include

Fix n_r3964 timeouts (hardcoded for 100Hz)

Also the include of <asm/termios.h> in 'n_r3964.h' is unnecessary and
prevents using the header file in any application that has to include
<termios.h> due to duplicate definition of 'struct termio'.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/n_r3964.h | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h
index 2352bcd31a06..db4f3776978a 100644
--- a/include/linux/n_r3964.h
+++ b/include/linux/n_r3964.h
@@ -13,6 +13,10 @@
  * L. Haag
  *
  * $Log: r3964.h,v $
+ * Revision 1.4  2005/12/21 19:54:24  Kurt Huwig <kurt huwig de>
+ * Fixed HZ usage on 2.6 kernels
+ * Removed unnecessary include
+ *
  * Revision 1.3  2001/03/18 13:02:24  dwmw2
  * Fix timer usage, use spinlocks properly.
  *
@@ -45,9 +49,11 @@
 #define __LINUX_N_R3964_H__
 
 /* line disciplines for r3964 protocol */
-#include <asm/termios.h>
 
 #ifdef __KERNEL__
+
+#include <linux/param.h>
+
 /*
  * Common ascii handshake characters:
  */
@@ -58,14 +64,14 @@
 #define NAK 0x15
 
 /*
- * Timeouts (msecs/10 msecs per timer interrupt):
+ * Timeouts (from milliseconds to jiffies)
  */
 
-#define R3964_TO_QVZ 550/10
-#define R3964_TO_ZVZ 220/10
-#define R3964_TO_NO_BUF 400/10
-#define R3964_NO_TX_ROOM 100/10
-#define R3964_TO_RX_PANIC 4000/10
+#define R3964_TO_QVZ ((550)*HZ/1000)
+#define R3964_TO_ZVZ ((220)*HZ/1000)
+#define R3964_TO_NO_BUF ((400)*HZ/1000)
+#define R3964_NO_TX_ROOM ((100)*HZ/1000)
+#define R3964_TO_RX_PANIC ((4000)*HZ/1000)
 #define R3964_MAX_RETRIES 5
 
 #endif
-- 
cgit v1.2.3


From f83b5e323f57d6e1f35a839d663e91cebe985e54 Mon Sep 17 00:00:00 2001
From: Ustyugov Roman <dr_unique@ymg.ru>
Date: Fri, 23 Sep 2005 08:42:11 +0400
Subject: kbuild: set correct KBUILD_MODNAME when using well known kernel
 symbols as module names

This patch fixes a problem when we use well known kernel symbols as module
names.

For example, if module source name is current.c, idle_stack.c or etc.,
we have a bad KBUILD_MODNAME value.
For example, KBUILD_MODNAME will be "get_current()" instead of "current", or
"(init_thread_union.stack)" instead of "idle_task".

The trick is to define a stringify macro on the commandline - named
KBUILD_STR for namespace reasons - and then to stringify the module
name.

There are a few uses of KBUILD_MODNAME throughout the tree but the usage
is for debug and will not be harmed by this change so left untouched for now.

While at it KBUILD_BASENAME was changed too. Any spinlock usage in the
unix module would have created wrong section names without it.
Usage in spinlock.h fixed so it no longer stringify KBUILD_BASENAME.

Original patch from Ustyogov Roman - all bugs introduced by me.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 include/linux/spinlock.h | 3 +--
 scripts/Makefile.lib     | 8 +++++---
 scripts/mod/modpost.c    | 3 +--
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 0e9682c9def5..799be6747944 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -59,8 +59,7 @@
 /*
  * Must define these before including other files, inline functions need them
  */
-#define LOCK_SECTION_NAME                       \
-        ".text.lock." __stringify(KBUILD_BASENAME)
+#define LOCK_SECTION_NAME ".text.lock."KBUILD_BASENAME
 
 #define LOCK_SECTION_START(extra)               \
         ".subsection 1\n\t"                     \
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index 0f81dcfd6909..550798f57da5 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -81,8 +81,10 @@ obj-dirs	:= $(addprefix $(obj)/,$(obj-dirs))
 # Note: It's possible that one object gets potentially linked into more
 #       than one module. In that case KBUILD_MODNAME will be set to foo_bar,
 #       where foo and bar are the name of the modules.
-basename_flags = -DKBUILD_BASENAME=$(subst $(comma),_,$(subst -,_,$(*F)))
-modname_flags  = $(if $(filter 1,$(words $(modname))),-DKBUILD_MODNAME=$(subst $(comma),_,$(subst -,_,$(modname))))
+name-fix = $(subst $(comma),_,$(subst -,_,$1))
+basename_flags = -D"KBUILD_BASENAME=KBUILD_STR($(call name-fix,$(*F)))"
+modname_flags  = $(if $(filter 1,$(words $(modname))),\
+                 -D"KBUILD_MODNAME=KBUILD_STR($(call name-fix,$(modname)))")
 
 _c_flags       = $(CFLAGS) $(EXTRA_CFLAGS) $(CFLAGS_$(*F).o)
 _a_flags       = $(AFLAGS) $(EXTRA_AFLAGS) $(AFLAGS_$(*F).o)
@@ -113,7 +115,7 @@ endif
 
 c_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(CPPFLAGS) \
 		 $(__c_flags) $(modkern_cflags) \
-		 $(basename_flags) $(modname_flags)
+		 -D"KBUILD_STR(s)=\#s" $(basename_flags) $(modname_flags)
 
 a_flags        = -Wp,-MD,$(depfile) $(NOSTDINC_FLAGS) $(CPPFLAGS) \
 		 $(__a_flags) $(modkern_aflags)
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 8ce5a6318684..f70ff13d4818 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -539,10 +539,9 @@ add_header(struct buffer *b, struct module *mod)
 	buf_printf(b, "\n");
 	buf_printf(b, "MODULE_INFO(vermagic, VERMAGIC_STRING);\n");
 	buf_printf(b, "\n");
-	buf_printf(b, "#undef unix\n"); /* We have a module called "unix" */
 	buf_printf(b, "struct module __this_module\n");
 	buf_printf(b, "__attribute__((section(\".gnu.linkonce.this_module\"))) = {\n");
-	buf_printf(b, " .name = __stringify(KBUILD_MODNAME),\n");
+	buf_printf(b, " .name = KBUILD_MODNAME,\n");
 	if (mod->has_init)
 		buf_printf(b, " .init = init_module,\n");
 	if (mod->has_cleanup)
-- 
cgit v1.2.3


From 9b4ffa48ae855c8657a36014c5b0243ff69f4722 Mon Sep 17 00:00:00 2001
From: Jaya Kumar <jayakumar.alsa@gmail.com>
Date: Thu, 17 Nov 2005 10:12:23 +0100
Subject: [ALSA] Add support for the CS5535 Audio device

Add support for the CS5535 Audio device.  I've fixed up some errors as per
Takashi's advice from the thread:

http://lkml.org/lkml/2005/9/15/119

 From: Alan Cox <alan@lxorguk.ukuu.org.uk>

        cs5535 is a 32bit x86 only device using weird CPU features

Signed-off-by: Jaya Kumar <jayakumar.alsa@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 CREDITS                                 |   1 +
 MAINTAINERS                             |   5 +
 include/linux/pci_ids.h                 |   4 +
 sound/pci/Kconfig                       |  13 +
 sound/pci/Makefile                      |   1 +
 sound/pci/cs5535audio/Makefile          |   8 +
 sound/pci/cs5535audio/cs5535audio.c     | 410 ++++++++++++++++++++++++++++++
 sound/pci/cs5535audio/cs5535audio.h     | 123 +++++++++
 sound/pci/cs5535audio/cs5535audio_pcm.c | 430 ++++++++++++++++++++++++++++++++
 9 files changed, 995 insertions(+)
 create mode 100644 sound/pci/cs5535audio/Makefile
 create mode 100644 sound/pci/cs5535audio/cs5535audio.c
 create mode 100644 sound/pci/cs5535audio/cs5535audio.h
 create mode 100644 sound/pci/cs5535audio/cs5535audio_pcm.c

(limited to 'include/linux')

diff --git a/CREDITS b/CREDITS
index 1b4f8694fa48..521f00d1b549 100644
--- a/CREDITS
+++ b/CREDITS
@@ -1883,6 +1883,7 @@ N: Jaya Kumar
 E: jayalk@intworks.biz
 W: http://www.intworks.biz
 D: Arc monochrome LCD framebuffer driver, x86 reboot fixups
+D: pirq addr, CS5535 alsa audio driver
 S: Gurgaon, India
 S: Kuala Lumpur, Malaysia
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 6af683025ae0..93f97b3afac5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -650,6 +650,11 @@ L:	linux-crypto@vger.kernel.org
 T:	git kernel.org:/pub/scm/linux/kernel/git/herbert/crypto-2.6.git
 S:	Maintained
 
+CS5535 Audio ALSA driver
+P:	Jaya Kumar
+M:	jayakumar.alsa@gmail.com
+S:	Maintained
+
 CYBERPRO FB DRIVER
 P:	Russell King
 M:	rmk@arm.linux.org.uk
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4db67b3b05cc..9093f118f99d 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -376,6 +376,10 @@
 #define PCI_DEVICE_ID_NS_87560_USB	0x0012
 #define PCI_DEVICE_ID_NS_83815		0x0020
 #define PCI_DEVICE_ID_NS_83820		0x0022
+#define PCI_DEVICE_ID_NS_CS5535_IDE	0x002d
+#define PCI_DEVICE_ID_NS_CS5535_AUDIO	0x002e
+#define PCI_DEVICE_ID_NS_CS5535_USB	0x002f
+#define PCI_DEVICE_ID_NS_CS5535_VIDEO	0x0030
 #define PCI_DEVICE_ID_NS_SATURN		0x0035
 #define PCI_DEVICE_ID_NS_SCx200_BRIDGE	0x0500
 #define PCI_DEVICE_ID_NS_SCx200_SMI	0x0501
diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
index 0fb16cf335ea..920305c7402f 100644
--- a/sound/pci/Kconfig
+++ b/sound/pci/Kconfig
@@ -359,6 +359,19 @@ config SND_ENS1370
 	  To compile this driver as a module, choose M here: the module
 	  will be called snd-ens1370.
 
+config SND_CS5535AUDIO
+	tristate "CS5535 Audio"
+	depends on SND && X86 && !X86_64
+	select SND_PCM
+	select SND_AC97_CODEC
+	help
+	  Say Y here to include support for audio on CS5535 chips. It is
+	  referred to as NS CS5535 IO or AMD CS5535 IO companion in
+	  various literature.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called snd-cs5535audio.
+
 config SND_ENS1371
 	tristate "(Creative) Ensoniq AudioPCI 1371/1373"
 	depends on SND
diff --git a/sound/pci/Makefile b/sound/pci/Makefile
index 42fabfcfc2a9..82a9c734f84d 100644
--- a/sound/pci/Makefile
+++ b/sound/pci/Makefile
@@ -54,6 +54,7 @@ obj-$(CONFIG_SND) += \
 	au88x0/ \
 	ca0106/ \
 	cs46xx/ \
+	cs5535audio/ \
 	emu10k1/ \
 	hda/ \
 	ice1712/ \
diff --git a/sound/pci/cs5535audio/Makefile b/sound/pci/cs5535audio/Makefile
new file mode 100644
index 000000000000..08d8ee6547d3
--- /dev/null
+++ b/sound/pci/cs5535audio/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for cs5535audio
+#
+
+snd-cs5535audio-objs := cs5535audio.o cs5535audio_pcm.o
+
+# Toplevel Module Dependency
+obj-$(CONFIG_SND_CS5535AUDIO) += snd-cs5535audio.o
diff --git a/sound/pci/cs5535audio/cs5535audio.c b/sound/pci/cs5535audio/cs5535audio.c
new file mode 100644
index 000000000000..920c857fc223
--- /dev/null
+++ b/sound/pci/cs5535audio/cs5535audio.c
@@ -0,0 +1,410 @@
+/*
+ * Driver for audio on multifunction CS5535 companion device
+ * Copyright (C) Jaya Kumar
+ *
+ * Based on Jaroslav Kysela and Takashi Iwai's examples.
+ * This work was sponsored by CIS(M) Sdn Bhd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/moduleparam.h>
+#include <asm/io.h>
+#include <sound/driver.h>
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/pcm.h>
+#include <sound/rawmidi.h>
+#include <sound/ac97_codec.h>
+#include <sound/initval.h>
+#include <sound/asoundef.h>
+#include "cs5535audio.h"
+
+#define DRIVER_NAME "cs5535audio"
+
+
+static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
+static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
+static int enable[SNDRV_CARDS] = SNDRV_DEFAULT_ENABLE_PNP;
+
+static struct pci_device_id snd_cs5535audio_ids[] = {
+	{ PCI_VENDOR_ID_NS, PCI_DEVICE_ID_NS_CS5535_AUDIO, PCI_ANY_ID,
+		PCI_ANY_ID, 0, 0, 0, },
+	{}
+};
+
+MODULE_DEVICE_TABLE(pci, snd_cs5535audio_ids);
+
+static void wait_till_cmd_acked(cs5535audio_t *cs5535au, unsigned long timeout)
+{
+	unsigned long tmp;
+	do {
+		tmp = cs_readl(cs5535au, ACC_CODEC_CNTL);
+		if (!(tmp & CMD_NEW))
+			break;
+		msleep(10);
+	} while (--timeout);
+	if (!timeout)
+		snd_printk(KERN_ERR "Failure writing to cs5535 codec\n");
+}
+
+static unsigned short snd_cs5535audio_codec_read(cs5535audio_t *cs5535au,
+							unsigned short reg)
+{
+	unsigned long regdata;
+	unsigned long timeout;
+	unsigned long val;
+
+	regdata = ((unsigned long) reg) << 24;
+	regdata |= ACC_CODEC_CNTL_RD_CMD;
+	regdata |= CMD_NEW;
+
+	cs_writel(cs5535au, ACC_CODEC_CNTL, regdata);
+	wait_till_cmd_acked(cs5535au, 500);
+
+	timeout = 50;
+	do {
+		val = cs_readl(cs5535au, ACC_CODEC_STATUS);
+		if (	(val & STS_NEW) &&
+			((unsigned long) reg == ((0xFF000000 & val)>>24)) )
+			break;
+		msleep(10);
+	} while (--timeout);
+	if (!timeout)
+		snd_printk(KERN_ERR "Failure reading cs5535 codec\n");
+
+	return ((unsigned short) val);
+}
+
+static void snd_cs5535audio_codec_write(cs5535audio_t *cs5535au,
+				   unsigned short reg, unsigned short val)
+{
+	unsigned long regdata;
+
+	regdata = ((unsigned long) reg) << 24;
+	regdata |= (unsigned long) val;
+	regdata &= CMD_MASK;
+	regdata |= CMD_NEW;
+	regdata &= ACC_CODEC_CNTL_WR_CMD;
+
+	cs_writel(cs5535au, ACC_CODEC_CNTL, regdata);
+	wait_till_cmd_acked(cs5535au, 50);
+}
+
+static void snd_cs5535audio_ac97_codec_write(ac97_t *ac97,
+				   unsigned short reg, unsigned short val)
+{
+	cs5535audio_t *cs5535au = ac97->private_data;
+	snd_cs5535audio_codec_write(cs5535au, reg, val);
+}
+
+static unsigned short snd_cs5535audio_ac97_codec_read(ac97_t *ac97,
+					    unsigned short reg)
+{
+	cs5535audio_t *cs5535au = ac97->private_data;
+	return snd_cs5535audio_codec_read(cs5535au, reg);
+}
+
+static void snd_cs5535audio_mixer_free_ac97(ac97_t *ac97)
+{
+	cs5535audio_t *cs5535audio = ac97->private_data;
+	cs5535audio->ac97 = NULL;
+}
+
+static int snd_cs5535audio_mixer(cs5535audio_t *cs5535au)
+{
+	snd_card_t *card = cs5535au->card;
+	ac97_bus_t *pbus;
+	ac97_template_t ac97;
+	int err;
+	static ac97_bus_ops_t ops = {
+		.write = snd_cs5535audio_ac97_codec_write,
+		.read = snd_cs5535audio_ac97_codec_read,
+	};
+
+	if ((err = snd_ac97_bus(card, 0, &ops, NULL, &pbus)) < 0)
+		return err;
+
+	memset(&ac97, 0, sizeof(ac97));
+	ac97.scaps = AC97_SCAP_AUDIO|AC97_SCAP_SKIP_MODEM;
+	ac97.private_data = cs5535au;
+	ac97.pci = cs5535au->pci;
+	ac97.private_free = snd_cs5535audio_mixer_free_ac97;
+
+	if ((err = snd_ac97_mixer(pbus, &ac97, &cs5535au->ac97)) < 0) {
+		snd_printk("mixer failed\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static void process_bm0_irq(cs5535audio_t *cs5535au)
+{
+	u8 bm_stat;
+	spin_lock(&cs5535au->reg_lock);
+	bm_stat = cs_readb(cs5535au, ACC_BM0_STATUS);
+	spin_unlock(&cs5535au->reg_lock);
+	if (bm_stat & EOP) {
+		cs5535audio_dma_t *dma;
+		dma = cs5535au->playback_substream->runtime->private_data;
+		snd_pcm_period_elapsed(cs5535au->playback_substream);
+	} else {
+		snd_printk(KERN_ERR "unexpected bm0 irq src, bm_stat=%x\n",
+					bm_stat);
+	}
+}
+
+static void process_bm1_irq(cs5535audio_t *cs5535au)
+{
+	u8 bm_stat;
+	spin_lock(&cs5535au->reg_lock);
+	bm_stat = cs_readb(cs5535au, ACC_BM1_STATUS);
+	spin_unlock(&cs5535au->reg_lock);
+	if (bm_stat & EOP) {
+		cs5535audio_dma_t *dma;
+		dma = cs5535au->capture_substream->runtime->private_data;
+		snd_pcm_period_elapsed(cs5535au->capture_substream);
+	}
+}
+
+static irqreturn_t snd_cs5535audio_interrupt(int irq, void *dev_id,
+						struct pt_regs *regs)
+{
+	u16 acc_irq_stat;
+	u8 bm_stat;
+	unsigned char count;
+	cs5535audio_t *cs5535au = dev_id;
+
+	if (cs5535au == NULL)
+		return IRQ_NONE;
+
+	acc_irq_stat = cs_readw(cs5535au, ACC_IRQ_STATUS);
+
+	if (!acc_irq_stat)
+		return IRQ_NONE;
+	for (count=0; count < 10; count++) {
+		if (acc_irq_stat & (1<<count)) {
+			switch (count) {
+			case IRQ_STS:
+				cs_readl(cs5535au, ACC_GPIO_STATUS);
+				break;
+			case WU_IRQ_STS:
+				cs_readl(cs5535au, ACC_GPIO_STATUS);
+				break;
+			case BM0_IRQ_STS:
+				process_bm0_irq(cs5535au);
+				break;
+			case BM1_IRQ_STS:
+				process_bm1_irq(cs5535au);
+				break;
+			case BM2_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM2_STATUS);
+				break;
+			case BM3_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM3_STATUS);
+				break;
+			case BM4_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM4_STATUS);
+				break;
+			case BM5_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM5_STATUS);
+				break;
+			case BM6_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM6_STATUS);
+				break;
+			case BM7_IRQ_STS:
+				bm_stat = cs_readb(cs5535au, ACC_BM7_STATUS);
+				break;
+			default:
+				snd_printk(KERN_ERR "Unexpected irq src\n");
+				break;
+			}
+		}
+	}
+	return IRQ_HANDLED;
+}
+
+static int snd_cs5535audio_free(cs5535audio_t *cs5535au)
+{
+	synchronize_irq(cs5535au->irq);
+	pci_set_power_state(cs5535au->pci, 3);
+
+	if (cs5535au->irq >= 0)
+		free_irq(cs5535au->irq, cs5535au);
+
+	pci_release_regions(cs5535au->pci);
+	pci_disable_device(cs5535au->pci);
+	kfree(cs5535au);
+	return 0;
+}
+
+static int snd_cs5535audio_dev_free(snd_device_t *device)
+{
+	cs5535audio_t *cs5535au = device->device_data;
+	return snd_cs5535audio_free(cs5535au);
+}
+
+static int __devinit snd_cs5535audio_create(snd_card_t *card,
+				     struct pci_dev *pci,
+				     cs5535audio_t **rcs5535au)
+{
+	cs5535audio_t *cs5535au;
+
+	int err;
+	static snd_device_ops_t ops = {
+		.dev_free =	snd_cs5535audio_dev_free,
+	};
+
+	*rcs5535au = NULL;
+	if ((err = pci_enable_device(pci)) < 0)
+		return err;
+
+	if (pci_set_dma_mask(pci, DMA_32BIT_MASK) < 0 ||
+		pci_set_consistent_dma_mask(pci, DMA_32BIT_MASK) < 0) {
+		printk(KERN_WARNING "unable to get 32bit dma\n");
+		err = -ENXIO;
+		goto pcifail;
+	}
+
+	cs5535au = kzalloc(sizeof(*cs5535au), GFP_KERNEL);
+	if (cs5535au == NULL) {
+		err = -ENOMEM;
+		goto pcifail;
+	}
+
+	spin_lock_init(&cs5535au->reg_lock);
+	cs5535au->card = card;
+	cs5535au->pci = pci;
+	cs5535au->irq = -1;
+
+	if ((err = pci_request_regions(pci, "CS5535 Audio")) < 0) {
+		kfree(cs5535au);
+		goto pcifail;
+	}
+
+	cs5535au->port = pci_resource_start(pci, 0);
+
+	if (request_irq(pci->irq, snd_cs5535audio_interrupt,
+		SA_INTERRUPT|SA_SHIRQ, "CS5535 Audio", cs5535au)) {
+		snd_printk("unable to grab IRQ %d\n", pci->irq);
+		err = -EBUSY;
+		goto sndfail;
+	}
+
+	cs5535au->irq = pci->irq;
+	pci_set_master(pci);
+
+	if ((err = snd_device_new(card, SNDRV_DEV_LOWLEVEL,
+					cs5535au, &ops)) < 0)
+		goto sndfail;
+
+	snd_card_set_dev(card, &pci->dev);
+
+	*rcs5535au = cs5535au;
+	return 0;
+
+sndfail: /* leave the device alive, just kill the snd */
+	snd_cs5535audio_free(cs5535au);
+	return err;
+
+pcifail:
+	pci_disable_device(pci);
+	return err;
+}
+
+static int __devinit snd_cs5535audio_probe(struct pci_dev *pci,
+					const struct pci_device_id *pci_id)
+{
+	static int dev;
+	snd_card_t *card;
+	cs5535audio_t *cs5535au;
+	int err;
+
+	if (dev >= SNDRV_CARDS)
+		return -ENODEV;
+	if (!enable[dev]) {
+		dev++;
+		return -ENOENT;
+	}
+
+	card = snd_card_new(index[dev], id[dev], THIS_MODULE, 0);
+	if (card == NULL)
+		return -ENOMEM;
+
+	if ((err = snd_cs5535audio_create(card, pci, &cs5535au)) < 0)
+		goto probefail_out;
+
+	if ((err = snd_cs5535audio_mixer(cs5535au)) < 0)
+		goto probefail_out;
+
+	if ((err = snd_cs5535audio_pcm(cs5535au)) < 0)
+		goto probefail_out;
+
+	strcpy(card->driver, DRIVER_NAME);
+
+	strcpy(card->shortname, "CS5535 Audio");
+	sprintf(card->longname, "%s %s at 0x%lx, irq %i",
+		card->shortname, card->driver,
+		cs5535au->port, cs5535au->irq);
+
+	if ((err = snd_card_register(card)) < 0)
+		goto probefail_out;
+
+	pci_set_drvdata(pci, card);
+	dev++;
+	return 0;
+
+probefail_out:
+	snd_card_free(card);
+	return err;
+}
+
+static void __devexit snd_cs5535audio_remove(struct pci_dev *pci)
+{
+	snd_card_free(pci_get_drvdata(pci));
+	pci_set_drvdata(pci, NULL);
+}
+
+static struct pci_driver driver = {
+	.name = DRIVER_NAME,
+	.id_table = snd_cs5535audio_ids,
+	.probe = snd_cs5535audio_probe,
+	.remove = __devexit_p(snd_cs5535audio_remove),
+};
+
+static int __init alsa_card_cs5535audio_init(void)
+{
+	return pci_module_init(&driver);
+}
+
+static void __exit alsa_card_cs5535audio_exit(void)
+{
+	pci_unregister_driver(&driver);
+}
+
+module_init(alsa_card_cs5535audio_init)
+module_exit(alsa_card_cs5535audio_exit)
+
+MODULE_AUTHOR("Jaya Kumar");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("CS5535 Audio");
+MODULE_SUPPORTED_DEVICE("CS5535 Audio");
diff --git a/sound/pci/cs5535audio/cs5535audio.h b/sound/pci/cs5535audio/cs5535audio.h
new file mode 100644
index 000000000000..e28177fb0991
--- /dev/null
+++ b/sound/pci/cs5535audio/cs5535audio.h
@@ -0,0 +1,123 @@
+#ifndef __SOUND_CS5535AUDIO_H
+#define __SOUND_CS5535AUDIO_H
+
+#define cs_writel(cs5535au, reg, val) outl(val, (int) cs5535au->port + reg)
+#define cs_writeb(cs5535au, reg, val) outb(val, (int) cs5535au->port + reg)
+#define cs_readl(cs5535au, reg)	inl((unsigned short) (cs5535au->port + reg))
+#define cs_readw(cs5535au, reg)	inw((unsigned short) (cs5535au->port + reg))
+#define cs_readb(cs5535au, reg)	inb((unsigned short) (cs5535au->port + reg))
+
+#define CS5535AUDIO_MAX_DESCRIPTORS	128
+
+/* acc_codec bar0 reg addrs */
+#define ACC_GPIO_STATUS			0x00
+#define ACC_CODEC_STATUS		0x08
+#define ACC_CODEC_CNTL			0x0C
+#define ACC_IRQ_STATUS			0x12
+#define ACC_BM0_CMD			0x20
+#define ACC_BM1_CMD			0x28
+#define ACC_BM2_CMD			0x30
+#define ACC_BM3_CMD			0x38
+#define ACC_BM4_CMD			0x40
+#define ACC_BM5_CMD			0x48
+#define ACC_BM6_CMD			0x50
+#define ACC_BM7_CMD			0x58
+#define ACC_BM0_PRD			0x24
+#define ACC_BM1_PRD			0x2C
+#define ACC_BM2_PRD			0x34
+#define ACC_BM3_PRD			0x3C
+#define ACC_BM4_PRD			0x44
+#define ACC_BM5_PRD			0x4C
+#define ACC_BM6_PRD			0x54
+#define ACC_BM7_PRD			0x5C
+#define ACC_BM0_STATUS			0x21
+#define ACC_BM1_STATUS			0x29
+#define ACC_BM2_STATUS			0x31
+#define ACC_BM3_STATUS			0x39
+#define ACC_BM4_STATUS			0x41
+#define ACC_BM5_STATUS			0x49
+#define ACC_BM6_STATUS			0x51
+#define ACC_BM7_STATUS			0x59
+#define ACC_BM0_PNTR			0x60
+#define ACC_BM1_PNTR			0x64
+#define ACC_BM2_PNTR			0x68
+#define ACC_BM3_PNTR			0x6C
+#define ACC_BM4_PNTR			0x70
+#define ACC_BM5_PNTR			0x74
+#define ACC_BM6_PNTR			0x78
+#define ACC_BM7_PNTR			0x7C
+/* acc_codec bar0 reg bits */
+/* ACC_IRQ_STATUS */
+#define IRQ_STS 			0
+#define WU_IRQ_STS 			1
+#define BM0_IRQ_STS 			2
+#define BM1_IRQ_STS 			3
+#define BM2_IRQ_STS 			4
+#define BM3_IRQ_STS 			5
+#define BM4_IRQ_STS 			6
+#define BM5_IRQ_STS		 	7
+#define BM6_IRQ_STS 			8
+#define BM7_IRQ_STS 			9
+/* ACC_BMX_STATUS */
+#define EOP				(1<<0)
+#define BM_EOP_ERR			(1<<1)
+/* ACC_BMX_CTL */
+#define BM_CTL_EN			0x00000001
+#define BM_CTL_PAUSE			0x00000011
+#define BM_CTL_DIS			0x00000000
+#define BM_CTL_BYTE_ORD_LE		0x00000000
+#define BM_CTL_BYTE_ORD_BE		0x00000100
+/* cs5535 specific ac97 codec register defines */
+#define CMD_MASK			0xFF00FFFF
+#define CMD_NEW				0x00010000
+#define STS_NEW				0x00020000
+#define PRM_RDY_STS			0x00800000
+#define ACC_CODEC_CNTL_WR_CMD		(~0x80000000)
+#define ACC_CODEC_CNTL_RD_CMD		0x80000000
+#define PRD_JMP				0x2000
+#define PRD_EOP				0x4000
+#define PRD_EOT				0x8000
+
+typedef struct _snd_cs5535audio cs5535audio_t;
+typedef struct snd_cs5535audio_dma cs5535audio_dma_t;
+typedef struct snd_cs5535audio_dma_ops cs5535audio_dma_ops_t;
+
+enum { CS5535AUDIO_DMA_PLAYBACK, CS5535AUDIO_DMA_CAPTURE, NUM_CS5535AUDIO_DMAS };
+struct snd_cs5535audio_dma_ops {
+	int type;
+	void (*enable_dma)(cs5535audio_t *cs5535au);
+	void (*disable_dma)(cs5535audio_t *cs5535au);
+	void (*pause_dma)(cs5535audio_t *cs5535au);
+	void (*setup_prd)(cs5535audio_t *cs5535au, u32 prd_addr);
+	u32 (*read_dma_pntr)(cs5535audio_t *cs5535au);
+};
+
+typedef struct cs5535audio_dma_desc {
+	u32 addr;
+	u16 size;
+	u16 ctlreserved;
+} cs5535audio_dma_desc_t;
+
+struct snd_cs5535audio_dma {
+	const cs5535audio_dma_ops_t *ops;
+	struct snd_dma_buffer desc_buf;
+	snd_pcm_substream_t *substream;
+	unsigned int buf_addr, buf_bytes;
+	unsigned int period_bytes, periods;
+};
+
+struct _snd_cs5535audio {
+	snd_card_t *card;
+	ac97_t *ac97;
+	int irq;
+	struct pci_dev *pci;
+	unsigned long port;
+	spinlock_t reg_lock;
+	snd_pcm_substream_t *playback_substream;
+	snd_pcm_substream_t *capture_substream;
+	cs5535audio_dma_t dmas[NUM_CS5535AUDIO_DMAS];
+};
+
+int __devinit snd_cs5535audio_pcm(cs5535audio_t *cs5535audio);
+#endif /* __SOUND_CS5535AUDIO_H */
+
diff --git a/sound/pci/cs5535audio/cs5535audio_pcm.c b/sound/pci/cs5535audio/cs5535audio_pcm.c
new file mode 100644
index 000000000000..5802ed9d57be
--- /dev/null
+++ b/sound/pci/cs5535audio/cs5535audio_pcm.c
@@ -0,0 +1,430 @@
+/*
+ * Driver for audio on multifunction CS5535 companion device
+ * Copyright (C) Jaya Kumar
+ *
+ * Based on Jaroslav Kysela and Takashi Iwai's examples.
+ * This work was sponsored by CIS(M) Sdn Bhd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ * todo: add be fmt support, spdif, pm
+ */
+
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <sound/driver.h>
+#include <sound/core.h>
+#include <sound/control.h>
+#include <sound/initval.h>
+#include <sound/asoundef.h>
+#include <sound/pcm.h>
+#include <sound/pcm_params.h>
+#include <sound/ac97_codec.h>
+#include "cs5535audio.h"
+
+static snd_pcm_hardware_t snd_cs5535audio_playback =
+{
+	.info =			(
+				SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_INTERLEAVED |
+		 		SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		 		SNDRV_PCM_INFO_MMAP_VALID |
+		 		SNDRV_PCM_INFO_PAUSE |
+				SNDRV_PCM_INFO_SYNC_START
+				),
+	.formats =		(
+				SNDRV_PCM_FMTBIT_S16_LE
+				),
+	.rates =		(
+				SNDRV_PCM_RATE_CONTINUOUS |
+				SNDRV_PCM_RATE_8000_48000
+				),
+	.rate_min =		4000,
+	.rate_max =		48000,
+	.channels_min =		2,
+	.channels_max =		2,
+	.buffer_bytes_max =	(128*1024),
+	.period_bytes_min =	64,
+	.period_bytes_max =	(64*1024 - 16),
+	.periods_min =		1,
+	.periods_max =		CS5535AUDIO_MAX_DESCRIPTORS,
+	.fifo_size =		0,
+};
+
+static snd_pcm_hardware_t snd_cs5535audio_capture =
+{
+	.info =			(
+				SNDRV_PCM_INFO_MMAP |
+				SNDRV_PCM_INFO_INTERLEAVED |
+		 		SNDRV_PCM_INFO_BLOCK_TRANSFER |
+		 		SNDRV_PCM_INFO_MMAP_VALID |
+				SNDRV_PCM_INFO_SYNC_START
+				),
+	.formats =		(
+				SNDRV_PCM_FMTBIT_S16_LE
+				),
+	.rates =		(
+				SNDRV_PCM_RATE_CONTINUOUS |
+				SNDRV_PCM_RATE_8000_48000
+				),
+	.rate_min =		4000,
+	.rate_max =		48000,
+	.channels_min =		2,
+	.channels_max =		2,
+	.buffer_bytes_max =	(128*1024),
+	.period_bytes_min =	64,
+	.period_bytes_max =	(64*1024 - 16),
+	.periods_min =		1,
+	.periods_max =		CS5535AUDIO_MAX_DESCRIPTORS,
+	.fifo_size =		0,
+};
+
+static int snd_cs5535audio_playback_open(snd_pcm_substream_t *substream)
+{
+	int err;
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	snd_pcm_runtime_t *runtime = substream->runtime;
+
+	runtime->hw = snd_cs5535audio_playback;
+	cs5535au->playback_substream = substream;
+	runtime->private_data = &(cs5535au->dmas[CS5535AUDIO_DMA_PLAYBACK]);
+	snd_pcm_set_sync(substream);
+	if ((err = snd_pcm_hw_constraint_integer(runtime,
+				SNDRV_PCM_HW_PARAM_PERIODS)) < 0)
+		return err;
+
+	return 0;
+}
+
+static int snd_cs5535audio_playback_close(snd_pcm_substream_t *substream)
+{
+	return 0;
+}
+
+#define CS5535AUDIO_DESC_LIST_SIZE \
+	PAGE_ALIGN(CS5535AUDIO_MAX_DESCRIPTORS * sizeof(cs5535audio_dma_desc_t))
+
+static int cs5535audio_build_dma_packets(cs5535audio_t *cs5535au,
+					cs5535audio_dma_t *dma,
+					snd_pcm_substream_t *substream,
+					unsigned int periods,
+					unsigned int period_bytes)
+{
+	unsigned int i;
+	u32 addr, desc_addr, jmpprd_addr;
+	cs5535audio_dma_desc_t *lastdesc;
+
+	if (periods > CS5535AUDIO_MAX_DESCRIPTORS)
+		return -ENOMEM;
+
+	if (dma->desc_buf.area == NULL) {
+		if (snd_dma_alloc_pages(SNDRV_DMA_TYPE_DEV,
+					snd_dma_pci_data(cs5535au->pci),
+					CS5535AUDIO_DESC_LIST_SIZE+1,
+					&dma->desc_buf) < 0)
+			return -ENOMEM;
+		dma->period_bytes = dma->periods = 0;
+	}
+
+	if (dma->periods == periods && dma->period_bytes == period_bytes)
+		return 0;
+
+	/* the u32 cast is okay because in snd*create we succesfully told
+   	   pci alloc that we're only 32 bit capable so the uppper will be 0 */
+	addr = (u32) substream->runtime->dma_addr;
+	desc_addr = (u32) dma->desc_buf.addr;
+	for (i = 0; i < periods; i++) {
+		cs5535audio_dma_desc_t *desc =
+			&((cs5535audio_dma_desc_t *) dma->desc_buf.area)[i];
+		desc->addr = cpu_to_le32(addr);
+		desc->size = period_bytes;
+		desc->ctlreserved = PRD_EOP;
+		desc_addr += sizeof(cs5535audio_dma_desc_t);
+		addr += period_bytes;
+	}
+	/* we reserved one dummy descriptor at the end to do the PRD jump */
+	lastdesc = &((cs5535audio_dma_desc_t *) dma->desc_buf.area)[periods];
+	lastdesc->addr = cpu_to_le32((u32) dma->desc_buf.addr);
+	lastdesc->size = 0;
+	lastdesc->ctlreserved = PRD_JMP;
+	jmpprd_addr = cpu_to_le32(lastdesc->addr +
+				(sizeof(cs5535audio_dma_desc_t)*periods));
+
+	dma->period_bytes = period_bytes;
+	dma->periods = periods;
+	spin_lock_irq(&cs5535au->reg_lock);
+	dma->ops->disable_dma(cs5535au);
+	dma->ops->setup_prd(cs5535au, jmpprd_addr);
+	spin_unlock_irq(&cs5535au->reg_lock);
+	return 0;
+}
+
+static void cs5535audio_playback_enable_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM0_CMD, BM_CTL_EN);
+}
+
+static void cs5535audio_playback_disable_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM0_CMD, 0);
+}
+
+static void cs5535audio_playback_pause_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM0_CMD, BM_CTL_PAUSE);
+}
+
+static void cs5535audio_playback_setup_prd(cs5535audio_t *cs5535au,
+						u32 prd_addr)
+{
+	cs_writel(cs5535au, ACC_BM0_PRD, prd_addr);
+}
+
+static u32 cs5535audio_playback_read_dma_pntr(cs5535audio_t *cs5535au)
+{
+	return cs_readl(cs5535au, ACC_BM0_PNTR);
+}
+
+static void cs5535audio_capture_enable_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM1_CMD, BM_CTL_EN);
+}
+
+static void cs5535audio_capture_disable_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM1_CMD, 0);
+}
+
+static void cs5535audio_capture_pause_dma(cs5535audio_t *cs5535au)
+{
+	cs_writeb(cs5535au, ACC_BM1_CMD, BM_CTL_PAUSE);
+}
+
+static void cs5535audio_capture_setup_prd(cs5535audio_t *cs5535au,
+						u32 prd_addr)
+{
+	cs_writel(cs5535au, ACC_BM1_PRD, prd_addr);
+}
+
+static u32 cs5535audio_capture_read_dma_pntr(cs5535audio_t *cs5535au)
+{
+	return cs_readl(cs5535au, ACC_BM1_PNTR);
+}
+
+static void cs5535audio_clear_dma_packets(cs5535audio_t *cs5535au,
+					cs5535audio_dma_t *dma,
+					snd_pcm_substream_t *substream)
+{
+	snd_dma_free_pages(&dma->desc_buf);
+	dma->desc_buf.area = NULL;
+}
+
+static int snd_cs5535audio_hw_params(snd_pcm_substream_t *substream,
+				 snd_pcm_hw_params_t *hw_params)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	cs5535audio_dma_t *dma = substream->runtime->private_data;
+	int err;
+
+	err = snd_pcm_lib_malloc_pages(substream,
+					params_buffer_bytes(hw_params));
+	if (err < 0)
+		return err;
+	dma->buf_addr = substream->runtime->dma_addr;
+	dma->buf_bytes = params_buffer_bytes(hw_params);
+
+	err = cs5535audio_build_dma_packets(cs5535au, dma, substream,
+				       params_periods(hw_params),
+				       params_period_bytes(hw_params));
+	return err;
+}
+
+static int snd_cs5535audio_hw_free(snd_pcm_substream_t *substream)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	cs5535audio_dma_t *dma = substream->runtime->private_data;
+
+	cs5535audio_clear_dma_packets(cs5535au, dma, substream);
+	return snd_pcm_lib_free_pages(substream);
+}
+
+static int snd_cs5535audio_playback_prepare(snd_pcm_substream_t *substream)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	return snd_ac97_set_rate(cs5535au->ac97, AC97_PCM_FRONT_DAC_RATE,
+					substream->runtime->rate);
+}
+
+static int snd_cs5535audio_trigger(snd_pcm_substream_t *substream, int cmd)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	cs5535audio_dma_t *dma = substream->runtime->private_data;
+
+	switch (cmd) {
+		case SNDRV_PCM_TRIGGER_PAUSE_PUSH:
+			spin_lock_irq(&cs5535au->reg_lock);
+			dma->ops->pause_dma(cs5535au);
+			spin_unlock_irq(&cs5535au->reg_lock);
+			break;
+		case SNDRV_PCM_TRIGGER_PAUSE_RELEASE:
+			spin_lock_irq(&cs5535au->reg_lock);
+			dma->ops->enable_dma(cs5535au);
+			spin_unlock_irq(&cs5535au->reg_lock);
+			break;
+		case SNDRV_PCM_TRIGGER_START:
+			spin_lock_irq(&cs5535au->reg_lock);
+			dma->ops->enable_dma(cs5535au);
+			spin_unlock_irq(&cs5535au->reg_lock);
+			break;
+		case SNDRV_PCM_TRIGGER_STOP:
+			spin_lock_irq(&cs5535au->reg_lock);
+			dma->ops->disable_dma(cs5535au);
+			spin_unlock_irq(&cs5535au->reg_lock);
+			break;
+		default:
+			snd_printk(KERN_ERR "unhandled trigger\n");
+			return -EINVAL;
+			break;
+	}
+	return 0;
+}
+
+static snd_pcm_uframes_t snd_cs5535audio_pcm_pointer(snd_pcm_substream_t
+							*substream)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	u32 curdma;
+	cs5535audio_dma_t *dma;
+
+	dma = substream->runtime->private_data;
+	curdma = dma->ops->read_dma_pntr(cs5535au);
+	if (curdma < dma->buf_addr) {
+		snd_printk(KERN_ERR "curdma=%x < %x bufaddr.\n",
+					curdma, dma->buf_addr);
+		return 0;
+	}
+	curdma -= dma->buf_addr;
+	if (curdma >= dma->buf_bytes) {
+		snd_printk(KERN_ERR "diff=%x >= %x buf_bytes.\n",
+					curdma, dma->buf_bytes);
+		return 0;
+	}
+	return bytes_to_frames(substream->runtime, curdma);
+}
+
+static int snd_cs5535audio_capture_open(snd_pcm_substream_t *substream)
+{
+	int err;
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	snd_pcm_runtime_t *runtime = substream->runtime;
+
+	runtime->hw = snd_cs5535audio_capture;
+	cs5535au->capture_substream = substream;
+	runtime->private_data = &(cs5535au->dmas[CS5535AUDIO_DMA_CAPTURE]);
+	snd_pcm_set_sync(substream);
+	if ((err = snd_pcm_hw_constraint_integer(runtime,
+					 SNDRV_PCM_HW_PARAM_PERIODS)) < 0)
+		return err;
+	return 0;
+}
+
+static int snd_cs5535audio_capture_close(snd_pcm_substream_t *substream)
+{
+	return 0;
+}
+
+static int snd_cs5535audio_capture_prepare(snd_pcm_substream_t *substream)
+{
+	cs5535audio_t *cs5535au = snd_pcm_substream_chip(substream);
+	return snd_ac97_set_rate(cs5535au->ac97, AC97_PCM_LR_ADC_RATE,
+					substream->runtime->rate);
+}
+
+static snd_pcm_ops_t snd_cs5535audio_playback_ops = {
+	.open =		snd_cs5535audio_playback_open,
+	.close =	snd_cs5535audio_playback_close,
+	.ioctl =	snd_pcm_lib_ioctl,
+	.hw_params =	snd_cs5535audio_hw_params,
+	.hw_free =	snd_cs5535audio_hw_free,
+	.prepare =	snd_cs5535audio_playback_prepare,
+	.trigger =	snd_cs5535audio_trigger,
+	.pointer =	snd_cs5535audio_pcm_pointer,
+};
+
+static snd_pcm_ops_t snd_cs5535audio_capture_ops = {
+	.open =		snd_cs5535audio_capture_open,
+	.close =	snd_cs5535audio_capture_close,
+	.ioctl =	snd_pcm_lib_ioctl,
+	.hw_params =	snd_cs5535audio_hw_params,
+	.hw_free =	snd_cs5535audio_hw_free,
+	.prepare =	snd_cs5535audio_capture_prepare,
+	.trigger =	snd_cs5535audio_trigger,
+	.pointer =	snd_cs5535audio_pcm_pointer,
+};
+
+static void snd_cs5535audio_pcm_free(snd_pcm_t *pcm)
+{
+	snd_pcm_lib_preallocate_free_for_all(pcm);
+}
+
+static cs5535audio_dma_ops_t snd_cs5535audio_playback_dma_ops = {
+        .type = CS5535AUDIO_DMA_PLAYBACK,
+        .enable_dma = cs5535audio_playback_enable_dma,
+        .disable_dma = cs5535audio_playback_disable_dma,
+        .setup_prd = cs5535audio_playback_setup_prd,
+        .pause_dma = cs5535audio_playback_pause_dma,
+        .read_dma_pntr = cs5535audio_playback_read_dma_pntr,
+};
+
+static cs5535audio_dma_ops_t snd_cs5535audio_capture_dma_ops = {
+        .type = CS5535AUDIO_DMA_CAPTURE,
+        .enable_dma = cs5535audio_capture_enable_dma,
+        .disable_dma = cs5535audio_capture_disable_dma,
+        .setup_prd = cs5535audio_capture_setup_prd,
+        .pause_dma = cs5535audio_capture_pause_dma,
+        .read_dma_pntr = cs5535audio_capture_read_dma_pntr,
+};
+
+int __devinit snd_cs5535audio_pcm(cs5535audio_t *cs5535au)
+{
+	snd_pcm_t *pcm;
+	int err;
+
+	err = snd_pcm_new(cs5535au->card, "CS5535 Audio", 0, 1, 1, &pcm);
+	if (err < 0)
+		return err;
+
+	cs5535au->dmas[CS5535AUDIO_DMA_PLAYBACK].ops =
+					&snd_cs5535audio_playback_dma_ops;
+	cs5535au->dmas[CS5535AUDIO_DMA_CAPTURE].ops =
+					&snd_cs5535audio_capture_dma_ops;
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_PLAYBACK,
+					&snd_cs5535audio_playback_ops);
+	snd_pcm_set_ops(pcm, SNDRV_PCM_STREAM_CAPTURE,
+					&snd_cs5535audio_capture_ops);
+
+	pcm->private_data = cs5535au;
+	pcm->private_free = snd_cs5535audio_pcm_free;
+	pcm->info_flags = 0;
+	strcpy(pcm->name, "CS5535 Audio");
+
+	snd_pcm_lib_preallocate_pages_for_all(pcm, SNDRV_DMA_TYPE_DEV,
+					snd_dma_pci_data(cs5535au->pci),
+					64*1024, 128*1024);
+
+	return 0;
+}
+
-- 
cgit v1.2.3


From 4c98cfef2efa6b6662ac28c4f0069964bbd9fdf9 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Tue, 29 Nov 2005 09:09:32 +0100
Subject: [ALSA] PATCH] Add PM support to PnP drivers

Add suspend/resume callback to pnp_driver and pnp_card_driver.

Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/pnp/card.c   | 25 +++++++++++++++++++++++++
 drivers/pnp/driver.c | 20 ++++++++++++++++++++
 include/linux/pnp.h  |  5 +++++
 3 files changed, 50 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pnp/card.c b/drivers/pnp/card.c
index bd7c966ea2d7..0ecbe4edbec1 100644
--- a/drivers/pnp/card.c
+++ b/drivers/pnp/card.c
@@ -69,6 +69,7 @@ static int card_probe(struct pnp_card * card, struct pnp_card_driver * drv)
 			return 0;
 		clink->card = card;
 		clink->driver = drv;
+		clink->pm_state = PMSG_ON;
 		if (drv->probe) {
 			if (drv->probe(clink, id)>=0)
 				return 1;
@@ -333,6 +334,28 @@ void pnp_release_card_device(struct pnp_dev * dev)
 	up_write(&dev->dev.bus->subsys.rwsem);
 }
 
+/*
+ * suspend/resume callbacks
+ */
+static int card_suspend(struct pnp_dev *dev, pm_message_t state)
+{
+	struct pnp_card_link *link = dev->card_link;
+	if (link->pm_state.event == state.event)
+		return 0;
+	link->pm_state = state;
+	return link->driver->suspend(link, state);
+}
+
+static int card_resume(struct pnp_dev *dev)
+{
+	struct pnp_card_link *link = dev->card_link;
+	if (link->pm_state.event == PM_EVENT_ON)
+		return 0;
+	link->pm_state = PMSG_ON;
+	link->driver->resume(link);
+	return 0;
+}
+
 /**
  * pnp_register_card_driver - registers a PnP card driver with the PnP Layer
  * @drv: pointer to the driver to register
@@ -348,6 +371,8 @@ int pnp_register_card_driver(struct pnp_card_driver * drv)
 	drv->link.flags = drv->flags;
 	drv->link.probe = NULL;
 	drv->link.remove = &card_remove_first;
+	drv->link.suspend = drv->suspend ? card_suspend : NULL;
+	drv->link.resume = drv->resume ? card_resume : NULL;
 
 	spin_lock(&pnp_lock);
 	list_add_tail(&drv->global_list, &pnp_card_drivers);
diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index d3ccce706ab4..ea2cb9a8b21d 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -146,10 +146,30 @@ static int pnp_bus_match(struct device *dev, struct device_driver *drv)
 	return 1;
 }
 
+static int pnp_bus_suspend(struct device *dev, pm_message_t state)
+{
+	struct pnp_dev * pnp_dev = to_pnp_dev(dev);
+	struct pnp_driver * pnp_drv = pnp_dev->driver;
+
+	if (pnp_drv && pnp_drv->suspend)
+		return pnp_drv->suspend(pnp_dev, state);
+	return 0;
+}
+
+static void pnp_bus_resume(struct device *dev)
+{
+	struct pnp_dev * pnp_dev = to_pnp_dev(dev);
+	struct pnp_driver * pnp_drv = pnp_dev->driver;
+
+	if (pnp_drv && pnp_drv->resume)
+		pnp_drv->resume(pnp_dev);
+}
 
 struct bus_type pnp_bus_type = {
 	.name	= "pnp",
 	.match	= pnp_bus_match,
+	.suspend = pnp_bus_suspend,
+	.resume = pnp_bus_resume,
 };
 
 
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 584d57cb393a..472319fcf631 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -162,6 +162,7 @@ struct pnp_card_link {
 	struct pnp_card * card;
 	struct pnp_card_driver * driver;
 	void * driver_data;
+	pm_message_t pm_state;
 };
 
 static inline void *pnp_get_card_drvdata (struct pnp_card_link *pcard)
@@ -294,6 +295,8 @@ struct pnp_driver {
 	unsigned int flags;
 	int  (*probe)  (struct pnp_dev *dev, const struct pnp_device_id *dev_id);
 	void (*remove) (struct pnp_dev *dev);
+	int  (*suspend) (struct pnp_dev *dev, pm_message_t state);
+	int  (*resume) (struct pnp_dev *dev);
 	struct device_driver driver;
 };
 
@@ -306,6 +309,8 @@ struct pnp_card_driver {
 	unsigned int flags;
 	int  (*probe)  (struct pnp_card_link *card, const struct pnp_card_device_id *card_id);
 	void (*remove) (struct pnp_card_link *card);
+	int  (*suspend) (struct pnp_card_link *card, pm_message_t state);
+	int  (*resume) (struct pnp_card_link *card);
 	struct pnp_driver link;
 };
 
-- 
cgit v1.2.3


From 68094e3251a664ee1389fcf179497237cbf78331 Mon Sep 17 00:00:00 2001
From: Pierre Ossman <drzeus-list@drzeus.cx>
Date: Tue, 29 Nov 2005 09:09:32 +0100
Subject: [ALSA] [PATCH] alsa: Improved PnP suspend support

Also use the PnP functions to start/stop the devices during the suspend so
that drivers will not have to duplicate this code.

Cc: Adam Belay <ambx1@neo.rr.com>
Cc: Jaroslav Kysela <perex@suse.cz>
Cc: Takashi Iwai <tiwai@suse.de>

Signed-off-by: Pierre Ossman <drzeus@drzeus.cx>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 drivers/pnp/driver.c  | 37 ++++++++++++++++++++----
 drivers/pnp/manager.c | 78 ++++++++++++++++++++++++++++++++++++++-------------
 include/linux/pnp.h   |  4 +++
 3 files changed, 95 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pnp/driver.c b/drivers/pnp/driver.c
index ea2cb9a8b21d..15fb758a9e52 100644
--- a/drivers/pnp/driver.c
+++ b/drivers/pnp/driver.c
@@ -150,19 +150,46 @@ static int pnp_bus_suspend(struct device *dev, pm_message_t state)
 {
 	struct pnp_dev * pnp_dev = to_pnp_dev(dev);
 	struct pnp_driver * pnp_drv = pnp_dev->driver;
+	int error;
+
+	if (!pnp_drv)
+		return 0;
+
+	if (pnp_drv->suspend) {
+		error = pnp_drv->suspend(pnp_dev, state);
+		if (error)
+			return error;
+	}
+
+	if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE) &&
+	    pnp_can_disable(pnp_dev)) {
+	    	error = pnp_stop_dev(pnp_dev);
+	    	if (error)
+	    		return error;
+	}
 
-	if (pnp_drv && pnp_drv->suspend)
-		return pnp_drv->suspend(pnp_dev, state);
 	return 0;
 }
 
-static void pnp_bus_resume(struct device *dev)
+static int pnp_bus_resume(struct device *dev)
 {
 	struct pnp_dev * pnp_dev = to_pnp_dev(dev);
 	struct pnp_driver * pnp_drv = pnp_dev->driver;
+	int error;
+
+	if (!pnp_drv)
+		return 0;
+
+	if (!(pnp_drv->flags & PNP_DRIVER_RES_DO_NOT_CHANGE)) {
+		error = pnp_start_dev(pnp_dev);
+		if (error)
+			return error;
+	}
 
-	if (pnp_drv && pnp_drv->resume)
-		pnp_drv->resume(pnp_dev);
+	if (pnp_drv->resume)
+		return pnp_drv->resume(pnp_dev);
+
+	return 0;
 }
 
 struct bus_type pnp_bus_type = {
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 261668618b2d..c4256aa32bcb 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -469,6 +469,53 @@ int pnp_auto_config_dev(struct pnp_dev *dev)
 	return -EBUSY;
 }
 
+/**
+ * pnp_start_dev - low-level start of the PnP device
+ * @dev: pointer to the desired device
+ *
+ * assumes that resources have alread been allocated
+ */
+
+int pnp_start_dev(struct pnp_dev *dev)
+{
+	if (!pnp_can_write(dev)) {
+		pnp_info("Device %s does not supported activation.", dev->dev.bus_id);
+		return -EINVAL;
+	}
+
+	if (dev->protocol->set(dev, &dev->res)<0) {
+		pnp_err("Failed to activate device %s.", dev->dev.bus_id);
+		return -EIO;
+	}
+
+	pnp_info("Device %s activated.", dev->dev.bus_id);
+
+	return 0;
+}
+
+/**
+ * pnp_stop_dev - low-level disable of the PnP device
+ * @dev: pointer to the desired device
+ *
+ * does not free resources
+ */
+
+int pnp_stop_dev(struct pnp_dev *dev)
+{
+	if (!pnp_can_disable(dev)) {
+		pnp_info("Device %s does not supported disabling.", dev->dev.bus_id);
+		return -EINVAL;
+	}
+	if (dev->protocol->disable(dev)<0) {
+		pnp_err("Failed to disable device %s.", dev->dev.bus_id);
+		return -EIO;
+	}
+
+	pnp_info("Device %s disabled.", dev->dev.bus_id);
+
+	return 0;
+}
+
 /**
  * pnp_activate_dev - activates a PnP device for use
  * @dev: pointer to the desired device
@@ -477,6 +524,8 @@ int pnp_auto_config_dev(struct pnp_dev *dev)
  */
 int pnp_activate_dev(struct pnp_dev *dev)
 {
+	int error;
+
 	if (!dev)
 		return -EINVAL;
 	if (dev->active) {
@@ -487,18 +536,11 @@ int pnp_activate_dev(struct pnp_dev *dev)
 	if (pnp_auto_config_dev(dev))
 		return -EBUSY;
 
-	if (!pnp_can_write(dev)) {
-		pnp_info("Device %s does not supported activation.", dev->dev.bus_id);
-		return -EINVAL;
-	}
-
-	if (dev->protocol->set(dev, &dev->res)<0) {
-		pnp_err("Failed to activate device %s.", dev->dev.bus_id);
-		return -EIO;
-	}
+	error = pnp_start_dev(dev);
+	if (error)
+		return error;
 
 	dev->active = 1;
-	pnp_info("Device %s activated.", dev->dev.bus_id);
 
 	return 1;
 }
@@ -511,23 +553,19 @@ int pnp_activate_dev(struct pnp_dev *dev)
  */
 int pnp_disable_dev(struct pnp_dev *dev)
 {
+	int error;
+
         if (!dev)
                 return -EINVAL;
 	if (!dev->active) {
 		return 0; /* the device is already disabled */
 	}
 
-	if (!pnp_can_disable(dev)) {
-		pnp_info("Device %s does not supported disabling.", dev->dev.bus_id);
-		return -EINVAL;
-	}
-	if (dev->protocol->disable(dev)<0) {
-		pnp_err("Failed to disable device %s.", dev->dev.bus_id);
-		return -EIO;
-	}
+	error = pnp_stop_dev(dev);
+	if (error)
+		return error;
 
 	dev->active = 0;
-	pnp_info("Device %s disabled.", dev->dev.bus_id);
 
 	/* release the resources so that other devices can use them */
 	down(&pnp_res_mutex);
@@ -558,6 +596,8 @@ EXPORT_SYMBOL(pnp_manual_config_dev);
 #if 0
 EXPORT_SYMBOL(pnp_auto_config_dev);
 #endif
+EXPORT_SYMBOL(pnp_start_dev);
+EXPORT_SYMBOL(pnp_stop_dev);
 EXPORT_SYMBOL(pnp_activate_dev);
 EXPORT_SYMBOL(pnp_disable_dev);
 EXPORT_SYMBOL(pnp_resource_change);
diff --git a/include/linux/pnp.h b/include/linux/pnp.h
index 472319fcf631..93b0959eb40f 100644
--- a/include/linux/pnp.h
+++ b/include/linux/pnp.h
@@ -385,6 +385,8 @@ void pnp_init_resource_table(struct pnp_resource_table *table);
 int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode);
 int pnp_auto_config_dev(struct pnp_dev *dev);
 int pnp_validate_config(struct pnp_dev *dev);
+int pnp_start_dev(struct pnp_dev *dev);
+int pnp_stop_dev(struct pnp_dev *dev);
 int pnp_activate_dev(struct pnp_dev *dev);
 int pnp_disable_dev(struct pnp_dev *dev);
 void pnp_resource_change(struct resource *resource, unsigned long start, unsigned long size);
@@ -428,6 +430,8 @@ static inline void pnp_init_resource_table(struct pnp_resource_table *table) { }
 static inline int pnp_manual_config_dev(struct pnp_dev *dev, struct pnp_resource_table *res, int mode) { return -ENODEV; }
 static inline int pnp_auto_config_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_validate_config(struct pnp_dev *dev) { return -ENODEV; }
+static inline int pnp_start_dev(struct pnp_dev *dev) { return -ENODEV; }
+static inline int pnp_stop_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_activate_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline int pnp_disable_dev(struct pnp_dev *dev) { return -ENODEV; }
 static inline void pnp_resource_change(struct resource *resource, unsigned long start, unsigned long size) { }
-- 
cgit v1.2.3


From 4d399cae3f5ec1f59b9e88084aae09c4f00760c9 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 3 Jan 2006 13:19:13 +0100
Subject: remove pointers to the defunct UDF mailing list

This patch removes pointers to the defunct UDF mailing list.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
---
 MAINTAINERS               | 1 -
 fs/udf/balloc.c           | 5 -----
 fs/udf/crc.c              | 5 -----
 fs/udf/dir.c              | 5 -----
 fs/udf/directory.c        | 5 -----
 fs/udf/file.c             | 5 -----
 fs/udf/fsync.c            | 5 -----
 fs/udf/ialloc.c           | 5 -----
 fs/udf/inode.c            | 5 -----
 fs/udf/lowlevel.c         | 5 -----
 fs/udf/misc.c             | 5 -----
 fs/udf/namei.c            | 5 -----
 fs/udf/partition.c        | 5 -----
 fs/udf/super.c            | 5 -----
 fs/udf/symlink.c          | 5 -----
 fs/udf/truncate.c         | 5 -----
 fs/udf/unicode.c          | 5 -----
 include/linux/udf_fs.h    | 5 -----
 include/linux/udf_fs_i.h  | 5 -----
 include/linux/udf_fs_sb.h | 5 -----
 20 files changed, 96 deletions(-)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6af683025ae0..bbe5f04f59ec 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2587,7 +2587,6 @@ S:	Maintained
 UDF FILESYSTEM
 P:	Ben Fennema
 M:	bfennema@falcon.csc.calpoly.edu
-L:	linux_udf@hpesjro.fc.hp.com
 W:	http://linux-udf.sourceforge.net
 S:	Maintained
 
diff --git a/fs/udf/balloc.c b/fs/udf/balloc.c
index b9ded26b10a9..6598a5037ac8 100644
--- a/fs/udf/balloc.c
+++ b/fs/udf/balloc.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Block allocation handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/crc.c b/fs/udf/crc.c
index d95c6e38a455..1b82a4adc2f7 100644
--- a/fs/udf/crc.c
+++ b/fs/udf/crc.c
@@ -14,11 +14,6 @@
  *
  *	AT&T gives permission for the free use of the CRC source code.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 82440b731142..f5222527fe39 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *  Directory handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/directory.c b/fs/udf/directory.c
index 9a61ecc5451b..fe751a2a0e47 100644
--- a/fs/udf/directory.c
+++ b/fs/udf/directory.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Directory related functions
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 01f520c71dc1..8a388289040d 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *  File handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *  E-mail regarding any portion of the Linux UDF file system should be
- *  directed to the development team mailing list (run by majordomo):
- *    linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *  This file is distributed under the terms of the GNU General Public
  *  License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/fsync.c b/fs/udf/fsync.c
index 2dde6b888c2b..5887d78cde43 100644
--- a/fs/udf/fsync.c
+++ b/fs/udf/fsync.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *  Fsync handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *  E-mail regarding any portion of the Linux UDF file system should be
- *  directed to the development team mailing list (run by majordomo):
- *      linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *  This file is distributed under the terms of the GNU General Public
  *  License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/ialloc.c b/fs/udf/ialloc.c
index a7e5d40f1ebc..c9b707b470ca 100644
--- a/fs/udf/ialloc.c
+++ b/fs/udf/ialloc.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Inode allocation handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index b83890beaaac..4014f17d382e 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *  Inode handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *  E-mail regarding any portion of the Linux UDF file system should be
- *  directed to the development team mailing list (run by majordomo):
- *    linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *  This file is distributed under the terms of the GNU General Public
  *  License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/lowlevel.c b/fs/udf/lowlevel.c
index 2da5087dfe05..084216107667 100644
--- a/fs/udf/lowlevel.c
+++ b/fs/udf/lowlevel.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *  Low Level Device Routines for the UDF filesystem
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/misc.c b/fs/udf/misc.c
index fd321f9ace83..cc8ca3254db1 100644
--- a/fs/udf/misc.c
+++ b/fs/udf/misc.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Miscellaneous routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index ac191ed7df0a..ca732e79c48b 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *      Inode name handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *      E-mail regarding any portion of the Linux UDF file system should be
- *      directed to the development team mailing list (run by majordomo):
- *              linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *      This file is distributed under the terms of the GNU General Public
  *      License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/partition.c b/fs/udf/partition.c
index 4d36f264be0d..dabf2b841db8 100644
--- a/fs/udf/partition.c
+++ b/fs/udf/partition.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *      Partition handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *      E-mail regarding any portion of the Linux UDF file system should be
- *      directed to the development team mailing list (run by majordomo):
- *              linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *      This file is distributed under the terms of the GNU General Public
  *      License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/super.c b/fs/udf/super.c
index 15bd4f24c5b7..4a6f49adc609 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -14,11 +14,6 @@
  *    http://www.ecma.ch/
  *    http://www.iso.org/
  *
- * CONTACTS
- *  E-mail regarding any portion of the Linux UDF file system should be
- *  directed to the development team mailing list (run by majordomo):
- *	  linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *  This file is distributed under the terms of the GNU General Public
  *  License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c
index 43f3051ef756..674bb40edc83 100644
--- a/fs/udf/symlink.c
+++ b/fs/udf/symlink.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Symlink handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/truncate.c b/fs/udf/truncate.c
index 7dc8a5572ca1..e1b0e8cfecb4 100644
--- a/fs/udf/truncate.c
+++ b/fs/udf/truncate.c
@@ -4,11 +4,6 @@
  * PURPOSE
  *	Truncate handling routines for the OSTA-UDF(tm) filesystem.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index 5a80efd8debc..706c92e1dcc9 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -11,11 +11,6 @@
  *	UTF-8 is explained in the IETF RFC XXXX.
  *		ftp://ftp.internic.net/rfc/rfcxxxx.txt
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team's mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/include/linux/udf_fs.h b/include/linux/udf_fs.h
index 46e2bb945353..36c684e1b110 100644
--- a/include/linux/udf_fs.h
+++ b/include/linux/udf_fs.h
@@ -13,11 +13,6 @@
  *    http://www.osta.org/ *    http://www.ecma.ch/
  *    http://www.iso.org/
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/include/linux/udf_fs_i.h b/include/linux/udf_fs_i.h
index 62b15a4214e6..1e7508420fcf 100644
--- a/include/linux/udf_fs_i.h
+++ b/include/linux/udf_fs_i.h
@@ -3,11 +3,6 @@
  *
  * This file is intended for the Linux kernel/module. 
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
diff --git a/include/linux/udf_fs_sb.h b/include/linux/udf_fs_sb.h
index 1966a6dbb4b6..b15ff2e99c91 100644
--- a/include/linux/udf_fs_sb.h
+++ b/include/linux/udf_fs_sb.h
@@ -3,11 +3,6 @@
  * 
  * This include file is for the Linux kernel/module.
  *
- * CONTACTS
- *	E-mail regarding any portion of the Linux UDF file system should be
- *	directed to the development team mailing list (run by majordomo):
- *		linux_udf@hpesjro.fc.hp.com
- *
  * COPYRIGHT
  *	This file is distributed under the terms of the GNU General Public
  *	License (GPL). Copies of the GPL can be obtained from:
-- 
cgit v1.2.3


From 7063fbf2261194f72ee75afca67b3b38b554b5fa Mon Sep 17 00:00:00 2001
From: Joel Becker <joel.becker@oracle.com>
Date: Thu, 15 Dec 2005 14:29:43 -0800
Subject: [PATCH] configfs: User-driven configuration filesystem

Configfs, a file system for userspace-driven kernel object configuration.
The OCFS2 stack makes extensive use of this for propagation of cluster
configuration information into kernel.

Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 Documentation/filesystems/00-INDEX                 |    2 +
 Documentation/filesystems/configfs/configfs.txt    |  434 ++++++++
 .../filesystems/configfs/configfs_example.c        |  474 +++++++++
 MAINTAINERS                                        |    5 +
 fs/Kconfig                                         |   14 +
 fs/Makefile                                        |    1 +
 fs/configfs/Makefile                               |    7 +
 fs/configfs/configfs_internal.h                    |  142 +++
 fs/configfs/dir.c                                  | 1102 ++++++++++++++++++++
 fs/configfs/file.c                                 |  360 +++++++
 fs/configfs/inode.c                                |  162 +++
 fs/configfs/item.c                                 |  227 ++++
 fs/configfs/mount.c                                |  159 +++
 fs/configfs/symlink.c                              |  281 +++++
 include/linux/configfs.h                           |  205 ++++
 15 files changed, 3575 insertions(+)
 create mode 100644 Documentation/filesystems/configfs/configfs.txt
 create mode 100644 Documentation/filesystems/configfs/configfs_example.c
 create mode 100644 fs/configfs/Makefile
 create mode 100644 fs/configfs/configfs_internal.h
 create mode 100644 fs/configfs/dir.c
 create mode 100644 fs/configfs/file.c
 create mode 100644 fs/configfs/inode.c
 create mode 100644 fs/configfs/item.c
 create mode 100644 fs/configfs/mount.c
 create mode 100644 fs/configfs/symlink.c
 create mode 100644 include/linux/configfs.h

(limited to 'include/linux')

diff --git a/Documentation/filesystems/00-INDEX b/Documentation/filesystems/00-INDEX
index bcfbab899b37..628f8a7adb85 100644
--- a/Documentation/filesystems/00-INDEX
+++ b/Documentation/filesystems/00-INDEX
@@ -12,6 +12,8 @@ cifs.txt
 	- description of the CIFS filesystem
 coda.txt
 	- description of the CODA filesystem.
+configfs/
+	- directory containing configfs documentation and example code.
 cramfs.txt
 	- info on the cram filesystem for small storage (ROMs etc)
 devfs/
diff --git a/Documentation/filesystems/configfs/configfs.txt b/Documentation/filesystems/configfs/configfs.txt
new file mode 100644
index 000000000000..c4ff96b7c4e0
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs.txt
@@ -0,0 +1,434 @@
+
+configfs - Userspace-driven kernel object configuation.
+
+Joel Becker <joel.becker@oracle.com>
+
+Updated: 31 March 2005
+
+Copyright (c) 2005 Oracle Corporation,
+	Joel Becker <joel.becker@oracle.com>
+
+
+[What is configfs?]
+
+configfs is a ram-based filesystem that provides the converse of
+sysfs's functionality.  Where sysfs is a filesystem-based view of
+kernel objects, configfs is a filesystem-based manager of kernel
+objects, or config_items.
+
+With sysfs, an object is created in kernel (for example, when a device
+is discovered) and it is registered with sysfs.  Its attributes then
+appear in sysfs, allowing userspace to read the attributes via
+readdir(3)/read(2).  It may allow some attributes to be modified via
+write(2).  The important point is that the object is created and
+destroyed in kernel, the kernel controls the lifecycle of the sysfs
+representation, and sysfs is merely a window on all this.
+
+A configfs config_item is created via an explicit userspace operation:
+mkdir(2).  It is destroyed via rmdir(2).  The attributes appear at
+mkdir(2) time, and can be read or modified via read(2) and write(2).
+As with sysfs, readdir(3) queries the list of items and/or attributes.
+symlink(2) can be used to group items together.  Unlike sysfs, the
+lifetime of the representation is completely driven by userspace.  The
+kernel modules backing the items must respond to this.
+
+Both sysfs and configfs can and should exist together on the same
+system.  One is not a replacement for the other.
+
+[Using configfs]
+
+configfs can be compiled as a module or into the kernel.  You can access
+it by doing
+
+	mount -t configfs none /config
+
+The configfs tree will be empty unless client modules are also loaded.
+These are modules that register their item types with configfs as
+subsystems.  Once a client subsystem is loaded, it will appear as a
+subdirectory (or more than one) under /config.  Like sysfs, the
+configfs tree is always there, whether mounted on /config or not.
+
+An item is created via mkdir(2).  The item's attributes will also
+appear at this time.  readdir(3) can determine what the attributes are,
+read(2) can query their default values, and write(2) can store new
+values.  Like sysfs, attributes should be ASCII text files, preferably
+with only one value per file.  The same efficiency caveats from sysfs
+apply.  Don't mix more than one attribute in one attribute file.
+
+Like sysfs, configfs expects write(2) to store the entire buffer at
+once.  When writing to configfs attributes, userspace processes should
+first read the entire file, modify the portions they wish to change, and
+then write the entire buffer back.  Attribute files have a maximum size
+of one page (PAGE_SIZE, 4096 on i386).
+
+When an item needs to be destroyed, remove it with rmdir(2).  An
+item cannot be destroyed if any other item has a link to it (via
+symlink(2)).  Links can be removed via unlink(2).
+
+[Configuring FakeNBD: an Example]
+
+Imagine there's a Network Block Device (NBD) driver that allows you to
+access remote block devices.  Call it FakeNBD.  FakeNBD uses configfs
+for its configuration.  Obviously, there will be a nice program that
+sysadmins use to configure FakeNBD, but somehow that program has to tell
+the driver about it.  Here's where configfs comes in.
+
+When the FakeNBD driver is loaded, it registers itself with configfs.
+readdir(3) sees this just fine:
+
+	# ls /config
+	fakenbd
+
+A fakenbd connection can be created with mkdir(2).  The name is
+arbitrary, but likely the tool will make some use of the name.  Perhaps
+it is a uuid or a disk name:
+
+	# mkdir /config/fakenbd/disk1
+	# ls /config/fakenbd/disk1
+	target device rw
+
+The target attribute contains the IP address of the server FakeNBD will
+connect to.  The device attribute is the device on the server.
+Predictably, the rw attribute determines whether the connection is
+read-only or read-write.
+
+	# echo 10.0.0.1 > /config/fakenbd/disk1/target
+	# echo /dev/sda1 > /config/fakenbd/disk1/device
+	# echo 1 > /config/fakenbd/disk1/rw
+
+That's it.  That's all there is.  Now the device is configured, via the
+shell no less.
+
+[Coding With configfs]
+
+Every object in configfs is a config_item.  A config_item reflects an
+object in the subsystem.  It has attributes that match values on that
+object.  configfs handles the filesystem representation of that object
+and its attributes, allowing the subsystem to ignore all but the
+basic show/store interaction.
+
+Items are created and destroyed inside a config_group.  A group is a
+collection of items that share the same attributes and operations.
+Items are created by mkdir(2) and removed by rmdir(2), but configfs
+handles that.  The group has a set of operations to perform these tasks
+
+A subsystem is the top level of a client module.  During initialization,
+the client module registers the subsystem with configfs, the subsystem
+appears as a directory at the top of the configfs filesystem.  A
+subsystem is also a config_group, and can do everything a config_group
+can.
+
+[struct config_item]
+
+	struct config_item {
+		char                    *ci_name;
+		char                    ci_namebuf[UOBJ_NAME_LEN];
+		struct kref             ci_kref;
+		struct list_head        ci_entry;
+		struct config_item      *ci_parent;
+		struct config_group     *ci_group;
+		struct config_item_type *ci_type;
+		struct dentry           *ci_dentry;
+	};
+
+	void config_item_init(struct config_item *);
+	void config_item_init_type_name(struct config_item *,
+					const char *name,
+					struct config_item_type *type);
+	struct config_item *config_item_get(struct config_item *);
+	void config_item_put(struct config_item *);
+
+Generally, struct config_item is embedded in a container structure, a
+structure that actually represents what the subsystem is doing.  The
+config_item portion of that structure is how the object interacts with
+configfs.
+
+Whether statically defined in a source file or created by a parent
+config_group, a config_item must have one of the _init() functions
+called on it.  This initializes the reference count and sets up the
+appropriate fields.
+
+All users of a config_item should have a reference on it via
+config_item_get(), and drop the reference when they are done via
+config_item_put().
+
+By itself, a config_item cannot do much more than appear in configfs.
+Usually a subsystem wants the item to display and/or store attributes,
+among other things.  For that, it needs a type.
+
+[struct config_item_type]
+
+	struct configfs_item_operations {
+		void (*release)(struct config_item *);
+		ssize_t (*show_attribute)(struct config_item *,
+					  struct configfs_attribute *,
+					  char *);
+		ssize_t (*store_attribute)(struct config_item *,
+					   struct configfs_attribute *,
+					   const char *, size_t);
+		int (*allow_link)(struct config_item *src,
+				  struct config_item *target);
+		int (*drop_link)(struct config_item *src,
+				 struct config_item *target);
+	};
+
+	struct config_item_type {
+		struct module                           *ct_owner;
+		struct configfs_item_operations         *ct_item_ops;
+		struct configfs_group_operations        *ct_group_ops;
+		struct configfs_attribute               **ct_attrs;
+	};
+
+The most basic function of a config_item_type is to define what
+operations can be performed on a config_item.  All items that have been
+allocated dynamically will need to provide the ct_item_ops->release()
+method.  This method is called when the config_item's reference count
+reaches zero.  Items that wish to display an attribute need to provide
+the ct_item_ops->show_attribute() method.  Similarly, storing a new
+attribute value uses the store_attribute() method.
+
+[struct configfs_attribute]
+
+	struct configfs_attribute {
+		char                    *ca_name;
+		struct module           *ca_owner;
+		mode_t                  ca_mode;
+	};
+
+When a config_item wants an attribute to appear as a file in the item's
+configfs directory, it must define a configfs_attribute describing it.
+It then adds the attribute to the NULL-terminated array
+config_item_type->ct_attrs.  When the item appears in configfs, the
+attribute file will appear with the configfs_attribute->ca_name
+filename.  configfs_attribute->ca_mode specifies the file permissions.
+
+If an attribute is readable and the config_item provides a
+ct_item_ops->show_attribute() method, that method will be called
+whenever userspace asks for a read(2) on the attribute.  The converse
+will happen for write(2).
+
+[struct config_group]
+
+A config_item cannot live in a vaccum.  The only way one can be created
+is via mkdir(2) on a config_group.  This will trigger creation of a
+child item.
+
+	struct config_group {
+		struct config_item		cg_item;
+		struct list_head		cg_children;
+		struct configfs_subsystem 	*cg_subsys;
+		struct config_group		**default_groups;
+	};
+
+	void config_group_init(struct config_group *group);
+	void config_group_init_type_name(struct config_group *group,
+					 const char *name,
+					 struct config_item_type *type);
+
+
+The config_group structure contains a config_item.  Properly configuring
+that item means that a group can behave as an item in its own right.
+However, it can do more: it can create child items or groups.  This is
+accomplished via the group operations specified on the group's
+config_item_type.
+
+	struct configfs_group_operations {
+		struct config_item *(*make_item)(struct config_group *group,
+						 const char *name);
+		struct config_group *(*make_group)(struct config_group *group,
+						   const char *name);
+		int (*commit_item)(struct config_item *item);
+		void (*drop_item)(struct config_group *group,
+				  struct config_item *item);
+	};
+
+A group creates child items by providing the
+ct_group_ops->make_item() method.  If provided, this method is called from mkdir(2) in the group's directory.  The subsystem allocates a new
+config_item (or more likely, its container structure), initializes it,
+and returns it to configfs.  Configfs will then populate the filesystem
+tree to reflect the new item.
+
+If the subsystem wants the child to be a group itself, the subsystem
+provides ct_group_ops->make_group().  Everything else behaves the same,
+using the group _init() functions on the group.
+
+Finally, when userspace calls rmdir(2) on the item or group,
+ct_group_ops->drop_item() is called.  As a config_group is also a
+config_item, it is not necessary for a seperate drop_group() method.
+The subsystem must config_item_put() the reference that was initialized
+upon item allocation.  If a subsystem has no work to do, it may omit
+the ct_group_ops->drop_item() method, and configfs will call
+config_item_put() on the item on behalf of the subsystem.
+
+IMPORTANT: drop_item() is void, and as such cannot fail.  When rmdir(2)
+is called, configfs WILL remove the item from the filesystem tree
+(assuming that it has no children to keep it busy).  The subsystem is
+responsible for responding to this.  If the subsystem has references to
+the item in other threads, the memory is safe.  It may take some time
+for the item to actually disappear from the subsystem's usage.  But it
+is gone from configfs.
+
+A config_group cannot be removed while it still has child items.  This
+is implemented in the configfs rmdir(2) code.  ->drop_item() will not be
+called, as the item has not been dropped.  rmdir(2) will fail, as the
+directory is not empty.
+
+[struct configfs_subsystem]
+
+A subsystem must register itself, ususally at module_init time.  This
+tells configfs to make the subsystem appear in the file tree.
+
+	struct configfs_subsystem {
+		struct config_group	su_group;
+		struct semaphore	su_sem;
+	};
+
+	int configfs_register_subsystem(struct configfs_subsystem *subsys);
+	void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+
+	A subsystem consists of a toplevel config_group and a semaphore.
+The group is where child config_items are created.  For a subsystem,
+this group is usually defined statically.  Before calling
+configfs_register_subsystem(), the subsystem must have initialized the
+group via the usual group _init() functions, and it must also have
+initialized the semaphore.
+	When the register call returns, the subsystem is live, and it
+will be visible via configfs.  At that point, mkdir(2) can be called and
+the subsystem must be ready for it.
+
+[An Example]
+
+The best example of these basic concepts is the simple_children
+subsystem/group and the simple_child item in configfs_example.c  It
+shows a trivial object displaying and storing an attribute, and a simple
+group creating and destroying these children.
+
+[Hierarchy Navigation and the Subsystem Semaphore]
+
+There is an extra bonus that configfs provides.  The config_groups and
+config_items are arranged in a hierarchy due to the fact that they
+appear in a filesystem.  A subsystem is NEVER to touch the filesystem
+parts, but the subsystem might be interested in this hierarchy.  For
+this reason, the hierarchy is mirrored via the config_group->cg_children
+and config_item->ci_parent structure members.
+
+A subsystem can navigate the cg_children list and the ci_parent pointer
+to see the tree created by the subsystem.  This can race with configfs'
+management of the hierarchy, so configfs uses the subsystem semaphore to
+protect modifications.  Whenever a subsystem wants to navigate the
+hierarchy, it must do so under the protection of the subsystem
+semaphore.
+
+A subsystem will be prevented from acquiring the semaphore while a newly
+allocated item has not been linked into this hierarchy.   Similarly, it
+will not be able to acquire the semaphore while a dropping item has not
+yet been unlinked.  This means that an item's ci_parent pointer will
+never be NULL while the item is in configfs, and that an item will only
+be in its parent's cg_children list for the same duration.  This allows
+a subsystem to trust ci_parent and cg_children while they hold the
+semaphore.
+
+[Item Aggregation Via symlink(2)]
+
+configfs provides a simple group via the group->item parent/child
+relationship.  Often, however, a larger environment requires aggregation
+outside of the parent/child connection.  This is implemented via
+symlink(2).
+
+A config_item may provide the ct_item_ops->allow_link() and
+ct_item_ops->drop_link() methods.  If the ->allow_link() method exists,
+symlink(2) may be called with the config_item as the source of the link.
+These links are only allowed between configfs config_items.  Any
+symlink(2) attempt outside the configfs filesystem will be denied.
+
+When symlink(2) is called, the source config_item's ->allow_link()
+method is called with itself and a target item.  If the source item
+allows linking to target item, it returns 0.  A source item may wish to
+reject a link if it only wants links to a certain type of object (say,
+in its own subsystem).
+
+When unlink(2) is called on the symbolic link, the source item is
+notified via the ->drop_link() method.  Like the ->drop_item() method,
+this is a void function and cannot return failure.  The subsystem is
+responsible for responding to the change.
+
+A config_item cannot be removed while it links to any other item, nor
+can it be removed while an item links to it.  Dangling symlinks are not
+allowed in configfs.
+
+[Automatically Created Subgroups]
+
+A new config_group may want to have two types of child config_items.
+While this could be codified by magic names in ->make_item(), it is much
+more explicit to have a method whereby userspace sees this divergence.
+
+Rather than have a group where some items behave differently than
+others, configfs provides a method whereby one or many subgroups are
+automatically created inside the parent at its creation.  Thus,
+mkdir("parent) results in "parent", "parent/subgroup1", up through
+"parent/subgroupN".  Items of type 1 can now be created in
+"parent/subgroup1", and items of type N can be created in
+"parent/subgroupN".
+
+These automatic subgroups, or default groups, do not preclude other
+children of the parent group.  If ct_group_ops->make_group() exists,
+other child groups can be created on the parent group directly.
+
+A configfs subsystem specifies default groups by filling in the
+NULL-terminated array default_groups on the config_group structure.
+Each group in that array is populated in the configfs tree at the same
+time as the parent group.  Similarly, they are removed at the same time
+as the parent.  No extra notification is provided.  When a ->drop_item()
+method call notifies the subsystem the parent group is going away, it
+also means every default group child associated with that parent group.
+
+As a consequence of this, default_groups cannot be removed directly via
+rmdir(2).  They also are not considered when rmdir(2) on the parent
+group is checking for children.
+
+[Committable Items]
+
+NOTE: Committable items are currently unimplemented.
+
+Some config_items cannot have a valid initial state.  That is, no
+default values can be specified for the item's attributes such that the
+item can do its work.  Userspace must configure one or more attributes,
+after which the subsystem can start whatever entity this item
+represents.
+
+Consider the FakeNBD device from above.  Without a target address *and*
+a target device, the subsystem has no idea what block device to import.
+The simple example assumes that the subsystem merely waits until all the
+appropriate attributes are configured, and then connects.  This will,
+indeed, work, but now every attribute store must check if the attributes
+are initialized.  Every attribute store must fire off the connection if
+that condition is met.
+
+Far better would be an explicit action notifying the subsystem that the
+config_item is ready to go.  More importantly, an explicit action allows
+the subsystem to provide feedback as to whether the attibutes are
+initialized in a way that makes sense.  configfs provides this as
+committable items.
+
+configfs still uses only normal filesystem operations.  An item is
+committed via rename(2).  The item is moved from a directory where it
+can be modified to a directory where it cannot.
+
+Any group that provides the ct_group_ops->commit_item() method has
+committable items.  When this group appears in configfs, mkdir(2) will
+not work directly in the group.  Instead, the group will have two
+subdirectories: "live" and "pending".  The "live" directory does not
+support mkdir(2) or rmdir(2) either.  It only allows rename(2).  The
+"pending" directory does allow mkdir(2) and rmdir(2).  An item is
+created in the "pending" directory.  Its attributes can be modified at
+will.  Userspace commits the item by renaming it into the "live"
+directory.  At this point, the subsystem recieves the ->commit_item()
+callback.  If all required attributes are filled to satisfaction, the
+method returns zero and the item is moved to the "live" directory.
+
+As rmdir(2) does not work in the "live" directory, an item must be
+shutdown, or "uncommitted".  Again, this is done via rename(2), this
+time from the "live" directory back to the "pending" one.  The subsystem
+is notified by the ct_group_ops->uncommit_object() method.
+
+
diff --git a/Documentation/filesystems/configfs/configfs_example.c b/Documentation/filesystems/configfs/configfs_example.c
new file mode 100644
index 000000000000..f3c6e4946f98
--- /dev/null
+++ b/Documentation/filesystems/configfs/configfs_example.c
@@ -0,0 +1,474 @@
+/*
+ * vim: noexpandtab ts=8 sts=0 sw=8:
+ *
+ * configfs_example.c - This file is a demonstration module containing
+ *      a number of configfs subsystems.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+
+/*
+ * 01-childless
+ *
+ * This first example is a childless subsystem.  It cannot create
+ * any config_items.  It just has attributes.
+ *
+ * Note that we are enclosing the configfs_subsystem inside a container.
+ * This is not necessary if a subsystem has no attributes directly
+ * on the subsystem.  See the next example, 02-simple-children, for
+ * such a subsystem.
+ */
+
+struct childless {
+	struct configfs_subsystem subsys;
+	int showme;
+	int storeme;
+};
+
+struct childless_attribute {
+	struct configfs_attribute attr;
+	ssize_t (*show)(struct childless *, char *);
+	ssize_t (*store)(struct childless *, const char *, size_t);
+};
+
+static inline struct childless *to_childless(struct config_item *item)
+{
+	return item ? container_of(to_configfs_subsystem(to_config_group(item)), struct childless, subsys) : NULL;
+}
+
+static ssize_t childless_showme_read(struct childless *childless,
+				     char *page)
+{
+	ssize_t pos;
+
+	pos = sprintf(page, "%d\n", childless->showme);
+	childless->showme++;
+
+	return pos;
+}
+
+static ssize_t childless_storeme_read(struct childless *childless,
+				      char *page)
+{
+	return sprintf(page, "%d\n", childless->storeme);
+}
+
+static ssize_t childless_storeme_write(struct childless *childless,
+				       const char *page,
+				       size_t count)
+{
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	childless->storeme = tmp;
+
+	return count;
+}
+
+static ssize_t childless_description_read(struct childless *childless,
+					  char *page)
+{
+	return sprintf(page,
+"[01-childless]\n"
+"\n"
+"The childless subsystem is the simplest possible subsystem in\n"
+"configfs.  It does not support the creation of child config_items.\n"
+"It only has a few attributes.  In fact, it isn't much different\n"
+"than a directory in /proc.\n");
+}
+
+static struct childless_attribute childless_attr_showme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "showme", .ca_mode = S_IRUGO },
+	.show	= childless_showme_read,
+};
+static struct childless_attribute childless_attr_storeme = {
+	.attr	= { .ca_owner = THIS_MODULE, .ca_name = "storeme", .ca_mode = S_IRUGO | S_IWUSR },
+	.show	= childless_storeme_read,
+	.store	= childless_storeme_write,
+};
+static struct childless_attribute childless_attr_description = {
+	.attr = { .ca_owner = THIS_MODULE, .ca_name = "description", .ca_mode = S_IRUGO },
+	.show = childless_description_read,
+};
+
+static struct configfs_attribute *childless_attrs[] = {
+	&childless_attr_showme.attr,
+	&childless_attr_storeme.attr,
+	&childless_attr_description.attr,
+	NULL,
+};
+
+static ssize_t childless_attr_show(struct config_item *item,
+				   struct configfs_attribute *attr,
+				   char *page)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = 0;
+
+	if (childless_attr->show)
+		ret = childless_attr->show(childless, page);
+	return ret;
+}
+
+static ssize_t childless_attr_store(struct config_item *item,
+				    struct configfs_attribute *attr,
+				    const char *page, size_t count)
+{
+	struct childless *childless = to_childless(item);
+	struct childless_attribute *childless_attr =
+		container_of(attr, struct childless_attribute, attr);
+	ssize_t ret = -EINVAL;
+
+	if (childless_attr->store)
+		ret = childless_attr->store(childless, page, count);
+	return ret;
+}
+
+static struct configfs_item_operations childless_item_ops = {
+	.show_attribute		= childless_attr_show,
+	.store_attribute	= childless_attr_store,
+};
+
+static struct config_item_type childless_type = {
+	.ct_item_ops	= &childless_item_ops,
+	.ct_attrs	= childless_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+static struct childless childless_subsys = {
+	.subsys = {
+		.su_group = {
+			.cg_item = {
+				.ci_namebuf = "01-childless",
+				.ci_type = &childless_type,
+			},
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 02-simple-children
+ *
+ * This example merely has a simple one-attribute child.  Note that
+ * there is no extra attribute structure, as the child's attribute is
+ * known from the get-go.  Also, there is no container for the
+ * subsystem, as it has no attributes of its own.
+ */
+
+struct simple_child {
+	struct config_item item;
+	int storeme;
+};
+
+static inline struct simple_child *to_simple_child(struct config_item *item)
+{
+	return item ? container_of(item, struct simple_child, item) : NULL;
+}
+
+static struct configfs_attribute simple_child_attr_storeme = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "storeme",
+	.ca_mode = S_IRUGO | S_IWUSR,
+};
+
+static struct configfs_attribute *simple_child_attrs[] = {
+	&simple_child_attr_storeme,
+	NULL,
+};
+
+static ssize_t simple_child_attr_show(struct config_item *item,
+				      struct configfs_attribute *attr,
+				      char *page)
+{
+	ssize_t count;
+	struct simple_child *simple_child = to_simple_child(item);
+
+	count = sprintf(page, "%d\n", simple_child->storeme);
+
+	return count;
+}
+
+static ssize_t simple_child_attr_store(struct config_item *item,
+				       struct configfs_attribute *attr,
+				       const char *page, size_t count)
+{
+	struct simple_child *simple_child = to_simple_child(item);
+	unsigned long tmp;
+	char *p = (char *) page;
+
+	tmp = simple_strtoul(p, &p, 10);
+	if (!p || (*p && (*p != '\n')))
+		return -EINVAL;
+
+	if (tmp > INT_MAX)
+		return -ERANGE;
+
+	simple_child->storeme = tmp;
+
+	return count;
+}
+
+static void simple_child_release(struct config_item *item)
+{
+	kfree(to_simple_child(item));
+}
+
+static struct configfs_item_operations simple_child_item_ops = {
+	.release		= simple_child_release,
+	.show_attribute		= simple_child_attr_show,
+	.store_attribute	= simple_child_attr_store,
+};
+
+static struct config_item_type simple_child_type = {
+	.ct_item_ops	= &simple_child_item_ops,
+	.ct_attrs	= simple_child_attrs,
+	.ct_owner	= THIS_MODULE,
+};
+
+
+static struct config_item *simple_children_make_item(struct config_group *group, const char *name)
+{
+	struct simple_child *simple_child;
+
+	simple_child = kmalloc(sizeof(struct simple_child), GFP_KERNEL);
+	if (!simple_child)
+		return NULL;
+
+	memset(simple_child, 0, sizeof(struct simple_child));
+
+	config_item_init_type_name(&simple_child->item, name,
+				   &simple_child_type);
+
+	simple_child->storeme = 0;
+
+	return &simple_child->item;
+}
+
+static struct configfs_attribute simple_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *simple_children_attrs[] = {
+	&simple_children_attr_description,
+	NULL,
+};
+
+static ssize_t simple_children_attr_show(struct config_item *item,
+			   		 struct configfs_attribute *attr,
+			   		 char *page)
+{
+	return sprintf(page,
+"[02-simple-children]\n"
+"\n"
+"This subsystem allows the creation of child config_items.  These\n"
+"items have only one attribute that is readable and writeable.\n");
+}
+
+static struct configfs_item_operations simple_children_item_ops = {
+	.show_attribute	= simple_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations simple_children_group_ops = {
+	.make_item	= simple_children_make_item,
+};
+
+static struct config_item_type simple_children_type = {
+	.ct_item_ops	= &simple_children_item_ops,
+	.ct_group_ops	= &simple_children_group_ops,
+	.ct_attrs	= simple_children_attrs,
+};
+
+static struct configfs_subsystem simple_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "02-simple-children",
+			.ci_type = &simple_children_type,
+		},
+	},
+};
+
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * 03-group-children
+ *
+ * This example reuses the simple_children group from above.  However,
+ * the simple_children group is not the subsystem itself, it is a
+ * child of the subsystem.  Creation of a group in the subsystem creates
+ * a new simple_children group.  That group can then have simple_child
+ * children of its own.
+ */
+
+struct simple_children {
+	struct config_group group;
+};
+
+static struct config_group *group_children_make_group(struct config_group *group, const char *name)
+{
+	struct simple_children *simple_children;
+
+	simple_children = kmalloc(sizeof(struct simple_children),
+				  GFP_KERNEL);
+	if (!simple_children)
+		return NULL;
+
+	memset(simple_children, 0, sizeof(struct simple_children));
+
+	config_group_init_type_name(&simple_children->group, name,
+				    &simple_children_type);
+
+	return &simple_children->group;
+}
+
+static struct configfs_attribute group_children_attr_description = {
+	.ca_owner = THIS_MODULE,
+	.ca_name = "description",
+	.ca_mode = S_IRUGO,
+};
+
+static struct configfs_attribute *group_children_attrs[] = {
+	&group_children_attr_description,
+	NULL,
+};
+
+static ssize_t group_children_attr_show(struct config_item *item,
+			   		struct configfs_attribute *attr,
+			   		char *page)
+{
+	return sprintf(page,
+"[03-group-children]\n"
+"\n"
+"This subsystem allows the creation of child config_groups.  These\n"
+"groups are like the subsystem simple-children.\n");
+}
+
+static struct configfs_item_operations group_children_item_ops = {
+	.show_attribute	= group_children_attr_show,
+};
+
+/*
+ * Note that, since no extra work is required on ->drop_item(),
+ * no ->drop_item() is provided.
+ */
+static struct configfs_group_operations group_children_group_ops = {
+	.make_group	= group_children_make_group,
+};
+
+static struct config_item_type group_children_type = {
+	.ct_item_ops	= &group_children_item_ops,
+	.ct_group_ops	= &group_children_group_ops,
+	.ct_attrs	= group_children_attrs,
+};
+
+static struct configfs_subsystem group_children_subsys = {
+	.su_group = {
+		.cg_item = {
+			.ci_namebuf = "03-group-children",
+			.ci_type = &group_children_type,
+		},
+	},
+};
+
+/* ----------------------------------------------------------------- */
+
+/*
+ * We're now done with our subsystem definitions.
+ * For convenience in this module, here's a list of them all.  It
+ * allows the init function to easily register them.  Most modules
+ * will only have one subsystem, and will only call register_subsystem
+ * on it directly.
+ */
+static struct configfs_subsystem *example_subsys[] = {
+	&childless_subsys.subsys,
+	&simple_children_subsys,
+	&group_children_subsys,
+	NULL,
+};
+
+static int __init configfs_example_init(void)
+{
+	int ret;
+	int i;
+	struct configfs_subsystem *subsys;
+
+	for (i = 0; example_subsys[i]; i++) {
+		subsys = example_subsys[i];
+
+		config_group_init(&subsys->su_group);
+		init_MUTEX(&subsys->su_sem);
+		ret = configfs_register_subsystem(subsys);
+		if (ret) {
+			printk(KERN_ERR "Error %d while registering subsystem %s\n",
+			       ret,
+			       subsys->su_group.cg_item.ci_namebuf);
+			goto out_unregister;
+		}
+	}
+
+	return 0;
+
+out_unregister:
+	for (; i >= 0; i--) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+
+	return ret;
+}
+
+static void __exit configfs_example_exit(void)
+{
+	int i;
+
+	for (i = 0; example_subsys[i]; i++) {
+		configfs_unregister_subsystem(example_subsys[i]);
+	}
+}
+
+module_init(configfs_example_init);
+module_exit(configfs_example_exit);
+MODULE_LICENSE("GPL");
diff --git a/MAINTAINERS b/MAINTAINERS
index 6af683025ae0..86ee06f43794 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -554,6 +554,11 @@ W:	http://us1.samba.org/samba/Linux_CIFS_client.html
 T:	git kernel.org:/pub/scm/linux/kernel/git/sfrench/cifs-2.6.git
 S:	Supported	
 
+CONFIGFS
+P:	Joel Becker
+M:	Joel Becker <joel.becker@oracle.com>
+S:	Supported
+
 CIRRUS LOGIC GENERIC FBDEV DRIVER
 P:	Jeff Garzik
 M:	jgarzik@pobox.com
diff --git a/fs/Kconfig b/fs/Kconfig
index d5255e627b5f..ba1dbe2b2202 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -841,6 +841,20 @@ config RELAYFS_FS
 
 	  If unsure, say N.
 
+config CONFIGFS_FS
+	tristate "Userspace-driven configuration filesystem (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  configfs is a ram-based filesystem that provides the converse
+	  of sysfs's functionality. Where sysfs is a filesystem-based
+	  view of kernel objects, configfs is a filesystem-based manager
+	  of kernel objects, or config_items.
+
+	  Both sysfs and configfs can and should exist together on the
+	  same system. One is not a replacement for the other.
+
+	  If unsure, say N.
+
 endmenu
 
 menu "Miscellaneous filesystems"
diff --git a/fs/Makefile b/fs/Makefile
index 4c2655759078..ff3d48a744f5 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -101,3 +101,4 @@ obj-$(CONFIG_BEFS_FS)		+= befs/
 obj-$(CONFIG_HOSTFS)		+= hostfs/
 obj-$(CONFIG_HPPFS)		+= hppfs/
 obj-$(CONFIG_DEBUG_FS)		+= debugfs/
+obj-$(CONFIG_CONFIGFS_FS)	+= configfs/
diff --git a/fs/configfs/Makefile b/fs/configfs/Makefile
new file mode 100644
index 000000000000..00ffb278e98c
--- /dev/null
+++ b/fs/configfs/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the configfs virtual filesystem
+#
+
+obj-$(CONFIG_CONFIGFS_FS)	+= configfs.o
+
+configfs-objs	:= inode.o file.o dir.o symlink.o mount.o item.o
diff --git a/fs/configfs/configfs_internal.h b/fs/configfs/configfs_internal.h
new file mode 100644
index 000000000000..8899d9c5f6bf
--- /dev/null
+++ b/fs/configfs/configfs_internal.h
@@ -0,0 +1,142 @@
+/* -*- mode: c; c-basic-offset:8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * configfs_internal.h - Internal stuff for configfs
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+
+struct configfs_dirent {
+	atomic_t		s_count;
+	struct list_head	s_sibling;
+	struct list_head	s_children;
+	struct list_head	s_links;
+	void 			* s_element;
+	int			s_type;
+	umode_t			s_mode;
+	struct dentry		* s_dentry;
+};
+
+#define CONFIGFS_ROOT		0x0001
+#define CONFIGFS_DIR		0x0002
+#define CONFIGFS_ITEM_ATTR 	0x0004
+#define CONFIGFS_ITEM_LINK 	0x0020
+#define CONFIGFS_USET_DIR	0x0040
+#define CONFIGFS_USET_DEFAULT	0x0080
+#define CONFIGFS_USET_DROPPING	0x0100
+#define CONFIGFS_NOT_PINNED	(CONFIGFS_ITEM_ATTR)
+
+extern struct vfsmount * configfs_mount;
+
+extern int configfs_is_root(struct config_item *item);
+
+extern struct inode * configfs_new_inode(mode_t mode);
+extern int configfs_create(struct dentry *, int mode, int (*init)(struct inode *));
+
+extern int configfs_create_file(struct config_item *, const struct configfs_attribute *);
+extern int configfs_make_dirent(struct configfs_dirent *,
+				struct dentry *, void *, umode_t, int);
+
+extern int configfs_add_file(struct dentry *, const struct configfs_attribute *, int);
+extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
+
+extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
+extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
+
+extern int configfs_pin_fs(void);
+extern void configfs_release_fs(void);
+
+extern struct rw_semaphore configfs_rename_sem;
+extern struct super_block * configfs_sb;
+extern struct file_operations configfs_dir_operations;
+extern struct file_operations configfs_file_operations;
+extern struct file_operations bin_fops;
+extern struct inode_operations configfs_dir_inode_operations;
+extern struct inode_operations configfs_symlink_inode_operations;
+
+extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
+			    const char *symname);
+extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
+
+struct configfs_symlink {
+	struct list_head sl_list;
+	struct config_item *sl_target;
+};
+
+extern int configfs_create_link(struct configfs_symlink *sl,
+				struct dentry *parent,
+				struct dentry *dentry);
+
+static inline struct config_item * to_item(struct dentry * dentry)
+{
+	struct configfs_dirent * sd = dentry->d_fsdata;
+	return ((struct config_item *) sd->s_element);
+}
+
+static inline struct configfs_attribute * to_attr(struct dentry * dentry)
+{
+	struct configfs_dirent * sd = dentry->d_fsdata;
+	return ((struct configfs_attribute *) sd->s_element);
+}
+
+static inline struct config_item *configfs_get_config_item(struct dentry *dentry)
+{
+	struct config_item * item = NULL;
+
+	spin_lock(&dcache_lock);
+	if (!d_unhashed(dentry)) {
+		struct configfs_dirent * sd = dentry->d_fsdata;
+		if (sd->s_type & CONFIGFS_ITEM_LINK) {
+			struct configfs_symlink * sl = sd->s_element;
+			item = config_item_get(sl->sl_target);
+		} else
+			item = config_item_get(sd->s_element);
+	}
+	spin_unlock(&dcache_lock);
+
+	return item;
+}
+
+static inline void release_configfs_dirent(struct configfs_dirent * sd)
+{
+	if (!(sd->s_type & CONFIGFS_ROOT))
+		kfree(sd);
+}
+
+static inline struct configfs_dirent * configfs_get(struct configfs_dirent * sd)
+{
+	if (sd) {
+		WARN_ON(!atomic_read(&sd->s_count));
+		atomic_inc(&sd->s_count);
+	}
+	return sd;
+}
+
+static inline void configfs_put(struct configfs_dirent * sd)
+{
+	WARN_ON(!atomic_read(&sd->s_count));
+	if (atomic_dec_and_test(&sd->s_count))
+		release_configfs_dirent(sd);
+}
+
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
new file mode 100644
index 000000000000..e48b539243a1
--- /dev/null
+++ b/fs/configfs/dir.c
@@ -0,0 +1,1102 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * dir.c - Operations for configfs directories.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#undef DEBUG
+
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+#include "configfs_internal.h"
+
+DECLARE_RWSEM(configfs_rename_sem);
+
+static void configfs_d_iput(struct dentry * dentry,
+			    struct inode * inode)
+{
+	struct configfs_dirent * sd = dentry->d_fsdata;
+
+	if (sd) {
+		BUG_ON(sd->s_dentry != dentry);
+		sd->s_dentry = NULL;
+		configfs_put(sd);
+	}
+	iput(inode);
+}
+
+/*
+ * We _must_ delete our dentries on last dput, as the chain-to-parent
+ * behavior is required to clear the parents of default_groups.
+ */
+static int configfs_d_delete(struct dentry *dentry)
+{
+	return 1;
+}
+
+static struct dentry_operations configfs_dentry_ops = {
+	.d_iput		= configfs_d_iput,
+	/* simple_delete_dentry() isn't exported */
+	.d_delete	= configfs_d_delete,
+};
+
+/*
+ * Allocates a new configfs_dirent and links it to the parent configfs_dirent
+ */
+static struct configfs_dirent *configfs_new_dirent(struct configfs_dirent * parent_sd,
+						void * element)
+{
+	struct configfs_dirent * sd;
+
+	sd = kmalloc(sizeof(*sd), GFP_KERNEL);
+	if (!sd)
+		return NULL;
+
+	memset(sd, 0, sizeof(*sd));
+	atomic_set(&sd->s_count, 1);
+	INIT_LIST_HEAD(&sd->s_links);
+	INIT_LIST_HEAD(&sd->s_children);
+	list_add(&sd->s_sibling, &parent_sd->s_children);
+	sd->s_element = element;
+
+	return sd;
+}
+
+int configfs_make_dirent(struct configfs_dirent * parent_sd,
+			 struct dentry * dentry, void * element,
+			 umode_t mode, int type)
+{
+	struct configfs_dirent * sd;
+
+	sd = configfs_new_dirent(parent_sd, element);
+	if (!sd)
+		return -ENOMEM;
+
+	sd->s_mode = mode;
+	sd->s_type = type;
+	sd->s_dentry = dentry;
+	if (dentry) {
+		dentry->d_fsdata = configfs_get(sd);
+		dentry->d_op = &configfs_dentry_ops;
+	}
+
+	return 0;
+}
+
+static int init_dir(struct inode * inode)
+{
+	inode->i_op = &configfs_dir_inode_operations;
+	inode->i_fop = &configfs_dir_operations;
+
+	/* directory inodes start off with i_nlink == 2 (for "." entry) */
+	inode->i_nlink++;
+	return 0;
+}
+
+static int init_file(struct inode * inode)
+{
+	inode->i_size = PAGE_SIZE;
+	inode->i_fop = &configfs_file_operations;
+	return 0;
+}
+
+static int init_symlink(struct inode * inode)
+{
+	inode->i_op = &configfs_symlink_inode_operations;
+	return 0;
+}
+
+static int create_dir(struct config_item * k, struct dentry * p,
+		      struct dentry * d)
+{
+	int error;
+	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
+
+	error = configfs_create(d, mode, init_dir);
+	if (!error) {
+		error = configfs_make_dirent(p->d_fsdata, d, k, mode,
+					   CONFIGFS_DIR);
+		if (!error) {
+			p->d_inode->i_nlink++;
+			(d)->d_op = &configfs_dentry_ops;
+		}
+	}
+	return error;
+}
+
+
+/**
+ *	configfs_create_dir - create a directory for an config_item.
+ *	@item:		config_itemwe're creating directory for.
+ *	@dentry:	config_item's dentry.
+ */
+
+static int configfs_create_dir(struct config_item * item, struct dentry *dentry)
+{
+	struct dentry * parent;
+	int error = 0;
+
+	BUG_ON(!item);
+
+	if (item->ci_parent)
+		parent = item->ci_parent->ci_dentry;
+	else if (configfs_mount && configfs_mount->mnt_sb)
+		parent = configfs_mount->mnt_sb->s_root;
+	else
+		return -EFAULT;
+
+	error = create_dir(item,parent,dentry);
+	if (!error)
+		item->ci_dentry = dentry;
+	return error;
+}
+
+int configfs_create_link(struct configfs_symlink *sl,
+			 struct dentry *parent,
+			 struct dentry *dentry)
+{
+	int err = 0;
+	umode_t mode = S_IFLNK | S_IRWXUGO;
+
+	err = configfs_create(dentry, mode, init_symlink);
+	if (!err) {
+		err = configfs_make_dirent(parent->d_fsdata, dentry, sl,
+					 mode, CONFIGFS_ITEM_LINK);
+		if (!err)
+			dentry->d_op = &configfs_dentry_ops;
+	}
+	return err;
+}
+
+static void remove_dir(struct dentry * d)
+{
+	struct dentry * parent = dget(d->d_parent);
+	struct configfs_dirent * sd;
+
+	sd = d->d_fsdata;
+ 	list_del_init(&sd->s_sibling);
+	configfs_put(sd);
+	if (d->d_inode)
+		simple_rmdir(parent->d_inode,d);
+
+	pr_debug(" o %s removing done (%d)\n",d->d_name.name,
+		 atomic_read(&d->d_count));
+
+	dput(parent);
+}
+
+/**
+ * configfs_remove_dir - remove an config_item's directory.
+ * @item:	config_item we're removing.
+ *
+ * The only thing special about this is that we remove any files in
+ * the directory before we remove the directory, and we've inlined
+ * what used to be configfs_rmdir() below, instead of calling separately.
+ */
+
+static void configfs_remove_dir(struct config_item * item)
+{
+	struct dentry * dentry = dget(item->ci_dentry);
+
+	if (!dentry)
+		return;
+
+	remove_dir(dentry);
+	/**
+	 * Drop reference from dget() on entrance.
+	 */
+	dput(dentry);
+}
+
+
+/* attaches attribute's configfs_dirent to the dentry corresponding to the
+ * attribute file
+ */
+static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * dentry)
+{
+	struct configfs_attribute * attr = sd->s_element;
+	int error;
+
+	error = configfs_create(dentry, (attr->ca_mode & S_IALLUGO) | S_IFREG, init_file);
+	if (error)
+		return error;
+
+	dentry->d_op = &configfs_dentry_ops;
+	dentry->d_fsdata = configfs_get(sd);
+	sd->s_dentry = dentry;
+	d_rehash(dentry);
+
+	return 0;
+}
+
+static struct dentry * configfs_lookup(struct inode *dir,
+				       struct dentry *dentry,
+				       struct nameidata *nd)
+{
+	struct configfs_dirent * parent_sd = dentry->d_parent->d_fsdata;
+	struct configfs_dirent * sd;
+	int found = 0;
+	int err = 0;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_type & CONFIGFS_NOT_PINNED) {
+			const unsigned char * name = configfs_get_name(sd);
+
+			if (strcmp(name, dentry->d_name.name))
+				continue;
+
+			found = 1;
+			err = configfs_attach_attr(sd, dentry);
+			break;
+		}
+	}
+
+	if (!found) {
+		/*
+		 * If it doesn't exist and it isn't a NOT_PINNED item,
+		 * it must be negative.
+		 */
+		return simple_lookup(dir, dentry, nd);
+	}
+
+	return ERR_PTR(err);
+}
+
+/*
+ * Only subdirectories count here.  Files (CONFIGFS_NOT_PINNED) are
+ * attributes and are removed by rmdir().  We recurse, taking i_sem
+ * on all children that are candidates for default detach.  If the
+ * result is clean, then configfs_detach_group() will handle dropping
+ * i_sem.  If there is an error, the caller will clean up the i_sem
+ * holders via configfs_detach_rollback().
+ */
+static int configfs_detach_prep(struct dentry *dentry)
+{
+	struct configfs_dirent *parent_sd = dentry->d_fsdata;
+	struct configfs_dirent *sd;
+	int ret;
+
+	ret = -EBUSY;
+	if (!list_empty(&parent_sd->s_links))
+		goto out;
+
+	ret = 0;
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_type & CONFIGFS_NOT_PINNED)
+			continue;
+		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+			down(&sd->s_dentry->d_inode->i_sem);
+			/* Mark that we've taken i_sem */
+			sd->s_type |= CONFIGFS_USET_DROPPING;
+
+			ret = configfs_detach_prep(sd->s_dentry);
+			if (!ret)
+			       	continue;
+		} else
+			ret = -ENOTEMPTY;
+
+		break;
+	}
+
+out:
+	return ret;
+}
+
+/*
+ * Walk the tree, dropping i_sem wherever CONFIGFS_USET_DROPPING is
+ * set.
+ */
+static void configfs_detach_rollback(struct dentry *dentry)
+{
+	struct configfs_dirent *parent_sd = dentry->d_fsdata;
+	struct configfs_dirent *sd;
+
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (sd->s_type & CONFIGFS_USET_DEFAULT) {
+			configfs_detach_rollback(sd->s_dentry);
+
+			if (sd->s_type & CONFIGFS_USET_DROPPING) {
+				sd->s_type &= ~CONFIGFS_USET_DROPPING;
+				up(&sd->s_dentry->d_inode->i_sem);
+			}
+		}
+	}
+}
+
+static void detach_attrs(struct config_item * item)
+{
+	struct dentry * dentry = dget(item->ci_dentry);
+	struct configfs_dirent * parent_sd;
+	struct configfs_dirent * sd, * tmp;
+
+	if (!dentry)
+		return;
+
+	pr_debug("configfs %s: dropping attrs for  dir\n",
+		 dentry->d_name.name);
+
+	parent_sd = dentry->d_fsdata;
+	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
+		if (!sd->s_element || !(sd->s_type & CONFIGFS_NOT_PINNED))
+			continue;
+		list_del_init(&sd->s_sibling);
+		configfs_drop_dentry(sd, dentry);
+		configfs_put(sd);
+	}
+
+	/**
+	 * Drop reference from dget() on entrance.
+	 */
+	dput(dentry);
+}
+
+static int populate_attrs(struct config_item *item)
+{
+	struct config_item_type *t = item->ci_type;
+	struct configfs_attribute *attr;
+	int error = 0;
+	int i;
+
+	if (!t)
+		return -EINVAL;
+	if (t->ct_attrs) {
+		for (i = 0; (attr = t->ct_attrs[i]) != NULL; i++) {
+			if ((error = configfs_create_file(item, attr)))
+				break;
+		}
+	}
+
+	if (error)
+		detach_attrs(item);
+
+	return error;
+}
+
+static int configfs_attach_group(struct config_item *parent_item,
+				 struct config_item *item,
+				 struct dentry *dentry);
+static void configfs_detach_group(struct config_item *item);
+
+static void detach_groups(struct config_group *group)
+{
+	struct dentry * dentry = dget(group->cg_item.ci_dentry);
+	struct dentry *child;
+	struct configfs_dirent *parent_sd;
+	struct configfs_dirent *sd, *tmp;
+
+	if (!dentry)
+		return;
+
+	parent_sd = dentry->d_fsdata;
+	list_for_each_entry_safe(sd, tmp, &parent_sd->s_children, s_sibling) {
+		if (!sd->s_element ||
+		    !(sd->s_type & CONFIGFS_USET_DEFAULT))
+			continue;
+
+		child = sd->s_dentry;
+
+		configfs_detach_group(sd->s_element);
+		child->d_inode->i_flags |= S_DEAD;
+
+		/*
+		 * From rmdir/unregister, a configfs_detach_prep() pass
+		 * has taken our i_sem for us.  Drop it.
+		 * From mkdir/register cleanup, there is no sem held.
+		 */
+		if (sd->s_type & CONFIGFS_USET_DROPPING)
+			up(&child->d_inode->i_sem);
+
+		d_delete(child);
+		dput(child);
+	}
+
+	/**
+	 * Drop reference from dget() on entrance.
+	 */
+	dput(dentry);
+}
+
+/*
+ * This fakes mkdir(2) on a default_groups[] entry.  It
+ * creates a dentry, attachs it, and then does fixup
+ * on the sd->s_type.
+ *
+ * We could, perhaps, tweak our parent's ->mkdir for a minute and
+ * try using vfs_mkdir.  Just a thought.
+ */
+static int create_default_group(struct config_group *parent_group,
+				struct config_group *group)
+{
+	int ret;
+	struct qstr name;
+	struct configfs_dirent *sd;
+	/* We trust the caller holds a reference to parent */
+	struct dentry *child, *parent = parent_group->cg_item.ci_dentry;
+
+	if (!group->cg_item.ci_name)
+		group->cg_item.ci_name = group->cg_item.ci_namebuf;
+	name.name = group->cg_item.ci_name;
+	name.len = strlen(name.name);
+	name.hash = full_name_hash(name.name, name.len);
+
+	ret = -ENOMEM;
+	child = d_alloc(parent, &name);
+	if (child) {
+		d_add(child, NULL);
+
+		ret = configfs_attach_group(&parent_group->cg_item,
+					    &group->cg_item, child);
+		if (!ret) {
+			sd = child->d_fsdata;
+			sd->s_type |= CONFIGFS_USET_DEFAULT;
+		} else {
+			d_delete(child);
+			dput(child);
+		}
+	}
+
+	return ret;
+}
+
+static int populate_groups(struct config_group *group)
+{
+	struct config_group *new_group;
+	struct dentry *dentry = group->cg_item.ci_dentry;
+	int ret = 0;
+	int i;
+
+	if (group && group->default_groups) {
+		/* FYI, we're faking mkdir here
+		 * I'm not sure we need this semaphore, as we're called
+		 * from our parent's mkdir.  That holds our parent's
+		 * i_sem, so afaik lookup cannot continue through our
+		 * parent to find us, let alone mess with our tree.
+		 * That said, taking our i_sem is closer to mkdir
+		 * emulation, and shouldn't hurt. */
+		down(&dentry->d_inode->i_sem);
+
+		for (i = 0; group->default_groups[i]; i++) {
+			new_group = group->default_groups[i];
+
+			ret = create_default_group(group, new_group);
+			if (ret)
+				break;
+		}
+
+		up(&dentry->d_inode->i_sem);
+	}
+
+	if (ret)
+		detach_groups(group);
+
+	return ret;
+}
+
+/*
+ * All of link_obj/unlink_obj/link_group/unlink_group require that
+ * subsys->su_sem is held.
+ */
+
+static void unlink_obj(struct config_item *item)
+{
+	struct config_group *group;
+
+	group = item->ci_group;
+	if (group) {
+		list_del_init(&item->ci_entry);
+
+		item->ci_group = NULL;
+		item->ci_parent = NULL;
+		config_item_put(item);
+
+		config_group_put(group);
+	}
+}
+
+static void link_obj(struct config_item *parent_item, struct config_item *item)
+{
+	/* Parent seems redundant with group, but it makes certain
+	 * traversals much nicer. */
+	item->ci_parent = parent_item;
+	item->ci_group = config_group_get(to_config_group(parent_item));
+	list_add_tail(&item->ci_entry, &item->ci_group->cg_children);
+
+	config_item_get(item);
+}
+
+static void unlink_group(struct config_group *group)
+{
+	int i;
+	struct config_group *new_group;
+
+	if (group->default_groups) {
+		for (i = 0; group->default_groups[i]; i++) {
+			new_group = group->default_groups[i];
+			unlink_group(new_group);
+		}
+	}
+
+	group->cg_subsys = NULL;
+	unlink_obj(&group->cg_item);
+}
+
+static void link_group(struct config_group *parent_group, struct config_group *group)
+{
+	int i;
+	struct config_group *new_group;
+	struct configfs_subsystem *subsys = NULL; /* gcc is a turd */
+
+	link_obj(&parent_group->cg_item, &group->cg_item);
+
+	if (parent_group->cg_subsys)
+		subsys = parent_group->cg_subsys;
+	else if (configfs_is_root(&parent_group->cg_item))
+		subsys = to_configfs_subsystem(group);
+	else
+		BUG();
+	group->cg_subsys = subsys;
+
+	if (group->default_groups) {
+		for (i = 0; group->default_groups[i]; i++) {
+			new_group = group->default_groups[i];
+			link_group(group, new_group);
+		}
+	}
+}
+
+/*
+ * The goal is that configfs_attach_item() (and
+ * configfs_attach_group()) can be called from either the VFS or this
+ * module.  That is, they assume that the items have been created,
+ * the dentry allocated, and the dcache is all ready to go.
+ *
+ * If they fail, they must clean up after themselves as if they
+ * had never been called.  The caller (VFS or local function) will
+ * handle cleaning up the dcache bits.
+ *
+ * configfs_detach_group() and configfs_detach_item() behave similarly on
+ * the way out.  They assume that the proper semaphores are held, they
+ * clean up the configfs items, and they expect their callers will
+ * handle the dcache bits.
+ */
+static int configfs_attach_item(struct config_item *parent_item,
+				struct config_item *item,
+				struct dentry *dentry)
+{
+	int ret;
+
+	ret = configfs_create_dir(item, dentry);
+	if (!ret) {
+		ret = populate_attrs(item);
+		if (ret) {
+			configfs_remove_dir(item);
+			d_delete(dentry);
+		}
+	}
+
+	return ret;
+}
+
+static void configfs_detach_item(struct config_item *item)
+{
+	detach_attrs(item);
+	configfs_remove_dir(item);
+}
+
+static int configfs_attach_group(struct config_item *parent_item,
+				 struct config_item *item,
+				 struct dentry *dentry)
+{
+	int ret;
+	struct configfs_dirent *sd;
+
+	ret = configfs_attach_item(parent_item, item, dentry);
+	if (!ret) {
+		sd = dentry->d_fsdata;
+		sd->s_type |= CONFIGFS_USET_DIR;
+
+		ret = populate_groups(to_config_group(item));
+		if (ret) {
+			configfs_detach_item(item);
+			d_delete(dentry);
+		}
+	}
+
+	return ret;
+}
+
+static void configfs_detach_group(struct config_item *item)
+{
+	detach_groups(to_config_group(item));
+	configfs_detach_item(item);
+}
+
+/*
+ * Drop the initial reference from make_item()/make_group()
+ * This function assumes that reference is held on item
+ * and that item holds a valid reference to the parent.  Also, it
+ * assumes the caller has validated ci_type.
+ */
+static void client_drop_item(struct config_item *parent_item,
+			     struct config_item *item)
+{
+	struct config_item_type *type;
+
+	type = parent_item->ci_type;
+	BUG_ON(!type);
+
+	if (type->ct_group_ops && type->ct_group_ops->drop_item)
+		type->ct_group_ops->drop_item(to_config_group(parent_item),
+						item);
+	else
+		config_item_put(item);
+}
+
+
+static int configfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+{
+	int ret;
+	struct config_group *group;
+	struct config_item *item;
+	struct config_item *parent_item;
+	struct configfs_subsystem *subsys;
+	struct configfs_dirent *sd;
+	struct config_item_type *type;
+	struct module *owner;
+	char *name;
+
+	if (dentry->d_parent == configfs_sb->s_root)
+		return -EPERM;
+
+	sd = dentry->d_parent->d_fsdata;
+	if (!(sd->s_type & CONFIGFS_USET_DIR))
+		return -EPERM;
+
+	parent_item = configfs_get_config_item(dentry->d_parent);
+	type = parent_item->ci_type;
+	subsys = to_config_group(parent_item)->cg_subsys;
+	BUG_ON(!subsys);
+
+	if (!type || !type->ct_group_ops ||
+	    (!type->ct_group_ops->make_group &&
+	     !type->ct_group_ops->make_item)) {
+		config_item_put(parent_item);
+		return -EPERM;  /* What lack-of-mkdir returns */
+	}
+
+	name = kmalloc(dentry->d_name.len + 1, GFP_KERNEL);
+	if (!name) {
+		config_item_put(parent_item);
+		return -ENOMEM;
+	}
+	snprintf(name, dentry->d_name.len + 1, "%s", dentry->d_name.name);
+
+	down(&subsys->su_sem);
+	group = NULL;
+	item = NULL;
+	if (type->ct_group_ops->make_group) {
+		group = type->ct_group_ops->make_group(to_config_group(parent_item), name);
+		if (group) {
+			link_group(to_config_group(parent_item), group);
+			item = &group->cg_item;
+		}
+	} else {
+		item = type->ct_group_ops->make_item(to_config_group(parent_item), name);
+		if (item)
+			link_obj(parent_item, item);
+	}
+	up(&subsys->su_sem);
+
+	kfree(name);
+	if (!item) {
+		config_item_put(parent_item);
+		return -ENOMEM;
+	}
+
+	ret = -EINVAL;
+	type = item->ci_type;
+	if (type) {
+		owner = type->ct_owner;
+		if (try_module_get(owner)) {
+			if (group) {
+				ret = configfs_attach_group(parent_item,
+							    item,
+							    dentry);
+			} else {
+				ret = configfs_attach_item(parent_item,
+							   item,
+							   dentry);
+			}
+
+			if (ret) {
+				down(&subsys->su_sem);
+				if (group)
+					unlink_group(group);
+				else
+					unlink_obj(item);
+				client_drop_item(parent_item, item);
+				up(&subsys->su_sem);
+
+				config_item_put(parent_item);
+				module_put(owner);
+			}
+		}
+	}
+
+	return ret;
+}
+
+static int configfs_rmdir(struct inode *dir, struct dentry *dentry)
+{
+	struct config_item *parent_item;
+	struct config_item *item;
+	struct configfs_subsystem *subsys;
+	struct configfs_dirent *sd;
+	struct module *owner = NULL;
+	int ret;
+
+	if (dentry->d_parent == configfs_sb->s_root)
+		return -EPERM;
+
+	sd = dentry->d_fsdata;
+	if (sd->s_type & CONFIGFS_USET_DEFAULT)
+		return -EPERM;
+
+	parent_item = configfs_get_config_item(dentry->d_parent);
+	subsys = to_config_group(parent_item)->cg_subsys;
+	BUG_ON(!subsys);
+
+	if (!parent_item->ci_type) {
+		config_item_put(parent_item);
+		return -EINVAL;
+	}
+
+	ret = configfs_detach_prep(dentry);
+	if (ret) {
+		configfs_detach_rollback(dentry);
+		config_item_put(parent_item);
+		return ret;
+	}
+
+	item = configfs_get_config_item(dentry);
+
+	/* Drop reference from above, item already holds one. */
+	config_item_put(parent_item);
+
+	if (item->ci_type)
+		owner = item->ci_type->ct_owner;
+
+	if (sd->s_type & CONFIGFS_USET_DIR) {
+		configfs_detach_group(item);
+
+		down(&subsys->su_sem);
+		unlink_group(to_config_group(item));
+	} else {
+		configfs_detach_item(item);
+
+		down(&subsys->su_sem);
+		unlink_obj(item);
+	}
+
+	client_drop_item(parent_item, item);
+	up(&subsys->su_sem);
+
+	/* Drop our reference from above */
+	config_item_put(item);
+
+	module_put(owner);
+
+	return 0;
+}
+
+struct inode_operations configfs_dir_inode_operations = {
+	.mkdir		= configfs_mkdir,
+	.rmdir		= configfs_rmdir,
+	.symlink	= configfs_symlink,
+	.unlink		= configfs_unlink,
+	.lookup		= configfs_lookup,
+};
+
+#if 0
+int configfs_rename_dir(struct config_item * item, const char *new_name)
+{
+	int error = 0;
+	struct dentry * new_dentry, * parent;
+
+	if (!strcmp(config_item_name(item), new_name))
+		return -EINVAL;
+
+	if (!item->parent)
+		return -EINVAL;
+
+	down_write(&configfs_rename_sem);
+	parent = item->parent->dentry;
+
+	down(&parent->d_inode->i_sem);
+
+	new_dentry = lookup_one_len(new_name, parent, strlen(new_name));
+	if (!IS_ERR(new_dentry)) {
+  		if (!new_dentry->d_inode) {
+			error = config_item_set_name(item, "%s", new_name);
+			if (!error) {
+				d_add(new_dentry, NULL);
+				d_move(item->dentry, new_dentry);
+			}
+			else
+				d_delete(new_dentry);
+		} else
+			error = -EEXIST;
+		dput(new_dentry);
+	}
+	up(&parent->d_inode->i_sem);
+	up_write(&configfs_rename_sem);
+
+	return error;
+}
+#endif
+
+static int configfs_dir_open(struct inode *inode, struct file *file)
+{
+	struct dentry * dentry = file->f_dentry;
+	struct configfs_dirent * parent_sd = dentry->d_fsdata;
+
+	down(&dentry->d_inode->i_sem);
+	file->private_data = configfs_new_dirent(parent_sd, NULL);
+	up(&dentry->d_inode->i_sem);
+
+	return file->private_data ? 0 : -ENOMEM;
+
+}
+
+static int configfs_dir_close(struct inode *inode, struct file *file)
+{
+	struct dentry * dentry = file->f_dentry;
+	struct configfs_dirent * cursor = file->private_data;
+
+	down(&dentry->d_inode->i_sem);
+	list_del_init(&cursor->s_sibling);
+	up(&dentry->d_inode->i_sem);
+
+	release_configfs_dirent(cursor);
+
+	return 0;
+}
+
+/* Relationship between s_mode and the DT_xxx types */
+static inline unsigned char dt_type(struct configfs_dirent *sd)
+{
+	return (sd->s_mode >> 12) & 15;
+}
+
+static int configfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
+{
+	struct dentry *dentry = filp->f_dentry;
+	struct configfs_dirent * parent_sd = dentry->d_fsdata;
+	struct configfs_dirent *cursor = filp->private_data;
+	struct list_head *p, *q = &cursor->s_sibling;
+	ino_t ino;
+	int i = filp->f_pos;
+
+	switch (i) {
+		case 0:
+			ino = dentry->d_inode->i_ino;
+			if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
+				break;
+			filp->f_pos++;
+			i++;
+			/* fallthrough */
+		case 1:
+			ino = parent_ino(dentry);
+			if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
+				break;
+			filp->f_pos++;
+			i++;
+			/* fallthrough */
+		default:
+			if (filp->f_pos == 2) {
+				list_del(q);
+				list_add(q, &parent_sd->s_children);
+			}
+			for (p=q->next; p!= &parent_sd->s_children; p=p->next) {
+				struct configfs_dirent *next;
+				const char * name;
+				int len;
+
+				next = list_entry(p, struct configfs_dirent,
+						   s_sibling);
+				if (!next->s_element)
+					continue;
+
+				name = configfs_get_name(next);
+				len = strlen(name);
+				if (next->s_dentry)
+					ino = next->s_dentry->d_inode->i_ino;
+				else
+					ino = iunique(configfs_sb, 2);
+
+				if (filldir(dirent, name, len, filp->f_pos, ino,
+						 dt_type(next)) < 0)
+					return 0;
+
+				list_del(q);
+				list_add(q, p);
+				p = q;
+				filp->f_pos++;
+			}
+	}
+	return 0;
+}
+
+static loff_t configfs_dir_lseek(struct file * file, loff_t offset, int origin)
+{
+	struct dentry * dentry = file->f_dentry;
+
+	down(&dentry->d_inode->i_sem);
+	switch (origin) {
+		case 1:
+			offset += file->f_pos;
+		case 0:
+			if (offset >= 0)
+				break;
+		default:
+			up(&file->f_dentry->d_inode->i_sem);
+			return -EINVAL;
+	}
+	if (offset != file->f_pos) {
+		file->f_pos = offset;
+		if (file->f_pos >= 2) {
+			struct configfs_dirent *sd = dentry->d_fsdata;
+			struct configfs_dirent *cursor = file->private_data;
+			struct list_head *p;
+			loff_t n = file->f_pos - 2;
+
+			list_del(&cursor->s_sibling);
+			p = sd->s_children.next;
+			while (n && p != &sd->s_children) {
+				struct configfs_dirent *next;
+				next = list_entry(p, struct configfs_dirent,
+						   s_sibling);
+				if (next->s_element)
+					n--;
+				p = p->next;
+			}
+			list_add_tail(&cursor->s_sibling, p);
+		}
+	}
+	up(&dentry->d_inode->i_sem);
+	return offset;
+}
+
+struct file_operations configfs_dir_operations = {
+	.open		= configfs_dir_open,
+	.release	= configfs_dir_close,
+	.llseek		= configfs_dir_lseek,
+	.read		= generic_read_dir,
+	.readdir	= configfs_readdir,
+};
+
+int configfs_register_subsystem(struct configfs_subsystem *subsys)
+{
+	int err;
+	struct config_group *group = &subsys->su_group;
+	struct qstr name;
+	struct dentry *dentry;
+	struct configfs_dirent *sd;
+
+	err = configfs_pin_fs();
+	if (err)
+		return err;
+
+	if (!group->cg_item.ci_name)
+		group->cg_item.ci_name = group->cg_item.ci_namebuf;
+
+	sd = configfs_sb->s_root->d_fsdata;
+	link_group(to_config_group(sd->s_element), group);
+
+	down(&configfs_sb->s_root->d_inode->i_sem);
+
+	name.name = group->cg_item.ci_name;
+	name.len = strlen(name.name);
+	name.hash = full_name_hash(name.name, name.len);
+
+	err = -ENOMEM;
+	dentry = d_alloc(configfs_sb->s_root, &name);
+	if (!dentry)
+		goto out_release;
+
+	d_add(dentry, NULL);
+
+	err = configfs_attach_group(sd->s_element, &group->cg_item,
+				    dentry);
+	if (!err)
+		dentry = NULL;
+	else
+		d_delete(dentry);
+
+	up(&configfs_sb->s_root->d_inode->i_sem);
+
+	if (dentry) {
+	    dput(dentry);
+out_release:
+	    unlink_group(group);
+	    configfs_release_fs();
+	}
+
+	return err;
+}
+
+void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
+{
+	struct config_group *group = &subsys->su_group;
+	struct dentry *dentry = group->cg_item.ci_dentry;
+
+	if (dentry->d_parent != configfs_sb->s_root) {
+		printk(KERN_ERR "configfs: Tried to unregister non-subsystem!\n");
+		return;
+	}
+
+	down(&configfs_sb->s_root->d_inode->i_sem);
+	down(&dentry->d_inode->i_sem);
+	if (configfs_detach_prep(dentry)) {
+		printk(KERN_ERR "configfs: Tried to unregister non-empty subsystem!\n");
+	}
+	configfs_detach_group(&group->cg_item);
+	dentry->d_inode->i_flags |= S_DEAD;
+	up(&dentry->d_inode->i_sem);
+
+	d_delete(dentry);
+
+	up(&configfs_sb->s_root->d_inode->i_sem);
+
+	dput(dentry);
+
+	unlink_group(group);
+	configfs_release_fs();
+}
+
+EXPORT_SYMBOL(configfs_register_subsystem);
+EXPORT_SYMBOL(configfs_unregister_subsystem);
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
new file mode 100644
index 000000000000..af1ffc9a15c0
--- /dev/null
+++ b/fs/configfs/file.c
@@ -0,0 +1,360 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * file.c - operations for regular (text) files.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/dnotify.h>
+#include <linux/slab.h>
+#include <asm/uaccess.h>
+#include <asm/semaphore.h>
+
+#include <linux/configfs.h>
+#include "configfs_internal.h"
+
+
+struct configfs_buffer {
+	size_t			count;
+	loff_t			pos;
+	char			* page;
+	struct configfs_item_operations	* ops;
+	struct semaphore	sem;
+	int			needs_read_fill;
+};
+
+
+/**
+ *	fill_read_buffer - allocate and fill buffer from item.
+ *	@dentry:	dentry pointer.
+ *	@buffer:	data buffer for file.
+ *
+ *	Allocate @buffer->page, if it hasn't been already, then call the
+ *	config_item's show() method to fill the buffer with this attribute's
+ *	data.
+ *	This is called only once, on the file's first read.
+ */
+static int fill_read_buffer(struct dentry * dentry, struct configfs_buffer * buffer)
+{
+	struct configfs_attribute * attr = to_attr(dentry);
+	struct config_item * item = to_item(dentry->d_parent);
+	struct configfs_item_operations * ops = buffer->ops;
+	int ret = 0;
+	ssize_t count;
+
+	if (!buffer->page)
+		buffer->page = (char *) get_zeroed_page(GFP_KERNEL);
+	if (!buffer->page)
+		return -ENOMEM;
+
+	count = ops->show_attribute(item,attr,buffer->page);
+	buffer->needs_read_fill = 0;
+	BUG_ON(count > (ssize_t)PAGE_SIZE);
+	if (count >= 0)
+		buffer->count = count;
+	else
+		ret = count;
+	return ret;
+}
+
+
+/**
+ *	flush_read_buffer - push buffer to userspace.
+ *	@buffer:	data buffer for file.
+ *	@userbuf:	user-passed buffer.
+ *	@count:		number of bytes requested.
+ *	@ppos:		file position.
+ *
+ *	Copy the buffer we filled in fill_read_buffer() to userspace.
+ *	This is done at the reader's leisure, copying and advancing
+ *	the amount they specify each time.
+ *	This may be called continuously until the buffer is empty.
+ */
+static int flush_read_buffer(struct configfs_buffer * buffer, char __user * buf,
+			     size_t count, loff_t * ppos)
+{
+	int error;
+
+	if (*ppos > buffer->count)
+		return 0;
+
+	if (count > (buffer->count - *ppos))
+		count = buffer->count - *ppos;
+
+	error = copy_to_user(buf,buffer->page + *ppos,count);
+	if (!error)
+		*ppos += count;
+	return error ? -EFAULT : count;
+}
+
+/**
+ *	configfs_read_file - read an attribute.
+ *	@file:	file pointer.
+ *	@buf:	buffer to fill.
+ *	@count:	number of bytes to read.
+ *	@ppos:	starting offset in file.
+ *
+ *	Userspace wants to read an attribute file. The attribute descriptor
+ *	is in the file's ->d_fsdata. The target item is in the directory's
+ *	->d_fsdata.
+ *
+ *	We call fill_read_buffer() to allocate and fill the buffer from the
+ *	item's show() method exactly once (if the read is happening from
+ *	the beginning of the file). That should fill the entire buffer with
+ *	all the data the item has to offer for that attribute.
+ *	We then call flush_read_buffer() to copy the buffer to userspace
+ *	in the increments specified.
+ */
+
+static ssize_t
+configfs_read_file(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	struct configfs_buffer * buffer = file->private_data;
+	ssize_t retval = 0;
+
+	down(&buffer->sem);
+	if (buffer->needs_read_fill) {
+		if ((retval = fill_read_buffer(file->f_dentry,buffer)))
+			goto out;
+	}
+	pr_debug("%s: count = %d, ppos = %lld, buf = %s\n",
+		 __FUNCTION__,count,*ppos,buffer->page);
+	retval = flush_read_buffer(buffer,buf,count,ppos);
+out:
+	up(&buffer->sem);
+	return retval;
+}
+
+
+/**
+ *	fill_write_buffer - copy buffer from userspace.
+ *	@buffer:	data buffer for file.
+ *	@userbuf:	data from user.
+ *	@count:		number of bytes in @userbuf.
+ *
+ *	Allocate @buffer->page if it hasn't been already, then
+ *	copy the user-supplied buffer into it.
+ */
+
+static int
+fill_write_buffer(struct configfs_buffer * buffer, const char __user * buf, size_t count)
+{
+	int error;
+
+	if (!buffer->page)
+		buffer->page = (char *)get_zeroed_page(GFP_KERNEL);
+	if (!buffer->page)
+		return -ENOMEM;
+
+	if (count > PAGE_SIZE)
+		count = PAGE_SIZE;
+	error = copy_from_user(buffer->page,buf,count);
+	buffer->needs_read_fill = 1;
+	return error ? -EFAULT : count;
+}
+
+
+/**
+ *	flush_write_buffer - push buffer to config_item.
+ *	@file:		file pointer.
+ *	@buffer:	data buffer for file.
+ *
+ *	Get the correct pointers for the config_item and the attribute we're
+ *	dealing with, then call the store() method for the attribute,
+ *	passing the buffer that we acquired in fill_write_buffer().
+ */
+
+static int
+flush_write_buffer(struct dentry * dentry, struct configfs_buffer * buffer, size_t count)
+{
+	struct configfs_attribute * attr = to_attr(dentry);
+	struct config_item * item = to_item(dentry->d_parent);
+	struct configfs_item_operations * ops = buffer->ops;
+
+	return ops->store_attribute(item,attr,buffer->page,count);
+}
+
+
+/**
+ *	configfs_write_file - write an attribute.
+ *	@file:	file pointer
+ *	@buf:	data to write
+ *	@count:	number of bytes
+ *	@ppos:	starting offset
+ *
+ *	Similar to configfs_read_file(), though working in the opposite direction.
+ *	We allocate and fill the data from the user in fill_write_buffer(),
+ *	then push it to the config_item in flush_write_buffer().
+ *	There is no easy way for us to know if userspace is only doing a partial
+ *	write, so we don't support them. We expect the entire buffer to come
+ *	on the first write.
+ *	Hint: if you're writing a value, first read the file, modify only the
+ *	the value you're changing, then write entire buffer back.
+ */
+
+static ssize_t
+configfs_write_file(struct file *file, const char __user *buf, size_t count, loff_t *ppos)
+{
+	struct configfs_buffer * buffer = file->private_data;
+
+	down(&buffer->sem);
+	count = fill_write_buffer(buffer,buf,count);
+	if (count > 0)
+		count = flush_write_buffer(file->f_dentry,buffer,count);
+	if (count > 0)
+		*ppos += count;
+	up(&buffer->sem);
+	return count;
+}
+
+static int check_perm(struct inode * inode, struct file * file)
+{
+	struct config_item *item = configfs_get_config_item(file->f_dentry->d_parent);
+	struct configfs_attribute * attr = to_attr(file->f_dentry);
+	struct configfs_buffer * buffer;
+	struct configfs_item_operations * ops = NULL;
+	int error = 0;
+
+	if (!item || !attr)
+		goto Einval;
+
+	/* Grab the module reference for this attribute if we have one */
+	if (!try_module_get(attr->ca_owner)) {
+		error = -ENODEV;
+		goto Done;
+	}
+
+	if (item->ci_type)
+		ops = item->ci_type->ct_item_ops;
+	else
+		goto Eaccess;
+
+	/* File needs write support.
+	 * The inode's perms must say it's ok,
+	 * and we must have a store method.
+	 */
+	if (file->f_mode & FMODE_WRITE) {
+
+		if (!(inode->i_mode & S_IWUGO) || !ops->store_attribute)
+			goto Eaccess;
+
+	}
+
+	/* File needs read support.
+	 * The inode's perms must say it's ok, and we there
+	 * must be a show method for it.
+	 */
+	if (file->f_mode & FMODE_READ) {
+		if (!(inode->i_mode & S_IRUGO) || !ops->show_attribute)
+			goto Eaccess;
+	}
+
+	/* No error? Great, allocate a buffer for the file, and store it
+	 * it in file->private_data for easy access.
+	 */
+	buffer = kmalloc(sizeof(struct configfs_buffer),GFP_KERNEL);
+	if (buffer) {
+		memset(buffer,0,sizeof(struct configfs_buffer));
+		init_MUTEX(&buffer->sem);
+		buffer->needs_read_fill = 1;
+		buffer->ops = ops;
+		file->private_data = buffer;
+	} else
+		error = -ENOMEM;
+	goto Done;
+
+ Einval:
+	error = -EINVAL;
+	goto Done;
+ Eaccess:
+	error = -EACCES;
+	module_put(attr->ca_owner);
+ Done:
+	if (error && item)
+		config_item_put(item);
+	return error;
+}
+
+static int configfs_open_file(struct inode * inode, struct file * filp)
+{
+	return check_perm(inode,filp);
+}
+
+static int configfs_release(struct inode * inode, struct file * filp)
+{
+	struct config_item * item = to_item(filp->f_dentry->d_parent);
+	struct configfs_attribute * attr = to_attr(filp->f_dentry);
+	struct module * owner = attr->ca_owner;
+	struct configfs_buffer * buffer = filp->private_data;
+
+	if (item)
+		config_item_put(item);
+	/* After this point, attr should not be accessed. */
+	module_put(owner);
+
+	if (buffer) {
+		if (buffer->page)
+			free_page((unsigned long)buffer->page);
+		kfree(buffer);
+	}
+	return 0;
+}
+
+struct file_operations configfs_file_operations = {
+	.read		= configfs_read_file,
+	.write		= configfs_write_file,
+	.llseek		= generic_file_llseek,
+	.open		= configfs_open_file,
+	.release	= configfs_release,
+};
+
+
+int configfs_add_file(struct dentry * dir, const struct configfs_attribute * attr, int type)
+{
+	struct configfs_dirent * parent_sd = dir->d_fsdata;
+	umode_t mode = (attr->ca_mode & S_IALLUGO) | S_IFREG;
+	int error = 0;
+
+	down(&dir->d_inode->i_sem);
+	error = configfs_make_dirent(parent_sd, NULL, (void *) attr, mode, type);
+	up(&dir->d_inode->i_sem);
+
+	return error;
+}
+
+
+/**
+ *	configfs_create_file - create an attribute file for an item.
+ *	@item:	item we're creating for.
+ *	@attr:	atrribute descriptor.
+ */
+
+int configfs_create_file(struct config_item * item, const struct configfs_attribute * attr)
+{
+	BUG_ON(!item || !item->ci_dentry || !attr);
+
+	return configfs_add_file(item->ci_dentry, attr,
+				 CONFIGFS_ITEM_ATTR);
+}
+
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
new file mode 100644
index 000000000000..6b274c6d428f
--- /dev/null
+++ b/fs/configfs/inode.c
@@ -0,0 +1,162 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * inode.c - basic inode and dentry operations.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ *
+ * Please see Documentation/filesystems/configfs.txt for more information.
+ */
+
+#undef DEBUG
+
+#include <linux/pagemap.h>
+#include <linux/namei.h>
+#include <linux/backing-dev.h>
+
+#include <linux/configfs.h>
+#include "configfs_internal.h"
+
+extern struct super_block * configfs_sb;
+
+static struct address_space_operations configfs_aops = {
+	.readpage	= simple_readpage,
+	.prepare_write	= simple_prepare_write,
+	.commit_write	= simple_commit_write
+};
+
+static struct backing_dev_info configfs_backing_dev_info = {
+	.ra_pages	= 0,	/* No readahead */
+	.capabilities	= BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+struct inode * configfs_new_inode(mode_t mode)
+{
+	struct inode * inode = new_inode(configfs_sb);
+	if (inode) {
+		inode->i_mode = mode;
+		inode->i_uid = 0;
+		inode->i_gid = 0;
+		inode->i_blksize = PAGE_CACHE_SIZE;
+		inode->i_blocks = 0;
+		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+		inode->i_mapping->a_ops = &configfs_aops;
+		inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
+	}
+	return inode;
+}
+
+int configfs_create(struct dentry * dentry, int mode, int (*init)(struct inode *))
+{
+	int error = 0;
+	struct inode * inode = NULL;
+	if (dentry) {
+		if (!dentry->d_inode) {
+			if ((inode = configfs_new_inode(mode))) {
+				if (dentry->d_parent && dentry->d_parent->d_inode) {
+					struct inode *p_inode = dentry->d_parent->d_inode;
+					p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+				}
+				goto Proceed;
+			}
+			else
+				error = -ENOMEM;
+		} else
+			error = -EEXIST;
+	} else
+		error = -ENOENT;
+	goto Done;
+
+ Proceed:
+	if (init)
+		error = init(inode);
+	if (!error) {
+		d_instantiate(dentry, inode);
+		if (S_ISDIR(mode) || S_ISLNK(mode))
+			dget(dentry);  /* pin link and directory dentries in core */
+	} else
+		iput(inode);
+ Done:
+	return error;
+}
+
+/*
+ * Get the name for corresponding element represented by the given configfs_dirent
+ */
+const unsigned char * configfs_get_name(struct configfs_dirent *sd)
+{
+	struct attribute * attr;
+
+	if (!sd || !sd->s_element)
+		BUG();
+
+	/* These always have a dentry, so use that */
+	if (sd->s_type & (CONFIGFS_DIR | CONFIGFS_ITEM_LINK))
+		return sd->s_dentry->d_name.name;
+
+	if (sd->s_type & CONFIGFS_ITEM_ATTR) {
+		attr = sd->s_element;
+		return attr->name;
+	}
+	return NULL;
+}
+
+
+/*
+ * Unhashes the dentry corresponding to given configfs_dirent
+ * Called with parent inode's i_sem held.
+ */
+void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
+{
+	struct dentry * dentry = sd->s_dentry;
+
+	if (dentry) {
+		spin_lock(&dcache_lock);
+		if (!(d_unhashed(dentry) && dentry->d_inode)) {
+			dget_locked(dentry);
+			__d_drop(dentry);
+			spin_unlock(&dcache_lock);
+			simple_unlink(parent->d_inode, dentry);
+		} else
+			spin_unlock(&dcache_lock);
+	}
+}
+
+void configfs_hash_and_remove(struct dentry * dir, const char * name)
+{
+	struct configfs_dirent * sd;
+	struct configfs_dirent * parent_sd = dir->d_fsdata;
+
+	down(&dir->d_inode->i_sem);
+	list_for_each_entry(sd, &parent_sd->s_children, s_sibling) {
+		if (!sd->s_element)
+			continue;
+		if (!strcmp(configfs_get_name(sd), name)) {
+			list_del_init(&sd->s_sibling);
+			configfs_drop_dentry(sd, dir);
+			configfs_put(sd);
+			break;
+		}
+	}
+	up(&dir->d_inode->i_sem);
+}
+
+
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
new file mode 100644
index 000000000000..e07485ac50ad
--- /dev/null
+++ b/fs/configfs/item.c
@@ -0,0 +1,227 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * item.c - library routines for handling generic config items
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on kobject:
+ * 	kobject is Copyright (c) 2002-2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ *
+ * Please see the file Documentation/filesystems/configfs.txt for
+ * critical information about using the config_item interface.
+ */
+
+#include <linux/string.h>
+#include <linux/module.h>
+#include <linux/stat.h>
+#include <linux/slab.h>
+
+#include <linux/configfs.h>
+
+
+static inline struct config_item * to_item(struct list_head * entry)
+{
+	return container_of(entry,struct config_item,ci_entry);
+}
+
+/* Evil kernel */
+static void config_item_release(struct kref *kref);
+
+/**
+ *	config_item_init - initialize item.
+ *	@item:	item in question.
+ */
+void config_item_init(struct config_item * item)
+{
+	kref_init(&item->ci_kref);
+	INIT_LIST_HEAD(&item->ci_entry);
+}
+
+/**
+ *	config_item_set_name - Set the name of an item
+ *	@item:	item.
+ *	@name:	name.
+ *
+ *	If strlen(name) >= CONFIGFS_ITEM_NAME_LEN, then use a
+ *	dynamically allocated string that @item->ci_name points to.
+ *	Otherwise, use the static @item->ci_namebuf array.
+ */
+
+int config_item_set_name(struct config_item * item, const char * fmt, ...)
+{
+	int error = 0;
+	int limit = CONFIGFS_ITEM_NAME_LEN;
+	int need;
+	va_list args;
+	char * name;
+
+	/*
+	 * First, try the static array
+	 */
+	va_start(args,fmt);
+	need = vsnprintf(item->ci_namebuf,limit,fmt,args);
+	va_end(args);
+	if (need < limit)
+		name = item->ci_namebuf;
+	else {
+		/*
+		 * Need more space? Allocate it and try again
+		 */
+		limit = need + 1;
+		name = kmalloc(limit,GFP_KERNEL);
+		if (!name) {
+			error = -ENOMEM;
+			goto Done;
+		}
+		va_start(args,fmt);
+		need = vsnprintf(name,limit,fmt,args);
+		va_end(args);
+
+		/* Still? Give up. */
+		if (need >= limit) {
+			kfree(name);
+			error = -EFAULT;
+			goto Done;
+		}
+	}
+
+	/* Free the old name, if necessary. */
+	if (item->ci_name && item->ci_name != item->ci_namebuf)
+		kfree(item->ci_name);
+
+	/* Now, set the new name */
+	item->ci_name = name;
+ Done:
+	return error;
+}
+
+EXPORT_SYMBOL(config_item_set_name);
+
+void config_item_init_type_name(struct config_item *item,
+				const char *name,
+				struct config_item_type *type)
+{
+	config_item_set_name(item, name);
+	item->ci_type = type;
+	config_item_init(item);
+}
+EXPORT_SYMBOL(config_item_init_type_name);
+
+void config_group_init_type_name(struct config_group *group, const char *name,
+			 struct config_item_type *type)
+{
+	config_item_set_name(&group->cg_item, name);
+	group->cg_item.ci_type = type;
+	config_group_init(group);
+}
+EXPORT_SYMBOL(config_group_init_type_name);
+
+struct config_item * config_item_get(struct config_item * item)
+{
+	if (item)
+		kref_get(&item->ci_kref);
+	return item;
+}
+
+/**
+ *	config_item_cleanup - free config_item resources.
+ *	@item:	item.
+ */
+
+void config_item_cleanup(struct config_item * item)
+{
+	struct config_item_type * t = item->ci_type;
+	struct config_group * s = item->ci_group;
+	struct config_item * parent = item->ci_parent;
+
+	pr_debug("config_item %s: cleaning up\n",config_item_name(item));
+	if (item->ci_name != item->ci_namebuf)
+		kfree(item->ci_name);
+	item->ci_name = NULL;
+	if (t && t->ct_item_ops && t->ct_item_ops->release)
+		t->ct_item_ops->release(item);
+	if (s)
+		config_group_put(s);
+	if (parent)
+		config_item_put(parent);
+}
+
+static void config_item_release(struct kref *kref)
+{
+	config_item_cleanup(container_of(kref, struct config_item, ci_kref));
+}
+
+/**
+ *	config_item_put - decrement refcount for item.
+ *	@item:	item.
+ *
+ *	Decrement the refcount, and if 0, call config_item_cleanup().
+ */
+void config_item_put(struct config_item * item)
+{
+	if (item)
+		kref_put(&item->ci_kref, config_item_release);
+}
+
+
+/**
+ *	config_group_init - initialize a group for use
+ *	@k:	group
+ */
+
+void config_group_init(struct config_group *group)
+{
+	config_item_init(&group->cg_item);
+	INIT_LIST_HEAD(&group->cg_children);
+}
+
+
+/**
+ *	config_group_find_obj - search for item in group.
+ *	@group:	group we're looking in.
+ *	@name:	item's name.
+ *
+ *	Lock group via @group->cg_subsys, and iterate over @group->cg_list,
+ *	looking for a matching config_item. If matching item is found
+ *	take a reference and return the item.
+ */
+
+struct config_item * config_group_find_obj(struct config_group * group, const char * name)
+{
+	struct list_head * entry;
+	struct config_item * ret = NULL;
+
+        /* XXX LOCKING! */
+	list_for_each(entry,&group->cg_children) {
+		struct config_item * item = to_item(entry);
+		if (config_item_name(item) &&
+                    !strcmp(config_item_name(item), name)) {
+			ret = config_item_get(item);
+			break;
+		}
+	}
+	return ret;
+}
+
+
+EXPORT_SYMBOL(config_item_init);
+EXPORT_SYMBOL(config_group_init);
+EXPORT_SYMBOL(config_item_get);
+EXPORT_SYMBOL(config_item_put);
+
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
new file mode 100644
index 000000000000..1a2f6f6a4d91
--- /dev/null
+++ b/fs/configfs/mount.c
@@ -0,0 +1,159 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * mount.c - operations for initializing and mounting configfs.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/mount.h>
+#include <linux/pagemap.h>
+#include <linux/init.h>
+
+#include <linux/configfs.h>
+#include "configfs_internal.h"
+
+/* Random magic number */
+#define CONFIGFS_MAGIC 0x62656570
+
+struct vfsmount * configfs_mount = NULL;
+struct super_block * configfs_sb = NULL;
+static int configfs_mnt_count = 0;
+
+static struct super_operations configfs_ops = {
+	.statfs		= simple_statfs,
+	.drop_inode	= generic_delete_inode,
+};
+
+static struct config_group configfs_root_group = {
+	.cg_item = {
+		.ci_namebuf	= "root",
+		.ci_name	= configfs_root_group.cg_item.ci_namebuf,
+	},
+};
+
+int configfs_is_root(struct config_item *item)
+{
+	return item == &configfs_root_group.cg_item;
+}
+
+static struct configfs_dirent configfs_root = {
+	.s_sibling	= LIST_HEAD_INIT(configfs_root.s_sibling),
+	.s_children	= LIST_HEAD_INIT(configfs_root.s_children),
+	.s_element	= &configfs_root_group.cg_item,
+	.s_type		= CONFIGFS_ROOT,
+};
+
+static int configfs_fill_super(struct super_block *sb, void *data, int silent)
+{
+	struct inode *inode;
+	struct dentry *root;
+
+	sb->s_blocksize = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic = CONFIGFS_MAGIC;
+	sb->s_op = &configfs_ops;
+	configfs_sb = sb;
+
+	inode = configfs_new_inode(S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO);
+	if (inode) {
+		inode->i_op = &configfs_dir_inode_operations;
+		inode->i_fop = &configfs_dir_operations;
+		/* directory inodes start off with i_nlink == 2 (for "." entry) */
+		inode->i_nlink++;
+	} else {
+		pr_debug("configfs: could not get root inode\n");
+		return -ENOMEM;
+	}
+
+	root = d_alloc_root(inode);
+	if (!root) {
+		pr_debug("%s: could not get root dentry!\n",__FUNCTION__);
+		iput(inode);
+		return -ENOMEM;
+	}
+	config_group_init(&configfs_root_group);
+	configfs_root_group.cg_item.ci_dentry = root;
+	root->d_fsdata = &configfs_root;
+	sb->s_root = root;
+	return 0;
+}
+
+static struct super_block *configfs_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data)
+{
+	return get_sb_single(fs_type, flags, data, configfs_fill_super);
+}
+
+static struct file_system_type configfs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "configfs",
+	.get_sb		= configfs_get_sb,
+	.kill_sb	= kill_litter_super,
+};
+
+int configfs_pin_fs(void)
+{
+	return simple_pin_fs("configfs", &configfs_mount,
+			     &configfs_mnt_count);
+}
+
+void configfs_release_fs(void)
+{
+	simple_release_fs(&configfs_mount, &configfs_mnt_count);
+}
+
+
+static decl_subsys(config, NULL, NULL);
+
+static int __init configfs_init(void)
+{
+	int err;
+
+	kset_set_kset_s(&config_subsys, kernel_subsys);
+	err = subsystem_register(&config_subsys);
+	if (err)
+		return err;
+
+	err = register_filesystem(&configfs_fs_type);
+	if (err) {
+		printk(KERN_ERR "configfs: Unable to register filesystem!\n");
+		subsystem_unregister(&config_subsys);
+	}
+
+	return err;
+}
+
+static void __exit configfs_exit(void)
+{
+	unregister_filesystem(&configfs_fs_type);
+	subsystem_unregister(&config_subsys);
+}
+
+MODULE_AUTHOR("Oracle");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("0.0.1");
+MODULE_DESCRIPTION("Simple RAM filesystem for user driven kernel subsystem configuration.");
+
+module_init(configfs_init);
+module_exit(configfs_exit);
diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
new file mode 100644
index 000000000000..50f5840521a9
--- /dev/null
+++ b/fs/configfs/symlink.c
@@ -0,0 +1,281 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * symlink.c - operations for configfs symlinks.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ */
+
+#include <linux/fs.h>
+#include <linux/module.h>
+#include <linux/namei.h>
+
+#include <linux/configfs.h>
+#include "configfs_internal.h"
+
+static int item_depth(struct config_item * item)
+{
+	struct config_item * p = item;
+	int depth = 0;
+	do { depth++; } while ((p = p->ci_parent) && !configfs_is_root(p));
+	return depth;
+}
+
+static int item_path_length(struct config_item * item)
+{
+	struct config_item * p = item;
+	int length = 1;
+	do {
+		length += strlen(config_item_name(p)) + 1;
+		p = p->ci_parent;
+	} while (p && !configfs_is_root(p));
+	return length;
+}
+
+static void fill_item_path(struct config_item * item, char * buffer, int length)
+{
+	struct config_item * p;
+
+	--length;
+	for (p = item; p && !configfs_is_root(p); p = p->ci_parent) {
+		int cur = strlen(config_item_name(p));
+
+		/* back up enough to print this bus id with '/' */
+		length -= cur;
+		strncpy(buffer + length,config_item_name(p),cur);
+		*(buffer + --length) = '/';
+	}
+}
+
+static int create_link(struct config_item *parent_item,
+ 		       struct config_item *item,
+		       struct dentry *dentry)
+{
+	struct configfs_dirent *target_sd = item->ci_dentry->d_fsdata;
+	struct configfs_symlink *sl;
+	int ret;
+
+	ret = -ENOMEM;
+	sl = kmalloc(sizeof(struct configfs_symlink), GFP_KERNEL);
+	if (sl) {
+		sl->sl_target = config_item_get(item);
+		/* FIXME: needs a lock, I'd bet */
+		list_add(&sl->sl_list, &target_sd->s_links);
+		ret = configfs_create_link(sl, parent_item->ci_dentry,
+					   dentry);
+		if (ret) {
+			list_del_init(&sl->sl_list);
+			config_item_put(item);
+			kfree(sl);
+		}
+	}
+
+	return ret;
+}
+
+
+static int get_target(const char *symname, struct nameidata *nd,
+		      struct config_item **target)
+{
+	int ret;
+
+	ret = path_lookup(symname, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, nd);
+	if (!ret) {
+		if (nd->dentry->d_sb == configfs_sb) {
+			*target = configfs_get_config_item(nd->dentry);
+			if (!*target) {
+				ret = -ENOENT;
+				path_release(nd);
+			}
+		} else
+			ret = -EPERM;
+	}
+
+	return ret;
+}
+
+
+int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+{
+	int ret;
+	struct nameidata nd;
+	struct config_item *parent_item;
+	struct config_item *target_item;
+	struct config_item_type *type;
+
+	ret = -EPERM;  /* What lack-of-symlink returns */
+	if (dentry->d_parent == configfs_sb->s_root)
+		goto out;
+
+	parent_item = configfs_get_config_item(dentry->d_parent);
+	type = parent_item->ci_type;
+
+	if (!type || !type->ct_item_ops ||
+	    !type->ct_item_ops->allow_link)
+		goto out_put;
+
+	ret = get_target(symname, &nd, &target_item);
+	if (ret)
+		goto out_put;
+
+	ret = type->ct_item_ops->allow_link(parent_item, target_item);
+	if (!ret)
+		ret = create_link(parent_item, target_item, dentry);
+
+	config_item_put(target_item);
+	path_release(&nd);
+
+out_put:
+	config_item_put(parent_item);
+
+out:
+	return ret;
+}
+
+int configfs_unlink(struct inode *dir, struct dentry *dentry)
+{
+	struct configfs_dirent *sd = dentry->d_fsdata;
+	struct configfs_symlink *sl;
+	struct config_item *parent_item;
+	struct config_item_type *type;
+	int ret;
+
+	ret = -EPERM;  /* What lack-of-symlink returns */
+	if (!(sd->s_type & CONFIGFS_ITEM_LINK))
+		goto out;
+
+	if (dentry->d_parent == configfs_sb->s_root)
+		BUG();
+
+	sl = sd->s_element;
+
+	parent_item = configfs_get_config_item(dentry->d_parent);
+	type = parent_item->ci_type;
+
+	list_del_init(&sd->s_sibling);
+	configfs_drop_dentry(sd, dentry->d_parent);
+	dput(dentry);
+	configfs_put(sd);
+
+	/*
+	 * drop_link() must be called before
+	 * list_del_init(&sl->sl_list), so that the order of
+	 * drop_link(this, target) and drop_item(target) is preserved.
+	 */
+	if (type && type->ct_item_ops &&
+	    type->ct_item_ops->drop_link)
+		type->ct_item_ops->drop_link(parent_item,
+					       sl->sl_target);
+
+	/* FIXME: Needs lock */
+	list_del_init(&sl->sl_list);
+
+	/* Put reference from create_link() */
+	config_item_put(sl->sl_target);
+	kfree(sl);
+
+	config_item_put(parent_item);
+
+	ret = 0;
+
+out:
+	return ret;
+}
+
+static int configfs_get_target_path(struct config_item * item, struct config_item * target,
+				   char *path)
+{
+	char * s;
+	int depth, size;
+
+	depth = item_depth(item);
+	size = item_path_length(target) + depth * 3 - 1;
+	if (size > PATH_MAX)
+		return -ENAMETOOLONG;
+
+	pr_debug("%s: depth = %d, size = %d\n", __FUNCTION__, depth, size);
+
+	for (s = path; depth--; s += 3)
+		strcpy(s,"../");
+
+	fill_item_path(target, path, size);
+	pr_debug("%s: path = '%s'\n", __FUNCTION__, path);
+
+	return 0;
+}
+
+static int configfs_getlink(struct dentry *dentry, char * path)
+{
+	struct config_item *item, *target_item;
+	int error = 0;
+
+	item = configfs_get_config_item(dentry->d_parent);
+	if (!item)
+		return -EINVAL;
+
+	target_item = configfs_get_config_item(dentry);
+	if (!target_item) {
+		config_item_put(item);
+		return -EINVAL;
+	}
+
+	down_read(&configfs_rename_sem);
+	error = configfs_get_target_path(item, target_item, path);
+	up_read(&configfs_rename_sem);
+
+	config_item_put(item);
+	config_item_put(target_item);
+	return error;
+
+}
+
+static void *configfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	int error = -ENOMEM;
+	unsigned long page = get_zeroed_page(GFP_KERNEL);
+
+	if (page) {
+		error = configfs_getlink(dentry, (char *)page);
+		if (!error) {
+			nd_set_link(nd, (char *)page);
+			return (void *)page;
+		}
+	}
+
+	nd_set_link(nd, ERR_PTR(error));
+	return NULL;
+}
+
+static void configfs_put_link(struct dentry *dentry, struct nameidata *nd,
+			      void *cookie)
+{
+	if (cookie) {
+		unsigned long page = (unsigned long)cookie;
+		free_page(page);
+	}
+}
+
+struct inode_operations configfs_symlink_inode_operations = {
+	.follow_link = configfs_follow_link,
+	.readlink = generic_readlink,
+	.put_link = configfs_put_link,
+};
+
diff --git a/include/linux/configfs.h b/include/linux/configfs.h
new file mode 100644
index 000000000000..acffb8c9073a
--- /dev/null
+++ b/include/linux/configfs.h
@@ -0,0 +1,205 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * configfs.h - definitions for the device driver filesystem
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ *
+ * Based on sysfs:
+ * 	sysfs is Copyright (C) 2001, 2002, 2003 Patrick Mochel
+ *
+ * Based on kobject.h:
+ *      Copyright (c) 2002-2003	Patrick Mochel
+ *      Copyright (c) 2002-2003	Open Source Development Labs
+ *
+ * configfs Copyright (C) 2005 Oracle.  All rights reserved.
+ *
+ * Please read Documentation/filesystems/configfs.txt before using the
+ * configfs interface, ESPECIALLY the parts about reference counts and
+ * item destructors.
+ */
+
+#ifndef _CONFIGFS_H_
+#define _CONFIGFS_H_
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/kref.h>
+
+#include <asm/atomic.h>
+#include <asm/semaphore.h>
+
+#define CONFIGFS_ITEM_NAME_LEN	20
+
+struct module;
+
+struct configfs_item_operations;
+struct configfs_group_operations;
+struct configfs_attribute;
+struct configfs_subsystem;
+
+struct config_item {
+	char			*ci_name;
+	char			ci_namebuf[CONFIGFS_ITEM_NAME_LEN];
+	struct kref		ci_kref;
+	struct list_head	ci_entry;
+	struct config_item	*ci_parent;
+	struct config_group	*ci_group;
+	struct config_item_type	*ci_type;
+	struct dentry		*ci_dentry;
+};
+
+extern int config_item_set_name(struct config_item *, const char *, ...);
+
+static inline char *config_item_name(struct config_item * item)
+{
+	return item->ci_name;
+}
+
+extern void config_item_init(struct config_item *);
+extern void config_item_init_type_name(struct config_item *item,
+				       const char *name,
+				       struct config_item_type *type);
+extern void config_item_cleanup(struct config_item *);
+
+extern struct config_item * config_item_get(struct config_item *);
+extern void config_item_put(struct config_item *);
+
+struct config_item_type {
+	struct module				*ct_owner;
+	struct configfs_item_operations		*ct_item_ops;
+	struct configfs_group_operations	*ct_group_ops;
+	struct configfs_attribute		**ct_attrs;
+};
+
+
+/**
+ *	group - a group of config_items of a specific type, belonging
+ *	to a specific subsystem.
+ */
+
+struct config_group {
+	struct config_item		cg_item;
+	struct list_head		cg_children;
+	struct configfs_subsystem 	*cg_subsys;
+	struct config_group		**default_groups;
+};
+
+
+extern void config_group_init(struct config_group *group);
+extern void config_group_init_type_name(struct config_group *group,
+					const char *name,
+					struct config_item_type *type);
+
+
+static inline struct config_group *to_config_group(struct config_item *item)
+{
+	return item ? container_of(item,struct config_group,cg_item) : NULL;
+}
+
+static inline struct config_group *config_group_get(struct config_group *group)
+{
+	return group ? to_config_group(config_item_get(&group->cg_item)) : NULL;
+}
+
+static inline void config_group_put(struct config_group *group)
+{
+	config_item_put(&group->cg_item);
+}
+
+extern struct config_item *config_group_find_obj(struct config_group *, const char *);
+
+
+struct configfs_attribute {
+	char			*ca_name;
+	struct module 		*ca_owner;
+	mode_t			ca_mode;
+};
+
+
+/*
+ * If allow_link() exists, the item can symlink(2) out to other
+ * items.  If the item is a group, it may support mkdir(2).
+ * Groups supply one of make_group() and make_item().  If the
+ * group supports make_group(), one can create group children.  If it
+ * supports make_item(), one can create config_item children.  If it has
+ * default_groups on group->default_groups, it has automatically created
+ * group children.  default_groups may coexist alongsize make_group() or
+ * make_item(), but if the group wishes to have only default_groups
+ * children (disallowing mkdir(2)), it need not provide either function.
+ * If the group has commit(), it supports pending and commited (active)
+ * items.
+ */
+struct configfs_item_operations {
+	void (*release)(struct config_item *);
+	ssize_t	(*show_attribute)(struct config_item *, struct configfs_attribute *,char *);
+	ssize_t	(*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t);
+	int (*allow_link)(struct config_item *src, struct config_item *target);
+	int (*drop_link)(struct config_item *src, struct config_item *target);
+};
+
+struct configfs_group_operations {
+	struct config_item *(*make_item)(struct config_group *group, const char *name);
+	struct config_group *(*make_group)(struct config_group *group, const char *name);
+	int (*commit_item)(struct config_item *item);
+	void (*drop_item)(struct config_group *group, struct config_item *item);
+};
+
+
+
+/**
+ * Use these macros to make defining attributes easier. See include/linux/device.h
+ * for examples..
+ */
+
+#if 0
+#define __ATTR(_name,_mode,_show,_store) { \
+	.attr = {.ca_name = __stringify(_name), .ca_mode = _mode, .ca_owner = THIS_MODULE },	\
+	.show	= _show,					\
+	.store	= _store,					\
+}
+
+#define __ATTR_RO(_name) { \
+	.attr	= { .ca_name = __stringify(_name), .ca_mode = 0444, .ca_owner = THIS_MODULE },	\
+	.show	= _name##_show,	\
+}
+
+#define __ATTR_NULL { .attr = { .name = NULL } }
+
+#define attr_name(_attr) (_attr).attr.name
+#endif
+
+
+struct configfs_subsystem {
+	struct config_group	su_group;
+	struct semaphore	su_sem;
+};
+
+static inline struct configfs_subsystem *to_configfs_subsystem(struct config_group *group)
+{
+	return group ?
+		container_of(group, struct configfs_subsystem, su_group) :
+		NULL;
+}
+
+int configfs_register_subsystem(struct configfs_subsystem *subsys);
+void configfs_unregister_subsystem(struct configfs_subsystem *subsys);
+
+#endif  /* __KERNEL__ */
+
+#endif /* _CONFIGFS_H_ */
-- 
cgit v1.2.3


From 994fc28c7b1e697ac56befe4aecabf23f0689f46 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Thu, 15 Dec 2005 14:28:17 -0800
Subject: [PATCH] add AOP_TRUNCATED_PAGE, prepend AOP_ to WRITEPAGE_ACTIVATE

readpage(), prepare_write(), and commit_write() callers are updated to
understand the special return code AOP_TRUNCATED_PAGE in the style of
writepage() and WRITEPAGE_ACTIVATE.  AOP_TRUNCATED_PAGE tells the caller that
the callee has unlocked the page and that the operation should be tried again
with a new page.  OCFS2 uses this to detect and work around a lock inversion in
its aop methods.  There should be no change in behaviour for methods that don't
return AOP_TRUNCATED_PAGE.

WRITEPAGE_ACTIVATE is also prepended with AOP_ for consistency and they are
made enums so that kerneldoc can be used to document their semantics.

Signed-off-by: Zach Brown <zach.brown@oracle.com>
---
 drivers/block/loop.c      | 23 +++++++++++----
 drivers/block/rd.c        |  4 +--
 fs/mpage.c                |  2 +-
 include/linux/fs.h        | 31 ++++++++++++++++++++
 include/linux/writeback.h |  6 ----
 mm/filemap.c              | 73 ++++++++++++++++++++++++++++++++---------------
 mm/readahead.c            | 15 ++++++----
 mm/shmem.c                |  2 +-
 mm/vmscan.c               |  2 +-
 9 files changed, 113 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 96c664af8d06..a452b13620a2 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -213,7 +213,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 	struct address_space_operations *aops = mapping->a_ops;
 	pgoff_t index;
 	unsigned offset, bv_offs;
-	int len, ret = 0;
+	int len, ret;
 
 	down(&mapping->host->i_sem);
 	index = pos >> PAGE_CACHE_SHIFT;
@@ -232,9 +232,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 		page = grab_cache_page(mapping, index);
 		if (unlikely(!page))
 			goto fail;
-		if (unlikely(aops->prepare_write(file, page, offset,
-				offset + size)))
+		ret = aops->prepare_write(file, page, offset,
+					  offset + size);
+		if (unlikely(ret)) {
+			if (ret == AOP_TRUNCATED_PAGE) {
+				page_cache_release(page);
+				continue;
+			}
 			goto unlock;
+		}
 		transfer_result = lo_do_transfer(lo, WRITE, page, offset,
 				bvec->bv_page, bv_offs, size, IV);
 		if (unlikely(transfer_result)) {
@@ -251,9 +257,15 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 			kunmap_atomic(kaddr, KM_USER0);
 		}
 		flush_dcache_page(page);
-		if (unlikely(aops->commit_write(file, page, offset,
-				offset + size)))
+		ret = aops->commit_write(file, page, offset,
+					 offset + size);
+		if (unlikely(ret)) {
+			if (ret == AOP_TRUNCATED_PAGE) {
+				page_cache_release(page);
+				continue;
+			}
 			goto unlock;
+		}
 		if (unlikely(transfer_result))
 			goto unlock;
 		bv_offs += size;
@@ -264,6 +276,7 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
 		unlock_page(page);
 		page_cache_release(page);
 	}
+	ret = 0;
 out:
 	up(&mapping->host->i_sem);
 	return ret;
diff --git a/drivers/block/rd.c b/drivers/block/rd.c
index 68c60a5bcdab..ffd6abd6d5a0 100644
--- a/drivers/block/rd.c
+++ b/drivers/block/rd.c
@@ -154,7 +154,7 @@ static int ramdisk_commit_write(struct file *file, struct page *page,
 
 /*
  * ->writepage to the the blockdev's mapping has to redirty the page so that the
- * VM doesn't go and steal it.  We return WRITEPAGE_ACTIVATE so that the VM
+ * VM doesn't go and steal it.  We return AOP_WRITEPAGE_ACTIVATE so that the VM
  * won't try to (pointlessly) write the page again for a while.
  *
  * Really, these pages should not be on the LRU at all.
@@ -165,7 +165,7 @@ static int ramdisk_writepage(struct page *page, struct writeback_control *wbc)
 		make_page_uptodate(page);
 	SetPageDirty(page);
 	if (wbc->for_reclaim)
-		return WRITEPAGE_ACTIVATE;
+		return AOP_WRITEPAGE_ACTIVATE;
 	unlock_page(page);
 	return 0;
 }
diff --git a/fs/mpage.c b/fs/mpage.c
index c5adcdddf3cc..f1d2d02bd4c8 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -721,7 +721,7 @@ retry:
 						&last_block_in_bio, &ret, wbc,
 						page->mapping->a_ops->writepage);
 			}
-			if (unlikely(ret == WRITEPAGE_ACTIVATE))
+			if (unlikely(ret == AOP_WRITEPAGE_ACTIVATE))
 				unlock_page(page);
 			if (ret || (--(wbc->nr_to_write) <= 0))
 				done = 1;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cc35b6ac778d..ed9a41a71e8b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -302,6 +302,37 @@ struct iattr {
  */
 #include <linux/quota.h>
 
+/** 
+ * enum positive_aop_returns - aop return codes with specific semantics
+ *
+ * @AOP_WRITEPAGE_ACTIVATE: Informs the caller that page writeback has
+ * 			    completed, that the page is still locked, and
+ * 			    should be considered active.  The VM uses this hint
+ * 			    to return the page to the active list -- it won't
+ * 			    be a candidate for writeback again in the near
+ * 			    future.  Other callers must be careful to unlock
+ * 			    the page if they get this return.  Returned by
+ * 			    writepage(); 
+ *
+ * @AOP_TRUNCATED_PAGE: The AOP method that was handed a locked page has
+ *  			unlocked it and the page might have been truncated.
+ *  			The caller should back up to acquiring a new page and
+ *  			trying again.  The aop will be taking reasonable
+ *  			precautions not to livelock.  If the caller held a page
+ *  			reference, it should drop it before retrying.  Returned
+ *  			by readpage(), prepare_write(), and commit_write().
+ *
+ * address_space_operation functions return these large constants to indicate
+ * special semantics to the caller.  These are much larger than the bytes in a
+ * page to allow for functions that return the number of bytes operated on in a
+ * given page.
+ */
+
+enum positive_aop_returns {
+	AOP_WRITEPAGE_ACTIVATE	= 0x80000,
+	AOP_TRUNCATED_PAGE	= 0x80001,
+};
+
 /*
  * oh the beauties of C type declarations.
  */
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 343d883d69c5..64a36ba43b2f 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -59,12 +59,6 @@ struct writeback_control {
 	unsigned for_reclaim:1;			/* Invoked from the page allocator */
 };
 
-/*
- * ->writepage() return values (make these much larger than a pagesize, in
- * case some fs is returning number-of-bytes-written from writepage)
- */
-#define WRITEPAGE_ACTIVATE	0x80000	/* IO was not started: activate page */
-
 /*
  * fs/fs-writeback.c
  */	
diff --git a/mm/filemap.c b/mm/filemap.c
index 33a28bfde158..6e1d08a2b8b9 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -831,8 +831,13 @@ readpage:
 		/* Start the actual read. The read will unlock the page. */
 		error = mapping->a_ops->readpage(filp, page);
 
-		if (unlikely(error))
+		if (unlikely(error)) {
+			if (error == AOP_TRUNCATED_PAGE) {
+				page_cache_release(page);
+				goto find_page;
+			}
 			goto readpage_error;
+		}
 
 		if (!PageUptodate(page)) {
 			lock_page(page);
@@ -1152,26 +1157,24 @@ static int fastcall page_cache_read(struct file * file, unsigned long offset)
 {
 	struct address_space *mapping = file->f_mapping;
 	struct page *page; 
-	int error;
+	int ret;
 
-	page = page_cache_alloc_cold(mapping);
-	if (!page)
-		return -ENOMEM;
+	do {
+		page = page_cache_alloc_cold(mapping);
+		if (!page)
+			return -ENOMEM;
+
+		ret = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
+		if (ret == 0)
+			ret = mapping->a_ops->readpage(file, page);
+		else if (ret == -EEXIST)
+			ret = 0; /* losing race to add is OK */
 
-	error = add_to_page_cache_lru(page, mapping, offset, GFP_KERNEL);
-	if (!error) {
-		error = mapping->a_ops->readpage(file, page);
 		page_cache_release(page);
-		return error;
-	}
 
-	/*
-	 * We arrive here in the unlikely event that someone 
-	 * raced with us and added our page to the cache first
-	 * or we are out of memory for radix-tree nodes.
-	 */
-	page_cache_release(page);
-	return error == -EEXIST ? 0 : error;
+	} while (ret == AOP_TRUNCATED_PAGE);
+		
+	return ret;
 }
 
 #define MMAP_LOTSAMISS  (100)
@@ -1331,10 +1334,14 @@ page_not_uptodate:
 		goto success;
 	}
 
-	if (!mapping->a_ops->readpage(file, page)) {
+	error = mapping->a_ops->readpage(file, page);
+	if (!error) {
 		wait_on_page_locked(page);
 		if (PageUptodate(page))
 			goto success;
+	} else if (error == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry_find;
 	}
 
 	/*
@@ -1358,10 +1365,14 @@ page_not_uptodate:
 		goto success;
 	}
 	ClearPageError(page);
-	if (!mapping->a_ops->readpage(file, page)) {
+	error = mapping->a_ops->readpage(file, page);
+	if (!error) {
 		wait_on_page_locked(page);
 		if (PageUptodate(page))
 			goto success;
+	} else if (error == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry_find;
 	}
 
 	/*
@@ -1444,10 +1455,14 @@ page_not_uptodate:
 		goto success;
 	}
 
-	if (!mapping->a_ops->readpage(file, page)) {
+	error = mapping->a_ops->readpage(file, page);
+	if (!error) {
 		wait_on_page_locked(page);
 		if (PageUptodate(page))
 			goto success;
+	} else if (error == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry_find;
 	}
 
 	/*
@@ -1470,10 +1485,14 @@ page_not_uptodate:
 	}
 
 	ClearPageError(page);
-	if (!mapping->a_ops->readpage(file, page)) {
+	error = mapping->a_ops->readpage(file, page);
+	if (!error) {
 		wait_on_page_locked(page);
 		if (PageUptodate(page))
 			goto success;
+	} else if (error == AOP_TRUNCATED_PAGE) {
+		page_cache_release(page);
+		goto retry_find;
 	}
 
 	/*
@@ -1934,12 +1953,16 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 		status = a_ops->prepare_write(file, page, offset, offset+bytes);
 		if (unlikely(status)) {
 			loff_t isize = i_size_read(inode);
+
+			if (status != AOP_TRUNCATED_PAGE)
+				unlock_page(page);
+			page_cache_release(page);
+			if (status == AOP_TRUNCATED_PAGE)
+				continue;
 			/*
 			 * prepare_write() may have instantiated a few blocks
 			 * outside i_size.  Trim these off again.
 			 */
-			unlock_page(page);
-			page_cache_release(page);
 			if (pos + bytes > isize)
 				vmtruncate(inode, isize);
 			break;
@@ -1952,6 +1975,10 @@ generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
 						cur_iov, iov_base, bytes);
 		flush_dcache_page(page);
 		status = a_ops->commit_write(file, page, offset, offset+bytes);
+		if (status == AOP_TRUNCATED_PAGE) {
+			page_cache_release(page);
+			continue;
+		}
 		if (likely(copied > 0)) {
 			if (!status)
 				status = copied;
diff --git a/mm/readahead.c b/mm/readahead.c
index 72e7adbb87c7..8d6eeaaa6296 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -158,7 +158,7 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 {
 	unsigned page_idx;
 	struct pagevec lru_pvec;
-	int ret = 0;
+	int ret;
 
 	if (mapping->a_ops->readpages) {
 		ret = mapping->a_ops->readpages(filp, mapping, pages, nr_pages);
@@ -171,14 +171,17 @@ static int read_pages(struct address_space *mapping, struct file *filp,
 		list_del(&page->lru);
 		if (!add_to_page_cache(page, mapping,
 					page->index, GFP_KERNEL)) {
-			mapping->a_ops->readpage(filp, page);
-			if (!pagevec_add(&lru_pvec, page))
-				__pagevec_lru_add(&lru_pvec);
-		} else {
-			page_cache_release(page);
+			ret = mapping->a_ops->readpage(filp, page);
+			if (ret != AOP_TRUNCATED_PAGE) {
+				if (!pagevec_add(&lru_pvec, page))
+					__pagevec_lru_add(&lru_pvec);
+				continue;
+			} /* else fall through to release */
 		}
+		page_cache_release(page);
 	}
 	pagevec_lru_add(&lru_pvec);
+	ret = 0;
 out:
 	return ret;
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index dc25565a61e9..d9fc277940da 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -855,7 +855,7 @@ unlock:
 	swap_free(swap);
 redirty:
 	set_page_dirty(page);
-	return WRITEPAGE_ACTIVATE;	/* Return with the page locked */
+	return AOP_WRITEPAGE_ACTIVATE;	/* Return with the page locked */
 }
 
 #ifdef CONFIG_NUMA
diff --git a/mm/vmscan.c b/mm/vmscan.c
index b0cd81c32de6..795a050fe471 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -367,7 +367,7 @@ static pageout_t pageout(struct page *page, struct address_space *mapping)
 		res = mapping->a_ops->writepage(page, &wbc);
 		if (res < 0)
 			handle_write_error(mapping, page, res);
-		if (res == WRITEPAGE_ACTIVATE) {
+		if (res == AOP_WRITEPAGE_ACTIVATE) {
 			ClearPageReclaim(page);
 			return PAGE_ACTIVATE;
 		}
-- 
cgit v1.2.3


From df71837d5024e2524cd51c93621e558aa7dd9f3f Mon Sep 17 00:00:00 2001
From: Trent Jaeger <tjaeger@cse.psu.edu>
Date: Tue, 13 Dec 2005 23:12:27 -0800
Subject: [LSM-IPSec]: Security association restriction.

This patch series implements per packet access control via the
extension of the Linux Security Modules (LSM) interface by hooks in
the XFRM and pfkey subsystems that leverage IPSec security
associations to label packets.  Extensions to the SELinux LSM are
included that leverage the patch for this purpose.

This patch implements the changes necessary to the XFRM subsystem,
pfkey interface, ipv4/ipv6, and xfrm_user interface to restrict a
socket to use only authorized security associations (or no security
association) to send/receive network packets.

Patch purpose:

The patch is designed to enable access control per packets based on
the strongly authenticated IPSec security association.  Such access
controls augment the existing ones based on network interface and IP
address.  The former are very coarse-grained, and the latter can be
spoofed.  By using IPSec, the system can control access to remote
hosts based on cryptographic keys generated using the IPSec mechanism.
This enables access control on a per-machine basis or per-application
if the remote machine is running the same mechanism and trusted to
enforce the access control policy.

Patch design approach:

The overall approach is that policy (xfrm_policy) entries set by
user-level programs (e.g., setkey for ipsec-tools) are extended with a
security context that is used at policy selection time in the XFRM
subsystem to restrict the sockets that can send/receive packets via
security associations (xfrm_states) that are built from those
policies.

A presentation available at
www.selinux-symposium.org/2005/presentations/session2/2-3-jaeger.pdf
from the SELinux symposium describes the overall approach.

Patch implementation details:

On output, the policy retrieved (via xfrm_policy_lookup or
xfrm_sk_policy_lookup) must be authorized for the security context of
the socket and the same security context is required for resultant
security association (retrieved or negotiated via racoon in
ipsec-tools).  This is enforced in xfrm_state_find.

On input, the policy retrieved must also be authorized for the socket
(at __xfrm_policy_check), and the security context of the policy must
also match the security association being used.

The patch has virtually no impact on packets that do not use IPSec.
The existing Netfilter (outgoing) and LSM rcv_skb hooks are used as
before.

Also, if IPSec is used without security contexts, the impact is
minimal.  The LSM must allow such policies to be selected for the
combination of socket and remote machine, but subsequent IPSec
processing proceeds as in the original case.

Testing:

The pfkey interface is tested using the ipsec-tools.  ipsec-tools have
been modified (a separate ipsec-tools patch is available for version
0.5) that supports assignment of xfrm_policy entries and security
associations with security contexts via setkey and the negotiation
using the security contexts via racoon.

The xfrm_user interface is tested via ad hoc programs that set
security contexts.  These programs are also available from me, and
contain programs for setting, getting, and deleting policy for testing
this interface.  Testing of sa functions was done by tracing kernel
behavior.

Signed-off-by: Trent Jaeger <tjaeger@cse.psu.edu>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pfkeyv2.h  |  13 +++-
 include/linux/security.h | 132 +++++++++++++++++++++++++++++++
 include/linux/xfrm.h     |  29 +++++++
 include/net/flow.h       |   7 +-
 include/net/xfrm.h       |  27 ++++++-
 net/core/flow.c          |   8 +-
 net/key/af_key.c         | 197 +++++++++++++++++++++++++++++++++++++++++++++--
 net/xfrm/xfrm_policy.c   |  88 +++++++++++++--------
 net/xfrm/xfrm_state.c    |   9 ++-
 net/xfrm/xfrm_user.c     | 148 +++++++++++++++++++++++++++++++++--
 security/Kconfig         |  13 ++++
 security/dummy.c         |  45 ++++++++++-
 12 files changed, 655 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
index 724066778aff..6351c4055ace 100644
--- a/include/linux/pfkeyv2.h
+++ b/include/linux/pfkeyv2.h
@@ -216,6 +216,16 @@ struct sadb_x_nat_t_port {
 } __attribute__((packed));
 /* sizeof(struct sadb_x_nat_t_port) == 8 */
 
+/* Generic LSM security context */
+struct sadb_x_sec_ctx {
+	uint16_t	sadb_x_sec_len;
+	uint16_t	sadb_x_sec_exttype;
+	uint8_t		sadb_x_ctx_alg;  /* LSMs: e.g., selinux == 1 */
+	uint8_t		sadb_x_ctx_doi;
+	uint16_t	sadb_x_ctx_len;
+} __attribute__((packed));
+/* sizeof(struct sadb_sec_ctx) = 8 */
+
 /* Message types */
 #define SADB_RESERVED		0
 #define SADB_GETSPI		1
@@ -325,7 +335,8 @@ struct sadb_x_nat_t_port {
 #define SADB_X_EXT_NAT_T_SPORT		21
 #define SADB_X_EXT_NAT_T_DPORT		22
 #define SADB_X_EXT_NAT_T_OA		23
-#define SADB_EXT_MAX			23
+#define SADB_X_EXT_SEC_CTX		24
+#define SADB_EXT_MAX			24
 
 /* Identity Extension values */
 #define SADB_IDENTTYPE_RESERVED	0
diff --git a/include/linux/security.h b/include/linux/security.h
index f7e0ae018712..ef753654daa5 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -59,6 +59,12 @@ struct sk_buff;
 struct sock;
 struct sockaddr;
 struct socket;
+struct flowi;
+struct dst_entry;
+struct xfrm_selector;
+struct xfrm_policy;
+struct xfrm_state;
+struct xfrm_user_sec_ctx;
 
 extern int cap_netlink_send(struct sock *sk, struct sk_buff *skb);
 extern int cap_netlink_recv(struct sk_buff *skb);
@@ -788,6 +794,52 @@ struct swap_info_struct;
  *      which is used to copy security attributes between local stream sockets.
  * @sk_free_security:
  *	Deallocate security structure.
+ * @sk_getsid:
+ *	Retrieve the LSM-specific sid for the sock to enable caching of network
+ *	authorizations.
+ *
+ * Security hooks for XFRM operations.
+ *
+ * @xfrm_policy_alloc_security:
+ *	@xp contains the xfrm_policy being added to Security Policy Database
+ *	used by the XFRM system.
+ *	@sec_ctx contains the security context information being provided by
+ *	the user-level policy update program (e.g., setkey).
+ *	Allocate a security structure to the xp->selector.security field.
+ *	The security field is initialized to NULL when the xfrm_policy is
+ *	allocated.
+ *	Return 0 if operation was successful (memory to allocate, legal context)
+ * @xfrm_policy_clone_security:
+ *	@old contains an existing xfrm_policy in the SPD.
+ *	@new contains a new xfrm_policy being cloned from old.
+ *	Allocate a security structure to the new->selector.security field
+ *	that contains the information from the old->selector.security field.
+ *	Return 0 if operation was successful (memory to allocate).
+ * @xfrm_policy_free_security:
+ *	@xp contains the xfrm_policy
+ *	Deallocate xp->selector.security.
+ * @xfrm_state_alloc_security:
+ *	@x contains the xfrm_state being added to the Security Association
+ *	Database by the XFRM system.
+ *	@sec_ctx contains the security context information being provided by
+ *	the user-level SA generation program (e.g., setkey or racoon).
+ *	Allocate a security structure to the x->sel.security field.  The
+ *	security field is initialized to NULL when the xfrm_state is
+ *	allocated.
+ *	Return 0 if operation was successful (memory to allocate, legal context).
+ * @xfrm_state_free_security:
+ *	@x contains the xfrm_state.
+ *	Deallocate x>sel.security.
+ * @xfrm_policy_lookup:
+ *	@xp contains the xfrm_policy for which the access control is being
+ *	checked.
+ *	@sk_sid contains the sock security label that is used to authorize
+ *	access to the policy xp.
+ *	@dir contains the direction of the flow (input or output).
+ *	Check permission when a sock selects a xfrm_policy for processing
+ *	XFRMs on a packet.  The hook is called when selecting either a
+ *	per-socket policy or a generic xfrm policy.
+ *	Return 0 if permission is granted.
  *
  * Security hooks affecting all Key Management operations
  *
@@ -1237,8 +1289,18 @@ struct security_operations {
 	int (*socket_getpeersec) (struct socket *sock, char __user *optval, int __user *optlen, unsigned len);
 	int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority);
 	void (*sk_free_security) (struct sock *sk);
+	unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir);
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+	int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx);
+	int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new);
+	void (*xfrm_policy_free_security) (struct xfrm_policy *xp);
+	int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx);
+	void (*xfrm_state_free_security) (struct xfrm_state *x);
+	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir);
+#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
+
 	/* key management security hooks */
 #ifdef CONFIG_KEYS
 	int (*key_alloc)(struct key *key);
@@ -2679,6 +2741,11 @@ static inline void security_sk_free(struct sock *sk)
 {
 	return security_ops->sk_free_security(sk);
 }
+
+static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+{
+	return security_ops->sk_getsid(sk, fl, dir);
+}
 #else	/* CONFIG_SECURITY_NETWORK */
 static inline int security_unix_stream_connect(struct socket * sock,
 					       struct socket * other, 
@@ -2795,8 +2862,73 @@ static inline int security_sk_alloc(struct sock *sk, int family, gfp_t priority)
 static inline void security_sk_free(struct sock *sk)
 {
 }
+
+static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx);
+}
+
+static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
+{
+	return security_ops->xfrm_policy_clone_security(old, new);
+}
+
+static inline void security_xfrm_policy_free(struct xfrm_policy *xp)
+{
+	security_ops->xfrm_policy_free_security(xp);
+}
+
+static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return security_ops->xfrm_state_alloc_security(x, sec_ctx);
+}
+
+static inline void security_xfrm_state_free(struct xfrm_state *x)
+{
+	security_ops->xfrm_state_free_security(x);
+}
+
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+{
+	return security_ops->xfrm_policy_lookup(xp, sk_sid, dir);
+}
+#else	/* CONFIG_SECURITY_NETWORK_XFRM */
+static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
+{
+	return 0;
+}
+
+static inline void security_xfrm_policy_free(struct xfrm_policy *xp)
+{
+}
+
+static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline void security_xfrm_state_free(struct xfrm_state *x)
+{
+}
+
+static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+{
+	return 0;
+}
+#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
+
 #ifdef CONFIG_KEYS
 #ifdef CONFIG_SECURITY
 static inline int security_key_alloc(struct key *key)
diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h
index 0fb077d68441..82fbb758e28f 100644
--- a/include/linux/xfrm.h
+++ b/include/linux/xfrm.h
@@ -27,6 +27,22 @@ struct xfrm_id
 	__u8		proto;
 };
 
+struct xfrm_sec_ctx {
+	__u8	ctx_doi;
+	__u8	ctx_alg;
+	__u16	ctx_len;
+	__u32	ctx_sid;
+	char	ctx_str[0];
+};
+
+/* Security Context Domains of Interpretation */
+#define XFRM_SC_DOI_RESERVED 0
+#define XFRM_SC_DOI_LSM 1
+
+/* Security Context Algorithms */
+#define XFRM_SC_ALG_RESERVED 0
+#define XFRM_SC_ALG_SELINUX 1
+
 /* Selector, used as selector both on policy rules (SPD) and SAs. */
 
 struct xfrm_selector
@@ -146,6 +162,18 @@ enum {
 
 #define XFRM_NR_MSGTYPES (XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)
 
+/*
+ * Generic LSM security context for comunicating to user space
+ * NOTE: Same format as sadb_x_sec_ctx
+ */
+struct xfrm_user_sec_ctx {
+	__u16			len;
+	__u16			exttype;
+	__u8			ctx_alg;  /* LSMs: e.g., selinux == 1 */
+	__u8			ctx_doi;
+	__u16			ctx_len;
+};
+
 struct xfrm_user_tmpl {
 	struct xfrm_id		id;
 	__u16			family;
@@ -176,6 +204,7 @@ enum xfrm_attr_type_t {
 	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
 	XFRMA_SA,
 	XFRMA_POLICY,
+	XFRMA_SEC_CTX,		/* struct xfrm_sec_ctx */
 	__XFRMA_MAX
 
 #define XFRMA_MAX (__XFRMA_MAX - 1)
diff --git a/include/net/flow.h b/include/net/flow.h
index 9a5c94b1a0ec..ec7eb86eb203 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -84,11 +84,12 @@ struct flowi {
 #define FLOW_DIR_OUT	1
 #define FLOW_DIR_FWD	2
 
-typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
+struct sock;
+typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp);
 
-extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
-			       flow_resolve_t resolver);
+extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
+	 		       flow_resolve_t resolver);
 extern void flow_cache_flush(void);
 extern atomic_t flow_cache_genid;
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1cdb87912137..487abca3ca6f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -144,6 +144,9 @@ struct xfrm_state
 	 * transformer. */
 	struct xfrm_type	*type;
 
+	/* Security context */
+	struct xfrm_sec_ctx	*security;
+
 	/* Private data of this transformer, format is opaque,
 	 * interpreted by xfrm_type methods. */
 	void			*data;
@@ -298,6 +301,7 @@ struct xfrm_policy
 	__u8			flags;
 	__u8			dead;
 	__u8			xfrm_nr;
+	struct xfrm_sec_ctx	*security;
 	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
 };
 
@@ -510,6 +514,25 @@ xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
 	return 0;
 }
 
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+/*	If neither has a context --> match
+ * 	Otherwise, both must have a context and the sids, doi, alg must match
+ */
+static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
+{
+	return ((!s1 && !s2) ||
+		(s1 && s2 &&
+		 (s1->ctx_sid == s2->ctx_sid) &&
+		 (s1->ctx_doi == s2->ctx_doi) &&
+		 (s1->ctx_alg == s2->ctx_alg)));
+}
+#else
+static inline int xfrm_sec_ctx_match(struct xfrm_sec_ctx *s1, struct xfrm_sec_ctx *s2)
+{
+	return 1;
+}
+#endif
+
 /* A struct encoding bundle of transformations to apply to some set of flow.
  *
  * dst->child points to the next element of bundle.
@@ -878,8 +901,8 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig
 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp);
 extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
-struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
-				      int delete);
+struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+					  struct xfrm_sec_ctx *ctx, int delete);
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
 void xfrm_policy_flush(void);
 u32 xfrm_get_acqseq(void);
diff --git a/net/core/flow.c b/net/core/flow.c
index 7e95b39de9fd..c4f25385029f 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -23,6 +23,7 @@
 #include <net/flow.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
+#include <linux/security.h>
 
 struct flow_cache_entry {
 	struct flow_cache_entry	*next;
@@ -30,6 +31,7 @@ struct flow_cache_entry {
 	u8			dir;
 	struct flowi		key;
 	u32			genid;
+	u32			sk_sid;
 	void			*object;
 	atomic_t		*object_ref;
 };
@@ -162,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
 	return 0;
 }
 
-void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
+void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir,
 			flow_resolve_t resolver)
 {
 	struct flow_cache_entry *fle, **head;
@@ -186,6 +188,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 	for (fle = *head; fle; fle = fle->next) {
 		if (fle->family == family &&
 		    fle->dir == dir &&
+		    fle->sk_sid == sk_sid &&
 		    flow_key_compare(key, &fle->key) == 0) {
 			if (fle->genid == atomic_read(&flow_cache_genid)) {
 				void *ret = fle->object;
@@ -210,6 +213,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 			*head = fle;
 			fle->family = family;
 			fle->dir = dir;
+			fle->sk_sid = sk_sid;
 			memcpy(&fle->key, key, sizeof(*key));
 			fle->object = NULL;
 			flow_count(cpu)++;
@@ -221,7 +225,7 @@ nocache:
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, family, dir, &obj, &obj_ref);
+		resolver(key, sk_sid, family, dir, &obj, &obj_ref);
 
 		if (fle) {
 			fle->genid = atomic_read(&flow_cache_genid);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 39031684b65c..d32f7791f1e4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -336,6 +336,7 @@ static u8 sadb_ext_min_len[] = {
 	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
 	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
+	[SADB_X_EXT_SEC_CTX]		= (u8) sizeof(struct sadb_x_sec_ctx),
 };
 
 /* Verify sadb_address_{len,prefixlen} against sa_family.  */
@@ -383,6 +384,55 @@ static int verify_address_len(void *p)
 	return 0;
 }
 
+static inline int pfkey_sec_ctx_len(struct sadb_x_sec_ctx *sec_ctx)
+{
+	int len = 0;
+
+	len += sizeof(struct sadb_x_sec_ctx);
+	len += sec_ctx->sadb_x_ctx_len;
+	len += sizeof(uint64_t) - 1;
+	len /= sizeof(uint64_t);
+
+	return len;
+}
+
+static inline int verify_sec_ctx_len(void *p)
+{
+	struct sadb_x_sec_ctx *sec_ctx = (struct sadb_x_sec_ctx *)p;
+	int len;
+
+	if (sec_ctx->sadb_x_ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len = pfkey_sec_ctx_len(sec_ctx);
+
+	if (sec_ctx->sadb_x_sec_len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+static inline struct xfrm_user_sec_ctx *pfkey_sadb2xfrm_user_sec_ctx(struct sadb_x_sec_ctx *sec_ctx)
+{
+	struct xfrm_user_sec_ctx *uctx = NULL;
+	int ctx_size = sec_ctx->sadb_x_ctx_len;
+
+	uctx = kmalloc((sizeof(*uctx)+ctx_size), GFP_KERNEL);
+
+	if (!uctx)
+		return NULL;
+
+	uctx->len = pfkey_sec_ctx_len(sec_ctx);
+	uctx->exttype = sec_ctx->sadb_x_sec_exttype;
+	uctx->ctx_doi = sec_ctx->sadb_x_ctx_doi;
+	uctx->ctx_alg = sec_ctx->sadb_x_ctx_alg;
+	uctx->ctx_len = sec_ctx->sadb_x_ctx_len;
+	memcpy(uctx + 1, sec_ctx + 1,
+	       uctx->ctx_len);
+
+	return uctx;
+}
+
 static int present_and_same_family(struct sadb_address *src,
 				   struct sadb_address *dst)
 {
@@ -438,6 +488,10 @@ static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_h
 				if (verify_address_len(p))
 					return -EINVAL;
 			}				
+			if (ext_type == SADB_X_EXT_SEC_CTX) {
+				if (verify_sec_ctx_len(p))
+					return -EINVAL;
+			}
 			ext_hdrs[ext_type-1] = p;
 		}
 		p   += ext_len;
@@ -586,6 +640,9 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 	struct sadb_key *key;
 	struct sadb_x_sa2 *sa2;
 	struct sockaddr_in *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
+	int ctx_size = 0;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6 *sin6;
 #endif
@@ -609,6 +666,12 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 			sizeof(struct sadb_address)*2 + 
 				sockaddr_size*2 +
 					sizeof(struct sadb_x_sa2);
+
+	if ((xfrm_ctx = x->security)) {
+		ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len);
+		size += sizeof(struct sadb_x_sec_ctx) + ctx_size;
+	}
+
 	/* identity & sensitivity */
 
 	if ((x->props.family == AF_INET &&
@@ -899,6 +962,20 @@ static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys,
 		n_port->sadb_x_nat_t_port_reserved = 0;
 	}
 
+	/* security context */
+	if (xfrm_ctx) {
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb,
+				sizeof(struct sadb_x_sec_ctx) + ctx_size);
+		sec_ctx->sadb_x_sec_len =
+		  (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	return skb;
 }
 
@@ -909,6 +986,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 	struct sadb_lifetime *lifetime;
 	struct sadb_sa *sa;
 	struct sadb_key *key;
+	struct sadb_x_sec_ctx *sec_ctx;
 	uint16_t proto;
 	int err;
 	
@@ -993,6 +1071,21 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr,
 		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
 		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
 	}
+
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			goto out;
+
+		err = security_xfrm_state_alloc(x, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
 	if (sa->sadb_sa_auth) {
 		int keysize = 0;
@@ -1720,6 +1813,18 @@ parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
 	return 0;
 }
 
+static inline int pfkey_xfrm_policy2sec_ctx_size(struct xfrm_policy *xp)
+{
+  struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+
+	if (xfrm_ctx) {
+		int len = sizeof(struct sadb_x_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+		return PFKEY_ALIGN8(len);
+	}
+	return 0;
+}
+
 static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 {
 	int sockaddr_size = pfkey_sockaddr_size(xp->family);
@@ -1733,7 +1838,8 @@ static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
 		(sockaddr_size * 2) +
 		sizeof(struct sadb_x_policy) +
 		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
-				(socklen * 2)));
+				(socklen * 2))) +
+		pfkey_xfrm_policy2sec_ctx_size(xp);
 }
 
 static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
@@ -1757,6 +1863,8 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 	struct sadb_lifetime *lifetime;
 	struct sadb_x_policy *pol;
 	struct sockaddr_in   *sin;
+	struct sadb_x_sec_ctx *sec_ctx;
+	struct xfrm_sec_ctx *xfrm_ctx;
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 	struct sockaddr_in6  *sin6;
 #endif
@@ -1941,6 +2049,21 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i
 			}
 		}
 	}
+
+	/* security context */
+	if ((xfrm_ctx = xp->security)) {
+		int ctx_size = pfkey_xfrm_policy2sec_ctx_size(xp);
+
+		sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, ctx_size);
+		sec_ctx->sadb_x_sec_len = ctx_size / sizeof(uint64_t);
+		sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX;
+		sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi;
+		sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg;
+		sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len;
+		memcpy(sec_ctx + 1, xfrm_ctx->ctx_str,
+		       xfrm_ctx->ctx_len);
+	}
+
 	hdr->sadb_msg_len = size / sizeof(uint64_t);
 	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
 }
@@ -1976,12 +2099,13 @@ out:
 
 static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
 {
-	int err;
+	int err = 0;
 	struct sadb_lifetime *lifetime;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
 	struct xfrm_policy *xp;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2028,6 +2152,22 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	if (xp->selector.dport)
 		xp->selector.dport_mask = ~0;
 
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx) {
+			err = -ENOBUFS;
+			goto out;
+		}
+
+		err = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (err)
+			goto out;
+	}
+
 	xp->lft.soft_byte_limit = XFRM_INF;
 	xp->lft.hard_byte_limit = XFRM_INF;
 	xp->lft.soft_packet_limit = XFRM_INF;
@@ -2051,10 +2191,9 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 
 	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
 				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
-	if (err) {
-		kfree(xp);
-		return err;
-	}
+
+	if (err)
+		goto out;
 
 	if (hdr->sadb_msg_type == SADB_X_SPDUPDATE)
 		c.event = XFRM_MSG_UPDPOLICY;
@@ -2069,6 +2208,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h
 	return 0;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return err;
 }
@@ -2078,9 +2218,10 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	int err;
 	struct sadb_address *sa;
 	struct sadb_x_policy *pol;
-	struct xfrm_policy *xp;
+	struct xfrm_policy *xp, tmp;
 	struct xfrm_selector sel;
 	struct km_event c;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
 				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
@@ -2109,7 +2250,24 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg
 	if (sel.dport)
 		sel.dport_mask = ~0;
 
-	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
+	sec_ctx = (struct sadb_x_sec_ctx *) ext_hdrs[SADB_X_EXT_SEC_CTX-1];
+	memset(&tmp, 0, sizeof(struct xfrm_policy));
+
+	if (sec_ctx != NULL) {
+		struct xfrm_user_sec_ctx *uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+
+		if (!uctx)
+			return -ENOMEM;
+
+		err = security_xfrm_policy_alloc(&tmp, uctx);
+		kfree(uctx);
+
+		if (err)
+			return err;
+	}
+
+	xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1);
+	security_xfrm_policy_free(&tmp);
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -2660,6 +2818,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 {
 	struct xfrm_policy *xp;
 	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
+	struct sadb_x_sec_ctx *sec_ctx;
 
 	switch (family) {
 	case AF_INET:
@@ -2709,10 +2868,32 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
 	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
 		goto out;
 
+	/* security context too */
+	if (len >= (pol->sadb_x_policy_len*8 +
+	    sizeof(struct sadb_x_sec_ctx))) {
+		char *p = (char *)pol;
+		struct xfrm_user_sec_ctx *uctx;
+
+		p += pol->sadb_x_policy_len*8;
+		sec_ctx = (struct sadb_x_sec_ctx *)p;
+		if (len < pol->sadb_x_policy_len*8 +
+		    sec_ctx->sadb_x_sec_len)
+			goto out;
+		if ((*dir = verify_sec_ctx_len(p)))
+			goto out;
+		uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx);
+		*dir = security_xfrm_policy_alloc(xp, uctx);
+		kfree(uctx);
+
+		if (*dir)
+			goto out;
+	}
+
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
 
 out:
+	security_xfrm_policy_free(xp);
 	kfree(xp);
 	return NULL;
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d19e274b9c4a..64a447375fdb 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -10,7 +10,7 @@
  * 	YOSHIFUJI Hideaki
  * 		Split up af-specific portion
  *	Derek Atkins <derek@ihtfp.com>		Add the post_input processor
- * 	
+ *
  */
 
 #include <asm/bug.h>
@@ -256,6 +256,7 @@ void __xfrm_policy_destroy(struct xfrm_policy *policy)
 	if (del_timer(&policy->timer))
 		BUG();
 
+	security_xfrm_policy_free(policy);
 	kfree(policy);
 }
 EXPORT_SYMBOL(__xfrm_policy_destroy);
@@ -350,7 +351,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
-		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
+		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
+		    xfrm_sec_ctx_match(pol->security, policy->security)) {
 			if (excl) {
 				write_unlock_bh(&xfrm_policy_lock);
 				return -EEXIST;
@@ -416,14 +418,15 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 }
 EXPORT_SYMBOL(xfrm_policy_insert);
 
-struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
-				      int delete)
+struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
+					  struct xfrm_sec_ctx *ctx, int delete)
 {
 	struct xfrm_policy *pol, **p;
 
 	write_lock_bh(&xfrm_policy_lock);
 	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
-		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
+		if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
+		    (xfrm_sec_ctx_match(ctx, pol->security))) {
 			xfrm_pol_hold(pol);
 			if (delete)
 				*p = pol->next;
@@ -438,7 +441,7 @@ struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
 	}
 	return pol;
 }
-EXPORT_SYMBOL(xfrm_policy_bysel);
+EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 {
@@ -519,7 +522,7 @@ EXPORT_SYMBOL(xfrm_policy_walk);
 
 /* Find policy to apply to this flow. */
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
@@ -533,9 +536,12 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			continue;
 
 		match = xfrm_selector_match(sel, fl, family);
+
 		if (match) {
-			xfrm_pol_hold(pol);
-			break;
+ 			if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
+				xfrm_pol_hold(pol);
+				break;
+			}
 		}
 	}
 	read_unlock_bh(&xfrm_policy_lock);
@@ -543,15 +549,37 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 		*obj_refp = &pol->refcnt;
 }
 
-static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
+static inline int policy_to_flow_dir(int dir)
+{
+	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
+ 	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
+ 	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
+ 		return dir;
+ 	switch (dir) {
+ 	default:
+ 	case XFRM_POLICY_IN:
+ 		return FLOW_DIR_IN;
+ 	case XFRM_POLICY_OUT:
+ 		return FLOW_DIR_OUT;
+ 	case XFRM_POLICY_FWD:
+ 		return FLOW_DIR_FWD;
+	};
+}
+
+static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
 {
 	struct xfrm_policy *pol;
 
 	read_lock_bh(&xfrm_policy_lock);
 	if ((pol = sk->sk_policy[dir]) != NULL) {
-		int match = xfrm_selector_match(&pol->selector, fl,
+ 		int match = xfrm_selector_match(&pol->selector, fl,
 						sk->sk_family);
+ 		int err = 0;
+
 		if (match)
+		  err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
+
+ 		if (match && !err)
 			xfrm_pol_hold(pol);
 		else
 			pol = NULL;
@@ -624,6 +652,10 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 
 	if (newp) {
 		newp->selector = old->selector;
+		if (security_xfrm_policy_clone(old, newp)) {
+			kfree(newp);
+			return NULL;  /* ENOMEM */
+		}
 		newp->lft = old->lft;
 		newp->curlft = old->curlft;
 		newp->action = old->action;
@@ -735,22 +767,6 @@ xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 	return err;
 }
 
-static inline int policy_to_flow_dir(int dir)
-{
-	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
-	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
-	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
-		return dir;
-	switch (dir) {
-	default:
-	case XFRM_POLICY_IN:
-		return FLOW_DIR_IN;
-	case XFRM_POLICY_OUT:
-		return FLOW_DIR_OUT;
-	case XFRM_POLICY_FWD:
-		return FLOW_DIR_FWD;
-	};
-}
 
 static int stale_bundle(struct dst_entry *dst);
 
@@ -769,19 +785,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 	int err;
 	u32 genid;
 	u16 family = dst_orig->ops->family;
+	u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
+	u32 sk_sid = security_sk_sid(sk, fl, dir);
 restart:
 	genid = atomic_read(&flow_cache_genid);
 	policy = NULL;
 	if (sk && sk->sk_policy[1])
-		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
 
 	if (!policy) {
 		/* To accelerate a bit...  */
 		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 			return 0;
 
-		policy = flow_cache_lookup(fl, family,
-					   policy_to_flow_dir(XFRM_POLICY_OUT),
+		policy = flow_cache_lookup(fl, sk_sid, family, dir,
 					   xfrm_policy_lookup);
 	}
 
@@ -962,16 +979,20 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 {
 	struct xfrm_policy *pol;
 	struct flowi fl;
+	u8 fl_dir = policy_to_flow_dir(dir);
+	u32 sk_sid;
 
 	if (_decode_session(skb, &fl, family) < 0)
 		return 0;
 
+	sk_sid = security_sk_sid(sk, &fl, fl_dir);
+
 	/* First, check used SA against their selectors. */
 	if (skb->sp) {
 		int i;
 
 		for (i=skb->sp->len-1; i>=0; i--) {
-		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
+			struct sec_decap_state *xvec = &(skb->sp->x[i]);
 			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
 				return 0;
 
@@ -986,11 +1007,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 
 	pol = NULL;
 	if (sk && sk->sk_policy[dir])
-		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
+		pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
 
 	if (!pol)
-		pol = flow_cache_lookup(&fl, family,
-					policy_to_flow_dir(dir),
+		pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
 					xfrm_policy_lookup);
 
 	if (!pol)
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 479effc97666..e12d0be5f976 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -10,7 +10,7 @@
  * 		Split up af-specific functions
  *	Derek Atkins <derek@ihtfp.com>
  *		Add UDP Encapsulation
- * 	
+ *
  */
 
 #include <linux/workqueue.h>
@@ -70,6 +70,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x)
 		x->type->destructor(x);
 		xfrm_put_type(x->type);
 	}
+	security_xfrm_state_free(x);
 	kfree(x);
 }
 
@@ -343,7 +344,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 			      selector.
 			 */
 			if (x->km.state == XFRM_STATE_VALID) {
-				if (!xfrm_selector_match(&x->sel, fl, family))
+				if (!xfrm_selector_match(&x->sel, fl, family) ||
+				    !xfrm_sec_ctx_match(pol->security, x->security))
 					continue;
 				if (!best ||
 				    best->km.dying > x->km.dying ||
@@ -354,7 +356,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
 				acquire_in_progress = 1;
 			} else if (x->km.state == XFRM_STATE_ERROR ||
 				   x->km.state == XFRM_STATE_EXPIRED) {
-				if (xfrm_selector_match(&x->sel, fl, family))
+ 				if (xfrm_selector_match(&x->sel, fl, family) &&
+				    xfrm_sec_ctx_match(pol->security, x->security))
 					error = -ESRCH;
 			}
 		}
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 0cdd9a07e043..92e2b804c606 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -7,7 +7,7 @@
  * 	Kazunori MIYAZAWA @USAGI
  * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
  * 		IPv6 support
- * 	
+ *
  */
 
 #include <linux/module.h>
@@ -88,6 +88,34 @@ static int verify_encap_tmpl(struct rtattr **xfrma)
 	return 0;
 }
 
+
+static inline int verify_sec_ctx_len(struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX - 1];
+	struct xfrm_user_sec_ctx *uctx;
+	int len = 0;
+
+	if (!rt)
+		return 0;
+
+	if (rt->rta_len < sizeof(*uctx))
+		return -EINVAL;
+
+	uctx = RTA_DATA(rt);
+
+	if (uctx->ctx_len > PAGE_SIZE)
+		return -EINVAL;
+
+	len += sizeof(struct xfrm_user_sec_ctx);
+	len += uctx->ctx_len;
+
+	if (uctx->len != len)
+		return -EINVAL;
+
+	return 0;
+}
+
+
 static int verify_newsa_info(struct xfrm_usersa_info *p,
 			     struct rtattr **xfrma)
 {
@@ -145,6 +173,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
 		goto out;
 	if ((err = verify_encap_tmpl(xfrma)))
 		goto out;
+	if ((err = verify_sec_ctx_len(xfrma)))
+		goto out;
 
 	err = -EINVAL;
 	switch (p->mode) {
@@ -209,6 +239,30 @@ static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_a
 	return 0;
 }
 
+
+static inline int xfrm_user_sec_ctx_size(struct xfrm_policy *xp)
+{
+	struct xfrm_sec_ctx *xfrm_ctx = xp->security;
+	int len = 0;
+
+	if (xfrm_ctx) {
+		len += sizeof(struct xfrm_user_sec_ctx);
+		len += xfrm_ctx->ctx_len;
+	}
+	return len;
+}
+
+static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg)
+{
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!u_arg)
+		return 0;
+
+	uctx = RTA_DATA(u_arg);
+	return security_xfrm_state_alloc(x, uctx);
+}
+
 static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
 {
 	memcpy(&x->id, &p->id, sizeof(x->id));
@@ -253,6 +307,9 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
 	if (err)
 		goto error;
 
+	if ((err = attach_sec_ctx(x, xfrma[XFRMA_SEC_CTX-1])))
+		goto error;
+
 	x->km.seq = p->seq;
 
 	return x;
@@ -272,11 +329,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
 	int err;
 	struct km_event c;
 
-	err = verify_newsa_info(p, (struct rtattr **) xfrma);
+	err = verify_newsa_info(p, (struct rtattr **)xfrma);
 	if (err)
 		return err;
 
-	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
+	x = xfrm_state_construct(p, (struct rtattr **)xfrma, &err);
 	if (!x)
 		return err;
 
@@ -390,6 +447,19 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
 	if (x->encap)
 		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
 
+	if (x->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				x->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = x->security->ctx_doi;
+		uctx->ctx_alg = x->security->ctx_alg;
+		uctx->ctx_len = x->security->ctx_len;
+		memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len);
+	}
 	nlh->nlmsg_len = skb->tail - b;
 out:
 	sp->this_idx++;
@@ -603,6 +673,18 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
 	return verify_policy_dir(p->dir);
 }
 
+static int copy_from_user_sec_ctx(struct xfrm_policy *pol, struct rtattr **xfrma)
+{
+	struct rtattr *rt = xfrma[XFRMA_SEC_CTX-1];
+	struct xfrm_user_sec_ctx *uctx;
+
+	if (!rt)
+		return 0;
+
+	uctx = RTA_DATA(rt);
+	return security_xfrm_policy_alloc(pol, uctx);
+}
+
 static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
 			   int nr)
 {
@@ -681,7 +763,10 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p,
 	}
 
 	copy_from_user_policy(xp, p);
-	err = copy_from_user_tmpl(xp, xfrma);
+
+	if (!(err = copy_from_user_tmpl(xp, xfrma)))
+		err = copy_from_user_sec_ctx(xp, xfrma);
+
 	if (err) {
 		*errp = err;
 		kfree(xp);
@@ -700,10 +785,13 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 	int excl;
 
 	err = verify_newpolicy_info(p);
+	if (err)
+		return err;
+	err = verify_sec_ctx_len((struct rtattr **)xfrma);
 	if (err)
 		return err;
 
-	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
+	xp = xfrm_policy_construct(p, (struct rtattr **)xfrma, &err);
 	if (!xp)
 		return err;
 
@@ -761,6 +849,27 @@ rtattr_failure:
 	return -1;
 }
 
+static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb)
+{
+	if (xp->security) {
+		int ctx_size = sizeof(struct xfrm_sec_ctx) +
+				xp->security->ctx_len;
+		struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size);
+		struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+		uctx->exttype = XFRMA_SEC_CTX;
+		uctx->len = ctx_size;
+		uctx->ctx_doi = xp->security->ctx_doi;
+		uctx->ctx_alg = xp->security->ctx_alg;
+		uctx->ctx_len = xp->security->ctx_len;
+		memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len);
+	}
+	return 0;
+
+ rtattr_failure:
+	return -1;
+}
+
 static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
 {
 	struct xfrm_dump_info *sp = ptr;
@@ -782,6 +891,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr
 	copy_to_user_policy(xp, p, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 out:
@@ -852,8 +963,25 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr
 
 	if (p->index)
 		xp = xfrm_policy_byid(p->dir, p->index, delete);
-	else
-		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
+	else {
+		struct rtattr **rtattrs = (struct rtattr **)xfrma;
+		struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1];
+		struct xfrm_policy tmp;
+
+		err = verify_sec_ctx_len(rtattrs);
+		if (err)
+			return err;
+
+		memset(&tmp, 0, sizeof(struct xfrm_policy));
+		if (rt) {
+			struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt);
+
+			if ((err = security_xfrm_policy_alloc(&tmp, uctx)))
+				return err;
+		}
+		xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete);
+		security_xfrm_policy_free(&tmp);
+	}
 	if (xp == NULL)
 		return -ENOENT;
 
@@ -1224,6 +1352,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 
 	nlh->nlmsg_len = skb->tail - b;
 	return skb->len;
@@ -1241,6 +1371,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
@@ -1324,6 +1455,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	copy_to_user_policy(xp, &upe->pol, dir);
 	if (copy_to_user_tmpl(xp, skb) < 0)
 		goto nlmsg_failure;
+	if (copy_to_user_sec_ctx(xp, skb))
+		goto nlmsg_failure;
 	upe->hard = !!hard;
 
 	nlh->nlmsg_len = skb->tail - b;
@@ -1341,6 +1474,7 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, struct km_eve
 
 	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
 	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
+	len += RTA_SPACE(xfrm_user_sec_ctx_size(xp));
 	skb = alloc_skb(len, GFP_ATOMIC);
 	if (skb == NULL)
 		return -ENOMEM;
diff --git a/security/Kconfig b/security/Kconfig
index 64d3f1e9ca85..34f593410d57 100644
--- a/security/Kconfig
+++ b/security/Kconfig
@@ -54,6 +54,19 @@ config SECURITY_NETWORK
 	  implement socket and networking access controls.
 	  If you are unsure how to answer this question, answer N.
 
+config SECURITY_NETWORK_XFRM
+	bool "XFRM (IPSec) Networking Security Hooks"
+	depends on XFRM && SECURITY_NETWORK
+	help
+	  This enables the XFRM (IPSec) networking security hooks.
+	  If enabled, a security module can use these hooks to
+	  implement per-packet access controls based on labels
+	  derived from IPSec policy.  Non-IPSec communications are
+	  designated as unlabelled, and only sockets authorized
+	  to communicate unlabelled data can send without using
+	  IPSec.
+	  If you are unsure how to answer this question, answer N.
+
 config SECURITY_CAPABILITIES
 	tristate "Default Linux Capabilities"
 	depends on SECURITY
diff --git a/security/dummy.c b/security/dummy.c
index 3ca5f2b828a0..a15c54709fde 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -776,8 +776,42 @@ static inline int dummy_sk_alloc_security (struct sock *sk, int family, gfp_t pr
 static inline void dummy_sk_free_security (struct sock *sk)
 {
 }
+
+static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir)
+{
+	return 0;
+}
 #endif	/* CONFIG_SECURITY_NETWORK */
 
+#ifdef CONFIG_SECURITY_NETWORK_XFRM
+static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static inline int dummy_xfrm_policy_clone_security(struct xfrm_policy *old, struct xfrm_policy *new)
+{
+	return 0;
+}
+
+static void dummy_xfrm_policy_free_security(struct xfrm_policy *xp)
+{
+}
+
+static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx)
+{
+	return 0;
+}
+
+static void dummy_xfrm_state_free_security(struct xfrm_state *x)
+{
+}
+
+static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir)
+{
+	return 0;
+}
+#endif /* CONFIG_SECURITY_NETWORK_XFRM */
 static int dummy_register_security (const char *name, struct security_operations *ops)
 {
 	return -EINVAL;
@@ -970,7 +1004,16 @@ void security_fixup_ops (struct security_operations *ops)
 	set_to_dummy_if_null(ops, socket_getpeersec);
 	set_to_dummy_if_null(ops, sk_alloc_security);
 	set_to_dummy_if_null(ops, sk_free_security);
-#endif	/* CONFIG_SECURITY_NETWORK */
+	set_to_dummy_if_null(ops, sk_getsid);
+ #endif	/* CONFIG_SECURITY_NETWORK */
+#ifdef  CONFIG_SECURITY_NETWORK_XFRM
+	set_to_dummy_if_null(ops, xfrm_policy_alloc_security);
+	set_to_dummy_if_null(ops, xfrm_policy_clone_security);
+	set_to_dummy_if_null(ops, xfrm_policy_free_security);
+	set_to_dummy_if_null(ops, xfrm_state_alloc_security);
+	set_to_dummy_if_null(ops, xfrm_state_free_security);
+	set_to_dummy_if_null(ops, xfrm_policy_lookup);
+#endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 #ifdef CONFIG_KEYS
 	set_to_dummy_if_null(ops, key_alloc);
 	set_to_dummy_if_null(ops, key_free);
-- 
cgit v1.2.3


From 89cee8b1cbb9dac40c92ef1968aea2b45f82fd18 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Dec 2005 23:14:27 -0800
Subject: [IPV4]: Safer reassembly

Another spin of Herbert Xu's "safer ip reassembly" patch
for 2.6.16.

(The original patch is here:
http://marc.theaimsgroup.com/?l=linux-netdev&m=112281936522415&w=2
and my only contribution is to have tested it.)

This patch (optionally) does additional checks before accepting IP
fragments, which can greatly reduce the possibility of reassembling
fragments which originated from different IP datagrams.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Arthur Kepner <akepner@sgi.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 23 ++++++++++++
 include/linux/sysctl.h                 |  1 +
 include/net/inetpeer.h                 |  1 +
 include/net/ip.h                       |  2 +
 net/ipv4/inetpeer.c                    |  1 +
 net/ipv4/ip_fragment.c                 | 68 +++++++++++++++++++++++++++++++++-
 net/ipv4/ip_output.c                   |  1 +
 net/ipv4/sysctl_net_ipv4.c             | 10 +++++
 8 files changed, 106 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ebc09a159f62..2b7cf19a06ad 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -46,6 +46,29 @@ ipfrag_secret_interval - INTEGER
 	for the hash secret) for IP fragments.
 	Default: 600
 
+ipfrag_max_dist - INTEGER
+	ipfrag_max_dist is a non-negative integer value which defines the 
+	maximum "disorder" which is allowed among fragments which share a 
+	common IP source address. Note that reordering of packets is 
+	not unusual, but if a large number of fragments arrive from a source 
+	IP address while a particular fragment queue remains incomplete, it 
+	probably indicates that one or more fragments belonging to that queue 
+	have been lost. When ipfrag_max_dist is positive, an additional check 
+	is done on fragments before they are added to a reassembly queue - if 
+	ipfrag_max_dist (or more) fragments have arrived from a particular IP 
+	address between additions to any IP fragment queue using that source 
+	address, it's presumed that one or more fragments in the queue are 
+	lost. The existing fragment queue will be dropped, and a new one 
+	started. An ipfrag_max_dist value of zero disables this check.
+
+	Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
+	result in unnecessarily dropping fragment queues when normal
+	reordering of packets occurs, which could lead to poor application 
+	performance. Using a very large value, e.g. 50000, increases the 
+	likelihood of incorrectly reassembling IP fragments that originate 
+	from different IP datagrams, which could result in data corruption.
+	Default: 64
+
 INET peer storage:
 
 inet_peer_threshold - INTEGER
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 4be34ef8c2f7..93fa765e47d3 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -390,6 +390,7 @@ enum
 	NET_IPV4_ICMP_ERRORS_USE_INBOUND_IFADDR=109,
 	NET_TCP_CONG_CONTROL=110,
 	NET_TCP_ABC=111,
+	NET_IPV4_IPFRAG_MAX_DIST=112,
 };
 
 enum {
diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 7fda471002b6..0965515f40cf 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -25,6 +25,7 @@ struct inet_peer
 	__u32			v4daddr;	/* peer's address */
 	__u16			avl_height;
 	__u16			ip_id_count;	/* IP ID for the next packet */
+	atomic_t		rid;		/* Frag reception counter */
 	__u32			tcp_ts;
 	unsigned long		tcp_ts_stamp;
 };
diff --git a/include/net/ip.h b/include/net/ip.h
index e4563bbee6ea..4d6294ba038e 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -45,6 +45,7 @@ struct inet_skb_parm
 #define IPSKB_TRANSLATED	2
 #define IPSKB_FORWARDED		4
 #define IPSKB_XFRM_TUNNEL_SIZE	8
+#define IPSKB_FRAG_COMPLETE	16
 };
 
 struct ipcm_cookie
@@ -168,6 +169,7 @@ extern int sysctl_ipfrag_high_thresh;
 extern int sysctl_ipfrag_low_thresh;
 extern int sysctl_ipfrag_time;
 extern int sysctl_ipfrag_secret_interval;
+extern int sysctl_ipfrag_max_dist;
 
 /* From inetpeer.c */
 extern int inet_peer_threshold;
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2fc3fd38924f..ce5fe3f74a3d 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -401,6 +401,7 @@ struct inet_peer *inet_getpeer(__u32 daddr, int create)
 		return NULL;
 	n->v4daddr = daddr;
 	atomic_set(&n->refcnt, 1);
+	atomic_set(&n->rid, 0);
 	n->ip_id_count = secure_ip_id(daddr);
 	n->tcp_ts_stamp = 0;
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index 8ce0ce2ee48e..ce2b70ce4018 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -22,6 +22,7 @@
  *		Patrick McHardy :	LRU queue of frag heads for evictor.
  */
 
+#include <linux/compiler.h>
 #include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
@@ -38,6 +39,7 @@
 #include <net/ip.h>
 #include <net/icmp.h>
 #include <net/checksum.h>
+#include <net/inetpeer.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 #include <linux/inet.h>
@@ -56,6 +58,8 @@
 int sysctl_ipfrag_high_thresh = 256*1024;
 int sysctl_ipfrag_low_thresh = 192*1024;
 
+int sysctl_ipfrag_max_dist = 64;
+
 /* Important NOTE! Fragment queue must be destroyed before MSL expires.
  * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL.
  */
@@ -89,8 +93,10 @@ struct ipq {
 	spinlock_t	lock;
 	atomic_t	refcnt;
 	struct timer_list timer;	/* when will this queue expire?		*/
-	int		iif;
 	struct timeval	stamp;
+	int             iif;
+	unsigned int    rid;
+	struct inet_peer *peer;
 };
 
 /* Hash table. */
@@ -195,6 +201,9 @@ static void ip_frag_destroy(struct ipq *qp, int *work)
 	BUG_TRAP(qp->last_in&COMPLETE);
 	BUG_TRAP(del_timer(&qp->timer) == 0);
 
+	if (qp->peer)
+		inet_putpeer(qp->peer);
+
 	/* Release all fragment data. */
 	fp = qp->fragments;
 	while (fp) {
@@ -353,6 +362,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph, u32 user)
 	qp->meat = 0;
 	qp->fragments = NULL;
 	qp->iif = 0;
+	qp->peer = sysctl_ipfrag_max_dist ? inet_getpeer(iph->saddr, 1) : NULL;
 
 	/* Initialize a timer for this entry. */
 	init_timer(&qp->timer);
@@ -398,6 +408,56 @@ static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 	return ip_frag_create(hash, iph, user);
 }
 
+/* Is the fragment too far ahead to be part of ipq? */
+static inline int ip_frag_too_far(struct ipq *qp)
+{
+	struct inet_peer *peer = qp->peer;
+	unsigned int max = sysctl_ipfrag_max_dist;
+	unsigned int start, end;
+
+	int rc;
+
+	if (!peer || !max)
+		return 0;
+
+	start = qp->rid;
+	end = atomic_inc_return(&peer->rid);
+	qp->rid = end;
+
+	rc = qp->fragments && (end - start) > max;
+
+	if (rc) {
+		IP_INC_STATS_BH(IPSTATS_MIB_REASMFAILS);
+	}
+
+	return rc;
+}
+
+static int ip_frag_reinit(struct ipq *qp)
+{
+	struct sk_buff *fp;
+
+	if (!mod_timer(&qp->timer, jiffies + sysctl_ipfrag_time)) {
+		atomic_inc(&qp->refcnt);
+		return -ETIMEDOUT;
+	}
+
+	fp = qp->fragments;
+	do {
+		struct sk_buff *xp = fp->next;
+		frag_kfree_skb(fp, NULL);
+		fp = xp;
+	} while (fp);
+
+	qp->last_in = 0;
+	qp->len = 0;
+	qp->meat = 0;
+	qp->fragments = NULL;
+	qp->iif = 0;
+
+	return 0;
+}
+
 /* Add new segment to existing queue. */
 static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 {
@@ -408,6 +468,12 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
 	if (qp->last_in & COMPLETE)
 		goto err;
 
+	if (!(IPCB(skb)->flags & IPSKB_FRAG_COMPLETE) &&
+	    unlikely(ip_frag_too_far(qp)) && unlikely(ip_frag_reinit(qp))) {
+		ipq_kill(qp);
+		goto err;
+	}
+
  	offset = ntohs(skb->nh.iph->frag_off);
 	flags = offset & ~IP_OFFSET;
 	offset &= IP_OFFSET;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index eba64e2bd397..2a830de3a699 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -445,6 +445,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
 	 * some transformers could create wrong frag_list or break existing
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 01444a02b48b..dbf82955aabe 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -22,6 +22,7 @@
 extern int sysctl_ip_nonlocal_bind;
 
 #ifdef CONFIG_SYSCTL
+static int zero;
 static int tcp_retr1_max = 255; 
 static int ip_local_port_range_min[] = { 1, 1 };
 static int ip_local_port_range_max[] = { 65535, 65535 };
@@ -613,6 +614,15 @@ ctl_table ipv4_table[] = {
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies
 	},
+	{
+		.ctl_name	= NET_IPV4_IPFRAG_MAX_DIST,
+		.procname	= "ipfrag_max_dist",
+		.data		= &sysctl_ipfrag_max_dist,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec_minmax,
+		.extra1		= &zero
+	},
 	{
 		.ctl_name	= NET_TCP_NO_METRICS_SAVE,
 		.procname	= "tcp_no_metrics_save",
-- 
cgit v1.2.3


From ca304b6104ffdd120bb6687a88a0625e58bc71cd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:15:40 -0800
Subject: [IPV6]: Introduce inet6_rsk()

And inet6_rsk_offset in inet_request_sock, for the same reasons as
inet_sock's pinfo6 member.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h               |  4 ++++
 include/linux/ipv6.h             | 39 +++++++++++++++++++++++++++++++++------
 net/ipv4/inet_diag.c             |  8 ++++----
 net/ipv6/inet6_connection_sock.c |  4 ++--
 net/ipv6/tcp_ipv6.c              | 19 +++++++++----------
 5 files changed, 52 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 33e8a19a1a0f..5a560daeade5 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -110,6 +110,10 @@ struct ip_options {
 
 struct inet_request_sock {
 	struct request_sock	req;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	u16			inet6_rsk_offset;
+	/* 2 bytes hole, try to pack */
+#endif
 	u32			loc_addr;
 	u32			rmt_addr;
 	u16			rmt_port;
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0b922785d98..7d3e86d9576e 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -199,18 +199,17 @@ static inline int inet6_iif(const struct sk_buff *skb)
 	return IP6CB(skb)->iif;
 }
 
-struct tcp6_request_sock {
-	struct tcp_request_sock	req;
+struct inet6_request_sock {
 	struct in6_addr		loc_addr;
 	struct in6_addr		rmt_addr;
 	struct sk_buff		*pktopts;
 	int			iif;
 };
 
-static inline struct tcp6_request_sock *tcp6_rsk(const struct request_sock *sk)
-{
-	return (struct tcp6_request_sock *)sk;
-}
+struct tcp6_request_sock {
+	struct tcp_request_sock	  tcp6rsk_tcp;
+	struct inet6_request_sock tcp6rsk_inet6;
+};
 
 /**
  * struct ipv6_pinfo - ipv6 private area
@@ -304,6 +303,28 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 	return inet_sk(__sk)->pinet6;
 }
 
+static inline struct inet6_request_sock *
+			inet6_rsk(const struct request_sock *rsk)
+{
+	return (struct inet6_request_sock *)(((u8 *)rsk) +
+					     inet_rsk(rsk)->inet6_rsk_offset);
+}
+
+static inline u32 inet6_rsk_offset(struct request_sock *rsk)
+{
+	return rsk->rsk_ops->obj_size - sizeof(struct inet6_request_sock);
+}
+
+static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *ops)
+{
+	struct request_sock *req = reqsk_alloc(ops);
+
+	if (req != NULL)
+		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
+
+	return req;
+}
+
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 {
 	return (struct raw6_sock *)sk;
@@ -361,6 +382,12 @@ static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 	return NULL;
 }
 
+static inline struct inet6_request_sock *
+			inet6_rsk(const struct request_sock *rsk)
+{
+	return NULL;
+}
+
 static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 {
 	return NULL;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 39061ed53cfd..3ce73b141d7e 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 	if (r->idiag_family == AF_INET6) {
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-			       &tcp6_rsk(req)->loc_addr);
+			       &inet6_rsk(req)->loc_addr);
 		ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-			       &tcp6_rsk(req)->rmt_addr);
+			       &inet6_rsk(req)->rmt_addr);
 	}
 #endif
 	nlh->nlmsg_len = skb->tail - b;
@@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
 				entry.saddr =
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->loc_addr.s6_addr32 :
+					inet6_rsk(req)->loc_addr.s6_addr32 :
 #endif
 					&ireq->loc_addr;
 				entry.daddr = 
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 					(entry.family == AF_INET6) ?
-					tcp6_rsk(req)->rmt_addr.s6_addr32 :
+					inet6_rsk(req)->rmt_addr.s6_addr32 :
 #endif
 					&ireq->rmt_addr;
 				entry.dport = ntohs(ireq->rmt_port);
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 04ff44344f90..fe874eeaa40c 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -61,7 +61,7 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,
 						     lopt->nr_table_entries)];
 	     (req = *prev) != NULL;
 	     prev = &req->dl_next) {
-		const struct tcp6_request_sock *treq = tcp6_rsk(req);
+		const struct inet6_request_sock *treq = inet6_rsk(req);
 
 		if (inet_rsk(req)->rmt_port == rport &&
 		    req->rsk_ops->family == AF_INET6 &&
@@ -85,7 +85,7 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
-	const u32 h = inet6_synq_hash(&tcp6_rsk(req)->rmt_addr,
+	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr,
 				      inet_rsk(req)->rmt_port,
 				      lopt->hash_rnd, lopt->nr_table_entries);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 5a10d30cec4a..c2472d771664 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -656,7 +656,7 @@ out:
 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
 			      struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct sk_buff * skb;
 	struct ipv6_txoptions *opt = NULL;
@@ -722,8 +722,8 @@ done:
 
 static void tcp_v6_reqsk_destructor(struct request_sock *req)
 {
-	if (tcp6_rsk(req)->pktopts)
-		kfree_skb(tcp6_rsk(req)->pktopts);
+	if (inet6_rsk(req)->pktopts)
+		kfree_skb(inet6_rsk(req)->pktopts);
 }
 
 static struct request_sock_ops tcp6_request_sock_ops = {
@@ -956,7 +956,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
  */
 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 {
-	struct tcp6_request_sock *treq;
+	struct inet6_request_sock *treq;
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_options_received tmp_opt;
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -981,7 +981,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp6_request_sock_ops);
+	req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -994,7 +994,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
 	tcp_openreq_init(req, &tmp_opt, skb);
 
-	treq = tcp6_rsk(req);
+	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
 	TCP_ECN_create_request(req, skb->h.th);
@@ -1035,7 +1035,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 					  struct request_sock *req,
 					  struct dst_entry *dst)
 {
-	struct tcp6_request_sock *treq = tcp6_rsk(req);
+	struct inet6_request_sock *treq = inet6_rsk(req);
 	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
 	struct tcp6_sock *newtcp6sk;
 	struct inet_sock *newinet;
@@ -1723,14 +1723,13 @@ static int tcp_v6_destroy_sock(struct sock *sk)
 static void get_openreq6(struct seq_file *seq, 
 			 struct sock *sk, struct request_sock *req, int i, int uid)
 {
-	struct in6_addr *dest, *src;
 	int ttd = req->expires - jiffies;
+	struct in6_addr *src = &inet6_rsk(req)->loc_addr;
+	struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	src = &tcp6_rsk(req)->loc_addr;
-	dest = &tcp6_rsk(req)->rmt_addr;
 	seq_printf(seq,
 		   "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
 		   "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
-- 
cgit v1.2.3


From 8292a17a399ffb7c5c8b083db4ad994e090055f7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:15:52 -0800
Subject: [ICSK]: Rename struct tcp_func to struct inet_connection_sock_af_ops

And move it to struct inet_connection_sock. DCCP will use it in the
upcoming changesets.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h                |  2 --
 include/net/inet_connection_sock.h | 26 ++++++++++++++++++++
 include/net/tcp.h                  | 50 +-------------------------------------
 include/net/transp_v6.h            |  2 +-
 net/ipv4/syncookies.c              |  4 +--
 net/ipv4/tcp.c                     |  8 +++---
 net/ipv4/tcp_input.c               | 11 +++++----
 net/ipv4/tcp_ipv4.c                | 11 ++++-----
 net/ipv4/tcp_minisocks.c           |  8 +++---
 net/ipv4/tcp_output.c              | 17 ++++++-------
 net/ipv6/ipv6_sockglue.c           |  2 +-
 net/ipv6/tcp_ipv6.c                | 28 ++++++++++-----------
 12 files changed, 71 insertions(+), 98 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0e1da6602e05..4e1434007f44 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -295,8 +295,6 @@ struct tcp_sock {
 
 	struct sk_buff_head	out_of_order_queue; /* Out of order segments go here */
 
-	struct tcp_func		*af_specific;	/* Operations which are AF_INET{4,6} specific	*/
-
  	__u32	rcv_wnd;	/* Current receiver window		*/
 	__u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
 	__u32	write_seq;	/* Tail(+1) of data held in tcp send buffer */
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index ccc81a1c550c..9e20d201e951 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -15,6 +15,7 @@
 #ifndef _INET_CONNECTION_SOCK_H
 #define _INET_CONNECTION_SOCK_H
 
+#include <linux/compiler.h>
 #include <linux/ip.h>
 #include <linux/string.h>
 #include <linux/timer.h>
@@ -29,6 +30,29 @@ struct inet_bind_bucket;
 struct inet_hashinfo;
 struct tcp_congestion_ops;
 
+/*
+ * Pointers to address related TCP functions
+ * (i.e. things that depend on the address family)
+ */
+struct inet_connection_sock_af_ops {
+	int	    (*queue_xmit)(struct sk_buff *skb, int ipfragok);
+	void	    (*send_check)(struct sock *sk, int len,
+				  struct sk_buff *skb);
+	int	    (*rebuild_header)(struct sock *sk);
+	int	    (*conn_request)(struct sock *sk, struct sk_buff *skb);
+	struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb,
+				      struct request_sock *req,
+				      struct dst_entry *dst);
+	int	    (*remember_stamp)(struct sock *sk);
+	__u16	    net_header_len;
+	int	    (*setsockopt)(struct sock *sk, int level, int optname, 
+				  char __user *optval, int optlen);
+	int	    (*getsockopt)(struct sock *sk, int level, int optname, 
+				  char __user *optval, int __user *optlen);
+	void	    (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
+	int sockaddr_len;
+};
+
 /** inet_connection_sock - INET connection oriented sock
  *
  * @icsk_accept_queue:	   FIFO of established children 
@@ -37,6 +61,7 @@ struct tcp_congestion_ops;
  * @icsk_retransmit_timer: Resend (no ack)
  * @icsk_rto:		   Retransmit timeout
  * @icsk_ca_ops		   Pluggable congestion control hook
+ * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
  * @icsk_ca_state:	   Congestion control state
  * @icsk_retransmits:	   Number of unrecovered [RTO] timeouts
  * @icsk_pending:	   Scheduled timer event
@@ -55,6 +80,7 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
 	struct tcp_congestion_ops *icsk_ca_ops;
+	struct inet_connection_sock_af_ops *icsk_af_ops;
 	__u8			  icsk_ca_state;
 	__u8			  icsk_retransmits;
 	__u8			  icsk_pending;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d78025f9fbea..83b117a25c2a 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -224,53 +224,6 @@ extern atomic_t tcp_memory_allocated;
 extern atomic_t tcp_sockets_allocated;
 extern int tcp_memory_pressure;
 
-/*
- *	Pointers to address related TCP functions
- *	(i.e. things that depend on the address family)
- */
-
-struct tcp_func {
-	int			(*queue_xmit)		(struct sk_buff *skb,
-							 int ipfragok);
-
-	void			(*send_check)		(struct sock *sk,
-							 struct tcphdr *th,
-							 int len,
-							 struct sk_buff *skb);
-
-	int			(*rebuild_header)	(struct sock *sk);
-
-	int			(*conn_request)		(struct sock *sk,
-							 struct sk_buff *skb);
-
-	struct sock *		(*syn_recv_sock)	(struct sock *sk,
-							 struct sk_buff *skb,
-							 struct request_sock *req,
-							 struct dst_entry *dst);
-    
-	int			(*remember_stamp)	(struct sock *sk);
-
-	__u16			net_header_len;
-
-	int			(*setsockopt)		(struct sock *sk, 
-							 int level, 
-							 int optname, 
-							 char __user *optval, 
-							 int optlen);
-
-	int			(*getsockopt)		(struct sock *sk, 
-							 int level, 
-							 int optname, 
-							 char __user *optval, 
-							 int __user *optlen);
-
-
-	void			(*addr2sockaddr)	(struct sock *sk,
-							 struct sockaddr *);
-
-	int sockaddr_len;
-};
-
 /*
  * The next routines deal with comparing 32 bit unsigned ints
  * and worry about wraparound (automatic with unsigned arithmetic).
@@ -405,8 +358,7 @@ extern void			tcp_parse_options(struct sk_buff *skb,
  *	TCP v4 functions exported for the inet6 API
  */
 
-extern void		       	tcp_v4_send_check(struct sock *sk, 
-						  struct tcphdr *th, int len, 
+extern void		       	tcp_v4_send_check(struct sock *sk, int len,
 						  struct sk_buff *skb);
 
 extern int			tcp_v4_conn_request(struct sock *sk,
diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 4e86f2de6638..61f724c1036f 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -44,7 +44,7 @@ extern int			datagram_send_ctl(struct msghdr *msg,
 /*
  *	address family specific functions
  */
-extern struct tcp_func	ipv4_specific;
+extern struct inet_connection_sock_af_ops ipv4_specific;
 
 extern int inet6_destroy_sock(struct sock *sk);
 
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index a34e60ea48a1..e20be3331f67 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -173,10 +173,10 @@ static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,
 					   struct request_sock *req,
 					   struct dst_entry *dst)
 {
-	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sock *child;
 
-	child = tp->af_specific->syn_recv_sock(sk, skb, req, dst);
+	child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst);
 	if (child)
 		inet_csk_reqsk_queue_add(sk, req, child);
 	else
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ef98b14ac56d..eacfe6a3442c 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1696,8 +1696,8 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int err = 0;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->setsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->setsockopt(sk, level, optname,
+						     optval, optlen);
 
 	/* This is a string value all the others are int's */
 	if (optname == TCP_CONGESTION) {
@@ -1939,8 +1939,8 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval,
 	int val, len;
 
 	if (level != SOL_TCP)
-		return tp->af_specific->getsockopt(sk, level, optname,
-						   optval, optlen);
+		return icsk->icsk_af_ops->getsockopt(sk, level, optname,
+						     optval, optlen);
 
 	if (get_user(len, optlen))
 		return -EFAULT;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf2e23086bce..7de6184d4bd8 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4071,8 +4071,10 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
+		icsk = inet_csk(sk);
+
 		/* Make sure socket is routed, for correct metrics.  */
-		tp->af_specific->rebuild_header(sk);
+		icsk->icsk_af_ops->rebuild_header(sk);
 
 		tcp_init_metrics(sk);
 
@@ -4098,8 +4100,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 			sk_wake_async(sk, 0, POLL_OUT);
 		}
 
-		icsk = inet_csk(sk);
-
 		if (sk->sk_write_pending ||
 		    icsk->icsk_accept_queue.rskq_defer_accept ||
 		    icsk->icsk_ack.pingpong) {
@@ -4220,6 +4220,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			  struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int queued = 0;
 
 	tp->rx_opt.saw_tstamp = 0;
@@ -4236,7 +4237,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 			goto discard;
 
 		if(th->syn) {
-			if(tp->af_specific->conn_request(sk, skb) < 0)
+			if (icsk->icsk_af_ops->conn_request(sk, skb) < 0)
 				return 1;
 
 			/* Now we have several options: In theory there is 
@@ -4349,7 +4350,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
 				/* Make sure socket is routed, for
 				 * correct metrics.
 				 */
-				tp->af_specific->rebuild_header(sk);
+				icsk->icsk_af_ops->rebuild_header(sk);
 
 				tcp_init_metrics(sk);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 2aa19c89a94a..704cf2105795 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -86,8 +86,7 @@ int sysctl_tcp_low_latency;
 /* Socket used for sending RSTs */
 static struct socket *tcp_socket;
 
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb);
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb);
 
 struct inet_hashinfo __cacheline_aligned tcp_hashinfo = {
 	.lhash_lock	= RW_LOCK_UNLOCKED,
@@ -645,10 +644,10 @@ out:
 }
 
 /* This routine computes an IPv4 TCP checksum. */
-void tcp_v4_send_check(struct sock *sk, struct tcphdr *th, int len,
-		       struct sk_buff *skb)
+void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct inet_sock *inet = inet_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0);
@@ -1383,7 +1382,7 @@ int tcp_v4_tw_remember_stamp(struct inet_timewait_sock *tw)
 	return 0;
 }
 
-struct tcp_func ipv4_specific = {
+struct inet_connection_sock_af_ops ipv4_specific = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1434,7 +1433,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
-	tp->af_specific = &ipv4_specific;
+	icsk->icsk_af_ops = &ipv4_specific;
 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 1b66a2ac4321..9c029683a626 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -274,18 +274,18 @@ kill:
 void tcp_time_wait(struct sock *sk, int state, int timeo)
 {
 	struct inet_timewait_sock *tw = NULL;
+	const struct inet_connection_sock *icsk = inet_csk(sk);
 	const struct tcp_sock *tp = tcp_sk(sk);
 	int recycle_ok = 0;
 
 	if (tcp_death_row.sysctl_tw_recycle && tp->rx_opt.ts_recent_stamp)
-		recycle_ok = tp->af_specific->remember_stamp(sk);
+		recycle_ok = icsk->icsk_af_ops->remember_stamp(sk);
 
 	if (tcp_death_row.tw_count < tcp_death_row.sysctl_max_tw_buckets)
 		tw = inet_twsk_alloc(sk, state);
 
 	if (tw != NULL) {
 		struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
-		const struct inet_connection_sock *icsk = inet_csk(sk);
 		const int rto = (icsk->icsk_rto << 2) - (icsk->icsk_rto >> 1);
 
 		tw->tw_rcv_wscale	= tp->rx_opt.rcv_wscale;
@@ -456,7 +456,6 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 			   struct request_sock **prev)
 {
 	struct tcphdr *th = skb->h.th;
-	struct tcp_sock *tp = tcp_sk(sk);
 	u32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK);
 	int paws_reject = 0;
 	struct tcp_options_received tmp_opt;
@@ -613,7 +612,8 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		 * ESTABLISHED STATE. If it will be dropped after
 		 * socket is created, wait for troubles.
 		 */
-		child = tp->af_specific->syn_recv_sock(sk, skb, req, NULL);
+		child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb,
+								 req, NULL);
 		if (child == NULL)
 			goto listen_overflow;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index b7325e0b406a..af1946c52c37 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -371,7 +371,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 		TCP_ECN_send(sk, tp, skb, tcp_header_size);
 	}
 
-	tp->af_specific->send_check(sk, th, skb->len, skb);
+	icsk->icsk_af_ops->send_check(sk, skb->len, skb);
 
 	if (likely(tcb->flags & TCPCB_FLAG_ACK))
 		tcp_event_ack_sent(sk, tcp_skb_pcount(skb));
@@ -381,7 +381,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	TCP_INC_STATS(TCP_MIB_OUTSEGS);
 
-	err = tp->af_specific->queue_xmit(skb, 0);
+	err = icsk->icsk_af_ops->queue_xmit(skb, 0);
 	if (unlikely(err <= 0))
 		return err;
 
@@ -638,12 +638,11 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	int mss_now;
-
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+	int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len -
+		       sizeof(struct tcphdr));
 
 	/* Clamp it (mss_clamp does not include tcp options) */
 	if (mss_now > tp->rx_opt.mss_clamp)
@@ -705,9 +704,9 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	xmit_size_goal = mss_now;
 
 	if (doing_tso) {
-		xmit_size_goal = 65535 -
-			tp->af_specific->net_header_len -
-			tp->ext_header_len - tp->tcp_header_len;
+		xmit_size_goal = (65535 -
+				  inet_csk(sk)->icsk_af_ops->net_header_len -
+				  tp->ext_header_len - tp->tcp_header_len);
 
 		if (tp->max_window &&
 		    (xmit_size_goal > (tp->max_window >> 1)))
@@ -1422,7 +1421,7 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
 	   (sysctl_tcp_retrans_collapse != 0))
 		tcp_retrans_try_collapse(sk, skb, cur_mss);
 
-	if(tp->af_specific->rebuild_header(sk))
+	if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
 		return -EHOSTUNREACH; /* Routing failure or similar. */
 
 	/* Some Solaris stacks overoptimize and ignore the FIN on a
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 3620718defe6..b6b63fa8454c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -170,7 +170,7 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 				sock_prot_inc_use(&tcp_prot);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
-				tp->af_specific = &ipv4_specific;
+				inet_csk(sk)->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
 				tcp_sync_mss(sk, tp->pmtu_cookie);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index c2472d771664..8ce8a1359d2b 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -68,14 +68,14 @@
 
 static void	tcp_v6_send_reset(struct sk_buff *skb);
 static void	tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
-static void	tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
+static void	tcp_v6_send_check(struct sock *sk, int len, 
 				  struct sk_buff *skb);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
 static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
 
-static struct tcp_func ipv6_mapped;
-static struct tcp_func ipv6_specific;
+static struct inet_connection_sock_af_ops ipv6_mapped;
+static struct inet_connection_sock_af_ops ipv6_specific;
 
 int inet6_csk_bind_conflict(const struct sock *sk,
 			    const struct inet_bind_bucket *tb)
@@ -107,9 +107,7 @@ static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 static void tcp_v6_hash(struct sock *sk)
 {
 	if (sk->sk_state != TCP_CLOSE) {
-		struct tcp_sock *tp = tcp_sk(sk);
-
-		if (tp->af_specific == &ipv6_mapped) {
+		if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
 			tcp_prot.hash(sk);
 			return;
 		}
@@ -417,14 +415,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		tp->af_specific = &ipv6_mapped;
+		inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 
 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
 			tp->ext_header_len = exthdrlen;
-			tp->af_specific = &ipv6_specific;
+			inet_csk(sk)->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 			goto failure;
 		} else {
@@ -751,10 +749,10 @@ static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
 }
 
 
-static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len, 
-			      struct sk_buff *skb)
+static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct tcphdr *th = skb->h.th;
 
 	if (skb->ip_summed == CHECKSUM_HW) {
 		th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
@@ -1070,7 +1068,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 
 		ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
 
-		newtp->af_specific = &ipv6_mapped;
+		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
 		newsk->sk_backlog_rcv = tcp_v4_do_rcv;
 		newnp->pktoptions  = NULL;
 		newnp->opt	   = NULL;
@@ -1084,7 +1082,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		 */
 
 		/* It is tricky place. Until this moment IPv4 tcp
-		   worked with IPv6 af_tcp.af_specific.
+		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
 		tcp_sync_mss(newsk, newtp->pmtu_cookie);
@@ -1631,7 +1629,7 @@ static int tcp_v6_remember_stamp(struct sock *sk)
 	return 0;
 }
 
-static struct tcp_func ipv6_specific = {
+static struct inet_connection_sock_af_ops ipv6_specific = {
 	.queue_xmit	=	tcp_v6_xmit,
 	.send_check	=	tcp_v6_send_check,
 	.rebuild_header	=	tcp_v6_rebuild_header,
@@ -1650,7 +1648,7 @@ static struct tcp_func ipv6_specific = {
  *	TCP over IPv4 via INET6 API
  */
 
-static struct tcp_func ipv6_mapped = {
+static struct inet_connection_sock_af_ops ipv6_mapped = {
 	.queue_xmit	=	ip_queue_xmit,
 	.send_check	=	tcp_v4_send_check,
 	.rebuild_header	=	inet_sk_rebuild_header,
@@ -1700,7 +1698,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 	sk->sk_state = TCP_CLOSE;
 
-	tp->af_specific = &ipv6_specific;
+	icsk->icsk_af_ops = &ipv6_specific;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
-- 
cgit v1.2.3


From 3305b80c214c642b89cd5c21af83bc91ec13f8bd Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 13 Dec 2005 23:16:37 -0800
Subject: [IP]: Simplify and consolidate MSG_PEEK error handling

When a packet is obtained from skb_recv_datagram with MSG_PEEK enabled
it is left on the socket receive queue.  This means that when we detect
a checksum error we have to be careful when trying to free the packet
as someone could have dequeued it in the time being.

Currently this delicate logic is duplicated three times between UDPv4,
UDPv6 and RAWv6.  This patch moves them into a one place and simplifies
the code somewhat.

This is based on a suggestion by Eric Dumazet.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 ++
 net/core/datagram.c    | 36 ++++++++++++++++++++++++++++++++++++
 net/ipv4/udp.c         | 15 +--------------
 net/ipv6/raw.c         | 16 +++-------------
 net/ipv6/udp.c         | 16 ++--------------
 5 files changed, 44 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8c5d6001a923..97f6580ce039 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1239,6 +1239,8 @@ extern int	       skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 							int hlen,
 							struct iovec *iov);
 extern void	       skb_free_datagram(struct sock *sk, struct sk_buff *skb);
+extern void	       skb_kill_datagram(struct sock *sk, struct sk_buff *skb,
+					 unsigned int flags);
 extern unsigned int    skb_checksum(const struct sk_buff *skb, int offset,
 				    int len, unsigned int csum);
 extern int	       skb_copy_bits(const struct sk_buff *skb, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 1bcfef51ac58..f8d322e1ea92 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <linux/poll.h>
 #include <linux/highmem.h>
+#include <linux/spinlock.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -199,6 +200,41 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 	kfree_skb(skb);
 }
 
+/**
+ *	skb_kill_datagram - Free a datagram skbuff forcibly
+ *	@sk: socket
+ *	@skb: datagram skbuff
+ *	@flags: MSG_ flags
+ *
+ *	This function frees a datagram skbuff that was received by
+ *	skb_recv_datagram.  The flags argument must match the one
+ *	used for skb_recv_datagram.
+ *
+ *	If the MSG_PEEK flag is set, and the packet is still on the
+ *	receive queue of the socket, it will be taken off the queue
+ *	before it is freed.
+ *
+ *	This function currently only disables BH when acquiring the
+ *	sk_receive_queue lock.  Therefore it must not be used in a
+ *	context where that lock is acquired in an IRQ context.
+ */
+
+void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+{
+	if (flags & MSG_PEEK) {
+		spin_lock_bh(&sk->sk_receive_queue.lock);
+		if (skb == skb_peek(&sk->sk_receive_queue)) {
+			__skb_unlink(skb, &sk->sk_receive_queue);
+			atomic_dec(&skb->users);
+		}
+		spin_unlock_bh(&sk->sk_receive_queue.lock);
+	}
+
+	kfree_skb(skb);
+}
+
+EXPORT_SYMBOL(skb_kill_datagram);
+
 /**
  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
  *	@skb: buffer to copy
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 2422a5f7195d..012c4621e40a 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -846,20 +846,7 @@ out:
 csum_copy_err:
 	UDP_INC_STATS_BH(UDP_MIB_INERRORS);
 
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (noblock)
 		return -EAGAIN;	
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a66900cda2af..66f1d12ea578 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -32,6 +32,7 @@
 #include <linux/icmpv6.h>
 #include <linux/netfilter.h>
 #include <linux/netfilter_ipv6.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
 #include <asm/bug.h>
@@ -433,25 +434,14 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
+	skb_kill_datagram(sk, skb, flags);
 
 	/* Error for blocking case is chosen to masquerade
 	   as some normal condition.
 	 */
 	err = (flags&MSG_DONTWAIT) ? -EAGAIN : -EHOSTUNREACH;
 	/* FIXME: increment a raw6 drops counter here */
-	goto out_free;
+	goto out;
 }
 
 static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl,
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 5cc8731eb55b..d8538dcea813 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -36,6 +36,7 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/init.h>
+#include <linux/skbuff.h>
 #include <asm/uaccess.h>
 
 #include <net/sock.h>
@@ -300,20 +301,7 @@ out:
 	return err;
 
 csum_copy_err:
-	/* Clear queue. */
-	if (flags&MSG_PEEK) {
-		int clear = 0;
-		spin_lock_bh(&sk->sk_receive_queue.lock);
-		if (skb == skb_peek(&sk->sk_receive_queue)) {
-			__skb_unlink(skb, &sk->sk_receive_queue);
-			clear = 1;
-		}
-		spin_unlock_bh(&sk->sk_receive_queue.lock);
-		if (clear)
-			kfree_skb(skb);
-	}
-
-	skb_free_datagram(sk, skb);
+	skb_kill_datagram(sk, skb, flags);
 
 	if (flags & MSG_DONTWAIT) {
 		UDP6_INC_STATS_USER(UDP_MIB_INERRORS);
-- 
cgit v1.2.3


From b9750ce13c08aa8a71a9b138d741f3046aefd991 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:22:54 -0800
Subject: [IPV6]: Generalise some functions

Using sk->sk_protocol instead of IPPROTO_TCP.

Will be used by DCCPv6 in the next changesets.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h                |   2 +
 include/net/inet6_connection_sock.h |  13 ++-
 net/ipv6/af_inet6.c                 |  52 ++++++++++++
 net/ipv6/inet6_connection_sock.c    | 103 ++++++++++++++++++++++++
 net/ipv6/tcp_ipv6.c                 | 153 +-----------------------------------
 5 files changed, 173 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7d3e86d9576e..69a0decfbdf4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -297,6 +297,8 @@ struct tcp6_sock {
 	struct ipv6_pinfo inet6;
 };
 
+extern int inet6_sk_rebuild_header(struct sock *sk);
+
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk)
 {
diff --git a/include/net/inet6_connection_sock.h b/include/net/inet6_connection_sock.h
index aa30ebde70dc..b33b438bffcc 100644
--- a/include/net/inet6_connection_sock.h
+++ b/include/net/inet6_connection_sock.h
@@ -15,8 +15,15 @@
 
 #include <linux/types.h>
 
-struct sock;
+struct in6_addr;
+struct inet_bind_bucket;
 struct request_sock;
+struct sk_buff;
+struct sock;
+struct sockaddr;
+
+extern int inet6_csk_bind_conflict(const struct sock *sk,
+				   const struct inet_bind_bucket *tb);
 
 extern struct request_sock *inet6_csk_search_req(const struct sock *sk,
 						 struct request_sock ***prevp,
@@ -28,4 +35,8 @@ extern struct request_sock *inet6_csk_search_req(const struct sock *sk,
 extern void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 					   struct request_sock *req,
 					   const unsigned long timeout);
+
+extern void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr);
+
+extern int inet6_csk_xmit(struct sk_buff *skb, int ipfragok);
 #endif /* _INET6_CONNECTION_SOCK_H */
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index d9546380fa04..fd040e9a1f47 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -609,6 +609,58 @@ inet6_unregister_protosw(struct inet_protosw *p)
 	}
 }
 
+int inet6_sk_rebuild_header(struct sock *sk)
+{
+	int err;
+	struct dst_entry *dst;
+	struct ipv6_pinfo *np = inet6_sk(sk);
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct in6_addr *final_p = NULL, final;
+		struct flowi fl;
+
+		memset(&fl, 0, sizeof(fl));
+		fl.proto = sk->sk_protocol;
+		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+		fl.fl6_flowlabel = np->flow_label;
+		fl.oif = sk->sk_bound_dev_if;
+		fl.fl_ip_dport = inet->dport;
+		fl.fl_ip_sport = inet->sport;
+
+		if (np->opt && np->opt->srcrt) {
+			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+			ipv6_addr_copy(&final, &fl.fl6_dst);
+			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+			final_p = &final;
+		}
+
+		err = ip6_dst_lookup(sk, &dst, &fl);
+		if (err) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(inet6_sk_rebuild_header);
+
 int
 snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
 {
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index fe874eeaa40c..792f90f0f9ec 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -21,8 +21,34 @@
 
 #include <net/addrconf.h>
 #include <net/inet_connection_sock.h>
+#include <net/inet_ecn.h>
+#include <net/inet_hashtables.h>
+#include <net/ip6_route.h>
 #include <net/sock.h>
 
+int inet6_csk_bind_conflict(const struct sock *sk,
+			    const struct inet_bind_bucket *tb)
+{
+	const struct sock *sk2;
+	const struct hlist_node *node;
+
+	/* We must walk the whole port owner list in this case. -DaveM */
+	sk_for_each_bound(sk2, node, &tb->owners) {
+		if (sk != sk2 &&
+		    (!sk->sk_bound_dev_if ||
+		     !sk2->sk_bound_dev_if ||
+		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
+		    (!sk->sk_reuse || !sk2->sk_reuse ||
+		     sk2->sk_state == TCP_LISTEN) &&
+		     ipv6_rcv_saddr_equal(sk, sk2))
+			break;
+	}
+
+	return node != NULL;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_bind_conflict);
+
 /*
  * request_sock (formerly open request) hash tables.
  */
@@ -94,3 +120,80 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,
 }
 
 EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);
+
+void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
+
+	sin6->sin6_family = AF_INET6;
+	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
+	sin6->sin6_port	= inet_sk(sk)->dport;
+	/* We do not store received flowlabel for TCP */
+	sin6->sin6_flowinfo = 0;
+	sin6->sin6_scope_id = 0;
+	if (sk->sk_bound_dev_if &&
+	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
+		sin6->sin6_scope_id = sk->sk_bound_dev_if;
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_addr2sockaddr);
+
+int inet6_csk_xmit(struct sk_buff *skb, int ipfragok)
+{
+	struct sock *sk = skb->sk;
+	struct inet_sock *inet = inet_sk(sk);
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct flowi fl;
+	struct dst_entry *dst;
+	struct in6_addr *final_p = NULL, final;
+
+	memset(&fl, 0, sizeof(fl));
+	fl.proto = sk->sk_protocol;
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+	fl.fl6_flowlabel = np->flow_label;
+	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+	fl.oif = sk->sk_bound_dev_if;
+	fl.fl_ip_sport = inet->sport;
+	fl.fl_ip_dport = inet->dport;
+
+	if (np->opt && np->opt->srcrt) {
+		struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
+		ipv6_addr_copy(&final, &fl.fl6_dst);
+		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+		final_p = &final;
+	}
+
+	dst = __sk_dst_check(sk, np->dst_cookie);
+
+	if (dst == NULL) {
+		int err = ip6_dst_lookup(sk, &dst, &fl);
+
+		if (err) {
+			sk->sk_err_soft = -err;
+			return err;
+		}
+
+		if (final_p)
+			ipv6_addr_copy(&fl.fl6_dst, final_p);
+
+		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
+			sk->sk_route_caps = 0;
+			return err;
+		}
+
+		ip6_dst_store(sk, dst, NULL);
+		sk->sk_route_caps = dst->dev->features &
+			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
+	}
+
+	skb->dst = dst_clone(dst);
+
+	/* Restore final destination back after routing done */
+	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+
+	return ip6_xmit(sk, skb, &fl, np->opt, 0);
+}
+
+EXPORT_SYMBOL_GPL(inet6_csk_xmit);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 8ce8a1359d2b..2f932ce72610 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -72,32 +72,10 @@ static void	tcp_v6_send_check(struct sock *sk, int len,
 				  struct sk_buff *skb);
 
 static int	tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
-static int	tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
 
 static struct inet_connection_sock_af_ops ipv6_mapped;
 static struct inet_connection_sock_af_ops ipv6_specific;
 
-int inet6_csk_bind_conflict(const struct sock *sk,
-			    const struct inet_bind_bucket *tb)
-{
-	const struct sock *sk2;
-	const struct hlist_node *node;
-
-	/* We must walk the whole port owner list in this case. -DaveM */
-	sk_for_each_bound(sk2, node, &tb->owners) {
-		if (sk != sk2 &&
-		    (!sk->sk_bound_dev_if ||
-		     !sk2->sk_bound_dev_if ||
-		     sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
-		    (!sk->sk_reuse || !sk2->sk_reuse ||
-		     sk2->sk_state == TCP_LISTEN) &&
-		     ipv6_rcv_saddr_equal(sk, sk2))
-			break;
-	}
-
-	return node != NULL;
-}
-
 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 {
 	return inet_csk_get_port(&tcp_hashinfo, sk, snum,
@@ -1500,129 +1478,6 @@ do_time_wait:
 	goto discard_it;
 }
 
-static int tcp_v6_rebuild_header(struct sock *sk)
-{
-	int err;
-	struct dst_entry *dst;
-	struct ipv6_pinfo *np = inet6_sk(sk);
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		struct inet_sock *inet = inet_sk(sk);
-		struct in6_addr *final_p = NULL, final;
-		struct flowi fl;
-
-		memset(&fl, 0, sizeof(fl));
-		fl.proto = IPPROTO_TCP;
-		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-		fl.fl6_flowlabel = np->flow_label;
-		fl.oif = sk->sk_bound_dev_if;
-		fl.fl_ip_dport = inet->dport;
-		fl.fl_ip_sport = inet->sport;
-
-		if (np->opt && np->opt->srcrt) {
-			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-			ipv6_addr_copy(&final, &fl.fl6_dst);
-			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-			final_p = &final;
-		}
-
-		err = ip6_dst_lookup(sk, &dst, &fl);
-		if (err) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	return 0;
-}
-
-static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
-{
-	struct sock *sk = skb->sk;
-	struct inet_sock *inet = inet_sk(sk);
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct flowi fl;
-	struct dst_entry *dst;
-	struct in6_addr *final_p = NULL, final;
-
-	memset(&fl, 0, sizeof(fl));
-	fl.proto = IPPROTO_TCP;
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
-	fl.fl6_flowlabel = np->flow_label;
-	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
-	fl.oif = sk->sk_bound_dev_if;
-	fl.fl_ip_sport = inet->sport;
-	fl.fl_ip_dport = inet->dport;
-
-	if (np->opt && np->opt->srcrt) {
-		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
-		ipv6_addr_copy(&final, &fl.fl6_dst);
-		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
-		final_p = &final;
-	}
-
-	dst = __sk_dst_check(sk, np->dst_cookie);
-
-	if (dst == NULL) {
-		int err = ip6_dst_lookup(sk, &dst, &fl);
-
-		if (err) {
-			sk->sk_err_soft = -err;
-			return err;
-		}
-
-		if (final_p)
-			ipv6_addr_copy(&fl.fl6_dst, final_p);
-
-		if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
-			sk->sk_route_caps = 0;
-			return err;
-		}
-
-		ip6_dst_store(sk, dst, NULL);
-		sk->sk_route_caps = dst->dev->features &
-			~(NETIF_F_IP_CSUM | NETIF_F_TSO);
-	}
-
-	skb->dst = dst_clone(dst);
-
-	/* Restore final destination back after routing done */
-	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
-
-	return ip6_xmit(sk, skb, &fl, np->opt, 0);
-}
-
-static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
-{
-	struct ipv6_pinfo *np = inet6_sk(sk);
-	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
-
-	sin6->sin6_family = AF_INET6;
-	ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
-	sin6->sin6_port	= inet_sk(sk)->dport;
-	/* We do not store received flowlabel for TCP */
-	sin6->sin6_flowinfo = 0;
-	sin6->sin6_scope_id = 0;
-	if (sk->sk_bound_dev_if &&
-	    ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
-		sin6->sin6_scope_id = sk->sk_bound_dev_if;
-}
-
 static int tcp_v6_remember_stamp(struct sock *sk)
 {
 	/* Alas, not yet... */
@@ -1630,9 +1485,9 @@ static int tcp_v6_remember_stamp(struct sock *sk)
 }
 
 static struct inet_connection_sock_af_ops ipv6_specific = {
-	.queue_xmit	=	tcp_v6_xmit,
+	.queue_xmit	=	inet6_csk_xmit,
 	.send_check	=	tcp_v6_send_check,
-	.rebuild_header	=	tcp_v6_rebuild_header,
+	.rebuild_header	=	inet6_sk_rebuild_header,
 	.conn_request	=	tcp_v6_conn_request,
 	.syn_recv_sock	=	tcp_v6_syn_recv_sock,
 	.remember_stamp	=	tcp_v6_remember_stamp,
@@ -1640,7 +1495,7 @@ static struct inet_connection_sock_af_ops ipv6_specific = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
@@ -1659,7 +1514,7 @@ static struct inet_connection_sock_af_ops ipv6_mapped = {
 
 	.setsockopt	=	ipv6_setsockopt,
 	.getsockopt	=	ipv6_getsockopt,
-	.addr2sockaddr	=	v6_addr2sockaddr,
+	.addr2sockaddr	=	inet6_csk_addr2sockaddr,
 	.sockaddr_len	=	sizeof(struct sockaddr_in6)
 };
 
-- 
cgit v1.2.3


From 0fa1a53e1f055a6c790f40e7728f42a825b29248 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:23:09 -0800
Subject: [IPV6]: Introduce inet6_timewait_sock

Out of tcp6_timewait_sock, that now is just an aggregation of
inet_timewait_sock and inet6_timewait_sock, using tw_ipv6_offset in struct
inet_timewait_sock, that is common to the IPv6 transport protocols that use
timewait sockets, like DCCP and TCP.

tw_ipv6_offset plays the struct inet_sock pinfo6 role, i.e. for the generic
code to find the IPv6 area in a timewait sock.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             | 32 +++++++++++++++++++++-----------
 include/net/inet6_hashtables.h   |  6 +++---
 include/net/inet_timewait_sock.h |  3 ++-
 net/ipv4/inet_diag.c             |  6 +++---
 net/ipv4/tcp_minisocks.c         |  8 +++++---
 net/ipv6/addrconf.c              |  2 +-
 net/ipv6/tcp_ipv6.c              | 12 ++++++------
 7 files changed, 41 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 69a0decfbdf4..7d3908594fac 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -348,26 +348,36 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 
 #include <linux/tcp.h>
 
+struct inet6_timewait_sock {
+	struct in6_addr tw_v6_daddr;
+	struct in6_addr	tw_v6_rcv_saddr;
+};
+
 struct tcp6_timewait_sock {
-	struct tcp_timewait_sock tw_v6_sk;
-	struct in6_addr		 tw_v6_daddr;
-	struct in6_addr		 tw_v6_rcv_saddr;
+	struct tcp_timewait_sock   tcp6tw_tcp;
+	struct inet6_timewait_sock tcp6tw_inet6;
 };
 
-static inline struct tcp6_timewait_sock *tcp6_twsk(const struct sock *sk)
+static inline u16 inet6_tw_offset(const struct proto *prot)
+{
+	return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock);
+}
+
+static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
 {
-	return (struct tcp6_timewait_sock *)sk;
+	return (struct inet6_timewait_sock *)(((u8 *)sk) +
+					      inet_twsk(sk)->tw_ipv6_offset);
 }
 
-static inline struct in6_addr *__tcp_v6_rcv_saddr(const struct sock *sk)
+static inline struct in6_addr *__inet6_rcv_saddr(const struct sock *sk)
 {
 	return likely(sk->sk_state != TCP_TIME_WAIT) ?
-		&inet6_sk(sk)->rcv_saddr : &tcp6_twsk(sk)->tw_v6_rcv_saddr;
+		&inet6_sk(sk)->rcv_saddr : &inet6_twsk(sk)->tw_v6_rcv_saddr;
 }
 
-static inline struct in6_addr *tcp_v6_rcv_saddr(const struct sock *sk)
+static inline struct in6_addr *inet6_rcv_saddr(const struct sock *sk)
 {
-	return sk->sk_family == AF_INET6 ? __tcp_v6_rcv_saddr(sk) : NULL;
+	return sk->sk_family == AF_INET6 ? __inet6_rcv_saddr(sk) : NULL;
 }
 
 static inline int inet_v6_ipv6only(const struct sock *sk)
@@ -395,8 +405,8 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 	return NULL;
 }
 
-#define __tcp_v6_rcv_saddr(__sk)	NULL
-#define tcp_v6_rcv_saddr(__sk)		NULL
+#define __inet6_rcv_saddr(__sk)	NULL
+#define inet6_rcv_saddr(__sk)	NULL
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
 #endif /* defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index a4a204f99ea6..25f708ff020e 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -110,10 +110,10 @@ static inline struct sock *
 
 		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
 		   sk->sk_family		== PF_INET6) {
-			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 
-			if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
-			    ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
+			if (ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
+			    ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
 			    (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 				goto hit;
 		}
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 28f7b2103505..ca240f856c46 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -127,7 +127,8 @@ struct inet_timewait_sock {
 	__u16			tw_num;
 	/* And these are ours. */
 	__u8			tw_ipv6only:1;
-	/* 31 bits hole, try to pack */
+	/* 15 bits hole, try to pack */
+	__u16			tw_ipv6_offset;
 	int			tw_timeout;
 	unsigned long		tw_ttd;
 	struct inet_bind_bucket	*tw_tb;
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 3ce73b141d7e..c49908192047 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk,
 		r->idiag_inode = 0;
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
 		if (r->idiag_family == AF_INET6) {
-			const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
+			const struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
 
 			ipv6_addr_copy((struct in6_addr *)r->id.idiag_src,
-				       &tcp6tw->tw_v6_rcv_saddr);
+				       &tw6->tw_v6_rcv_saddr);
 			ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst,
-				       &tcp6tw->tw_v6_daddr);
+				       &tw6->tw_v6_daddr);
 		}
 #endif
 		nlh->nlmsg_len = skb->tail - b;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 9c029683a626..2b9b7f6c7f7c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -298,10 +298,12 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 		if (tw->tw_family == PF_INET6) {
 			struct ipv6_pinfo *np = inet6_sk(sk);
-			struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+			struct inet6_timewait_sock *tw6;
 
-			ipv6_addr_copy(&tcp6tw->tw_v6_daddr, &np->daddr);
-			ipv6_addr_copy(&tcp6tw->tw_v6_rcv_saddr, &np->rcv_saddr);
+			tw->tw_ipv6_offset = inet6_tw_offset(sk->sk_prot);
+			tw6 = inet6_twsk((struct sock *)tw);
+			ipv6_addr_copy(&tw6->tw_v6_daddr, &np->daddr);
+			ipv6_addr_copy(&tw6->tw_v6_rcv_saddr, &np->rcv_saddr);
 			tw->tw_ipv6only = np->ipv6only;
 		}
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index a60585fd85ad..704fb73e6c5f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1195,7 +1195,7 @@ struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *
 int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)
 {
 	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;
-	const struct in6_addr *sk2_rcv_saddr6 = tcp_v6_rcv_saddr(sk2);
+	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2);
 	u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr;
 	u32 sk2_rcv_saddr = inet_rcv_saddr(sk2);
 	int sk_ipv6only = ipv6_only_sock(sk);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2f932ce72610..cb880079daf3 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -138,14 +138,14 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 
 	/* Check TIME-WAIT sockets first. */
 	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
+		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
 
 		tw = inet_twsk(sk2);
 
 		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
 		   sk2->sk_family		== PF_INET6	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr)	&&
-		   ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr)	&&
+		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
+		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
 			struct tcp_sock *tp = tcp_sk(sk);
@@ -1663,14 +1663,14 @@ static void get_timewait6_sock(struct seq_file *seq,
 {
 	struct in6_addr *dest, *src;
 	__u16 destp, srcp;
-	struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
+	struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
 	int ttd = tw->tw_ttd - jiffies;
 
 	if (ttd < 0)
 		ttd = 0;
 
-	dest = &tcp6tw->tw_v6_daddr;
-	src  = &tcp6tw->tw_v6_rcv_saddr;
+	dest = &tw6->tw_v6_daddr;
+	src  = &tw6->tw_v6_rcv_saddr;
 	destp = ntohs(tw->tw_dport);
 	srcp  = ntohs(tw->tw_sport);
 
-- 
cgit v1.2.3


From 6d6ee43e0b8b8d4847627fd43739b98ec2b9404f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:25:19 -0800
Subject: [TWSK]: Introduce struct timewait_sock_ops

So that we can share several timewait sockets related functions and
make the timewait mini sockets infrastructure closer to the request
mini sockets one.

Next changesets will take advantage of this, moving more code out of
TCP and DCCP v4 and v6 to common infrastructure.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h             |  3 +-
 include/net/inet_timewait_sock.h |  3 +-
 include/net/sock.h               |  4 +--
 include/net/tcp.h                |  3 ++
 include/net/timewait_sock.h      | 31 ++++++++++++++++++
 net/core/sock.c                  | 21 ++++++------
 net/dccp/ipv4.c                  |  9 ++++-
 net/dccp/ipv6.c                  |  6 +++-
 net/ipv4/inet_timewait_sock.c    |  5 +--
 net/ipv4/tcp_ipv4.c              | 71 ++++++++++++++++++++++++----------------
 net/ipv6/tcp_ipv6.c              | 25 +++++---------
 11 files changed, 118 insertions(+), 63 deletions(-)
 create mode 100644 include/net/timewait_sock.h

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7d3908594fac..a0d04891fe12 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -360,7 +360,8 @@ struct tcp6_timewait_sock {
 
 static inline u16 inet6_tw_offset(const struct proto *prot)
 {
-	return prot->twsk_obj_size - sizeof(struct inet6_timewait_sock);
+	return prot->twsk_prot->twsk_obj_size -
+			sizeof(struct inet6_timewait_sock);
 }
 
 static inline struct inet6_timewait_sock *inet6_twsk(const struct sock *sk)
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index ca240f856c46..e396a65473d7 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -26,6 +26,7 @@
 
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <net/timewait_sock.h>
 
 #include <asm/atomic.h>
 
@@ -200,7 +201,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw)
 		printk(KERN_DEBUG "%s timewait_sock %p released\n",
 		       tw->tw_prot->name, tw);
 #endif
-		kmem_cache_free(tw->tw_prot->twsk_slab, tw);
+		kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
 		module_put(owner);
 	}
 }
diff --git a/include/net/sock.h b/include/net/sock.h
index 0fbae85c6d55..91d28957dc10 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -493,6 +493,7 @@ extern void sk_stream_kill_queues(struct sock *sk);
 extern int sk_wait_data(struct sock *sk, long *timeo);
 
 struct request_sock_ops;
+struct timewait_sock_ops;
 
 /* Networking protocol blocks we attach to sockets.
  * socket layer -> transport layer interface
@@ -557,11 +558,10 @@ struct proto {
 	kmem_cache_t		*slab;
 	unsigned int		obj_size;
 
-	kmem_cache_t		*twsk_slab;
-	unsigned int		twsk_obj_size;
 	atomic_t		*orphan_count;
 
 	struct request_sock_ops	*rsk_prot;
+	struct timewait_sock_ops *twsk_prot;
 
 	struct module		*owner;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 83b117a25c2a..176221cd0cce 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -287,6 +287,9 @@ extern int			tcp_rcv_established(struct sock *sk,
 
 extern void			tcp_rcv_space_adjust(struct sock *sk);
 
+extern int			tcp_twsk_unique(struct sock *sk,
+						struct sock *sktw, void *twp);
+
 static inline void tcp_dec_quickack_mode(struct sock *sk,
 					 const unsigned int pkts)
 {
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
new file mode 100644
index 000000000000..2544281e1d5e
--- /dev/null
+++ b/include/net/timewait_sock.h
@@ -0,0 +1,31 @@
+/*
+ * NET		Generic infrastructure for Network protocols.
+ *
+ * Authors:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _TIMEWAIT_SOCK_H
+#define _TIMEWAIT_SOCK_H
+
+#include <linux/slab.h>
+#include <net/sock.h>
+
+struct timewait_sock_ops {
+	kmem_cache_t	*twsk_slab;
+	unsigned int	twsk_obj_size;
+	int		(*twsk_unique)(struct sock *sk,
+				       struct sock *sktw, void *twp);
+};
+
+static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
+{
+	if (sk->sk_prot->twsk_prot->twsk_unique != NULL)
+		return sk->sk_prot->twsk_prot->twsk_unique(sk, sktw, twp);
+	return 0;
+}
+
+#endif /* _TIMEWAIT_SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 13cc3be4f056..6465b0e4c8cb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1488,7 +1488,7 @@ int proto_register(struct proto *prot, int alloc_slab)
 			}
 		}
 
-		if (prot->twsk_obj_size) {
+		if (prot->twsk_prot != NULL) {
 			static const char mask[] = "tw_sock_%s";
 
 			timewait_sock_slab_name = kmalloc(strlen(prot->name) + sizeof(mask) - 1, GFP_KERNEL);
@@ -1497,11 +1497,12 @@ int proto_register(struct proto *prot, int alloc_slab)
 				goto out_free_request_sock_slab;
 
 			sprintf(timewait_sock_slab_name, mask, prot->name);
-			prot->twsk_slab = kmem_cache_create(timewait_sock_slab_name,
-							    prot->twsk_obj_size,
-							    0, SLAB_HWCACHE_ALIGN,
-							    NULL, NULL);
-			if (prot->twsk_slab == NULL)
+			prot->twsk_prot->twsk_slab =
+				kmem_cache_create(timewait_sock_slab_name,
+						  prot->twsk_prot->twsk_obj_size,
+						  0, SLAB_HWCACHE_ALIGN,
+						  NULL, NULL);
+			if (prot->twsk_prot->twsk_slab == NULL)
 				goto out_free_timewait_sock_slab_name;
 		}
 	}
@@ -1548,12 +1549,12 @@ void proto_unregister(struct proto *prot)
 		prot->rsk_prot->slab = NULL;
 	}
 
-	if (prot->twsk_slab != NULL) {
-		const char *name = kmem_cache_name(prot->twsk_slab);
+	if (prot->twsk_prot != NULL && prot->twsk_prot->twsk_slab != NULL) {
+		const char *name = kmem_cache_name(prot->twsk_prot->twsk_slab);
 
-		kmem_cache_destroy(prot->twsk_slab);
+		kmem_cache_destroy(prot->twsk_prot->twsk_slab);
 		kfree(name);
-		prot->twsk_slab = NULL;
+		prot->twsk_prot->twsk_slab = NULL;
 	}
 }
 
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index bc28d71905e2..e11cda0cb6b3 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -20,6 +20,7 @@
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
 #include <net/sock.h>
+#include <net/timewait_sock.h>
 #include <net/tcp_states.h>
 #include <net/xfrm.h>
 
@@ -1309,6 +1310,10 @@ static struct request_sock_ops dccp_request_sock_ops = {
 	.send_reset	= dccp_v4_ctl_send_reset,
 };
 
+static struct timewait_sock_ops dccp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct inet_timewait_sock),
+};
+
 struct proto dccp_prot = {
 	.name			= "DCCP",
 	.owner			= THIS_MODULE,
@@ -1332,5 +1337,7 @@ struct proto dccp_prot = {
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp_sock),
 	.rsk_prot		= &dccp_request_sock_ops,
-	.twsk_obj_size		= sizeof(struct inet_timewait_sock),
+	.twsk_prot		= &dccp_timewait_sock_ops,
 };
+
+EXPORT_SYMBOL_GPL(dccp_prot);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index a7d2aee5b3af..4d078f5b911b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -652,6 +652,10 @@ static struct request_sock_ops dccp6_request_sock_ops = {
 	.send_reset	= dccp_v6_ctl_send_reset,
 };
 
+static struct timewait_sock_ops dccp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct dccp6_timewait_sock),
+};
+
 static void dccp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1359,7 +1363,7 @@ static struct proto dccp_v6_prot = {
 	.max_header		= MAX_DCCP_HEADER,
 	.obj_size		= sizeof(struct dccp6_sock),
 	.rsk_prot		= &dccp6_request_sock_ops,
-	.twsk_obj_size		= sizeof(struct dccp6_timewait_sock),
+	.twsk_prot		= &dccp6_timewait_sock_ops,
 };
 
 static struct inet6_protocol dccp_v6_protocol = {
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index a010e9a68811..417f126c749e 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -90,8 +90,9 @@ EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
 
 struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, const int state)
 {
-	struct inet_timewait_sock *tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_slab,
-							 SLAB_ATOMIC);
+	struct inet_timewait_sock *tw =
+		kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
+				 SLAB_ATOMIC);
 	if (tw != NULL) {
 		const struct inet_sock *inet = inet_sk(sk);
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 0b5ab04d3c5a..6728772a943a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -69,6 +69,7 @@
 #include <net/transp_v6.h>
 #include <net/ipv6.h>
 #include <net/inet_common.h>
+#include <net/timewait_sock.h>
 #include <net/xfrm.h>
 
 #include <linux/inet.h>
@@ -118,6 +119,39 @@ static inline __u32 tcp_v4_init_sequence(struct sock *sk, struct sk_buff *skb)
 					  skb->h.th->source);
 }
 
+int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
+{
+	const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	/* With PAWS, it is safe from the viewpoint
+	   of data integrity. Even without PAWS it is safe provided sequence
+	   spaces do not overlap i.e. at data rates <= 80Mbit/sec.
+
+	   Actually, the idea is close to VJ's one, only timestamp cache is
+	   held not per host, but per port pair and TW bucket is used as state
+	   holder.
+
+	   If TW bucket has been already destroyed we fall back to VJ's scheme
+	   and use initial timestamp retrieved from peer table.
+	 */
+	if (tcptw->tw_ts_recent_stamp &&
+	    (twp == NULL || (sysctl_tcp_tw_reuse &&
+			     xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
+		tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
+		if (tp->write_seq == 0)
+			tp->write_seq = 1;
+		tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
+		tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
+		sock_hold(sktw);
+		return 1;
+	}
+
+	return 0;
+}
+
+EXPORT_SYMBOL_GPL(tcp_twsk_unique);
+
 /* called with local bh disabled */
 static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
 				      struct inet_timewait_sock **twp)
@@ -142,35 +176,9 @@ static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
 		tw = inet_twsk(sk2);
 
 		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			/* With PAWS, it is safe from the viewpoint
-			   of data integrity. Even without PAWS it
-			   is safe provided sequence spaces do not
-			   overlap i.e. at data rates <= 80Mbit/sec.
-
-			   Actually, the idea is close to VJ's one,
-			   only timestamp cache is held not per host,
-			   but per port pair and TW bucket is used
-			   as state holder.
-
-			   If TW bucket has been already destroyed we
-			   fall back to VJ's scheme and use initial
-			   timestamp retrieved from peer table.
-			 */
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp || (sysctl_tcp_tw_reuse &&
-				      xtime.tv_sec -
-				      tcptw->tw_ts_recent_stamp > 1))) {
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (tp->write_seq == 0)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
+			if (twsk_unique(sk, sk2, twp))
 				goto unique;
-			} else
+			else
 				goto not_unique;
 		}
 	}
@@ -869,6 +877,11 @@ struct request_sock_ops tcp_request_sock_ops = {
 	.send_reset	=	tcp_v4_send_reset,
 };
 
+static struct timewait_sock_ops tcp_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
+
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 {
 	struct inet_request_sock *ireq;
@@ -1979,7 +1992,7 @@ struct proto tcp_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp_sock),
-	.twsk_obj_size		= sizeof(struct tcp_timewait_sock),
+	.twsk_prot		= &tcp_timewait_sock_ops,
 	.rsk_prot		= &tcp_request_sock_ops,
 };
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index e5c8a669e84e..514b57bb80b7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -60,6 +60,7 @@
 #include <net/addrconf.h>
 #include <net/snmp.h>
 #include <net/dsfield.h>
+#include <net/timewait_sock.h>
 
 #include <asm/uaccess.h>
 
@@ -147,22 +148,9 @@ static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
 		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
 		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
 		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
-			const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
-			struct tcp_sock *tp = tcp_sk(sk);
-
-			if (tcptw->tw_ts_recent_stamp &&
-			    (!twp ||
-			     (sysctl_tcp_tw_reuse &&
-			      xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
-				/* See comment in tcp_ipv4.c */
-				tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
-				if (!tp->write_seq)
-					tp->write_seq = 1;
-				tp->rx_opt.ts_recent	   = tcptw->tw_ts_recent;
-				tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
-				sock_hold(sk2);
+			if (twsk_unique(sk, sk2, twp))
 				goto unique;
-			} else
+			else
 				goto not_unique;
 		}
 	}
@@ -711,6 +699,11 @@ static struct request_sock_ops tcp6_request_sock_ops = {
 	.send_reset	=	tcp_v6_send_reset
 };
 
+static struct timewait_sock_ops tcp6_timewait_sock_ops = {
+	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),
+	.twsk_unique	= tcp_twsk_unique,
+};
+
 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
 {
 	struct ipv6_pinfo *np = inet6_sk(sk);
@@ -1752,7 +1745,7 @@ struct proto tcpv6_prot = {
 	.sysctl_rmem		= sysctl_tcp_rmem,
 	.max_header		= MAX_TCP_HEADER,
 	.obj_size		= sizeof(struct tcp6_sock),
-	.twsk_obj_size		= sizeof(struct tcp6_timewait_sock),
+	.twsk_prot		= &tcp6_timewait_sock_ops,
 	.rsk_prot		= &tcp6_request_sock_ops,
 };
 
-- 
cgit v1.2.3


From a7f5e7f164788a22eb5d3de8e2d3cee1bf58fdca Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:25:31 -0800
Subject: [INET]: Generalise tcp_v4_hash_connect

Renaming it to inet_hash_connect, making it possible to ditch
dccp_v4_hash_connect and share the same code with TCP instead.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c         |   6 +-
 include/linux/random.h        |   2 +-
 include/net/inet_hashtables.h |   3 +
 net/dccp/ipv4.c               | 160 +------------------------------------
 net/ipv4/inet_hashtables.c    | 178 ++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/tcp_ipv4.c           | 173 +---------------------------------------
 6 files changed, 186 insertions(+), 336 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 7999da25fe40..79b59d986af4 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1554,10 +1554,8 @@ __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
 
 EXPORT_SYMBOL(secure_tcp_sequence_number);
 
-
-
-/* Generate secure starting point for ephemeral TCP port search */
-u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport)
+/* Generate secure starting point for ephemeral IPV4 transport port search */
+u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport)
 {
 	struct keydata *keyptr = get_keyptr();
 	u32 hash[4];
diff --git a/include/linux/random.h b/include/linux/random.h
index 7b2adb3322d5..01424a8e621c 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -52,7 +52,7 @@ extern void get_random_bytes(void *buf, int nbytes);
 void generate_random_uuid(unsigned char uuid_out[16]);
 
 extern __u32 secure_ip_id(__u32 daddr);
-extern u32 secure_tcp_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport);
+extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport);
 extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, 
 				       __u16 dport);
 extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index 07840baa9341..c83baa79f66e 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -434,4 +434,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo,
 
 	return sk;
 }
+
+extern int inet_hash_connect(struct inet_timewait_death_row *death_row,
+			     struct sock *sk);
 #endif /* _INET_HASHTABLES_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index e11cda0cb6b3..671fbf3b2379 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -54,164 +54,6 @@ void dccp_unhash(struct sock *sk)
 
 EXPORT_SYMBOL_GPL(dccp_unhash);
 
-/* called with local bh disabled */
-static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const u32 daddr = inet->rcv_saddr;
-	const u32 saddr = inet->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
-	const struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp != NULL) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw != NULL) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &dccp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static int dccp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (snum == 0) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
- 		int remaining = (high - low) + 1;
- 		int rover = net_random() % (high - low) + low;
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
- 		do {
- 			head = &dccp_hashinfo.bhash[inet_bhashfn(rover,
-						    dccp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == rover) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__dccp_v4_check_established(sk,
-									 rover,
-									 &tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
-						     head, rover);
- 			if (tb == NULL) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 			if (++rover > high)
- 				rover = low;
- 		} while (--remaining > 0);
-
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
- 		/* All locks still held and bhs disabled */
- 		inet_bind_hash(sk, tb, rover);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(rover);
- 			__inet_hash(&dccp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw != NULL) {
- 			inet_twsk_deschedule(tw, &dccp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
-						 dccp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
-		__inet_hash(&dccp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __dccp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
 	struct inet_sock *inet = inet_sk(sk);
@@ -272,7 +114,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	 * complete initialization after this.
 	 */
 	dccp_set_state(sk, DCCP_REQUESTING);
-	err = dccp_v4_hash_connect(sk);
+	err = inet_hash_connect(&dccp_death_row, sk);
 	if (err != 0)
 		goto failure;
 
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index e8d29fe736d2..33228115cda4 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -15,12 +15,14 @@
 
 #include <linux/config.h>
 #include <linux/module.h>
+#include <linux/random.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/wait.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
+#include <net/ip.h>
 
 /*
  * Allocate and initialize a new local port bind bucket.
@@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad
 }
 
 EXPORT_SYMBOL_GPL(__inet_lookup_listener);
+
+/* called with local bh disabled */
+static int __inet_check_established(struct inet_timewait_death_row *death_row,
+				    struct sock *sk, __u16 lport,
+				    struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	struct inet_sock *inet = inet_sk(sk);
+	u32 daddr = inet->rcv_saddr;
+	u32 saddr = inet->daddr;
+	int dif = sk->sk_bound_dev_if;
+	INET_ADDR_COOKIE(acookie, saddr, daddr)
+	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		tw = inet_twsk(sk2);
+
+		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	/* Must record num and sport now. Otherwise we will see
+	 * in hash table socket with a funny identity. */
+	inet->num = lport;
+	inet->sport = htons(lport);
+	sk->sk_hash = hash;
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	return secure_ipv4_port_ephemeral(inet->rcv_saddr, inet->daddr, 
+					  inet->dport);
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+int inet_hash_connect(struct inet_timewait_death_row *death_row,
+		      struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (!snum) {
+ 		int low = sysctl_local_port_range[0];
+ 		int high = sysctl_local_port_range[1];
+		int range = high - low;
+ 		int i;
+		int port;
+		static u32 hint;
+		u32 offset = hint + inet_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet_check_established(death_row,
+								      sk, port,
+								      &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet_hash(hinfo, sk, 0);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);;
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb  = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
+		__inet_hash(hinfo, sk, 0);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet_hash_connect);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6728772a943a..c2fe61becd61 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -152,177 +152,6 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 
 EXPORT_SYMBOL_GPL(tcp_twsk_unique);
 
-/* called with local bh disabled */
-static int __tcp_v4_check_established(struct sock *sk, __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	u32 daddr = inet->rcv_saddr;
-	u32 saddr = inet->daddr;
-	int dif = sk->sk_bound_dev_if;
-	INET_ADDR_COOKIE(acookie, saddr, daddr)
-	const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		tw = inet_twsk(sk2);
-
-		if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) {
-			if (twsk_unique(sk, sk2, twp))
-				goto unique;
-			else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-unique:
-	/* Must record num and sport now. Otherwise we will see
-	 * in hash table socket with a funny identity. */
-	inet->num = lport;
-	inet->sport = htons(lport);
-	sk->sk_hash = hash;
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 connect_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-
-	return secure_tcp_port_ephemeral(inet->rcv_saddr, inet->daddr, 
-					 inet->dport);
-}
-
-/*
- * Bind a port for a connect operation and hash it.
- */
-static inline int tcp_v4_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + connect_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v4_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet_hash(&tcp_hashinfo, sk, 0);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);;
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb  = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet_hash(&tcp_hashinfo, sk, 0);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v4_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 /* This will initiate an outgoing connection. */
 int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 {
@@ -403,7 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	 * complete initialization after this.
 	 */
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v4_hash_connect(sk);
+	err = inet_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto failure;
 
-- 
cgit v1.2.3


From d8313f5ca2b1f86b7df6c99fc4b3fffa1f84e92b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:25:44 -0800
Subject: [INET6]: Generalise tcp_v6_hash_connect

Renaming it to inet6_hash_connect, making it possible to ditch
dccp_v6_hash_connect and share the same code with TCP instead.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/char/random.c       |   4 +-
 include/linux/random.h      |   4 +-
 include/net/ipv6.h          |   3 +
 net/dccp/ipv6.c             | 171 +----------------------------------------
 net/ipv6/inet6_hashtables.c | 183 +++++++++++++++++++++++++++++++++++++++++++-
 net/ipv6/tcp_ipv6.c         | 173 +----------------------------------------
 6 files changed, 190 insertions(+), 348 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index 79b59d986af4..bdfdfd28594d 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1573,7 +1573,7 @@ u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport)
 }
 
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport)
+u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dport)
 {
 	struct keydata *keyptr = get_keyptr();
 	u32 hash[12];
@@ -1584,7 +1584,7 @@ u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, __u16 dp
 
 	return twothirdsMD4Transform(daddr, hash);
 }
-EXPORT_SYMBOL(secure_tcpv6_port_ephemeral);
+EXPORT_SYMBOL(secure_ipv6_port_ephemeral);
 #endif
 
 #if defined(CONFIG_IP_DCCP) || defined(CONFIG_IP_DCCP_MODULE)
diff --git a/include/linux/random.h b/include/linux/random.h
index 01424a8e621c..5d6456bcdeba 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -53,8 +53,8 @@ void generate_random_uuid(unsigned char uuid_out[16]);
 
 extern __u32 secure_ip_id(__u32 daddr);
 extern u32 secure_ipv4_port_ephemeral(__u32 saddr, __u32 daddr, __u16 dport);
-extern u32 secure_tcpv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, 
-				       __u16 dport);
+extern u32 secure_ipv6_port_ephemeral(const __u32 *saddr, const __u32 *daddr, 
+				      __u16 dport);
 extern __u32 secure_tcp_sequence_number(__u32 saddr, __u32 daddr,
 					__u16 sport, __u16 dport);
 extern __u32 secure_tcpv6_sequence_number(__u32 *saddr, __u32 *daddr,
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 851376108ac2..e3d5d7bc8837 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -527,6 +527,9 @@ extern int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 extern int inet6_ioctl(struct socket *sock, unsigned int cmd, 
 		       unsigned long arg);
 
+extern int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+			      struct sock *sk);
+
 /*
  * reassembly.c
  */
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4d078f5b911b..71bf04eb21e1 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -84,175 +84,6 @@ static __u32 dccp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 						   dh->dccph_sport);
 }
 
-static int __dccp_v6_check_established(struct sock *sk, const __u16 lport,
-				       struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	const unsigned int hash = inet6_ehashfn(daddr, inet->num,
-						saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&dccp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
-		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
-
-		tw = inet_twsk(sk2);
-
-		if(*((__u32 *)&(tw->tw_dport))	== ports	 &&
-		   sk2->sk_family		== PF_INET6	 &&
-		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
-		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if)
-			goto not_unique;
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &dccp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 dccp_v6_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					   np->daddr.s6_addr32,
-					   inet->dport);
-}
-
-static int dccp_v6_hash_connect(struct sock *sk)
-{
-	const unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (snum == 0) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + dccp_v6_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &dccp_hashinfo.bhash[inet_bhashfn(port,
-						    dccp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__dccp_v6_check_established(sk,
-									 port,
-									 &tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep,
-						     head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet6_hash(&dccp_hashinfo, sk);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &dccp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &dccp_hashinfo.bhash[inet_bhashfn(snum,
-						 dccp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet6_hash(&dccp_hashinfo, sk);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __dccp_v6_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
 			   int addr_len)
 {
@@ -403,7 +234,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->dport = usin->sin6_port;
 
 	dccp_set_state(sk, DCCP_REQUESTING);
-	err = dccp_v6_hash_connect(sk);
+	err = inet6_hash_connect(&dccp_death_row, sk);
 	if (err)
 		goto late_failure;
 	/* FIXME */
diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c
index 01d5f46d4e40..4154f3a8b6cf 100644
--- a/net/ipv6/inet6_hashtables.c
+++ b/net/ipv6/inet6_hashtables.c
@@ -5,7 +5,8 @@
  *
  *		Generic INET6 transport hashtables
  *
- * Authors:	Lotsa people, from code originally in tcp
+ * Authors:	Lotsa people, from code originally in tcp, generalised here
+ * 		by Arnaldo Carvalho de Melo <acme@mandriva.com>
  *
  *	This program is free software; you can redistribute it and/or
  *      modify it under the terms of the GNU General Public License
@@ -14,12 +15,13 @@
  */
 
 #include <linux/config.h>
-
 #include <linux/module.h>
+#include <linux/random.h>
 
 #include <net/inet_connection_sock.h>
 #include <net/inet_hashtables.h>
 #include <net/inet6_hashtables.h>
+#include <net/ip.h>
 
 struct sock *inet6_lookup_listener(struct inet_hashinfo *hashinfo,
 				   const struct in6_addr *daddr,
@@ -79,3 +81,180 @@ struct sock *inet6_lookup(struct inet_hashinfo *hashinfo,
 }
 
 EXPORT_SYMBOL_GPL(inet6_lookup);
+
+static int __inet6_check_established(struct inet_timewait_death_row *death_row,
+				     struct sock *sk, const __u16 lport,
+				     struct inet_timewait_sock **twp)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	const struct in6_addr *daddr = &np->rcv_saddr;
+	const struct in6_addr *saddr = &np->daddr;
+	const int dif = sk->sk_bound_dev_if;
+	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+	const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr,
+						inet->dport);
+	struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
+	struct sock *sk2;
+	const struct hlist_node *node;
+	struct inet_timewait_sock *tw;
+
+	prefetch(head->chain.first);
+	write_lock(&head->lock);
+
+	/* Check TIME-WAIT sockets first. */
+	sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) {
+		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
+
+		tw = inet_twsk(sk2);
+
+		if(*((__u32 *)&(tw->tw_dport)) == ports		 &&
+		   sk2->sk_family	       == PF_INET6	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	 &&
+		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) &&
+		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
+			if (twsk_unique(sk, sk2, twp))
+				goto unique;
+			else
+				goto not_unique;
+		}
+	}
+	tw = NULL;
+
+	/* And established part... */
+	sk_for_each(sk2, node, &head->chain) {
+		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
+			goto not_unique;
+	}
+
+unique:
+	BUG_TRAP(sk_unhashed(sk));
+	__sk_add_node(sk, &head->chain);
+	sk->sk_hash = hash;
+	sock_prot_inc_use(sk->sk_prot);
+	write_unlock(&head->lock);
+
+	if (twp != NULL) {
+		*twp = tw;
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+	} else if (tw != NULL) {
+		/* Silly. Should hash-dance instead... */
+		inet_twsk_deschedule(tw, death_row);
+		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+		inet_twsk_put(tw);
+	}
+	return 0;
+
+not_unique:
+	write_unlock(&head->lock);
+	return -EADDRNOTAVAIL;
+}
+
+static inline u32 inet6_sk_port_offset(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const struct ipv6_pinfo *np = inet6_sk(sk);
+	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32,
+					  np->daddr.s6_addr32,
+					  inet->dport);
+}
+
+int inet6_hash_connect(struct inet_timewait_death_row *death_row,
+		       struct sock *sk)
+{
+	struct inet_hashinfo *hinfo = death_row->hashinfo;
+	const unsigned short snum = inet_sk(sk)->num;
+ 	struct inet_bind_hashbucket *head;
+ 	struct inet_bind_bucket *tb;
+	int ret;
+
+ 	if (snum == 0) {
+ 		const int low = sysctl_local_port_range[0];
+ 		const int high = sysctl_local_port_range[1];
+		const int range = high - low;
+ 		int i, port;
+		static u32 hint;
+		const u32 offset = hint + inet6_sk_port_offset(sk);
+		struct hlist_node *node;
+ 		struct inet_timewait_sock *tw = NULL;
+
+ 		local_bh_disable();
+		for (i = 1; i <= range; i++) {
+			port = low + (i + offset) % range;
+ 			head = &hinfo->bhash[inet_bhashfn(port, hinfo->bhash_size)];
+ 			spin_lock(&head->lock);
+
+ 			/* Does not bother with rcv_saddr checks,
+ 			 * because the established check is already
+ 			 * unique enough.
+ 			 */
+			inet_bind_bucket_for_each(tb, node, &head->chain) {
+ 				if (tb->port == port) {
+ 					BUG_TRAP(!hlist_empty(&tb->owners));
+ 					if (tb->fastreuse >= 0)
+ 						goto next_port;
+ 					if (!__inet6_check_established(death_row,
+								       sk, port,
+								       &tw))
+ 						goto ok;
+ 					goto next_port;
+ 				}
+ 			}
+
+ 			tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+						     head, port);
+ 			if (!tb) {
+ 				spin_unlock(&head->lock);
+ 				break;
+ 			}
+ 			tb->fastreuse = -1;
+ 			goto ok;
+
+ 		next_port:
+ 			spin_unlock(&head->lock);
+ 		}
+ 		local_bh_enable();
+
+ 		return -EADDRNOTAVAIL;
+
+ok:
+		hint += i;
+
+ 		/* Head lock still held and bh's disabled */
+ 		inet_bind_hash(sk, tb, port);
+		if (sk_unhashed(sk)) {
+ 			inet_sk(sk)->sport = htons(port);
+ 			__inet6_hash(hinfo, sk);
+ 		}
+ 		spin_unlock(&head->lock);
+
+ 		if (tw) {
+ 			inet_twsk_deschedule(tw, death_row);
+ 			inet_twsk_put(tw);
+ 		}
+
+		ret = 0;
+		goto out;
+ 	}
+
+ 	head = &hinfo->bhash[inet_bhashfn(snum, hinfo->bhash_size)];
+ 	tb   = inet_csk(sk)->icsk_bind_hash;
+	spin_lock_bh(&head->lock);
+
+	if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+		__inet6_hash(hinfo, sk);
+		spin_unlock_bh(&head->lock);
+		return 0;
+	} else {
+		spin_unlock(&head->lock);
+		/* No definite answer... Walk to established hash table */
+		ret = __inet6_check_established(death_row, sk, snum, NULL);
+out:
+		local_bh_enable();
+		return ret;
+	}
+}
+
+EXPORT_SYMBOL_GPL(inet6_hash_connect);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 514b57bb80b7..a682eb9093e1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -119,177 +119,6 @@ static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 	}
 }
 
-static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
-				      struct inet_timewait_sock **twp)
-{
-	struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-	const struct in6_addr *daddr = &np->rcv_saddr;
-	const struct in6_addr *saddr = &np->daddr;
-	const int dif = sk->sk_bound_dev_if;
-	const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
-	unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
-	struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
-	struct sock *sk2;
-	const struct hlist_node *node;
-	struct inet_timewait_sock *tw;
-
-	prefetch(head->chain.first);
-	write_lock(&head->lock);
-
-	/* Check TIME-WAIT sockets first. */
-	sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
-		const struct inet6_timewait_sock *tw6 = inet6_twsk(sk2);
-
-		tw = inet_twsk(sk2);
-
-		if(*((__u32 *)&(tw->tw_dport))	== ports	&&
-		   sk2->sk_family		== PF_INET6	&&
-		   ipv6_addr_equal(&tw6->tw_v6_daddr, saddr)	&&
-		   ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr)	&&
-		   sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
-			if (twsk_unique(sk, sk2, twp))
-				goto unique;
-			else
-				goto not_unique;
-		}
-	}
-	tw = NULL;
-
-	/* And established part... */
-	sk_for_each(sk2, node, &head->chain) {
-		if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
-			goto not_unique;
-	}
-
-unique:
-	BUG_TRAP(sk_unhashed(sk));
-	__sk_add_node(sk, &head->chain);
-	sk->sk_hash = hash;
-	sock_prot_inc_use(sk->sk_prot);
-	write_unlock(&head->lock);
-
-	if (twp) {
-		*twp = tw;
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-	} else if (tw) {
-		/* Silly. Should hash-dance instead... */
-		inet_twsk_deschedule(tw, &tcp_death_row);
-		NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
-
-		inet_twsk_put(tw);
-	}
-	return 0;
-
-not_unique:
-	write_unlock(&head->lock);
-	return -EADDRNOTAVAIL;
-}
-
-static inline u32 tcpv6_port_offset(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const struct ipv6_pinfo *np = inet6_sk(sk);
-
-	return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
-					   np->daddr.s6_addr32,
-					   inet->dport);
-}
-
-static int tcp_v6_hash_connect(struct sock *sk)
-{
-	unsigned short snum = inet_sk(sk)->num;
- 	struct inet_bind_hashbucket *head;
- 	struct inet_bind_bucket *tb;
-	int ret;
-
- 	if (!snum) {
- 		int low = sysctl_local_port_range[0];
- 		int high = sysctl_local_port_range[1];
-		int range = high - low;
- 		int i;
-		int port;
-		static u32 hint;
-		u32 offset = hint + tcpv6_port_offset(sk);
-		struct hlist_node *node;
- 		struct inet_timewait_sock *tw = NULL;
-
- 		local_bh_disable();
-		for (i = 1; i <= range; i++) {
-			port = low + (i + offset) % range;
- 			head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
- 			spin_lock(&head->lock);
-
- 			/* Does not bother with rcv_saddr checks,
- 			 * because the established check is already
- 			 * unique enough.
- 			 */
-			inet_bind_bucket_for_each(tb, node, &head->chain) {
- 				if (tb->port == port) {
- 					BUG_TRAP(!hlist_empty(&tb->owners));
- 					if (tb->fastreuse >= 0)
- 						goto next_port;
- 					if (!__tcp_v6_check_established(sk,
-									port,
-									&tw))
- 						goto ok;
- 					goto next_port;
- 				}
- 			}
-
- 			tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
- 			if (!tb) {
- 				spin_unlock(&head->lock);
- 				break;
- 			}
- 			tb->fastreuse = -1;
- 			goto ok;
-
- 		next_port:
- 			spin_unlock(&head->lock);
- 		}
- 		local_bh_enable();
-
- 		return -EADDRNOTAVAIL;
-
-ok:
-		hint += i;
-
- 		/* Head lock still held and bh's disabled */
- 		inet_bind_hash(sk, tb, port);
-		if (sk_unhashed(sk)) {
- 			inet_sk(sk)->sport = htons(port);
- 			__inet6_hash(&tcp_hashinfo, sk);
- 		}
- 		spin_unlock(&head->lock);
-
- 		if (tw) {
- 			inet_twsk_deschedule(tw, &tcp_death_row);
- 			inet_twsk_put(tw);
- 		}
-
-		ret = 0;
-		goto out;
- 	}
-
- 	head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
- 	tb   = inet_csk(sk)->icsk_bind_hash;
-	spin_lock_bh(&head->lock);
-
-	if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
-		__inet6_hash(&tcp_hashinfo, sk);
-		spin_unlock_bh(&head->lock);
-		return 0;
-	} else {
-		spin_unlock(&head->lock);
-		/* No definite answer... Walk to established hash table */
-		ret = __tcp_v6_check_established(sk, snum, NULL);
-out:
-		local_bh_enable();
-		return ret;
-	}
-}
-
 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, 
 			  int addr_len)
 {
@@ -450,7 +279,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	inet->dport = usin->sin6_port;
 
 	tcp_set_state(sk, TCP_SYN_SENT);
-	err = tcp_v6_hash_connect(sk);
+	err = inet6_hash_connect(&tcp_death_row, sk);
 	if (err)
 		goto late_failure;
 
-- 
cgit v1.2.3


From 22712813620fa8e682dbfb253a60ca0131da1e07 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:25:56 -0800
Subject: [TCP]: Move the TCPF_ enum to tcp_states.h

Upcoming patches will make, for instance, ip_sockglue.c need just this enum
and not all of tcp.h.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 16 ----------------
 include/net/tcp_states.h | 16 ++++++++++++++++
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4e1434007f44..da38eea1994b 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -55,22 +55,6 @@ struct tcphdr {
 	__u16	urg_ptr;
 };
 
-#define TCP_ACTION_FIN	(1 << 7)
-
-enum {
-  TCPF_ESTABLISHED = (1 << 1),
-  TCPF_SYN_SENT  = (1 << 2),
-  TCPF_SYN_RECV  = (1 << 3),
-  TCPF_FIN_WAIT1 = (1 << 4),
-  TCPF_FIN_WAIT2 = (1 << 5),
-  TCPF_TIME_WAIT = (1 << 6),
-  TCPF_CLOSE     = (1 << 7),
-  TCPF_CLOSE_WAIT = (1 << 8),
-  TCPF_LAST_ACK  = (1 << 9),
-  TCPF_LISTEN    = (1 << 10),
-  TCPF_CLOSING   = (1 << 11) 
-};
-
 /*
  *	The union cast uses a gcc extension to avoid aliasing problems
  *  (union is compatible to any of its members)
diff --git a/include/net/tcp_states.h b/include/net/tcp_states.h
index b9d4176b2d15..b0b645988bd8 100644
--- a/include/net/tcp_states.h
+++ b/include/net/tcp_states.h
@@ -31,4 +31,20 @@ enum {
 
 #define TCP_STATE_MASK	0xF
 
+#define TCP_ACTION_FIN	(1 << 7)
+
+enum {
+	TCPF_ESTABLISHED = (1 << 1),
+	TCPF_SYN_SENT	 = (1 << 2),
+	TCPF_SYN_RECV	 = (1 << 3),
+	TCPF_FIN_WAIT1	 = (1 << 4),
+	TCPF_FIN_WAIT2	 = (1 << 5),
+	TCPF_TIME_WAIT	 = (1 << 6),
+	TCPF_CLOSE	 = (1 << 7),
+	TCPF_CLOSE_WAIT	 = (1 << 8),
+	TCPF_LAST_ACK	 = (1 << 9),
+	TCPF_LISTEN	 = (1 << 10),
+	TCPF_CLOSING	 = (1 << 11) 
+};
+
 #endif	/* _LINUX_TCP_STATES_H */
-- 
cgit v1.2.3


From d83d8461f902c672bc1bd8fbc6a94e19f092da97 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 13 Dec 2005 23:26:10 -0800
Subject: [IP_SOCKGLUE]: Remove most of the tcp specific calls

As DCCP needs to be called in the same spots.

Now we have a member in inet_sock (is_icsk), set at sock creation time from
struct inet_protosw->flags (if INET_PROTOSW_ICSK is set, like for TCP and
DCCP) to see if a struct sock instance is a inet_connection_sock for places
like the ones in ip_sockglue.c (v4 and v6) where we previously were looking if
sk_type was SOCK_STREAM, that is insufficient because we now use the same code
for DCCP, that has sk_type SOCK_DCCP.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dccp.h               |  4 ----
 include/linux/ip.h                 |  1 +
 include/linux/tcp.h                |  3 +--
 include/net/inet_connection_sock.h |  6 +++++-
 include/net/protocol.h             |  1 +
 net/dccp/diag.c                    |  2 +-
 net/dccp/input.c                   |  2 +-
 net/dccp/ipv4.c                    | 12 +++++++-----
 net/dccp/ipv6.c                    | 26 ++++++++++++++------------
 net/dccp/output.c                  |  7 ++++---
 net/dccp/proto.c                   |  2 +-
 net/ipv4/af_inet.c                 |  4 +++-
 net/ipv4/ip_sockglue.c             | 13 ++++++-------
 net/ipv4/tcp.c                     |  2 +-
 net/ipv4/tcp_input.c               | 10 ++++------
 net/ipv4/tcp_ipv4.c                | 12 ++++++------
 net/ipv4/tcp_output.c              | 18 ++++++++++--------
 net/ipv6/af_inet6.c                |  1 +
 net/ipv6/ipv6_sockglue.c           | 24 +++++++++++++-----------
 net/ipv6/tcp_ipv6.c                | 30 +++++++++++++++++-------------
 20 files changed, 97 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index 71fab4311e92..d0bdb499cf8d 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -408,8 +408,6 @@ struct dccp_ackvec;
  * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
  * @dccps_timestamp_time - time of latest TIMESTAMP option
  * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
- * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
- * @dccps_pmtu_cookie - Last pmtu seen by socket
  * @dccps_packet_size - Set thru setsockopt
  * @dccps_role - Role of this sock, one of %dccp_role
  * @dccps_ndp_count - number of Non Data Packets since last data packet
@@ -434,8 +432,6 @@ struct dccp_sock {
 	__u32				dccps_timestamp_echo;
 	__u32				dccps_packet_size;
 	unsigned long			dccps_ndp_count;
-	__u16				dccps_ext_header_len;
-	__u32				dccps_pmtu_cookie;
 	__u32				dccps_mss_cache;
 	struct dccp_options		dccps_options;
 	struct dccp_ackvec		*dccps_hc_rx_ackvec;
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 5a560daeade5..6ccc596c19c8 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -155,6 +155,7 @@ struct inet_sock {
 	__u8			mc_ttl;		/* Multicasting TTL */
 	__u8			pmtudisc;
 	unsigned		recverr : 1,
+				is_icsk : 1,	/* inet_connection_sock? */
 				freebind : 1,
 				hdrincl : 1,
 				mc_loop : 1;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index da38eea1994b..f2bb2396853f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -238,10 +238,9 @@ struct tcp_sock {
 	__u32	snd_wl1;	/* Sequence for window update		*/
 	__u32	snd_wnd;	/* The window we expect to receive	*/
 	__u32	max_window;	/* Maximal window ever seen from peer	*/
-	__u32	pmtu_cookie;	/* Last pmtu seen by socket		*/
 	__u32	mss_cache;	/* Cached effective mss, not including SACKS */
 	__u16	xmit_size_goal;	/* Goal for segmenting output packets	*/
-	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
+	/* XXX Two bytes hole, try to pack */
 
 	__u32	window_clamp;	/* Maximal window to advertise		*/
 	__u32	rcv_ssthresh;	/* Current window clamp			*/
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index e50e2b890c6d..91888967d3e3 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -60,6 +60,7 @@ struct inet_connection_sock_af_ops {
  * @icsk_timeout:	   Timeout
  * @icsk_retransmit_timer: Resend (no ack)
  * @icsk_rto:		   Retransmit timeout
+ * @icsk_pmtu_cookie	   Last pmtu seen by socket
  * @icsk_ca_ops		   Pluggable congestion control hook
  * @icsk_af_ops		   Operations which are AF_INET{4,6} specific
  * @icsk_ca_state:	   Congestion control state
@@ -68,6 +69,7 @@ struct inet_connection_sock_af_ops {
  * @icsk_backoff:	   Backoff
  * @icsk_syn_retries:      Number of allowed SYN (or equivalent) retries
  * @icsk_probes_out:	   unanswered 0 window probes
+ * @icsk_ext_hdr_len:	   Network protocol overhead (IP/IPv6 options)
  * @icsk_ack:		   Delayed ACK control data
  */
 struct inet_connection_sock {
@@ -79,15 +81,17 @@ struct inet_connection_sock {
  	struct timer_list	  icsk_retransmit_timer;
  	struct timer_list	  icsk_delack_timer;
 	__u32			  icsk_rto;
+	__u32			  icsk_pmtu_cookie;
 	struct tcp_congestion_ops *icsk_ca_ops;
 	struct inet_connection_sock_af_ops *icsk_af_ops;
+	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
 	__u8			  icsk_ca_state;
 	__u8			  icsk_retransmits;
 	__u8			  icsk_pending;
 	__u8			  icsk_backoff;
 	__u8			  icsk_syn_retries;
 	__u8			  icsk_probes_out;
-	/* 2 BYTES HOLE, TRY TO PACK! */
+	__u16			  icsk_ext_hdr_len;
 	struct {
 		__u8		  pending;	 /* ACK is pending			   */
 		__u8		  quick;	 /* Scheduled number of quick acks	   */
diff --git a/include/net/protocol.h b/include/net/protocol.h
index 357691f6a45f..a29cb29647d0 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -76,6 +76,7 @@ struct inet_protosw {
 };
 #define INET_PROTOSW_REUSE 0x01	     /* Are ports automatically reusable? */
 #define INET_PROTOSW_PERMANENT 0x02  /* Permanent protocols are unremovable. */
+#define INET_PROTOSW_ICSK      0x04  /* Is this an inet_connection_sock? */
 
 extern struct net_protocol *inet_protocol_base;
 extern struct net_protocol *inet_protos[MAX_INET_PROTOS];
diff --git a/net/dccp/diag.c b/net/dccp/diag.c
index f675d8e642d3..3f78c00e3822 100644
--- a/net/dccp/diag.c
+++ b/net/dccp/diag.c
@@ -28,7 +28,7 @@ static void dccp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_retransmits	= icsk->icsk_retransmits;
 	info->tcpi_probes	= icsk->icsk_probes_out;
 	info->tcpi_backoff	= icsk->icsk_backoff;
-	info->tcpi_pmtu		= dp->dccps_pmtu_cookie;
+	info->tcpi_pmtu		= icsk->icsk_pmtu_cookie;
 
 	if (dp->dccps_options.dccpo_send_ack_vector)
 		info->tcpi_options |= TCPI_OPT_SACK;
diff --git a/net/dccp/input.c b/net/dccp/input.c
index 9a724ff2a622..55e921bdd131 100644
--- a/net/dccp/input.c
+++ b/net/dccp/input.c
@@ -311,7 +311,7 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
 			goto out_invalid_packet;
 		}
 
-		dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+		dccp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 
 		/*
 		 *    Step 10: Process REQUEST state (second part)
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 671fbf3b2379..c363051a7f16 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -104,9 +104,9 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	dp->dccps_ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt != NULL)
-		dp->dccps_ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 	/*
 	 * Socket identity is still unknown (sport may be zero).
 	 * However we set state to DCCP_REQUESTING and not releasing socket
@@ -191,7 +191,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    dp->dccps_pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		dccp_sync_mss(sk, mtu);
 
 		/*
@@ -1051,6 +1051,7 @@ struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
 int dccp_v4_init_sock(struct sock *sk)
 {
 	struct dccp_sock *dp = dccp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	static int dccp_ctl_socket_init = 1;
 
 	dccp_options_init(&dp->dccps_options);
@@ -1090,10 +1091,11 @@ int dccp_v4_init_sock(struct sock *sk)
 		dccp_ctl_socket_init = 0;
 
 	dccp_init_xmit_timers(sk);
-	inet_csk(sk)->icsk_rto = DCCP_TIMEOUT_INIT;
+	icsk->icsk_rto = DCCP_TIMEOUT_INIT;
 	sk->sk_state = DCCP_CLOSED;
 	sk->sk_write_space = dccp_write_space;
-	inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;
+	icsk->icsk_af_ops = &dccp_ipv4_af_ops;
+	icsk->icsk_sync_mss = dccp_sync_mss;
 	dp->dccps_mss_cache = 536;
 	dp->dccps_role = DCCP_ROLE_UNDEFINED;
 	dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 71bf04eb21e1..599b0be21515 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -88,6 +88,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			   int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
@@ -158,7 +159,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	 */
 
 	if (addr_type == IPV6_ADDR_MAPPED) {
-		u32 exthdrlen = dp->dccps_ext_header_len;
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -170,14 +171,14 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		inet_csk(sk)->icsk_af_ops = &dccp_ipv6_mapped;
+		icsk->icsk_af_ops = &dccp_ipv6_mapped;
 		sk->sk_backlog_rcv = dccp_v4_do_rcv;
 
 		err = dccp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
-			dp->dccps_ext_header_len = exthdrlen;
-			inet_csk(sk)->icsk_af_ops = &dccp_ipv6_af_ops;
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &dccp_ipv6_af_ops;
 			sk->sk_backlog_rcv = dccp_v6_do_rcv;
 			goto failure;
 		} else {
@@ -227,9 +228,10 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 
 	ip6_dst_store(sk, dst, NULL);
 
-	dp->dccps_ext_header_len = 0;
+	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
-		dp->dccps_ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
 
 	inet->dport = usin->sin6_port;
 
@@ -292,7 +294,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	np = inet6_sk(sk);
 
 	if (type == ICMPV6_PKT_TOOBIG) {
-		struct dccp_sock *dp = dccp_sk(sk);
 		struct dst_entry *dst = NULL;
 
 		if (sock_owned_by_user(sk))
@@ -332,7 +333,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
-		if (dp->dccps_pmtu_cookie > dst_mtu(dst)) {
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			dccp_sync_mss(sk, dst_mtu(dst));
 		} /* else let the usual retransmit timer handle it */
 		dst_release(dst);
@@ -808,7 +809,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
-		dccp_sync_mss(newsk, newdp->dccps_pmtu_cookie);
+		dccp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
 
 		return newsk;
 	}
@@ -916,10 +917,10 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk,
 			sock_kfree_s(sk, opt, opt->tot_len);
 	}
 
-	newdp->dccps_ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newnp->opt)
-		newdp->dccps_ext_header_len = newnp->opt->opt_nflen +
-					      newnp->opt->opt_flen;
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
 
 	dccp_sync_mss(newsk, dst_mtu(dst));
 
@@ -1230,6 +1231,7 @@ static struct inet_protosw dccp_v6_protosw = {
 	.prot		= &dccp_v6_prot,
 	.ops		= &inet6_dccp_ops,
 	.capability	= -1,
+	.flags		= INET_PROTOSW_ICSK,
 };
 
 static int __init dccp_v6_init(void)
diff --git a/net/dccp/output.c b/net/dccp/output.c
index c40f7f8a328b..95a3c2c6a3ce 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -134,12 +134,13 @@ static int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
 
 unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 {
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct dccp_sock *dp = dccp_sk(sk);
-	int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len -
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
 		       sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext));
 
 	/* Now subtract optional transport overhead */
-	mss_now -= dp->dccps_ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/*
 	 * FIXME: this should come from the CCID infrastructure, where, say,
@@ -152,7 +153,7 @@ unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
 	mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
 
 	/* And store cached results */
-	dp->dccps_pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	dp->dccps_mss_cache = mss_now;
 
 	return mss_now;
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 51dfacd22a6e..40a4c6899051 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -712,7 +712,7 @@ static struct inet_protosw dccp_v4_protosw = {
 	.ops		= &inet_dccp_ops,
 	.capability	= -1,
 	.no_check	= 0,
-	.flags		= 0,
+	.flags		= INET_PROTOSW_ICSK,
 };
 
 /*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index d368cf249000..617e858beff1 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -302,6 +302,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
@@ -869,7 +870,8 @@ static struct inet_protosw inetsw_array[] =
                 .ops =        &inet_stream_ops,
                 .capability = -1,
                 .no_check =   0,
-                .flags =      INET_PROTOSW_PERMANENT,
+                .flags =      INET_PROTOSW_PERMANENT |
+			      INET_PROTOSW_ICSK,
         },
 
         {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 4f2d87257309..add019c746f8 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -29,8 +29,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
-#include <net/tcp.h>
-#include <linux/tcp.h>
+#include <net/tcp_states.h>
 #include <linux/udp.h>
 #include <linux/igmp.h>
 #include <linux/netfilter.h>
@@ -427,8 +426,8 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 			err = ip_options_get_from_user(&opt, optval, optlen);
 			if (err)
 				break;
-			if (sk->sk_type == SOCK_STREAM) {
-				struct tcp_sock *tp = tcp_sk(sk);
+			if (inet->is_icsk) {
+				struct inet_connection_sock *icsk = inet_csk(sk);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				if (sk->sk_family == PF_INET ||
 				    (!((1 << sk->sk_state) &
@@ -436,10 +435,10 @@ int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
 				     inet->daddr != LOOPBACK4_IPV6)) {
 #endif
 					if (inet->opt)
-						tp->ext_header_len -= inet->opt->optlen;
+						icsk->icsk_ext_hdr_len -= inet->opt->optlen;
 					if (opt)
-						tp->ext_header_len += opt->optlen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+						icsk->icsk_ext_hdr_len += opt->optlen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
 				}
 #endif
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index eacfe6a3442c..00aa80e93243 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1914,7 +1914,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info)
 	info->tcpi_last_data_recv = jiffies_to_msecs(now - icsk->icsk_ack.lrcvtime);
 	info->tcpi_last_ack_recv = jiffies_to_msecs(now - tp->rcv_tstamp);
 
-	info->tcpi_pmtu = tp->pmtu_cookie;
+	info->tcpi_pmtu = icsk->icsk_pmtu_cookie;
 	info->tcpi_rcv_ssthresh = tp->rcv_ssthresh;
 	info->tcpi_rtt = jiffies_to_usecs(tp->srtt)>>3;
 	info->tcpi_rttvar = jiffies_to_usecs(tp->mdev)>>2;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7de6184d4bd8..981d1203b152 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2342,7 +2342,7 @@ static int tcp_ack_update_window(struct sock *sk, struct tcp_sock *tp,
 
 			if (nwin > tp->max_window) {
 				tp->max_window = nwin;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, inet_csk(sk)->icsk_pmtu_cookie);
 			}
 		}
 	}
@@ -3967,12 +3967,12 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 					 struct tcphdr *th, unsigned len)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	int saved_clamp = tp->rx_opt.mss_clamp;
 
 	tcp_parse_options(skb, &tp->rx_opt, 0);
 
 	if (th->ack) {
-		struct inet_connection_sock *icsk;
 		/* rfc793:
 		 * "If the state is SYN-SENT then
 		 *    first check the ACK bit
@@ -4061,7 +4061,7 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		if (tp->rx_opt.sack_ok && sysctl_tcp_fack)
 			tp->rx_opt.sack_ok |= 2;
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 		/* Remember, tcp_poll() does not lock socket!
@@ -4071,8 +4071,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb,
 		mb();
 		tcp_set_state(sk, TCP_ESTABLISHED);
 
-		icsk = inet_csk(sk);
-
 		/* Make sure socket is routed, for correct metrics.  */
 		icsk->icsk_af_ops->rebuild_header(sk);
 
@@ -4173,7 +4171,7 @@ discard:
 		if (tp->ecn_flags&TCP_ECN_OK)
 			sock_set_flag(sk, SOCK_NO_LARGESEND);
 
-		tcp_sync_mss(sk, tp->pmtu_cookie);
+		tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		tcp_initialize_rcv_mss(sk);
 
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index c2fe61becd61..9b62d80bb20f 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -220,9 +220,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
 	inet->dport = usin->sin_port;
 	inet->daddr = daddr;
 
-	tp->ext_header_len = 0;
+	inet_csk(sk)->icsk_ext_hdr_len = 0;
 	if (inet->opt)
-		tp->ext_header_len = inet->opt->optlen;
+		inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen;
 
 	tp->rx_opt.mss_clamp = 536;
 
@@ -275,7 +275,6 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
 {
 	struct dst_entry *dst;
 	struct inet_sock *inet = inet_sk(sk);
-	struct tcp_sock *tp = tcp_sk(sk);
 
 	/* We are not interested in TCP_LISTEN and open_requests (SYN-ACKs
 	 * send out by Linux are always <576bytes so they should go through
@@ -304,7 +303,7 @@ static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *iph,
 	mtu = dst_mtu(dst);
 
 	if (inet->pmtudisc != IP_PMTUDISC_DONT &&
-	    tp->pmtu_cookie > mtu) {
+	    inet_csk(sk)->icsk_pmtu_cookie > mtu) {
 		tcp_sync_mss(sk, mtu);
 
 		/* Resend the TCP packet because it's
@@ -895,9 +894,9 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	ireq->opt	      = NULL;
 	newinet->mc_index     = inet_iif(skb);
 	newinet->mc_ttl	      = skb->nh.iph->ttl;
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newinet->opt)
-		newtp->ext_header_len = newinet->opt->optlen;
+		inet_csk(newsk)->icsk_ext_hdr_len = newinet->opt->optlen;
 	newinet->id = newtp->write_seq ^ jiffies;
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1266,6 +1265,7 @@ static int tcp_v4_init_sock(struct sock *sk)
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
 	icsk->icsk_af_ops = &ipv4_specific;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 
 	sk->sk_sndbuf = sysctl_tcp_wmem[1];
 	sk->sk_rcvbuf = sysctl_tcp_rmem[1];
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index af1946c52c37..3a0a914de917 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -621,7 +621,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    It is minimum of user_mss and mss received with SYN.
    It also does not include TCP options.
 
-   tp->pmtu_cookie is last pmtu, seen by this function.
+   inet_csk(sk)->icsk_pmtu_cookie is last pmtu, seen by this function.
 
    tp->mss_cache is current effective sending mss, including
    all tcp options except for SACKs. It is evaluated,
@@ -631,17 +631,18 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len)
    NOTE1. rfc1122 clearly states that advertised MSS
    DOES NOT include either tcp or ip options.
 
-   NOTE2. tp->pmtu_cookie and tp->mss_cache are READ ONLY outside
-   this function.			--ANK (980731)
+   NOTE2. inet_csk(sk)->icsk_pmtu_cookie and tp->mss_cache
+   are READ ONLY outside this function.		--ANK (980731)
  */
 
 unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	/* Calculate base mss without TCP options:
 	   It is MMS_S - sizeof(tcphdr) of rfc1122
 	 */
-	int mss_now = (pmtu - inet_csk(sk)->icsk_af_ops->net_header_len -
+	int mss_now = (pmtu - icsk->icsk_af_ops->net_header_len -
 		       sizeof(struct tcphdr));
 
 	/* Clamp it (mss_clamp does not include tcp options) */
@@ -649,7 +650,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		mss_now = tp->rx_opt.mss_clamp;
 
 	/* Now subtract optional transport overhead */
-	mss_now -= tp->ext_header_len;
+	mss_now -= icsk->icsk_ext_hdr_len;
 
 	/* Then reserve room for full set of TCP options and 8 bytes of data */
 	if (mss_now < 48)
@@ -663,7 +664,7 @@ unsigned int tcp_sync_mss(struct sock *sk, u32 pmtu)
 		mss_now = max((tp->max_window>>1), 68U - tp->tcp_header_len);
 
 	/* And store cached results */
-	tp->pmtu_cookie = pmtu;
+	icsk->icsk_pmtu_cookie = pmtu;
 	tp->mss_cache = mss_now;
 
 	return mss_now;
@@ -693,7 +694,7 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 
 	if (dst) {
 		u32 mtu = dst_mtu(dst);
-		if (mtu != tp->pmtu_cookie)
+		if (mtu != inet_csk(sk)->icsk_pmtu_cookie)
 			mss_now = tcp_sync_mss(sk, mtu);
 	}
 
@@ -706,7 +707,8 @@ unsigned int tcp_current_mss(struct sock *sk, int large_allowed)
 	if (doing_tso) {
 		xmit_size_goal = (65535 -
 				  inet_csk(sk)->icsk_af_ops->net_header_len -
-				  tp->ext_header_len - tp->tcp_header_len);
+				  inet_csk(sk)->icsk_ext_hdr_len -
+				  tp->tcp_header_len);
 
 		if (tp->max_window &&
 		    (xmit_size_goal > (tp->max_window >> 1)))
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index bf17aab9b776..70a510ff31ee 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -167,6 +167,7 @@ lookup_protocol:
 		sk->sk_reuse = 1;
 
 	inet = inet_sk(sk);
+	inet->is_icsk = INET_PROTOSW_ICSK & answer_flags;
 
 	if (SOCK_RAW == sock->type) {
 		inet->num = protocol;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index b6b63fa8454c..c63868dd2ca2 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -163,17 +163,17 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 			sk_refcnt_debug_dec(sk);
 
 			if (sk->sk_protocol == IPPROTO_TCP) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
 				sock_prot_inc_use(&tcp_prot);
 				local_bh_enable();
 				sk->sk_prot = &tcp_prot;
-				inet_csk(sk)->icsk_af_ops = &ipv4_specific;
+				icsk->icsk_af_ops = &ipv4_specific;
 				sk->sk_socket->ops = &inet_stream_ops;
 				sk->sk_family = PF_INET;
-				tcp_sync_mss(sk, tp->pmtu_cookie);
+				tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 			} else {
 				local_bh_disable();
 				sock_prot_dec_use(sk->sk_prot);
@@ -317,14 +317,15 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
 		}
 
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
@@ -380,14 +381,15 @@ sticky_done:
 			goto done;
 update:
 		retv = 0;
-		if (sk->sk_type == SOCK_STREAM) {
+		if (inet_sk(sk)->is_icsk) {
 			if (opt) {
-				struct tcp_sock *tp = tcp_sk(sk);
+				struct inet_connection_sock *icsk = inet_csk(sk);
 				if (!((1 << sk->sk_state) &
 				      (TCPF_LISTEN | TCPF_CLOSE))
 				    && inet_sk(sk)->daddr != LOOPBACK4_IPV6) {
-					tp->ext_header_len = opt->opt_flen + opt->opt_nflen;
-					tcp_sync_mss(sk, tp->pmtu_cookie);
+					icsk->icsk_ext_hdr_len =
+						opt->opt_flen + opt->opt_nflen;
+					icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
 				}
 			}
 			opt = xchg(&np->opt, opt);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index a682eb9093e1..2947bc56d8a0 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -123,7 +123,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 			  int addr_len)
 {
 	struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
-	struct inet_sock *inet = inet_sk(sk);
+ 	struct inet_sock *inet = inet_sk(sk);
+	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct in6_addr *saddr = NULL, *final_p = NULL, final;
@@ -198,7 +199,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	 */
 
 	if (addr_type == IPV6_ADDR_MAPPED) {
-		u32 exthdrlen = tp->ext_header_len;
+		u32 exthdrlen = icsk->icsk_ext_hdr_len;
 		struct sockaddr_in sin;
 
 		SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
@@ -210,14 +211,14 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 		sin.sin_port = usin->sin6_port;
 		sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 
-		inet_csk(sk)->icsk_af_ops = &ipv6_mapped;
+		icsk->icsk_af_ops = &ipv6_mapped;
 		sk->sk_backlog_rcv = tcp_v4_do_rcv;
 
 		err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 
 		if (err) {
-			tp->ext_header_len = exthdrlen;
-			inet_csk(sk)->icsk_af_ops = &ipv6_specific;
+			icsk->icsk_ext_hdr_len = exthdrlen;
+			icsk->icsk_af_ops = &ipv6_specific;
 			sk->sk_backlog_rcv = tcp_v6_do_rcv;
 			goto failure;
 		} else {
@@ -270,9 +271,10 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 	sk->sk_route_caps = dst->dev->features &
 		~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 
-	tp->ext_header_len = 0;
+	icsk->icsk_ext_hdr_len = 0;
 	if (np->opt)
-		tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
+		icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
+					  np->opt->opt_nflen);
 
 	tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 
@@ -385,7 +387,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			dst_hold(dst);
 
-		if (tp->pmtu_cookie > dst_mtu(dst)) {
+		if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
 			tcp_sync_mss(sk, dst_mtu(dst));
 			tcp_simple_retransmit(sk);
 		} /* else let the usual retransmit timer handle it */
@@ -869,7 +871,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		   worked with IPv6 icsk.icsk_af_ops.
 		   Sync it now.
 		 */
-		tcp_sync_mss(newsk, newtp->pmtu_cookie);
+		tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
 
 		return newsk;
 	}
@@ -976,10 +978,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 			sock_kfree_s(sk, opt, opt->tot_len);
 	}
 
-	newtp->ext_header_len = 0;
+	inet_csk(newsk)->icsk_ext_hdr_len = 0;
 	if (newnp->opt)
-		newtp->ext_header_len = newnp->opt->opt_nflen +
-					newnp->opt->opt_flen;
+		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
+						     newnp->opt->opt_flen);
 
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
@@ -1361,6 +1363,7 @@ static int tcp_v6_init_sock(struct sock *sk)
 
 	icsk->icsk_af_ops = &ipv6_specific;
 	icsk->icsk_ca_ops = &tcp_init_congestion_ops;
+	icsk->icsk_sync_mss = tcp_sync_mss;
 	sk->sk_write_space = sk_stream_write_space;
 	sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
 
@@ -1591,7 +1594,8 @@ static struct inet_protosw tcpv6_protosw = {
 	.ops		=	&inet6_stream_ops,
 	.capability	=	-1,
 	.no_check	=	0,
-	.flags		=	INET_PROTOSW_PERMANENT,
+	.flags		=	INET_PROTOSW_PERMANENT |
+				INET_PROTOSW_ICSK,
 };
 
 void __init tcpv6_init(void)
-- 
cgit v1.2.3


From c865e5d99e25a171e8262fc0f7ba608568633c64 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 21 Dec 2005 19:03:44 -0800
Subject: [PKT_SCHED] netem: packet corruption option

Here is a new feature for netem in 2.6.16. It adds the ability to
randomly corrupt packets with netem. A version was done by
Hagen Paul Pfeifer, but I redid it to handle the cases of backwards
compatibility with netlink interface and presence of hardware checksum
offload. It is useful for testing hardware offload in devices.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/pkt_sched.h |  7 +++++++
 net/sched/sch_netem.c     | 49 ++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pkt_sched.h b/include/linux/pkt_sched.h
index e87b233615b3..d10f35338507 100644
--- a/include/linux/pkt_sched.h
+++ b/include/linux/pkt_sched.h
@@ -429,6 +429,7 @@ enum
 	TCA_NETEM_CORR,
 	TCA_NETEM_DELAY_DIST,
 	TCA_NETEM_REORDER,
+	TCA_NETEM_CORRUPT,
 	__TCA_NETEM_MAX,
 };
 
@@ -457,6 +458,12 @@ struct tc_netem_reorder
 	__u32	correlation;
 };
 
+struct tc_netem_corrupt
+{
+	__u32	probability;
+	__u32	correlation;
+};
+
 #define NETEM_DIST_SCALE	8192
 
 #endif
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 82fb07aa06a5..ba5283204837 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -25,7 +25,7 @@
 
 #include <net/pkt_sched.h>
 
-#define VERSION "1.1"
+#define VERSION "1.2"
 
 /*	Network Emulation Queuing algorithm.
 	====================================
@@ -65,11 +65,12 @@ struct netem_sched_data {
 	u32 jitter;
 	u32 duplicate;
 	u32 reorder;
+	u32 corrupt;
 
 	struct crndstate {
 		unsigned long last;
 		unsigned long rho;
-	} delay_cor, loss_cor, dup_cor, reorder_cor;
+	} delay_cor, loss_cor, dup_cor, reorder_cor, corrupt_cor;
 
 	struct disttable {
 		u32  size;
@@ -183,6 +184,23 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 		q->duplicate = dupsave;
 	}
 
+	/*
+	 * Randomized packet corruption.
+	 * Make copy if needed since we are modifying
+	 * If packet is going to be hardware checksummed, then
+	 * do it now in software before we mangle it.
+	 */
+	if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) {
+		if (!(skb = skb_unshare(skb, GFP_ATOMIC))
+		    || (skb->ip_summed == CHECKSUM_HW
+			&& skb_checksum_help(skb, 0))) {
+			sch->qstats.drops++;
+			return NET_XMIT_DROP;
+		}
+
+		skb->data[net_random() % skb_headlen(skb)] ^= 1<<(net_random() % 8);
+	}
+
 	if (q->gap == 0 		/* not doing reordering */
 	    || q->counter < q->gap 	/* inside last reordering gap */
 	    || q->reorder < get_crandom(&q->reorder_cor)) {
@@ -382,6 +400,20 @@ static int get_reorder(struct Qdisc *sch, const struct rtattr *attr)
 	return 0;
 }
 
+static int get_corrupt(struct Qdisc *sch, const struct rtattr *attr)
+{
+	struct netem_sched_data *q = qdisc_priv(sch);
+	const struct tc_netem_corrupt *r = RTA_DATA(attr);
+
+	if (RTA_PAYLOAD(attr) != sizeof(*r))
+		return -EINVAL;
+
+	q->corrupt = r->probability;
+	init_crandom(&q->corrupt_cor, r->correlation);
+	return 0;
+}
+
+/* Parse netlink message to set options */
 static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 {
 	struct netem_sched_data *q = qdisc_priv(sch);
@@ -432,13 +464,19 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt)
 			if (ret)
 				return ret;
 		}
+
 		if (tb[TCA_NETEM_REORDER-1]) {
 			ret = get_reorder(sch, tb[TCA_NETEM_REORDER-1]);
 			if (ret)
 				return ret;
 		}
-	}
 
+		if (tb[TCA_NETEM_CORRUPT-1]) {
+			ret = get_corrupt(sch, tb[TCA_NETEM_CORRUPT-1]);
+			if (ret)
+				return ret;
+		}
+	}
 
 	return 0;
 }
@@ -564,6 +602,7 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	struct tc_netem_qopt qopt;
 	struct tc_netem_corr cor;
 	struct tc_netem_reorder reorder;
+	struct tc_netem_corrupt corrupt;
 
 	qopt.latency = q->latency;
 	qopt.jitter = q->jitter;
@@ -582,6 +621,10 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb)
 	reorder.correlation = q->reorder_cor.rho;
 	RTA_PUT(skb, TCA_NETEM_REORDER, sizeof(reorder), &reorder);
 
+	corrupt.probability = q->corrupt;
+	corrupt.correlation = q->corrupt_cor.rho;
+	RTA_PUT(skb, TCA_NETEM_CORRUPT, sizeof(corrupt), &corrupt);
+
 	rta->rta_len = skb->tail - b;
 
 	return skb->len;
-- 
cgit v1.2.3


From 3821af2fe13700cab6fd67367128fa180e43f8b8 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Wed, 21 Dec 2005 19:30:53 -0800
Subject: [FLS64]: generic version

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-alpha/bitops.h     | 1 +
 include/asm-arm/bitops.h       | 2 ++
 include/asm-arm26/bitops.h     | 1 +
 include/asm-cris/bitops.h      | 1 +
 include/asm-frv/bitops.h       | 1 +
 include/asm-generic/bitops.h   | 1 +
 include/asm-h8300/bitops.h     | 1 +
 include/asm-i386/bitops.h      | 1 +
 include/asm-ia64/bitops.h      | 1 +
 include/asm-m32r/bitops.h      | 1 +
 include/asm-m68k/bitops.h      | 1 +
 include/asm-m68knommu/bitops.h | 1 +
 include/asm-mips/bitops.h      | 2 +-
 include/asm-parisc/bitops.h    | 1 +
 include/asm-powerpc/bitops.h   | 1 +
 include/asm-s390/bitops.h      | 1 +
 include/asm-sh/bitops.h        | 1 +
 include/asm-sh64/bitops.h      | 1 +
 include/asm-sparc/bitops.h     | 1 +
 include/asm-sparc64/bitops.h   | 1 +
 include/asm-v850/bitops.h      | 1 +
 include/asm-x86_64/bitops.h    | 1 +
 include/asm-xtensa/bitops.h    | 1 +
 include/linux/bitops.h         | 9 +++++++++
 24 files changed, 33 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/asm-alpha/bitops.h b/include/asm-alpha/bitops.h
index 578ed3f1a607..302201f1a097 100644
--- a/include/asm-alpha/bitops.h
+++ b/include/asm-alpha/bitops.h
@@ -321,6 +321,7 @@ static inline int fls(int word)
 #else
 #define fls	generic_fls
 #endif
+#define fls64   generic_fls64
 
 /* Compute powers of two for the given integer.  */
 static inline long floor_log2(unsigned long word)
diff --git a/include/asm-arm/bitops.h b/include/asm-arm/bitops.h
index 7399d431edfe..d02de721ecc1 100644
--- a/include/asm-arm/bitops.h
+++ b/include/asm-arm/bitops.h
@@ -332,6 +332,7 @@ static inline unsigned long __ffs(unsigned long word)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 /*
  * ffs: find first bit set. This is defined the same way as
@@ -351,6 +352,7 @@ static inline unsigned long __ffs(unsigned long word)
 #define fls(x) \
 	( __builtin_constant_p(x) ? generic_fls(x) : \
 	  ({ int __r; asm("clz\t%0, %1" : "=r"(__r) : "r"(x) : "cc"); 32-__r; }) )
+#define fls64(x)   generic_fls64(x)
 #define ffs(x) ({ unsigned long __t = (x); fls(__t & -__t); })
 #define __ffs(x) (ffs(x) - 1)
 #define ffz(x) __ffs( ~(x) )
diff --git a/include/asm-arm26/bitops.h b/include/asm-arm26/bitops.h
index 7d062fb2e343..15cc6f2da792 100644
--- a/include/asm-arm26/bitops.h
+++ b/include/asm-arm26/bitops.h
@@ -259,6 +259,7 @@ static inline unsigned long __ffs(unsigned long word)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 /*
  * ffs: find first bit set. This is defined the same way as
diff --git a/include/asm-cris/bitops.h b/include/asm-cris/bitops.h
index 1bddb3f3a289..d3eb0f1e4208 100644
--- a/include/asm-cris/bitops.h
+++ b/include/asm-cris/bitops.h
@@ -240,6 +240,7 @@ static inline int test_bit(int nr, const volatile unsigned long *addr)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 /*
  * hweightN - returns the hamming weight of a N-bit word
diff --git a/include/asm-frv/bitops.h b/include/asm-frv/bitops.h
index b664bd5b6663..02be7b3a8a83 100644
--- a/include/asm-frv/bitops.h
+++ b/include/asm-frv/bitops.h
@@ -228,6 +228,7 @@ found_middle:
 							\
 	bit ? 33 - bit : bit;				\
 })
+#define fls64(x)   generic_fls64(x)
 
 /*
  * Every architecture must define this function. It's the fastest
diff --git a/include/asm-generic/bitops.h b/include/asm-generic/bitops.h
index ce31b739fd80..0e6d9852008c 100644
--- a/include/asm-generic/bitops.h
+++ b/include/asm-generic/bitops.h
@@ -56,6 +56,7 @@ extern __inline__ int test_bit(int nr, const unsigned long * addr)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-h8300/bitops.h b/include/asm-h8300/bitops.h
index 5036f595f8c9..c0411ec9d651 100644
--- a/include/asm-h8300/bitops.h
+++ b/include/asm-h8300/bitops.h
@@ -406,5 +406,6 @@ found_middle:
 #endif /* __KERNEL__ */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #endif /* _H8300_BITOPS_H */
diff --git a/include/asm-i386/bitops.h b/include/asm-i386/bitops.h
index ddf1739dc7fd..4807aa1d2e3d 100644
--- a/include/asm-i386/bitops.h
+++ b/include/asm-i386/bitops.h
@@ -372,6 +372,7 @@ static inline unsigned long ffz(unsigned long word)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-ia64/bitops.h b/include/asm-ia64/bitops.h
index 7232528e2d0c..36d0fb95ea89 100644
--- a/include/asm-ia64/bitops.h
+++ b/include/asm-ia64/bitops.h
@@ -345,6 +345,7 @@ fls (int t)
 	x |= x >> 16;
 	return ia64_popcnt(x);
 }
+#define fls64(x)   generic_fls64(x)
 
 /*
  * ffs: find first bit set. This is defined the same way as the libc and compiler builtin
diff --git a/include/asm-m32r/bitops.h b/include/asm-m32r/bitops.h
index e78443981349..abea2fdd8689 100644
--- a/include/asm-m32r/bitops.h
+++ b/include/asm-m32r/bitops.h
@@ -465,6 +465,7 @@ static __inline__ unsigned long __ffs(unsigned long word)
  * fls: find last bit set.
  */
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-m68k/bitops.h b/include/asm-m68k/bitops.h
index b1bcf7c66516..13f4c0048463 100644
--- a/include/asm-m68k/bitops.h
+++ b/include/asm-m68k/bitops.h
@@ -310,6 +310,7 @@ static inline int fls(int x)
 
 	return 32 - cnt;
 }
+#define fls64(x)   generic_fls64(x)
 
 /*
  * Every architecture must define this function. It's the fastest
diff --git a/include/asm-m68knommu/bitops.h b/include/asm-m68knommu/bitops.h
index c42f88a9b9f9..4058dd086a02 100644
--- a/include/asm-m68knommu/bitops.h
+++ b/include/asm-m68knommu/bitops.h
@@ -499,5 +499,6 @@ found_middle:
  * fls: find last bit set.
  */
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #endif /* _M68KNOMMU_BITOPS_H */
diff --git a/include/asm-mips/bitops.h b/include/asm-mips/bitops.h
index 5496f9064a6a..3b0c8aaf6e8b 100644
--- a/include/asm-mips/bitops.h
+++ b/include/asm-mips/bitops.h
@@ -695,7 +695,7 @@ static inline unsigned long fls(unsigned long word)
 
 	return flz(~word) + 1;
 }
-
+#define fls64(x)   generic_fls64(x)
 
 /*
  * find_next_zero_bit - find the first zero bit in a memory region
diff --git a/include/asm-parisc/bitops.h b/include/asm-parisc/bitops.h
index 55b98c67fd82..15d8c2b51584 100644
--- a/include/asm-parisc/bitops.h
+++ b/include/asm-parisc/bitops.h
@@ -263,6 +263,7 @@ static __inline__ int fls(int x)
 
 	return ret;
 }
+#define fls64(x)   generic_fls64(x)
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
diff --git a/include/asm-powerpc/bitops.h b/include/asm-powerpc/bitops.h
index 5727229b0444..1996eaa8aeae 100644
--- a/include/asm-powerpc/bitops.h
+++ b/include/asm-powerpc/bitops.h
@@ -310,6 +310,7 @@ static __inline__ int fls(unsigned int x)
 	asm ("cntlzw %0,%1" : "=r" (lz) : "r" (x));
 	return 32 - lz;
 }
+#define fls64(x)   generic_fls64(x)
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
diff --git a/include/asm-s390/bitops.h b/include/asm-s390/bitops.h
index b07c578b22ea..61232760cc3b 100644
--- a/include/asm-s390/bitops.h
+++ b/include/asm-s390/bitops.h
@@ -839,6 +839,7 @@ static inline int sched_find_first_bit(unsigned long *b)
  * fls: find last bit set.
  */
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
diff --git a/include/asm-sh/bitops.h b/include/asm-sh/bitops.h
index 5163d1ff2f1b..1c5260860045 100644
--- a/include/asm-sh/bitops.h
+++ b/include/asm-sh/bitops.h
@@ -470,6 +470,7 @@ found_middle:
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-sh64/bitops.h b/include/asm-sh64/bitops.h
index e1ff63e09227..ce9c3ad45fe0 100644
--- a/include/asm-sh64/bitops.h
+++ b/include/asm-sh64/bitops.h
@@ -510,6 +510,7 @@ found_middle:
 
 #define ffs(x)	generic_ffs(x)
 #define fls(x)	generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-sparc/bitops.h b/include/asm-sparc/bitops.h
index bfbd795a0a80..41722b5e45ef 100644
--- a/include/asm-sparc/bitops.h
+++ b/include/asm-sparc/bitops.h
@@ -298,6 +298,7 @@ static inline int ffs(int x)
  * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
  */
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 /*
  * hweightN: returns the hamming weight (i.e. the number
diff --git a/include/asm-sparc64/bitops.h b/include/asm-sparc64/bitops.h
index 6388b8376c50..6efc0162fb09 100644
--- a/include/asm-sparc64/bitops.h
+++ b/include/asm-sparc64/bitops.h
@@ -119,6 +119,7 @@ static inline unsigned long __ffs(unsigned long word)
  */
 
 #define fls(x) generic_fls(x)
+#define fls64(x)   generic_fls64(x)
 
 #ifdef __KERNEL__
 
diff --git a/include/asm-v850/bitops.h b/include/asm-v850/bitops.h
index b91e799763fd..8955d2376ac8 100644
--- a/include/asm-v850/bitops.h
+++ b/include/asm-v850/bitops.h
@@ -276,6 +276,7 @@ found_middle:
 
 #define ffs(x) generic_ffs (x)
 #define fls(x) generic_fls (x)
+#define fls64(x) generic_fls64(x)
 #define __ffs(x) ffs(x)
 
 
diff --git a/include/asm-x86_64/bitops.h b/include/asm-x86_64/bitops.h
index 05a0d374404b..94b52c8ce97f 100644
--- a/include/asm-x86_64/bitops.h
+++ b/include/asm-x86_64/bitops.h
@@ -409,6 +409,7 @@ static __inline__ int ffs(int x)
 
 /* find last set bit */
 #define fls(x) generic_fls(x)
+#define fls64(x) generic_fls64(x)
 
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-xtensa/bitops.h b/include/asm-xtensa/bitops.h
index e76ee889e21d..0a2065f1a372 100644
--- a/include/asm-xtensa/bitops.h
+++ b/include/asm-xtensa/bitops.h
@@ -245,6 +245,7 @@ static __inline__ int fls (unsigned int x)
 {
 	return __cntlz(x);
 }
+#define fls64(x)   generic_fls64(x)
 
 static __inline__ int
 find_next_bit(const unsigned long *addr, int size, int offset)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 38c2fb7ebe09..6a2a19f14bb2 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -76,6 +76,15 @@ static __inline__ int generic_fls(int x)
  */
 #include <asm/bitops.h>
 
+
+static inline int generic_fls64(__u64 x)
+{
+	__u32 h = x >> 32;
+	if (h)
+		return fls(x) + 32;
+	return fls(x);
+}
+
 static __inline__ int get_bitmask_order(unsigned int count)
 {
 	int order;
-- 
cgit v1.2.3


From 77d76ea310b50a9c8ff15bd290fcb4ed4961adf2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 22 Dec 2005 12:43:42 -0800
Subject: [NET]: Small cleanup to socket initialization

sock_init can be done as a core_initcall instead of calling
it directly in init/main.c

Also I removed an out of date #ifdef.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  1 -
 include/linux/socket.h |  1 -
 init/main.c            |  4 ----
 net/socket.c           | 10 +++++-----
 4 files changed, 5 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 97f6580ce039..971677178e0c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -32,7 +32,6 @@
 
 #define HAVE_ALLOC_SKB		/* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB	/* Ditto 8)		   */
-#define SLAB_SKB 		/* Slabified skbuffs 	   */
 
 #define CHECKSUM_NONE 0
 #define CHECKSUM_HW 1
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 1739c2d5b95b..9f4019156fd8 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -27,7 +27,6 @@ struct __kernel_sockaddr_storage {
 #include <linux/compiler.h>		/* __user			*/
 
 extern int sysctl_somaxconn;
-extern void sock_init(void);
 #ifdef CONFIG_PROC_FS
 struct seq_file;
 extern void socket_seq_show(struct seq_file *seq);
diff --git a/init/main.c b/init/main.c
index 27f97f9b4636..54aaf561cf66 100644
--- a/init/main.c
+++ b/init/main.c
@@ -47,7 +47,6 @@
 #include <linux/rmap.h>
 #include <linux/mempolicy.h>
 #include <linux/key.h>
-#include <net/sock.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -614,9 +613,6 @@ static void __init do_basic_setup(void)
 	sysctl_init();
 #endif
 
-	/* Networking initialization needs a process context */ 
-	sock_init();
-
 	do_initcalls();
 }
 
diff --git a/net/socket.c b/net/socket.c
index 3145103cdf54..98be7ef3c086 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -2036,7 +2036,7 @@ int sock_unregister(int family)
 	return 0;
 }
 
-void __init sock_init(void)
+static int __init sock_init(void)
 {
 	/*
 	 *	Initialize sock SLAB cache.
@@ -2044,12 +2044,10 @@ void __init sock_init(void)
 	 
 	sk_init();
 
-#ifdef SLAB_SKB
 	/*
 	 *	Initialize skbuff SLAB cache 
 	 */
 	skb_init();
-#endif
 
 	/*
 	 *	Initialize the protocols module. 
@@ -2058,8 +2056,8 @@ void __init sock_init(void)
 	init_inodecache();
 	register_filesystem(&sock_fs_type);
 	sock_mnt = kern_mount(&sock_fs_type);
-	/* The real protocol initialization is performed when
-	 *  do_initcalls is run.  
+
+	/* The real protocol initialization is performed in later initcalls.
 	 */
 
 #ifdef CONFIG_NETFILTER
@@ -2067,6 +2065,8 @@ void __init sock_init(void)
 #endif
 }
 
+core_initcall(sock_init);	/* early initcall */
+
 #ifdef CONFIG_PROC_FS
 void socket_seq_show(struct seq_file *seq)
 {
-- 
cgit v1.2.3


From 90ddc4f0470427df306f308ad03db6b6b21644b8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 22 Dec 2005 12:49:22 -0800
Subject: [NET]: move struct proto_ops to const

I noticed that some of 'struct proto_ops' used in the kernel may share
a cache line used by locks or other heavily modified data. (default
linker alignement is 32 bytes, and L1_CACHE_LINE is 64 or 128 at
least)

This patch makes sure a 'struct proto_ops' can be declared as const,
so that all cpus can share all parts of it without false sharing.

This is not mandatory : a driver can still use a read/write structure
if it needs to (and eventually a __read_mostly)

I made a global stubstitute to change all existing occurences to make
them const.

This should reduce the possibility of false sharing on SMP, and
speedup some socket system calls.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h         |  4 ++--
 include/net/inet_common.h   |  4 ++--
 include/net/ipv6.h          |  4 ++--
 include/net/protocol.h      |  2 +-
 net/appletalk/ddp.c         |  4 ++--
 net/atm/pvc.c               |  2 +-
 net/atm/svc.c               |  2 +-
 net/ax25/af_ax25.c          |  4 ++--
 net/bluetooth/bnep/sock.c   |  2 +-
 net/bluetooth/cmtp/sock.c   |  2 +-
 net/bluetooth/hci_sock.c    |  2 +-
 net/bluetooth/hidp/sock.c   |  2 +-
 net/bluetooth/l2cap.c       |  4 ++--
 net/bluetooth/rfcomm/sock.c |  4 ++--
 net/bluetooth/sco.c         |  4 ++--
 net/dccp/proto.c            |  2 +-
 net/decnet/af_decnet.c      |  4 ++--
 net/econet/af_econet.c      |  4 ++--
 net/ipv4/af_inet.c          |  6 +++---
 net/ipv6/af_inet6.c         |  6 +++---
 net/ipx/af_ipx.c            |  4 ++--
 net/irda/af_irda.c          | 16 ++++++++--------
 net/key/af_key.c            |  4 ++--
 net/llc/af_llc.c            |  4 ++--
 net/netlink/af_netlink.c    |  4 ++--
 net/netrom/af_netrom.c      |  4 ++--
 net/packet/af_packet.c      |  8 ++++----
 net/sctp/ipv6.c             |  2 +-
 net/sctp/protocol.c         |  2 +-
 net/sunrpc/svcsock.c        |  2 +-
 net/unix/af_unix.c          |  6 +++---
 net/wanrouter/af_wanpipe.c  |  4 ++--
 net/x25/af_x25.c            |  4 ++--
 33 files changed, 66 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/net.h b/include/linux/net.h
index d6a41e6577f6..28195a2d8ff0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -107,7 +107,7 @@ enum sock_type {
 struct socket {
 	socket_state		state;
 	unsigned long		flags;
-	struct proto_ops	*ops;
+	const struct proto_ops	*ops;
 	struct fasync_struct	*fasync_list;
 	struct file		*file;
 	struct sock		*sk;
@@ -260,7 +260,7 @@ SOCKCALL_WRAP(name, recvmsg, (struct kiocb *iocb, struct socket *sock, struct ms
 SOCKCALL_WRAP(name, mmap, (struct file *file, struct socket *sock, struct vm_area_struct *vma), \
 	      (file, sock, vma)) \
 	      \
-static struct proto_ops name##_ops = {			\
+static const struct proto_ops name##_ops = {			\
 	.family		= fam,				\
 	.owner		= THIS_MODULE,			\
 	.release	= __lock_##name##_release,	\
diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index f943306ce5ff..227adcbdfec8 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -1,8 +1,8 @@
 #ifndef _INET_COMMON_H
 #define _INET_COMMON_H
 
-extern struct proto_ops		inet_stream_ops;
-extern struct proto_ops		inet_dgram_ops;
+extern const struct proto_ops		inet_stream_ops;
+extern const struct proto_ops		inet_dgram_ops;
 
 /*
  *	INET4 prototypes used by INET6
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index e3d5d7bc8837..11a725662c36 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -538,8 +538,8 @@ extern int sysctl_ip6frag_low_thresh;
 extern int sysctl_ip6frag_time;
 extern int sysctl_ip6frag_secret_interval;
 
-extern struct proto_ops inet6_stream_ops;
-extern struct proto_ops inet6_dgram_ops;
+extern const struct proto_ops inet6_stream_ops;
+extern const struct proto_ops inet6_dgram_ops;
 
 extern int ip6_mc_source(int add, int omode, struct sock *sk,
 			 struct group_source_req *pgsr);
diff --git a/include/net/protocol.h b/include/net/protocol.h
index a29cb29647d0..63f7db99c2a6 100644
--- a/include/net/protocol.h
+++ b/include/net/protocol.h
@@ -65,7 +65,7 @@ struct inet_protosw {
 	int		 protocol; /* This is the L4 protocol number.  */
 
 	struct proto	 *prot;
-	struct proto_ops *ops;
+	const struct proto_ops *ops;
   
 	int              capability; /* Which (if any) capability do
 				      * we need to use this socket
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 7982656b9c83..296f186802ff 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -63,7 +63,7 @@
 #include <linux/atalk.h>
 
 struct datalink_proto *ddp_dl, *aarp_dl;
-static struct proto_ops atalk_dgram_ops;
+static const struct proto_ops atalk_dgram_ops;
 
 /**************************************************************************\
 *                                                                          *
@@ -1841,7 +1841,7 @@ static struct net_proto_family atalk_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(atalk_dgram_ops) = {
 	.family		= PF_APPLETALK,
 	.owner		= THIS_MODULE,
 	.release	= atalk_release,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2684a92da22b..f2c541774dcd 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -102,7 +102,7 @@ static int pvc_getname(struct socket *sock,struct sockaddr *sockaddr,
 }
 
 
-static struct proto_ops pvc_proto_ops = {
+static const struct proto_ops pvc_proto_ops = {
 	.family =	PF_ATMPVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/atm/svc.c b/net/atm/svc.c
index d7b266136bf6..3a180cfd7b48 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -613,7 +613,7 @@ static int svc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return error;
 }
 
-static struct proto_ops svc_proto_ops = {
+static const struct proto_ops svc_proto_ops = {
 	.family =	PF_ATMSVC,
 	.owner =	THIS_MODULE,
 
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 1b683f302657..8b5d10aaba05 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -54,7 +54,7 @@
 HLIST_HEAD(ax25_list);
 DEFINE_SPINLOCK(ax25_list_lock);
 
-static struct proto_ops ax25_proto_ops;
+static const struct proto_ops ax25_proto_ops;
 
 static void ax25_free_sock(struct sock *sk)
 {
@@ -1944,7 +1944,7 @@ static struct net_proto_family ax25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops ax25_proto_ops = {
+static const struct proto_ops ax25_proto_ops = {
 	.family		= PF_AX25,
 	.owner		= THIS_MODULE,
 	.release	= ax25_release,
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index 9778c6acd53b..ccbaf69afc5b 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -146,7 +146,7 @@ static int bnep_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return 0;
 }
 
-static struct proto_ops bnep_sock_ops = {
+static const struct proto_ops bnep_sock_ops = {
 	.family     = PF_BLUETOOTH,
 	.owner      = THIS_MODULE,
 	.release    = bnep_sock_release,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index beb045bf5714..5e22343b6090 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -137,7 +137,7 @@ static int cmtp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops cmtp_sock_ops = {
+static const struct proto_ops cmtp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= cmtp_sock_release,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1d6d0a15c099..84e6c93a044a 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -575,7 +575,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, char
 	return 0;
 }
 
-static struct proto_ops hci_sock_ops = {
+static const struct proto_ops hci_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hci_sock_release,
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index f8986f881431..8f8dd931b294 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -143,7 +143,7 @@ static int hidp_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long
 	return -EINVAL;
 }
 
-static struct proto_ops hidp_sock_ops = {
+static const struct proto_ops hidp_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= hidp_sock_release,
diff --git a/net/bluetooth/l2cap.c b/net/bluetooth/l2cap.c
index 95f33cc7a24e..7f0781e4326f 100644
--- a/net/bluetooth/l2cap.c
+++ b/net/bluetooth/l2cap.c
@@ -57,7 +57,7 @@
 
 #define VERSION "2.8"
 
-static struct proto_ops l2cap_sock_ops;
+static const struct proto_ops l2cap_sock_ops;
 
 static struct bt_sock_list l2cap_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -2161,7 +2161,7 @@ static ssize_t l2cap_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(l2cap, S_IRUGO, l2cap_sysfs_show, NULL);
 
-static struct proto_ops l2cap_sock_ops = {
+static const struct proto_ops l2cap_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= l2cap_sock_release,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 6c34261b232e..757d2dd3b02f 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -58,7 +58,7 @@
 #define BT_DBG(D...)
 #endif
 
-static struct proto_ops rfcomm_sock_ops;
+static const struct proto_ops rfcomm_sock_ops;
 
 static struct bt_sock_list rfcomm_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -907,7 +907,7 @@ static ssize_t rfcomm_sock_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(rfcomm, S_IRUGO, rfcomm_sock_sysfs_show, NULL);
 
-static struct proto_ops rfcomm_sock_ops = {
+static const struct proto_ops rfcomm_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= rfcomm_sock_release,
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 648181430699..6b61323ce23c 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -56,7 +56,7 @@
 
 #define VERSION "0.5"
 
-static struct proto_ops sco_sock_ops;
+static const struct proto_ops sco_sock_ops;
 
 static struct bt_sock_list sco_sk_list = {
 	.lock = RW_LOCK_UNLOCKED
@@ -914,7 +914,7 @@ static ssize_t sco_sysfs_show(struct class *dev, char *buf)
 
 static CLASS_ATTR(sco, S_IRUGO, sco_sysfs_show, NULL);
 
-static struct proto_ops sco_sock_ops = {
+static const struct proto_ops sco_sock_ops = {
 	.family		= PF_BLUETOOTH,
 	.owner		= THIS_MODULE,
 	.release	= sco_sock_release,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 40a4c6899051..e4e629ed9bf7 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -680,7 +680,7 @@ void dccp_shutdown(struct sock *sk, int how)
 
 EXPORT_SYMBOL_GPL(dccp_shutdown);
 
-static struct proto_ops inet_dccp_ops = {
+static const struct proto_ops inet_dccp_ops = {
 	.family		= PF_INET,
 	.owner		= THIS_MODULE,
 	.release	= inet_release,
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index d402e9020c68..65e3baed0251 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -149,7 +149,7 @@ static void dn_keepalive(struct sock *sk);
 #define DN_SK_HASH_MASK (DN_SK_HASH_SIZE - 1)
 
 
-static struct proto_ops dn_proto_ops;
+static const struct proto_ops dn_proto_ops;
 static DEFINE_RWLOCK(dn_hash_lock);
 static struct hlist_head dn_sk_hash[DN_SK_HASH_SIZE];
 static struct hlist_head dn_wild_sk;
@@ -2342,7 +2342,7 @@ static struct net_proto_family	dn_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops dn_proto_ops = {
+static const struct proto_ops dn_proto_ops = {
 	.family =	AF_DECnet,
 	.owner =	THIS_MODULE,
 	.release =	dn_release,
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index 34fdac51df96..ff58f49c8b4a 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -45,7 +45,7 @@
 #include <asm/uaccess.h>
 #include <asm/system.h>
 
-static struct proto_ops econet_ops;
+static const struct proto_ops econet_ops;
 static struct hlist_head econet_sklist;
 static DEFINE_RWLOCK(econet_lock);
 
@@ -698,7 +698,7 @@ static struct net_proto_family econet_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(econet_ops) = {
 	.family =	PF_ECONET,
 	.owner =	THIS_MODULE,
 	.release =	econet_release,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 617e858beff1..4ed8a814c6cb 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -785,7 +785,7 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return err;
 }
 
-struct proto_ops inet_stream_ops = {
+const struct proto_ops inet_stream_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -806,7 +806,7 @@ struct proto_ops inet_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet_dgram_ops = {
+const struct proto_ops inet_dgram_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
@@ -831,7 +831,7 @@ struct proto_ops inet_dgram_ops = {
  * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
  * udp_poll
  */
-static struct proto_ops inet_sockraw_ops = {
+static const struct proto_ops inet_sockraw_ops = {
 	.family =	PF_INET,
 	.owner =	THIS_MODULE,
 	.release =	inet_release,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 70a510ff31ee..7c9f19269f21 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -462,7 +462,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 	return(0);
 }
 
-struct proto_ops inet6_stream_ops = {
+const struct proto_ops inet6_stream_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -483,7 +483,7 @@ struct proto_ops inet6_stream_ops = {
 	.sendpage =	tcp_sendpage
 };
 
-struct proto_ops inet6_dgram_ops = {
+const struct proto_ops inet6_dgram_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
@@ -511,7 +511,7 @@ static struct net_proto_family inet6_family_ops = {
 };
 
 /* Same as inet6_dgram_ops, sans udp_poll.  */
-static struct proto_ops inet6_sockraw_ops = {
+static const struct proto_ops inet6_sockraw_ops = {
 	.family =	PF_INET6,
 	.owner =	THIS_MODULE,
 	.release =	inet6_release,
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 34b3bb868409..6c464c11bb09 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -75,7 +75,7 @@ static struct datalink_proto *pEII_datalink;
 static struct datalink_proto *p8023_datalink;
 static struct datalink_proto *pSNAP_datalink;
 
-static struct proto_ops ipx_dgram_ops;
+static const struct proto_ops ipx_dgram_ops;
 
 LIST_HEAD(ipx_interfaces);
 DEFINE_SPINLOCK(ipx_interfaces_lock);
@@ -1901,7 +1901,7 @@ static struct net_proto_family ipx_family_ops = {
 	.owner		= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(ipx_dgram_ops) = {
 	.family		= PF_IPX,
 	.owner		= THIS_MODULE,
 	.release	= ipx_release,
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index f121f7de2032..e57683d424f7 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -62,12 +62,12 @@
 
 static int irda_create(struct socket *sock, int protocol);
 
-static struct proto_ops irda_stream_ops;
-static struct proto_ops irda_seqpacket_ops;
-static struct proto_ops irda_dgram_ops;
+static const struct proto_ops irda_stream_ops;
+static const struct proto_ops irda_seqpacket_ops;
+static const struct proto_ops irda_dgram_ops;
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops irda_ultra_ops;
+static const struct proto_ops irda_ultra_ops;
 #define ULTRA_MAX_DATA 382
 #endif /* CONFIG_IRDA_ULTRA */
 
@@ -2464,7 +2464,7 @@ static struct net_proto_family irda_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2485,7 +2485,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_stream_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2506,7 +2506,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_seqpacket_ops) = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
@@ -2528,7 +2528,7 @@ static struct proto_ops SOCKOPS_WRAPPED(irda_dgram_ops) = {
 };
 
 #ifdef CONFIG_IRDA_ULTRA
-static struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(irda_ultra_ops) = {
 	.family =	PF_IRDA,
 	.owner =	THIS_MODULE,
 	.release =	irda_release,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index d32f7791f1e4..52efd04cbedb 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -113,7 +113,7 @@ static __inline__ void pfkey_unlock_table(void)
 }
 
 
-static struct proto_ops pfkey_ops;
+static const struct proto_ops pfkey_ops;
 
 static void pfkey_insert(struct sock *sk)
 {
@@ -3127,7 +3127,7 @@ out:
 	return err;
 }
 
-static struct proto_ops pfkey_ops = {
+static const struct proto_ops pfkey_ops = {
 	.family		=	PF_KEY,
 	.owner		=	THIS_MODULE,
 	/* Operations that make no sense on pfkey sockets. */
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index b6d3df5c911c..9cf65f9d8902 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -36,7 +36,7 @@
 static u16 llc_ui_sap_last_autoport = LLC_SAP_DYN_START;
 static u16 llc_ui_sap_link_no_max[256];
 static struct sockaddr_llc llc_ui_addrnull;
-static struct proto_ops llc_ui_ops;
+static const struct proto_ops llc_ui_ops;
 
 static int llc_ui_wait_for_conn(struct sock *sk, long timeout);
 static int llc_ui_wait_for_disc(struct sock *sk, long timeout);
@@ -1098,7 +1098,7 @@ static struct net_proto_family llc_ui_family_ops = {
 	.owner	= THIS_MODULE,
 };
 
-static struct proto_ops llc_ui_ops = {
+static const struct proto_ops llc_ui_ops = {
 	.family	     = PF_LLC,
 	.owner       = THIS_MODULE,
 	.release     = llc_ui_release,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 96020d7087e8..7849cac14d3a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -293,7 +293,7 @@ static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len)
 	return 0;
 }
 
-static struct proto_ops netlink_ops;
+static const struct proto_ops netlink_ops;
 
 static int netlink_insert(struct sock *sk, u32 pid)
 {
@@ -1656,7 +1656,7 @@ int netlink_unregister_notifier(struct notifier_block *nb)
 	return notifier_chain_unregister(&netlink_chain, nb);
 }
                 
-static struct proto_ops netlink_ops = {
+static const struct proto_ops netlink_ops = {
 	.family =	PF_NETLINK,
 	.owner =	THIS_MODULE,
 	.release =	netlink_release,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index e5d82d711cae..05b653c05971 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -63,7 +63,7 @@ static unsigned short circuit = 0x101;
 static HLIST_HEAD(nr_list);
 static DEFINE_SPINLOCK(nr_list_lock);
 
-static struct proto_ops nr_proto_ops;
+static const struct proto_ops nr_proto_ops;
 
 /*
  *	Socket removal during an interrupt is now safe.
@@ -1337,7 +1337,7 @@ static struct net_proto_family nr_family_ops = {
 	.owner		=	THIS_MODULE,
 };
 
-static struct proto_ops nr_proto_ops = {
+static const struct proto_ops nr_proto_ops = {
 	.family		=	PF_NETROM,
 	.owner		=	THIS_MODULE,
 	.release	=	nr_release,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 3e2462760413..deda6fdb1e53 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -251,10 +251,10 @@ static void packet_sock_destruct(struct sock *sk)
 }
 
 
-static struct proto_ops packet_ops;
+static const struct proto_ops packet_ops;
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt;
+static const struct proto_ops packet_ops_spkt;
 
 static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,  struct packet_type *pt, struct net_device *orig_dev)
 {
@@ -1784,7 +1784,7 @@ out:
 
 
 #ifdef CONFIG_SOCK_PACKET
-static struct proto_ops packet_ops_spkt = {
+static const struct proto_ops packet_ops_spkt = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
@@ -1806,7 +1806,7 @@ static struct proto_ops packet_ops_spkt = {
 };
 #endif
 
-static struct proto_ops packet_ops = {
+static const struct proto_ops packet_ops = {
 	.family =	PF_PACKET,
 	.owner =	THIS_MODULE,
 	.release =	packet_release,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index fa3be2b8fb5f..15c05165c905 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -866,7 +866,7 @@ static int sctp_inet6_supported_addrs(const struct sctp_sock *opt,
 	return 2;
 }
 
-static struct proto_ops inet6_seqpacket_ops = {
+static const struct proto_ops inet6_seqpacket_ops = {
 	.family     = PF_INET6,
 	.owner      = THIS_MODULE,
 	.release    = inet6_release,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index f775d78aa59d..d1b0747a5b9d 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -829,7 +829,7 @@ static struct notifier_block sctp_inetaddr_notifier = {
 };
 
 /* Socket operations.  */
-static struct proto_ops inet_seqpacket_ops = {
+static const struct proto_ops inet_seqpacket_ops = {
 	.family      = PF_INET,
 	.owner       = THIS_MODULE,
 	.release     = inet_release,       /* Needs to be wrapped... */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c6a51911e71e..d68eba481291 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -758,7 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk)
 	struct svc_serv	*serv = svsk->sk_server;
 	struct socket	*sock = svsk->sk_sock;
 	struct socket	*newsock;
-	struct proto_ops *ops;
+	const struct proto_ops *ops;
 	struct svc_sock	*newsvsk;
 	int		err, slen;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 04e850e04e3d..7d3fe6aebcdb 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -473,7 +473,7 @@ static int unix_dgram_connect(struct socket *, struct sockaddr *,
 static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *,
 				  struct msghdr *, size_t);
 
-static struct proto_ops unix_stream_ops = {
+static const struct proto_ops unix_stream_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -494,7 +494,7 @@ static struct proto_ops unix_stream_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_dgram_ops = {
+static const struct proto_ops unix_dgram_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
@@ -515,7 +515,7 @@ static struct proto_ops unix_dgram_ops = {
 	.sendpage =	sock_no_sendpage,
 };
 
-static struct proto_ops unix_seqpacket_ops = {
+static const struct proto_ops unix_seqpacket_ops = {
 	.family =	PF_UNIX,
 	.owner =	THIS_MODULE,
 	.release =	unix_release,
diff --git a/net/wanrouter/af_wanpipe.c b/net/wanrouter/af_wanpipe.c
index 59fec59b2132..67948bf22dc4 100644
--- a/net/wanrouter/af_wanpipe.c
+++ b/net/wanrouter/af_wanpipe.c
@@ -181,7 +181,7 @@ struct wanpipe_opt
 #endif
 
 static int sk_count;
-extern struct proto_ops wanpipe_ops;
+extern const struct proto_ops wanpipe_ops;
 static unsigned long find_free_critical;
 
 static void wanpipe_unlink_driver(struct sock *sk);
@@ -2546,7 +2546,7 @@ static int wanpipe_connect(struct socket *sock, struct sockaddr *uaddr, int addr
 	return 0;
 }
 
-struct proto_ops wanpipe_ops = {
+const struct proto_ops wanpipe_ops = {
 	.family = 	PF_WANPIPE,
 	.owner =	THIS_MODULE,
 	.release = 	wanpipe_release,
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 020d73cc8414..ca8b3b0b920d 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -64,7 +64,7 @@ int sysctl_x25_ack_holdback_timeout    = X25_DEFAULT_T2;
 HLIST_HEAD(x25_list);
 DEFINE_RWLOCK(x25_list_lock);
 
-static struct proto_ops x25_proto_ops;
+static const struct proto_ops x25_proto_ops;
 
 static struct x25_address null_x25_address = {"               "};
 
@@ -1391,7 +1391,7 @@ static struct net_proto_family x25_family_ops = {
 	.owner	=	THIS_MODULE,
 };
 
-static struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
+static const struct proto_ops SOCKOPS_WRAPPED(x25_proto_ops) = {
 	.family =	AF_X25,
 	.owner =	THIS_MODULE,
 	.release =	x25_release,
-- 
cgit v1.2.3


From 14c850212ed8f8cbb5972ad6b8812e08a0bc901c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@mandriva.com>
Date: Tue, 27 Dec 2005 02:43:12 -0200
Subject: [INET_SOCK]: Move struct inet_sock & helper functions to
 net/inet_sock.h

To help in reducing the number of include dependencies, several files were
touched as they were getting needed headers indirectly for stuff they use.

Thanks also to Alan Menegotto for pointing out that net/dccp/proto.c had
linux/dccp.h include twice.

Signed-off-by: Arnaldo Carvalho de Melo <acme@mandriva.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c      |   2 +
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   2 +
 drivers/net/ns83820.c                          |   1 +
 drivers/net/sk98lin/skge.c                     |   1 +
 drivers/net/skge.c                             |   1 +
 drivers/net/tg3.c                              |   1 +
 fs/9p/trans_sock.c                             |   1 +
 fs/nfs/callback.c                              |   3 +
 include/linux/dccp.h                           |   3 +-
 include/linux/ip.h                             | 126 +---------------
 include/linux/ipv6.h                           |   7 +-
 include/linux/udp.h                            |   6 +-
 include/net/atmclip.h                          |   2 +-
 include/net/dst.h                              |   1 +
 include/net/icmp.h                             |   9 +-
 include/net/ieee80211_crypt.h                  |   9 +-
 include/net/inet_connection_sock.h             |   3 +-
 include/net/inet_ecn.h                         |   2 +
 include/net/inet_hashtables.h                  |  21 +--
 include/net/inet_sock.h                        | 193 +++++++++++++++++++++++++
 include/net/inet_timewait_sock.h               |   2 +-
 include/net/ip.h                               |  17 ++-
 include/net/ip_fib.h                           |   2 +
 include/net/ip_vs.h                            |  12 +-
 include/net/ipv6.h                             |   3 +
 include/net/ndisc.h                            |  17 ++-
 include/net/neighbour.h                        |   2 +-
 include/net/pkt_act.h                          |   1 -
 include/net/raw.h                              |   2 +
 include/net/udp.h                              |   4 +-
 include/net/xfrm.h                             |   3 +-
 net/bridge/br_netfilter.c                      |   4 +
 net/bridge/netfilter/ebt_log.c                 |   1 +
 net/core/netpoll.c                             |   1 +
 net/dccp/ccid.h                                |   2 +
 net/dccp/ipv4.c                                |   1 +
 net/dccp/ipv6.c                                |   1 +
 net/dccp/output.c                              |   1 +
 net/dccp/proto.c                               |   3 +-
 net/econet/af_econet.c                         |   1 +
 net/ipv4/af_inet.c                             |   1 +
 net/ipv4/ah4.c                                 |   1 +
 net/ipv4/arp.c                                 |   1 +
 net/ipv4/devinet.c                             |   1 +
 net/ipv4/esp4.c                                |   1 +
 net/ipv4/fib_frontend.c                        |   1 +
 net/ipv4/fib_hash.c                            |   1 +
 net/ipv4/fib_rules.c                           |   1 +
 net/ipv4/fib_semantics.c                       |   2 +
 net/ipv4/icmp.c                                |   1 +
 net/ipv4/igmp.c                                |   2 +
 net/ipv4/ip_input.c                            |   1 +
 net/ipv4/ip_options.c                          |   1 +
 net/ipv4/ip_sockglue.c                         |   1 +
 net/ipv4/ipcomp.c                              |   1 +
 net/ipv4/ipconfig.c                            |   2 +
 net/ipv4/ipmr.c                                |   1 +
 net/ipv4/ipvs/ip_vs_conn.c                     |   2 +
 net/ipv4/ipvs/ip_vs_ctl.c                      |   1 +
 net/ipv4/ipvs/ip_vs_dh.c                       |   2 +
 net/ipv4/ipvs/ip_vs_est.c                      |   3 +
 net/ipv4/ipvs/ip_vs_lblc.c                     |   2 +
 net/ipv4/ipvs/ip_vs_lblcr.c                    |   2 +
 net/ipv4/ipvs/ip_vs_proto_ah.c                 |   2 +
 net/ipv4/ipvs/ip_vs_proto_esp.c                |   2 +
 net/ipv4/ipvs/ip_vs_proto_udp.c                |   3 +
 net/ipv4/ipvs/ip_vs_sh.c                       |   2 +
 net/ipv4/ipvs/ip_vs_sync.c                     |   2 +
 net/ipv4/netfilter/ip_conntrack_amanda.c       |   2 +
 net/ipv4/netfilter/ip_conntrack_proto_gre.c    |   1 +
 net/ipv4/netfilter/ip_conntrack_proto_udp.c    |   1 +
 net/ipv4/netfilter/ip_conntrack_standalone.c   |   1 +
 net/ipv4/netfilter/ip_nat_snmp_basic.c         |   2 +
 net/ipv4/netfilter/ipt_MASQUERADE.c            |   2 +
 net/ipv4/netfilter/ipt_physdev.c               |   1 +
 net/ipv4/proc.c                                |   1 +
 net/ipv4/sysctl_net_ipv4.c                     |   1 +
 net/ipv4/udp.c                                 |   1 +
 net/ipv6/ah6.c                                 |   1 +
 net/ipv6/esp6.c                                |   1 +
 net/ipv6/ipcomp6.c                             |   1 +
 net/ipv6/netfilter/ip6_tables.c                |   1 +
 net/ipv6/netfilter/ip6t_LOG.c                  |   1 +
 net/ipv6/netfilter/ip6t_ah.c                   |   1 +
 net/ipv6/netfilter/ip6t_esp.c                  |   1 +
 net/sched/sch_teql.c                           |   1 +
 net/sctp/protocol.c                            |   1 +
 87 files changed, 355 insertions(+), 184 deletions(-)
 create mode 100644 include/net/inet_sock.h

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 475d98fa9e26..780009c7eaa6 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -47,6 +47,8 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 
+#include <net/dst.h>
+
 MODULE_AUTHOR("Roland Dreier");
 MODULE_DESCRIPTION("IP-over-InfiniBand net driver");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index ef3ee035bbc8..ed0c2ead8bc1 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -43,6 +43,8 @@
 #include <linux/delay.h>
 #include <linux/completion.h>
 
+#include <net/dst.h>
+
 #include "ipoib.h"
 
 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
diff --git a/drivers/net/ns83820.c b/drivers/net/ns83820.c
index f857ae94d261..b0c3b6ab6263 100644
--- a/drivers/net/ns83820.c
+++ b/drivers/net/ns83820.c
@@ -115,6 +115,7 @@
 #include <linux/ethtool.h>
 #include <linux/timer.h>
 #include <linux/if_vlan.h>
+#include <linux/rtnetlink.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
diff --git a/drivers/net/sk98lin/skge.c b/drivers/net/sk98lin/skge.c
index ae7343934758..e1a2d52cc1fe 100644
--- a/drivers/net/sk98lin/skge.c
+++ b/drivers/net/sk98lin/skge.c
@@ -107,6 +107,7 @@
 
 #include	"h/skversion.h"
 
+#include	<linux/in.h>
 #include	<linux/module.h>
 #include	<linux/moduleparam.h>
 #include	<linux/init.h>
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index 00d683063c01..d8cc3aea032a 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -25,6 +25,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 2fc9893d69e1..59d916ccc810 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -24,6 +24,7 @@
 #include <linux/compiler.h>
 #include <linux/slab.h>
 #include <linux/delay.h>
+#include <linux/in.h>
 #include <linux/init.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
diff --git a/fs/9p/trans_sock.c b/fs/9p/trans_sock.c
index a93c2bf94c33..6a9a75d40f73 100644
--- a/fs/9p/trans_sock.c
+++ b/fs/9p/trans_sock.c
@@ -26,6 +26,7 @@
  */
 
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/net.h>
 #include <linux/ipv6.h>
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index f2ca782aba33..30cae3602867 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -14,6 +14,9 @@
 #include <linux/sunrpc/svc.h>
 #include <linux/sunrpc/svcsock.h>
 #include <linux/nfs_fs.h>
+
+#include <net/inet_sock.h>
+
 #include "nfs4_fs.h"
 #include "callback.h"
 
diff --git a/include/linux/dccp.h b/include/linux/dccp.h
index d0bdb499cf8d..088529f54965 100644
--- a/include/linux/dccp.h
+++ b/include/linux/dccp.h
@@ -192,10 +192,9 @@ enum {
 #include <linux/workqueue.h>
 
 #include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
 #include <net/inet_timewait_sock.h>
-#include <net/sock.h>
 #include <net/tcp_states.h>
-#include <net/tcp.h>
 
 enum dccp_state {
 	DCCP_OPEN	= TCP_ESTABLISHED,
diff --git a/include/linux/ip.h b/include/linux/ip.h
index 6ccc596c19c8..9e2eb9a602eb 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -16,6 +16,7 @@
  */
 #ifndef _LINUX_IP_H
 #define _LINUX_IP_H
+#include <linux/types.h>
 #include <asm/byteorder.h>
 
 #define IPTOS_TOS_MASK		0x1E
@@ -78,131 +79,6 @@
 #define	IPOPT_TS_TSANDADDR	1		/* timestamps and addresses */
 #define	IPOPT_TS_PRESPEC	3		/* specified modules only */
 
-#ifdef __KERNEL__
-#include <linux/config.h>
-#include <linux/types.h>
-#include <net/request_sock.h>
-#include <net/sock.h>
-#include <linux/igmp.h>
-#include <net/flow.h>
-
-struct ip_options {
-  __u32		faddr;				/* Saved first hop address */
-  unsigned char	optlen;
-  unsigned char srr;
-  unsigned char rr;
-  unsigned char ts;
-  unsigned char is_setbyuser:1,			/* Set by setsockopt?			*/
-                is_data:1,			/* Options in __data, rather than skb	*/
-                is_strictroute:1,		/* Strict source route			*/
-                srr_is_hit:1,			/* Packet destination addr was our one	*/
-                is_changed:1,			/* IP checksum more not valid		*/	
-                rr_needaddr:1,			/* Need to record addr of outgoing dev	*/
-                ts_needtime:1,			/* Need to record timestamp		*/
-                ts_needaddr:1;			/* Need to record addr of outgoing dev  */
-  unsigned char router_alert;
-  unsigned char __pad1;
-  unsigned char __pad2;
-  unsigned char __data[0];
-};
-
-#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
-
-struct inet_request_sock {
-	struct request_sock	req;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	u16			inet6_rsk_offset;
-	/* 2 bytes hole, try to pack */
-#endif
-	u32			loc_addr;
-	u32			rmt_addr;
-	u16			rmt_port;
-	u16			snd_wscale : 4, 
-				rcv_wscale : 4, 
-				tstamp_ok  : 1,
-				sack_ok	   : 1,
-				wscale_ok  : 1,
-				ecn_ok	   : 1,
-				acked	   : 1;
-	struct ip_options	*opt;
-};
-
-static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
-{
-	return (struct inet_request_sock *)sk;
-}
-
-struct ipv6_pinfo;
-
-struct inet_sock {
-	/* sk and pinet6 has to be the first two members of inet_sock */
-	struct sock		sk;
-#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
-	struct ipv6_pinfo	*pinet6;
-#endif
-	/* Socket demultiplex comparisons on incoming packets. */
-	__u32			daddr;		/* Foreign IPv4 addr */
-	__u32			rcv_saddr;	/* Bound local IPv4 addr */
-	__u16			dport;		/* Destination port */
-	__u16			num;		/* Local port */
-	__u32			saddr;		/* Sending source */
-	__s16			uc_ttl;		/* Unicast TTL */
-	__u16			cmsg_flags;
-	struct ip_options	*opt;
-	__u16			sport;		/* Source port */
-	__u16			id;		/* ID counter for DF pkts */
-	__u8			tos;		/* TOS */
-	__u8			mc_ttl;		/* Multicasting TTL */
-	__u8			pmtudisc;
-	unsigned		recverr : 1,
-				is_icsk : 1,	/* inet_connection_sock? */
-				freebind : 1,
-				hdrincl : 1,
-				mc_loop : 1;
-	int			mc_index;	/* Multicast device index */
-	__u32			mc_addr;
-	struct ip_mc_socklist	*mc_list;	/* Group array */
-	/*
-	 * Following members are used to retain the infomation to build
-	 * an ip header on each ip fragmentation while the socket is corked.
-	 */
-	struct {
-		unsigned int		flags;
-		unsigned int		fragsize;
-		struct ip_options	*opt;
-		struct rtable		*rt;
-		int			length; /* Total length of all frames */
-		u32			addr;
-		struct flowi		fl;
-	} cork;
-};
-
-#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
-#define IPCORK_ALLFRAG	2	/* always fragment (for ipv6 for now) */
-
-static inline struct inet_sock *inet_sk(const struct sock *sk)
-{
-	return (struct inet_sock *)sk;
-}
-
-static inline void __inet_sk_copy_descendant(struct sock *sk_to,
-					     const struct sock *sk_from,
-					     const int ancestor_size)
-{
-	memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
-	       sk_from->sk_prot->obj_size - ancestor_size);
-}
-#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE))
-static inline void inet_sk_copy_descendant(struct sock *sk_to,
-					   const struct sock *sk_from)
-{
-	__inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
-}
-#endif
-#endif
-
-extern int inet_sk_rebuild_header(struct sock *sk);
-
 struct iphdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	__u8	ihl:4,
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index a0d04891fe12..93bbed5c6cf4 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -171,12 +171,13 @@ enum {
 };
 
 #ifdef __KERNEL__
-#include <linux/in6.h>          /* struct sockaddr_in6 */
 #include <linux/icmpv6.h>
-#include <net/if_inet6.h>       /* struct ipv6_mc_socklist */
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
+#include <net/if_inet6.h>       /* struct ipv6_mc_socklist */
+#include <net/inet_sock.h>
+
 /* 
    This structure contains results of exthdrs parsing
    as offsets from skb->nh.
@@ -346,8 +347,6 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to,
 #define __ipv6_only_sock(sk)	(inet6_sk(sk)->ipv6only)
 #define ipv6_only_sock(sk)	((sk)->sk_family == PF_INET6 && __ipv6_only_sock(sk))
 
-#include <linux/tcp.h>
-
 struct inet6_timewait_sock {
 	struct in6_addr tw_v6_daddr;
 	struct in6_addr	tw_v6_rcv_saddr;
diff --git a/include/linux/udp.h b/include/linux/udp.h
index b60e0b4a25c4..85a55658831c 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -35,10 +35,10 @@ struct udphdr {
 #define UDP_ENCAP_ESPINUDP	2 /* draft-ietf-ipsec-udp-encaps-06 */
 
 #ifdef __KERNEL__
-
 #include <linux/config.h>
-#include <net/sock.h>
-#include <linux/ip.h>
+#include <linux/types.h>
+
+#include <net/inet_sock.h>
 
 struct udp_sock {
 	/* inet_sock has to be the first member */
diff --git a/include/net/atmclip.h b/include/net/atmclip.h
index 47048b1d179a..90fcc98e676f 100644
--- a/include/net/atmclip.h
+++ b/include/net/atmclip.h
@@ -7,7 +7,6 @@
 #define _ATMCLIP_H
 
 #include <linux/netdevice.h>
-#include <linux/skbuff.h>
 #include <linux/atm.h>
 #include <linux/atmdev.h>
 #include <linux/atmarp.h>
@@ -18,6 +17,7 @@
 #define CLIP_VCC(vcc) ((struct clip_vcc *) ((vcc)->user_back))
 #define NEIGH2ENTRY(neigh) ((struct atmarp_entry *) (neigh)->primary_key)
 
+struct sk_buff;
 
 struct clip_vcc {
 	struct atm_vcc	*vcc;		/* VCC descriptor */
diff --git a/include/net/dst.h b/include/net/dst.h
index 6c196a5baf24..bee8b84d329d 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -9,6 +9,7 @@
 #define _NET_DST_H
 
 #include <linux/config.h>
+#include <linux/netdevice.h>
 #include <linux/rtnetlink.h>
 #include <linux/rcupdate.h>
 #include <linux/jiffies.h>
diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6cdebeee5f96..e7c3f20fbafc 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -20,12 +20,9 @@
 
 #include <linux/config.h>
 #include <linux/icmp.h>
-#include <linux/skbuff.h>
 
-#include <net/sock.h>
-#include <net/protocol.h>
+#include <net/inet_sock.h>
 #include <net/snmp.h>
-#include <linux/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -38,6 +35,10 @@ DECLARE_SNMP_STAT(struct icmp_mib, icmp_statistics);
 #define ICMP_INC_STATS_BH(field)	SNMP_INC_STATS_BH(icmp_statistics, field)
 #define ICMP_INC_STATS_USER(field) 	SNMP_INC_STATS_USER(icmp_statistics, field)
 
+struct dst_entry;
+struct net_proto_family;
+struct sk_buff;
+
 extern void	icmp_send(struct sk_buff *skb_in,  int type, int code, u32 info);
 extern int	icmp_rcv(struct sk_buff *skb);
 extern int	icmp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/include/net/ieee80211_crypt.h b/include/net/ieee80211_crypt.h
index 225fc751d464..03b766afdc39 100644
--- a/include/net/ieee80211_crypt.h
+++ b/include/net/ieee80211_crypt.h
@@ -23,12 +23,17 @@
 #ifndef IEEE80211_CRYPT_H
 #define IEEE80211_CRYPT_H
 
-#include <linux/skbuff.h>
+#include <linux/types.h>
+#include <linux/list.h>
+#include <asm/atomic.h>
 
 enum {
 	IEEE80211_CRYPTO_TKIP_COUNTERMEASURES = (1 << 0),
 };
 
+struct sk_buff;
+struct module;
+
 struct ieee80211_crypto_ops {
 	const char *name;
 	struct list_head list;
@@ -87,6 +92,8 @@ struct ieee80211_crypt_data {
 	atomic_t refcnt;
 };
 
+struct ieee80211_device;
+
 int ieee80211_register_crypto_ops(struct ieee80211_crypto_ops *ops);
 int ieee80211_unregister_crypto_ops(struct ieee80211_crypto_ops *ops);
 struct ieee80211_crypto_ops *ieee80211_get_crypto_ops(const char *name);
diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 91888967d3e3..50234fa56a68 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -16,9 +16,10 @@
 #define _INET_CONNECTION_SOCK_H
 
 #include <linux/compiler.h>
-#include <linux/ip.h>
 #include <linux/string.h>
 #include <linux/timer.h>
+
+#include <net/inet_sock.h>
 #include <net/request_sock.h>
 
 #define INET_CSK_DEBUG 1
diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index b0c47e2eccf1..d599c6bfbb86 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -3,6 +3,8 @@
 
 #include <linux/ip.h>
 #include <linux/skbuff.h>
+
+#include <net/inet_sock.h>
 #include <net/dsfield.h>
 
 enum {
diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h
index c83baa79f66e..135d80fd658e 100644
--- a/include/net/inet_hashtables.h
+++ b/include/net/inet_hashtables.h
@@ -26,6 +26,7 @@
 #include <linux/wait.h>
 
 #include <net/inet_connection_sock.h>
+#include <net/inet_sock.h>
 #include <net/route.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
@@ -128,26 +129,6 @@ struct inet_hashinfo {
 	kmem_cache_t			*bind_bucket_cachep;
 };
 
-static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
-			       const __u32 faddr, const __u16 fport)
-{
-	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
-	h ^= h >> 16;
-	h ^= h >> 8;
-	return h;
-}
-
-static inline int inet_sk_ehashfn(const struct sock *sk)
-{
-	const struct inet_sock *inet = inet_sk(sk);
-	const __u32 laddr = inet->rcv_saddr;
-	const __u16 lport = inet->num;
-	const __u32 faddr = inet->daddr;
-	const __u16 fport = inet->dport;
-
-	return inet_ehashfn(laddr, lport, faddr, fport);
-}
-
 static inline struct inet_ehash_bucket *inet_ehash_bucket(
 	struct inet_hashinfo *hashinfo,
 	unsigned int hash)
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
new file mode 100644
index 000000000000..883eb529ef8e
--- /dev/null
+++ b/include/net/inet_sock.h
@@ -0,0 +1,193 @@
+/*
+ * INET		An implementation of the TCP/IP protocol suite for the LINUX
+ *		operating system.  INET is implemented using the  BSD Socket
+ *		interface as the means of communication with the user level.
+ *
+ *		Definitions for inet_sock
+ *
+ * Authors:	Many, reorganised here by
+ * 		Arnaldo Carvalho de Melo <acme@mandriva.com>
+ *
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ */
+#ifndef _INET_SOCK_H
+#define _INET_SOCK_H
+
+#include <linux/config.h>
+
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <net/flow.h>
+#include <net/sock.h>
+#include <net/request_sock.h>
+
+/** struct ip_options - IP Options
+ *
+ * @faddr - Saved first hop address
+ * @is_setbyuser - Set by setsockopt?
+ * @is_data - Options in __data, rather than skb
+ * @is_strictroute - Strict source route
+ * @srr_is_hit - Packet destination addr was our one
+ * @is_changed - IP checksum more not valid
+ * @rr_needaddr - Need to record addr of outgoing dev
+ * @ts_needtime - Need to record timestamp
+ * @ts_needaddr - Need to record addr of outgoing dev
+ */
+struct ip_options {
+	__u32		faddr;
+	unsigned char	optlen;
+	unsigned char	srr;
+	unsigned char	rr;
+	unsigned char	ts;
+	unsigned char	is_setbyuser:1,
+			is_data:1,
+			is_strictroute:1,
+			srr_is_hit:1,
+			is_changed:1,
+			rr_needaddr:1,
+			ts_needtime:1,
+			ts_needaddr:1;
+	unsigned char	router_alert;
+	unsigned char	__pad1;
+	unsigned char	__pad2;
+	unsigned char	__data[0];
+};
+
+#define optlength(opt) (sizeof(struct ip_options) + opt->optlen)
+
+struct inet_request_sock {
+	struct request_sock	req;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	u16			inet6_rsk_offset;
+	/* 2 bytes hole, try to pack */
+#endif
+	u32			loc_addr;
+	u32			rmt_addr;
+	u16			rmt_port;
+	u16			snd_wscale : 4, 
+				rcv_wscale : 4, 
+				tstamp_ok  : 1,
+				sack_ok	   : 1,
+				wscale_ok  : 1,
+				ecn_ok	   : 1,
+				acked	   : 1;
+	struct ip_options	*opt;
+};
+
+static inline struct inet_request_sock *inet_rsk(const struct request_sock *sk)
+{
+	return (struct inet_request_sock *)sk;
+}
+
+struct ip_mc_socklist;
+struct ipv6_pinfo;
+struct rtable;
+
+/** struct inet_sock - representation of INET sockets
+ *
+ * @sk - ancestor class
+ * @pinet6 - pointer to IPv6 control block
+ * @daddr - Foreign IPv4 addr
+ * @rcv_saddr - Bound local IPv4 addr
+ * @dport - Destination port
+ * @num - Local port
+ * @saddr - Sending source
+ * @uc_ttl - Unicast TTL
+ * @sport - Source port
+ * @id - ID counter for DF pkts
+ * @tos - TOS
+ * @mc_ttl - Multicasting TTL
+ * @is_icsk - is this an inet_connection_sock?
+ * @mc_index - Multicast device index
+ * @mc_list - Group array
+ * @cork - info to build ip hdr on each ip frag while socket is corked
+ */
+struct inet_sock {
+	/* sk and pinet6 has to be the first two members of inet_sock */
+	struct sock		sk;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct ipv6_pinfo	*pinet6;
+#endif
+	/* Socket demultiplex comparisons on incoming packets. */
+	__u32			daddr;
+	__u32			rcv_saddr;
+	__u16			dport;
+	__u16			num;
+	__u32			saddr;
+	__s16			uc_ttl;
+	__u16			cmsg_flags;
+	struct ip_options	*opt;
+	__u16			sport;
+	__u16			id;
+	__u8			tos;
+	__u8			mc_ttl;
+	__u8			pmtudisc;
+	__u8			recverr:1,
+				is_icsk:1,
+				freebind:1,
+				hdrincl:1,
+				mc_loop:1;
+	int			mc_index;
+	__u32			mc_addr;
+	struct ip_mc_socklist	*mc_list;
+	struct {
+		unsigned int		flags;
+		unsigned int		fragsize;
+		struct ip_options	*opt;
+		struct rtable		*rt;
+		int			length; /* Total length of all frames */
+		u32			addr;
+		struct flowi		fl;
+	} cork;
+};
+
+#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
+#define IPCORK_ALLFRAG	2	/* always fragment (for ipv6 for now) */
+
+static inline struct inet_sock *inet_sk(const struct sock *sk)
+{
+	return (struct inet_sock *)sk;
+}
+
+static inline void __inet_sk_copy_descendant(struct sock *sk_to,
+					     const struct sock *sk_from,
+					     const int ancestor_size)
+{
+	memcpy(inet_sk(sk_to) + 1, inet_sk(sk_from) + 1,
+	       sk_from->sk_prot->obj_size - ancestor_size);
+}
+#if !(defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE))
+static inline void inet_sk_copy_descendant(struct sock *sk_to,
+					   const struct sock *sk_from)
+{
+	__inet_sk_copy_descendant(sk_to, sk_from, sizeof(struct inet_sock));
+}
+#endif
+
+extern int inet_sk_rebuild_header(struct sock *sk);
+
+static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport,
+					const __u32 faddr, const __u16 fport)
+{
+	unsigned int h = (laddr ^ lport) ^ (faddr ^ fport);
+	h ^= h >> 16;
+	h ^= h >> 8;
+	return h;
+}
+
+static inline int inet_sk_ehashfn(const struct sock *sk)
+{
+	const struct inet_sock *inet = inet_sk(sk);
+	const __u32 laddr = inet->rcv_saddr;
+	const __u16 lport = inet->num;
+	const __u32 faddr = inet->daddr;
+	const __u16 fport = inet->dport;
+
+	return inet_ehashfn(laddr, lport, faddr, fport);
+}
+
+#endif	/* _INET_SOCK_H */
diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index e396a65473d7..1da294c47522 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -17,13 +17,13 @@
 
 #include <linux/config.h>
 
-#include <linux/ip.h>
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/timer.h>
 #include <linux/types.h>
 #include <linux/workqueue.h>
 
+#include <net/inet_sock.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/timewait_sock.h>
diff --git a/include/net/ip.h b/include/net/ip.h
index 4d6294ba038e..f7e7fd728b67 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -24,14 +24,10 @@
 
 #include <linux/config.h>
 #include <linux/types.h>
-#include <linux/socket.h>
 #include <linux/ip.h>
 #include <linux/in.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/in_route.h>
-#include <net/route.h>
-#include <net/arp.h>
+
+#include <net/inet_sock.h>
 #include <net/snmp.h>
 
 struct sock;
@@ -75,6 +71,13 @@ extern rwlock_t ip_ra_lock;
 
 #define IP_FRAG_TIME	(30 * HZ)		/* fragment lifetime	*/
 
+struct msghdr;
+struct net_device;
+struct packet_type;
+struct rtable;
+struct sk_buff;
+struct sockaddr;
+
 extern void		ip_mc_dropsocket(struct sock *);
 extern void		ip_mc_dropdevice(struct net_device *dev);
 extern int		igmp_mc_proc_init(void);
@@ -184,6 +187,8 @@ extern int sysctl_ip_dynaddr;
 extern void ipfrag_init(void);
 
 #ifdef CONFIG_INET
+#include <net/dst.h>
+
 /* The function in 2.2 was invalid, producing wrong result for
  * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
 static inline
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 14de4ebd1211..e000fa2cd5f6 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -238,6 +238,8 @@ extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
 			       struct net_device *dev, u32 *spec_dst, u32 *itag);
 extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
 
+struct rtentry;
+
 /* Exported by fib_semantics.c */
 extern int ip_fib_check_default(u32 gw, struct net_device *dev);
 extern int fib_sync_down(u32 local, struct net_device *dev, int force);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 3b5559a023a4..7d2674fde19a 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -251,16 +251,15 @@ struct ip_vs_daemon_user {
 #include <linux/config.h>
 #include <linux/list.h>                 /* for struct list_head */
 #include <linux/spinlock.h>             /* for struct rwlock_t */
-#include <linux/skbuff.h>               /* for struct sk_buff */
-#include <linux/ip.h>                   /* for struct iphdr */
 #include <asm/atomic.h>                 /* for struct atomic_t */
-#include <linux/netdevice.h>		/* for struct neighbour */
-#include <net/dst.h>			/* for struct dst_entry */
-#include <net/udp.h>
 #include <linux/compiler.h>
+#include <linux/timer.h>
 
+#include <net/checksum.h>
 
 #ifdef CONFIG_IP_VS_DEBUG
+#include <linux/net.h>
+
 extern int ip_vs_get_debug_level(void);
 #define IP_VS_DBG(level, msg...)			\
     do {						\
@@ -429,8 +428,11 @@ struct ip_vs_stats
 	spinlock_t              lock;           /* spin lock */
 };
 
+struct dst_entry;
+struct iphdr;
 struct ip_vs_conn;
 struct ip_vs_app;
+struct sk_buff;
 
 struct ip_vs_protocol {
 	struct ip_vs_protocol	*next;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 11a725662c36..860bbac4c4ee 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -541,6 +541,9 @@ extern int sysctl_ip6frag_secret_interval;
 extern const struct proto_ops inet6_stream_ops;
 extern const struct proto_ops inet6_dgram_ops;
 
+struct group_source_req;
+struct group_filter;
+
 extern int ip6_mc_source(int add, int omode, struct sock *sk,
 			 struct group_source_req *pgsr);
 extern int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index f85d6e4b7442..bbac87eeb422 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -35,11 +35,20 @@ enum {
 
 #ifdef __KERNEL__
 
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include <linux/config.h>
+#include <linux/compiler.h>
 #include <linux/icmpv6.h>
+#include <linux/in6.h>
+#include <linux/types.h>
+
 #include <net/neighbour.h>
-#include <asm/atomic.h>
+
+struct ctl_table;
+struct file;
+struct inet6_dev;
+struct net_device;
+struct net_proto_family;
+struct sk_buff;
 
 extern struct neigh_table nd_tbl;
 
@@ -108,7 +117,7 @@ extern int			igmp6_event_report(struct sk_buff *skb);
 extern void			igmp6_cleanup(void);
 
 #ifdef CONFIG_SYSCTL
-extern int 			ndisc_ifinfo_sysctl_change(ctl_table *ctl,
+extern int 			ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
 							   int write,
 							   struct file * filp,
 							   void __user *buffer,
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 34c07731933d..6fa9ae190741 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -49,8 +49,8 @@
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
-#include <linux/skbuff.h>
 #include <linux/netdevice.h>
+#include <linux/skbuff.h>
 #include <linux/rcupdate.h>
 #include <linux/seq_file.h>
 
diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h
index bd08964b72c0..b225d8472b7e 100644
--- a/include/net/pkt_act.h
+++ b/include/net/pkt_act.h
@@ -15,7 +15,6 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
-#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/rtnetlink.h>
 #include <linux/module.h>
diff --git a/include/net/raw.h b/include/net/raw.h
index f47917469b12..e67b28a0248c 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -19,6 +19,8 @@
 
 #include <linux/config.h>
 
+#include <net/protocol.h>
+
 extern struct proto raw_prot;
 
 extern void 	raw_err(struct sock *, struct sk_buff *, u32 info);
diff --git a/include/net/udp.h b/include/net/udp.h
index 107b9d791a1f..766fba1369ce 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -22,9 +22,8 @@
 #ifndef _UDP_H
 #define _UDP_H
 
-#include <linux/udp.h>
-#include <linux/ip.h>
 #include <linux/list.h>
+#include <net/inet_sock.h>
 #include <net/sock.h>
 #include <net/snmp.h>
 #include <linux/seq_file.h>
@@ -62,6 +61,7 @@ static inline int udp_lport_inuse(u16 num)
 
 extern struct proto udp_prot;
 
+struct sk_buff;
 
 extern void	udp_err(struct sk_buff *, u32);
 
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 487abca3ca6f..07d7b50cdd76 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -2,11 +2,12 @@
 #define _NET_XFRM_H
 
 #include <linux/compiler.h>
+#include <linux/in.h>
 #include <linux/xfrm.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/skbuff.h>
-#include <linux/netdevice.h>
+#include <linux/socket.h>
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <linux/in6.h>
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 23422bd53a5e..223f8270daee 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -26,6 +26,7 @@
 #include <linux/ip.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
 #include <linux/netfilter_bridge.h>
@@ -33,8 +34,11 @@
 #include <linux/netfilter_ipv6.h>
 #include <linux/netfilter_arp.h>
 #include <linux/in_route.h>
+
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
+
 #include <asm/uaccess.h>
 #include <asm/checksum.h>
 #include "br_private.h"
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c436e6c6242b..9f6e0193ae10 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -9,6 +9,7 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/netfilter_bridge/ebtables.h>
 #include <linux/netfilter_bridge/ebt_log.h>
 #include <linux/netfilter.h>
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 49424a42a2c0..281a632fa6a6 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -13,6 +13,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/string.h>
+#include <linux/if_arp.h>
 #include <linux/inetdevice.h>
 #include <linux/inet.h>
 #include <linux/interrupt.h>
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
index c37eeeaf5c6e..de681c6ad081 100644
--- a/net/dccp/ccid.h
+++ b/net/dccp/ccid.h
@@ -21,6 +21,8 @@
 
 #define CCID_MAX 255
 
+struct tcp_info;
+
 struct ccid {
 	unsigned char	ccid_id;
 	const char	*ccid_name;
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index 99e8afa7ba1e..3f244670764a 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -19,6 +19,7 @@
 
 #include <net/icmp.h>
 #include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
 #include <net/sock.h>
 #include <net/timewait_sock.h>
 #include <net/tcp_states.h>
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 2e194c8f9953..c609dc78f487 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -20,6 +20,7 @@
 #include <net/addrconf.h>
 #include <net/inet_common.h>
 #include <net/inet_hashtables.h>
+#include <net/inet_sock.h>
 #include <net/inet6_connection_sock.h>
 #include <net/inet6_hashtables.h>
 #include <net/ip6_route.h>
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 95a3c2c6a3ce..efd7ffb903a1 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -15,6 +15,7 @@
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
 
+#include <net/inet_sock.h>
 #include <net/sock.h>
 
 #include "ackvec.h"
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e4e629ed9bf7..65b11ea90d85 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,7 +24,7 @@
 #include <net/checksum.h>
 
 #include <net/inet_common.h>
-#include <net/ip.h>
+#include <net/inet_sock.h>
 #include <net/protocol.h>
 #include <net/sock.h>
 #include <net/xfrm.h>
@@ -34,7 +34,6 @@
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/poll.h>
-#include <linux/dccp.h>
 
 #include "ccid.h"
 #include "dccp.h"
diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c
index ff58f49c8b4a..70fb2b88da65 100644
--- a/net/econet/af_econet.c
+++ b/net/econet/af_econet.c
@@ -31,6 +31,7 @@
 #include <linux/if_arp.h>
 #include <linux/wireless.h>
 #include <linux/skbuff.h>
+#include <linux/udp.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <linux/stat.h>
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 4ed8a814c6cb..36a6306ca5a3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -93,6 +93,7 @@
 #include <linux/smp_lock.h>
 #include <linux/inet.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/ip.h>
 #include <net/protocol.h>
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index 035ad2c9e1ba..aed537fa2c88 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -6,6 +6,7 @@
 #include <linux/crypto.h>
 #include <linux/pfkeyv2.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <asm/scatterlist.h>
 
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index b425748f02d7..37432088fe6d 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -86,6 +86,7 @@
 #include <linux/in.h>
 #include <linux/mm.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/fddidevice.h>
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 04a6fe3e95a2..7b9bb28e2ee9 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -58,6 +58,7 @@
 #endif
 #include <linux/kmod.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/route.h>
 #include <net/ip_fib.h>
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 1b18ce66e7b7..73bfcae8af9c 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -9,6 +9,7 @@
 #include <linux/pfkeyv2.h>
 #include <linux/random.h>
 #include <net/icmp.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 
 /* decapsulation data for use when post-processing */
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 19b1b984d687..18f5e509281a 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -30,6 +30,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
index 7ea0209cb169..e2890ec8159e 100644
--- a/net/ipv4/fib_hash.c
+++ b/net/ipv4/fib_hash.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 0b298bbc1518..0dd4d06e456d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -33,6 +33,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 6d2a6ac070e3..ef4724de7350 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -29,6 +29,7 @@
 #include <linux/errno.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/proc_fs.h>
@@ -36,6 +37,7 @@
 #include <linux/netlink.h>
 #include <linux/init.h>
 
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 92e23b2ad4d2..be5a519cd2f8 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -73,6 +73,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/string.h>
 #include <linux/netfilter_ipv4.h>
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 4a195c724f01..34758118c10c 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -91,6 +91,8 @@
 #include <linux/if_arp.h>
 #include <linux/rtnetlink.h>
 #include <linux/times.h>
+
+#include <net/arp.h>
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <net/route.h>
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 473d0f2b2e0d..e45846ae570b 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -128,6 +128,7 @@
 #include <linux/sockios.h>
 #include <linux/in.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index dbe12da8d8b3..d3f6c468faf4 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -22,6 +22,7 @@
 #include <net/sock.h>
 #include <net/ip.h>
 #include <net/icmp.h>
+#include <net/route.h>
 
 /* 
  * Write options to IP header, record destination address to
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index add019c746f8..6986e11d65cc 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -25,6 +25,7 @@
 #include <linux/skbuff.h>
 #include <linux/ip.h>
 #include <linux/icmp.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <net/sock.h>
 #include <net/ip.h>
diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
index fc718df17b40..d64e2ec8da7b 100644
--- a/net/ipv4/ipcomp.c
+++ b/net/ipv4/ipcomp.c
@@ -28,6 +28,7 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 #include <net/ipcomp.h>
+#include <net/protocol.h>
 
 struct ipcomp_tfms {
 	struct list_head list;
diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
index e8674baaa8d9..bb3613ec448c 100644
--- a/net/ipv4/ipconfig.c
+++ b/net/ipv4/ipconfig.c
@@ -42,6 +42,7 @@
 #include <linux/in.h>
 #include <linux/if.h>
 #include <linux/inet.h>
+#include <linux/inetdevice.h>
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/skbuff.h>
@@ -58,6 +59,7 @@
 #include <net/arp.h>
 #include <net/ip.h>
 #include <net/ipconfig.h>
+#include <net/route.h>
 
 #include <asm/uaccess.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 302b7eb507c9..caa3b7d2e48a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -52,6 +52,7 @@
 #include <net/ip.h>
 #include <net/protocol.h>
 #include <linux/skbuff.h>
+#include <net/route.h>
 #include <net/sock.h>
 #include <net/icmp.h>
 #include <net/udp.h>
diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
index 2a3a8c59c655..d35cea31cb55 100644
--- a/net/ipv4/ipvs/ip_vs_conn.c
+++ b/net/ipv4/ipvs/ip_vs_conn.c
@@ -24,7 +24,9 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/vmalloc.h>
 #include <linux/proc_fs.h>		/* for proc_net_* */
 #include <linux/seq_file.h>
diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c
index 9bdcf31b760e..fe2c39d2a002 100644
--- a/net/ipv4/ipvs/ip_vs_ctl.c
+++ b/net/ipv4/ipvs/ip_vs_ctl.c
@@ -35,6 +35,7 @@
 #include <linux/netfilter_ipv4.h>
 
 #include <net/ip.h>
+#include <net/route.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c
index f3bc320dce93..9fee19c4c617 100644
--- a/net/ipv4/ipvs/ip_vs_dh.c
+++ b/net/ipv4/ipvs/ip_vs_dh.c
@@ -37,8 +37,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_est.c b/net/ipv4/ipvs/ip_vs_est.c
index 67b3e2fc1fa1..e7004741ac73 100644
--- a/net/ipv4/ipvs/ip_vs_est.c
+++ b/net/ipv4/ipvs/ip_vs_est.c
@@ -13,7 +13,10 @@
  * Changes:
  *
  */
+#include <linux/config.h>
 #include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 
 #include <net/ip_vs.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c
index b6dad7e3710c..6e5cb92a5c83 100644
--- a/net/ipv4/ipvs/ip_vs_lblc.c
+++ b/net/ipv4/ipvs/ip_vs_lblc.c
@@ -41,8 +41,10 @@
  * me to write this module.
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c
index 8c78ef76c121..32ba37ba72d8 100644
--- a/net/ipv4/ipvs/ip_vs_lblcr.c
+++ b/net/ipv4/ipvs/ip_vs_lblcr.c
@@ -39,8 +39,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 /* for sysctl */
 #include <linux/fs.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_ah.c b/net/ipv4/ipvs/ip_vs_proto_ah.c
index 453e94a0bbd7..8b0505b09317 100644
--- a/net/ipv4/ipvs/ip_vs_proto_ah.c
+++ b/net/ipv4/ipvs/ip_vs_proto_ah.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_esp.c b/net/ipv4/ipvs/ip_vs_proto_esp.c
index 478e5c7c7e8e..c36ccf057a19 100644
--- a/net/ipv4/ipvs/ip_vs_proto_esp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_esp.c
@@ -12,6 +12,8 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/netfilter.h>
diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c
index 8ae5f2e0aefa..89d9175d8f28 100644
--- a/net/ipv4/ipvs/ip_vs_proto_udp.c
+++ b/net/ipv4/ipvs/ip_vs_proto_udp.c
@@ -15,8 +15,11 @@
  *
  */
 
+#include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/kernel.h>
 #include <linux/netfilter_ipv4.h>
+#include <linux/udp.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c
index 6f7c50e44a39..7775e6cc68be 100644
--- a/net/ipv4/ipvs/ip_vs_sh.c
+++ b/net/ipv4/ipvs/ip_vs_sh.c
@@ -34,8 +34,10 @@
  *
  */
 
+#include <linux/ip.h>
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/skbuff.h>
 
 #include <net/ip_vs.h>
 
diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c
index 2e5ced3d8062..1bca714bda3d 100644
--- a/net/ipv4/ipvs/ip_vs_sync.c
+++ b/net/ipv4/ipvs/ip_vs_sync.c
@@ -21,12 +21,14 @@
 
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/inetdevice.h>
 #include <linux/net.h>
 #include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/skbuff.h>
 #include <linux/in.h>
 #include <linux/igmp.h>                 /* for ip_mc_join_group */
+#include <linux/udp.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
index e52847fa10f5..0366eedb4d70 100644
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ b/net/ipv4/netfilter/ip_conntrack_amanda.c
@@ -18,11 +18,13 @@
  *
  */
 
+#include <linux/in.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netfilter.h>
 #include <linux/ip.h>
 #include <linux/moduleparam.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
index 744abb9d377a..57956dee60c8 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
@@ -31,6 +31,7 @@
 #include <linux/ip.h>
 #include <linux/in.h>
 #include <linux/list.h>
+#include <linux/seq_file.h>
 
 static DEFINE_RWLOCK(ip_ct_gre_lock);
 #define ASSERT_READ_LOCK(x)
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
index f2dcac7c7660..46becbe4fe58 100644
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
@@ -11,6 +11,7 @@
 #include <linux/timer.h>
 #include <linux/netfilter.h>
 #include <linux/in.h>
+#include <linux/ip.h>
 #include <linux/udp.h>
 #include <linux/seq_file.h>
 #include <net/checksum.h>
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
index dd476b191f4b..a88bcc551244 100644
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ b/net/ipv4/netfilter/ip_conntrack_standalone.c
@@ -27,6 +27,7 @@
 #endif
 #include <net/checksum.h>
 #include <net/ip.h>
+#include <net/route.h>
 
 #define ASSERT_READ_LOCK(x)
 #define ASSERT_WRITE_LOCK(x)
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/ip_nat_snmp_basic.c
index 8acb7ed40b47..4f95d477805c 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/ip_nat_snmp_basic.c
@@ -44,6 +44,7 @@
  *
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
@@ -53,6 +54,7 @@
 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
 #include <linux/netfilter_ipv4/ip_nat_helper.h>
 #include <linux/ip.h>
+#include <linux/udp.h>
 #include <net/checksum.h>
 #include <net/udp.h>
 #include <asm/uaccess.h>
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 275a174c6fe6..27860510ca6d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -11,6 +11,7 @@
 
 #include <linux/config.h>
 #include <linux/types.h>
+#include <linux/inetdevice.h>
 #include <linux/ip.h>
 #include <linux/timer.h>
 #include <linux/module.h>
@@ -18,6 +19,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/checksum.h>
+#include <net/route.h>
 #include <linux/netfilter_ipv4.h>
 #include <linux/netfilter_ipv4/ip_nat_rule.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/netfilter/ipt_physdev.c b/net/ipv4/netfilter/ipt_physdev.c
index 1a53924041fc..03f554857a4d 100644
--- a/net/ipv4/netfilter/ipt_physdev.c
+++ b/net/ipv4/netfilter/ipt_physdev.c
@@ -9,6 +9,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/netfilter_ipv4/ipt_physdev.h>
 #include <linux/netfilter_ipv4/ip_tables.h>
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 0d7dc668db46..39d49dc333a7 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -38,6 +38,7 @@
 #include <net/protocol.h>
 #include <net/tcp.h>
 #include <net/udp.h>
+#include <linux/inetdevice.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
 #include <net/sock.h>
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index dbf82955aabe..16984d4a8a06 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -12,6 +12,7 @@
 #include <linux/sysctl.h>
 #include <linux/config.h>
 #include <linux/igmp.h>
+#include <linux/inetdevice.h>
 #include <net/snmp.h>
 #include <net/icmp.h>
 #include <net/ip.h>
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 67c036384e77..223abaa72bc5 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -86,6 +86,7 @@
 #include <linux/module.h>
 #include <linux/socket.h>
 #include <linux/sockios.h>
+#include <linux/igmp.h>
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/timer.h>
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index f3629730eb15..13cc7f895583 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <net/xfrm.h>
 #include <asm/scatterlist.h>
 
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 8bfbe9970793..6de8ee1a5ad9 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -36,6 +36,7 @@
 #include <linux/random.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/icmpv6.h>
 
 static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 55917fb17094..626dd39685f2 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -47,6 +47,7 @@
 #include <linux/rtnetlink.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
+#include <net/protocol.h>
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index dd80020d8740..ea43ef1d94a7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -15,6 +15,7 @@
  *      - new extension header parser code
  */
 #include <linux/config.h>
+#include <linux/in.h>
 #include <linux/skbuff.h>
 #include <linux/kmod.h>
 #include <linux/vmalloc.h>
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 0cd1d1bd9033..ae4653bfd654 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/skbuff.h>
+#include <linux/if_arp.h>
 #include <linux/ip.h>
 #include <linux/spinlock.h>
 #include <linux/icmpv6.h>
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index dde37793d20b..268918d5deea 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 24bc0cde43a1..65937de1b58c 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -9,6 +9,7 @@
 
 #include <linux/module.h>
 #include <linux/skbuff.h>
+#include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/types.h>
 #include <net/checksum.h>
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 6cf0342706b5..c4a2a8c4c339 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -22,6 +22,7 @@
 #include <linux/in.h>
 #include <linux/errno.h>
 #include <linux/interrupt.h>
+#include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/inet.h>
 #include <linux/netdevice.h>
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index d1b0747a5b9d..de693b43c8ea 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -54,6 +54,7 @@
 #include <net/protocol.h>
 #include <net/ip.h>
 #include <net/ipv6.h>
+#include <net/route.h>
 #include <net/sctp/sctp.h>
 #include <net/addrconf.h>
 #include <net/inet_common.h>
-- 
cgit v1.2.3


From 17ba15fb6264f27374bc87f4c3f8519b80289d85 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Tue, 27 Dec 2005 20:57:40 -0800
Subject: [PPPOX]: Fix assignment into const proto_ops.

And actually, with this, the whole pppox layer can basically
be removed and subsumed into pppoe.c, no other pppox sub-protocol
implementation exists and we've had this thing for at least 4
years.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/pppoe.c      |  9 ++++-----
 drivers/net/pppox.c      | 10 +++-------
 include/linux/if_pppox.h |  3 +--
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
index 71e303b28646..9369f811075d 100644
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -85,7 +85,7 @@ static int pppoe_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 static int pppoe_xmit(struct ppp_channel *chan, struct sk_buff *skb);
 static int __pppoe_xmit(struct sock *sk, struct sk_buff *skb);
 
-static struct proto_ops pppoe_ops;
+static const struct proto_ops pppoe_ops;
 static DEFINE_RWLOCK(pppoe_hash_lock);
 
 static struct ppp_channel_ops pppoe_chan_ops;
@@ -1063,9 +1063,7 @@ static int __init pppoe_proc_init(void)
 static inline int pppoe_proc_init(void) { return 0; }
 #endif /* CONFIG_PROC_FS */
 
-/* ->ioctl are set at pppox_create */
-
-static struct proto_ops pppoe_ops = {
+static const struct proto_ops pppoe_ops = {
     .family		= AF_PPPOX,
     .owner		= THIS_MODULE,
     .release		= pppoe_release,
@@ -1081,7 +1079,8 @@ static struct proto_ops pppoe_ops = {
     .getsockopt		= sock_no_getsockopt,
     .sendmsg		= pppoe_sendmsg,
     .recvmsg		= pppoe_recvmsg,
-    .mmap		= sock_no_mmap
+    .mmap		= sock_no_mmap,
+    .ioctl		= pppox_ioctl,
 };
 
 static struct pppox_proto pppoe_proto = {
diff --git a/drivers/net/pppox.c b/drivers/net/pppox.c
index 0c1e114527fb..9315046b3f55 100644
--- a/drivers/net/pppox.c
+++ b/drivers/net/pppox.c
@@ -68,8 +68,7 @@ EXPORT_SYMBOL(register_pppox_proto);
 EXPORT_SYMBOL(unregister_pppox_proto);
 EXPORT_SYMBOL(pppox_unbind_sock);
 
-static int pppox_ioctl(struct socket* sock, unsigned int cmd, 
-		       unsigned long arg)
+int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
 	struct sock *sk = sock->sk;
 	struct pppox_sock *po = pppox_sk(sk);
@@ -105,6 +104,7 @@ static int pppox_ioctl(struct socket* sock, unsigned int cmd,
 	return rc;
 }
 
+EXPORT_SYMBOL(pppox_ioctl);
 
 static int pppox_create(struct socket *sock, int protocol)
 {
@@ -119,11 +119,7 @@ static int pppox_create(struct socket *sock, int protocol)
 		goto out;
 
 	rc = pppox_protos[protocol]->create(sock);
-	if (!rc) {
-		/* We get to set the ioctl handler. */
-		/* For everything else, pppox is just a shell. */
-		sock->ops->ioctl = pppox_ioctl;
-	}
+
 	module_put(pppox_protos[protocol]->owner);
 out:
 	return rc;
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index e677f73f13dd..4fab3d0a4bce 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -157,8 +157,7 @@ struct pppox_proto {
 extern int register_pppox_proto(int proto_num, struct pppox_proto *pp);
 extern void unregister_pppox_proto(int proto_num);
 extern void pppox_unbind_sock(struct sock *sk);/* delete ppp-channel binding */
-extern int pppox_channel_ioctl(struct ppp_channel *pc, unsigned int cmd,
-			       unsigned long arg);
+extern int pppox_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
 
 /* PPPoX socket states */
 enum {
-- 
cgit v1.2.3


From 4947d3ef8de7b4f42aed6ea9ba689dc8fb45b5a5 Mon Sep 17 00:00:00 2001
From: Benjamin LaHaise <bcrl@kvack.org>
Date: Tue, 3 Jan 2006 14:06:50 -0800
Subject: [NET]: Speed up __alloc_skb()

From: Benjamin LaHaise <bcrl@kvack.org>

In __alloc_skb(), the use of skb_shinfo() which casts a u8 * to the
shared info structure results in gcc being forced to do a reload of the
pointer since it has no information on possible aliasing.  Fix this by
using a pointer to refer to skb_shared_info.

By initializing skb_shared_info sequentially, the write combining buffers
can reduce the number of memory transactions to a single write.  Reorder
the initialization in __alloc_skb() to match the structure definition.
There is also an alignment issue on 64 bit systems with skb_shared_info
by converting nr_frags to a short everything packs up nicely.

Also, pass the slab cache pointer according to the fclone flag instead
of using two almost identical function calls.

This raises bw_unix performance up to a peak of 707KB/s when combined
with the spinlock patch.  It should help other networking protocols, too.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  2 +-
 net/core/skbuff.c      | 27 +++++++++++++--------------
 2 files changed, 14 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 971677178e0c..483cfc47ec34 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -133,7 +133,7 @@ struct skb_frag_struct {
  */
 struct skb_shared_info {
 	atomic_t	dataref;
-	unsigned int	nr_frags;
+	unsigned short	nr_frags;
 	unsigned short	tso_size;
 	unsigned short	tso_segs;
 	unsigned short  ufo_size;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 83fee37de38e..070f91cfde59 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -135,17 +135,13 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 			    int fclone)
 {
+	struct skb_shared_info *shinfo;
 	struct sk_buff *skb;
 	u8 *data;
 
 	/* Get the HEAD */
-	if (fclone)
-		skb = kmem_cache_alloc(skbuff_fclone_cache,
-				       gfp_mask & ~__GFP_DMA);
-	else
-		skb = kmem_cache_alloc(skbuff_head_cache,
-				       gfp_mask & ~__GFP_DMA);
-
+	skb = kmem_cache_alloc(fclone ? skbuff_fclone_cache : skbuff_head_cache,
+				gfp_mask & ~__GFP_DMA);
 	if (!skb)
 		goto out;
 
@@ -162,6 +158,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb->data = data;
 	skb->tail = data;
 	skb->end  = data + size;
+	/* make sure we initialize shinfo sequentially */
+	shinfo = skb_shinfo(skb);
+	atomic_set(&shinfo->dataref, 1);
+	shinfo->nr_frags  = 0;
+	shinfo->tso_size = 0;
+	shinfo->tso_segs = 0;
+	shinfo->ufo_size = 0;
+	shinfo->ip6_frag_id = 0;
+	shinfo->frag_list = NULL;
+
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
@@ -171,13 +177,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 		child->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
-	atomic_set(&(skb_shinfo(skb)->dataref), 1);
-	skb_shinfo(skb)->nr_frags  = 0;
-	skb_shinfo(skb)->tso_size = 0;
-	skb_shinfo(skb)->tso_segs = 0;
-	skb_shinfo(skb)->frag_list = NULL;
-	skb_shinfo(skb)->ufo_size = 0;
-	skb_shinfo(skb)->ip6_frag_id = 0;
 out:
 	return skb;
 nodata:
-- 
cgit v1.2.3


From 88df8ef59a3eb54b1e2412765ff2736d2376d1ca Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 3 Jan 2006 15:25:45 -0800
Subject: [NET]: Don't exclude broadcast addresses from
 is_multicast_ether_addr()

The check for multicast shouldn't exclude broadcast type addresses.
This reverts the incorrect change done in 2.6.13.

The broadcast address is a multicast address and should be excluded
from being a valid_ether_address for use in bridging or device address.

Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 5f49a30eb6f2..745c988359c0 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -63,10 +63,11 @@ static inline int is_zero_ether_addr(const u8 *addr)
  * @addr: Pointer to a six-byte array containing the Ethernet address
  *
  * Return true if the address is a multicast address.
+ * By definition the broadcast address is also a multicast address.
  */
 static inline int is_multicast_ether_addr(const u8 *addr)
 {
-	return ((addr[0] != 0xff) && (0x01 & addr[0]));
+	return (0x01 & addr[0]);
 }
 
 /**
-- 
cgit v1.2.3


From a00828e9ac62caed7b830d631914d7748817ccd1 Mon Sep 17 00:00:00 2001
From: Pete Zaitcev <zaitcev@redhat.com>
Date: Sat, 22 Oct 2005 20:15:09 -0700
Subject: [PATCH] USB: drivers/usb/storage/libusual

This patch adds a shim driver libusual, which routes devices between
usb-storage and ub according to the common table, based on unusual_devs.h.
The help and example syntax is in Kconfig.

Signed-off-by: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/block/Kconfig              |   3 +-
 drivers/block/ub.c                 |  23 ++--
 drivers/usb/Makefile               |   1 +
 drivers/usb/storage/Kconfig        |  14 ++
 drivers/usb/storage/Makefile       |   4 +
 drivers/usb/storage/libusual.c     | 266 +++++++++++++++++++++++++++++++++++++
 drivers/usb/storage/protocol.h     |  14 --
 drivers/usb/storage/transport.h    |  31 -----
 drivers/usb/storage/unusual_devs.h |  24 ++++
 drivers/usb/storage/usb.c          | 119 ++++++-----------
 drivers/usb/storage/usb.h          |  31 +----
 include/linux/usb_usual.h          | 123 +++++++++++++++++
 12 files changed, 486 insertions(+), 167 deletions(-)
 create mode 100644 drivers/usb/storage/libusual.c
 create mode 100644 include/linux/usb_usual.h

(limited to 'include/linux')

diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig
index 7b1cd93892be..c4b9d2adfc08 100644
--- a/drivers/block/Kconfig
+++ b/drivers/block/Kconfig
@@ -358,7 +358,8 @@ config BLK_DEV_UB
 	  This driver supports certain USB attached storage devices
 	  such as flash keys.
 
-	  Warning: Enabling this cripples the usb-storage driver.
+	  If you enable this driver, it is recommended to avoid conflicts
+	  with usb-storage by enabling USB_LIBUSUAL.
 
 	  If unsure, say N.
 
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index bfb23d543ff7..06d741d58a68 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -29,6 +29,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/usb.h>
+#include <linux/usb_usual.h>
 #include <linux/blkdev.h>
 #include <linux/devfs_fs_kernel.h>
 #include <linux/timer.h>
@@ -106,16 +107,6 @@
  *                                                                   +--------+
  */
 
-/*
- * Definitions which have to be scattered once we understand the layout better.
- */
-
-/* Transport (despite PR in the name) */
-#define US_PR_BULK	0x50		/* bulk only */
-
-/* Protocol */
-#define US_SC_SCSI	0x06		/* Transparent */
-
 /*
  * This many LUNs per USB device.
  * Every one of them takes a host, see UB_MAX_HOSTS.
@@ -422,13 +413,18 @@ static int ub_probe_lun(struct ub_dev *sc, int lnum);
 
 /*
  */
+#ifdef CONFIG_USB_LIBUSUAL
+
+#define ub_usb_ids  storage_usb_ids
+#else
+
 static struct usb_device_id ub_usb_ids[] = {
-	// { USB_DEVICE_VER(0x0781, 0x0002, 0x0009, 0x0009) },	/* SDDR-31 */
 	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_BULK) },
 	{ }
 };
 
 MODULE_DEVICE_TABLE(usb, ub_usb_ids);
+#endif /* CONFIG_USB_LIBUSUAL */
 
 /*
  * Find me a way to identify "next free minor" for add_disk(),
@@ -2172,6 +2168,9 @@ static int ub_probe(struct usb_interface *intf,
 	int rc;
 	int i;
 
+	if (usb_usual_check_type(dev_id, USB_US_TYPE_UB))
+		return -ENXIO;
+
 	rc = -ENOMEM;
 	if ((sc = kmalloc(sizeof(struct ub_dev), GFP_KERNEL)) == NULL)
 		goto err_core;
@@ -2479,6 +2478,7 @@ static int __init ub_init(void)
 	if ((rc = usb_register(&ub_driver)) != 0)
 		goto err_register;
 
+	usb_usual_set_present(USB_US_TYPE_UB);
 	return 0;
 
 err_register:
@@ -2494,6 +2494,7 @@ static void __exit ub_exit(void)
 
 	devfs_remove(DEVFS_NAME);
 	unregister_blkdev(UB_MAJOR, DRV_NAME);
+	usb_usual_clear_present(USB_US_TYPE_UB);
 }
 
 module_init(ub_init);
diff --git a/drivers/usb/Makefile b/drivers/usb/Makefile
index a50c2bc506f2..3639c3f8d357 100644
--- a/drivers/usb/Makefile
+++ b/drivers/usb/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_USB_MIDI)		+= class/
 obj-$(CONFIG_USB_PRINTER)	+= class/
 
 obj-$(CONFIG_USB_STORAGE)	+= storage/
+obj-$(CONFIG_USB)		+= storage/
 
 obj-$(CONFIG_USB_AIPTEK)	+= input/
 obj-$(CONFIG_USB_ATI_REMOTE)	+= input/
diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index c41d64dbb0f0..bdfcb95d9c12 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -124,3 +124,17 @@ config USB_STORAGE_ONETOUCH
 	  hard drive's as an input device. An action can be associated with
 	  this input in any keybinding software. (e.g. gnome's keyboard short-
 	  cuts)
+
+config USB_LIBUSUAL
+	bool "The shared table of common (or usual) storage devices"
+	depends on USB
+	help
+	  This module contains a table of common (or usual) devices
+	  for usb-storage and ub drivers, and allows to switch binding
+	  of these devices without rebuilding modules.
+
+	  Typical syntax of /etc/modprobe.conf is:
+
+		options libusual bias="ub"
+
+	  If unsure, say N.
diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile
index 44ab8f9978fe..2d416e9028bb 100644
--- a/drivers/usb/storage/Makefile
+++ b/drivers/usb/storage/Makefile
@@ -22,3 +22,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH)	+= onetouch.o
 
 usb-storage-objs :=	scsiglue.o protocol.o transport.o usb.o \
 			initializers.o $(usb-storage-obj-y)
+
+ifneq ($(CONFIG_USB_LIBUSUAL),)
+	obj-$(CONFIG_USB)	+= libusual.o
+endif
diff --git a/drivers/usb/storage/libusual.c b/drivers/usb/storage/libusual.c
new file mode 100644
index 000000000000..61f73d8a2c0f
--- /dev/null
+++ b/drivers/usb/storage/libusual.c
@@ -0,0 +1,266 @@
+/*
+ * libusual
+ *
+ * The libusual contains the table of devices common for ub and usb-storage.
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+#include <linux/usb_usual.h>
+#include <linux/vmalloc.h>
+
+/*
+ */
+#define USU_MOD_FL_THREAD   1	/* Thread is running */
+#define USU_MOD_FL_PRESENT  2	/* The module is loaded */
+
+struct mod_status {
+	unsigned long fls;
+};
+
+static struct mod_status stat[3];
+static DEFINE_SPINLOCK(usu_lock);
+
+/*
+ */
+#define USB_US_DEFAULT_BIAS	USB_US_TYPE_STOR
+
+#define BIAS_NAME_SIZE  (sizeof("usb-storage"))
+static char bias[BIAS_NAME_SIZE];
+static int usb_usual_bias;
+static const char *bias_names[3] = { "none", "usb-storage", "ub" };
+
+static DECLARE_MUTEX_LOCKED(usu_init_notify);
+static DECLARE_COMPLETION(usu_end_notify);
+static atomic_t total_threads = ATOMIC_INIT(0);
+
+static int usu_probe_thread(void *arg);
+static int parse_bias(const char *bias_s);
+
+/*
+ * The table.
+ */
+#define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \
+		    vendorName, productName,useProtocol, useTransport, \
+		    initFunction, flags) \
+{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \
+  .driver_info = (flags)|(USB_US_TYPE_STOR<<24) }
+
+#define USUAL_DEV(useProto, useTrans, useType) \
+{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \
+  .driver_info = ((useType)<<24) }
+
+struct usb_device_id storage_usb_ids [] = {
+#	include "unusual_devs.h"
+	{ } /* Terminating entry */
+};
+
+#undef USUAL_DEV
+#undef UNUSUAL_DEV
+
+MODULE_DEVICE_TABLE(usb, storage_usb_ids);
+EXPORT_SYMBOL_GPL(storage_usb_ids);
+
+/*
+ * @type: the module type as an integer
+ */
+void usb_usual_set_present(int type)
+{
+	struct mod_status *st;
+	unsigned long flags;
+
+	if (type <= 0 || type >= 3)
+		return;
+	st = &stat[type];
+	spin_lock_irqsave(&usu_lock, flags);
+	st->fls |= USU_MOD_FL_PRESENT;
+	spin_unlock_irqrestore(&usu_lock, flags);
+}
+EXPORT_SYMBOL_GPL(usb_usual_set_present);
+
+void usb_usual_clear_present(int type)
+{
+	struct mod_status *st;
+	unsigned long flags;
+
+	if (type <= 0 || type >= 3)
+		return;
+	st = &stat[type];
+	spin_lock_irqsave(&usu_lock, flags);
+	st->fls &= ~USU_MOD_FL_PRESENT;
+	spin_unlock_irqrestore(&usu_lock, flags);
+}
+EXPORT_SYMBOL_GPL(usb_usual_clear_present);
+
+/*
+ * Match the calling driver type against the table.
+ * Returns: 0 if the device matches.
+ */
+int usb_usual_check_type(const struct usb_device_id *id, int caller_type)
+{
+	int id_type = USB_US_TYPE(id->driver_info);
+
+	if (caller_type <= 0 || caller_type >= 3)
+		return -EINVAL;
+
+	/* Drivers grab fixed assignment devices */
+	if (id_type == caller_type)
+		return 0;
+	/* Drivers grab devices biased to them */
+	if (id_type == USB_US_TYPE_NONE && caller_type == usb_usual_bias)
+		return 0;
+	return -ENODEV;
+}
+EXPORT_SYMBOL_GPL(usb_usual_check_type);
+
+/*
+ */
+static int usu_probe(struct usb_interface *intf,
+			 const struct usb_device_id *id)
+{
+	int type;
+	int rc;
+	unsigned long flags;
+
+	type = USB_US_TYPE(id->driver_info);
+	if (type == 0)
+		type = usb_usual_bias;
+
+	spin_lock_irqsave(&usu_lock, flags);
+	if ((stat[type].fls & (USU_MOD_FL_THREAD|USU_MOD_FL_PRESENT)) != 0) {
+		spin_unlock_irqrestore(&usu_lock, flags);
+		return -ENXIO;
+	}
+	stat[type].fls |= USU_MOD_FL_THREAD;
+	spin_unlock_irqrestore(&usu_lock, flags);
+
+	rc = kernel_thread(usu_probe_thread, (void*)type, CLONE_VM);
+	if (rc < 0) {
+		printk(KERN_WARNING "libusual: "
+		    "Unable to start the thread for %s: %d\n",
+		    bias_names[type], rc);
+		spin_lock_irqsave(&usu_lock, flags);
+		stat[type].fls &= ~USU_MOD_FL_THREAD;
+		spin_unlock_irqrestore(&usu_lock, flags);
+		return rc;	/* Not being -ENXIO causes a message printed */
+	}
+	atomic_inc(&total_threads);
+
+	return -ENXIO;
+}
+
+static void usu_disconnect(struct usb_interface *intf)
+{
+	;	/* We should not be here. */
+}
+
+static struct usb_driver usu_driver = {
+	.owner =	THIS_MODULE,
+	.name =		"libusual",
+	.probe =	usu_probe,
+	.disconnect =	usu_disconnect,
+	.id_table =	storage_usb_ids,
+};
+
+/*
+ * A whole new thread for a purpose of request_module seems quite stupid.
+ * The request_module forks once inside again. However, if we attempt
+ * to load a storage module from our own modprobe thread, that module
+ * references our symbols, which cannot be resolved until our module is
+ * initialized. I wish there was a way to wait for the end of initialization.
+ * The module notifier reports MODULE_STATE_COMING only.
+ * So, we wait until module->init ends as the next best thing.
+ */
+static int usu_probe_thread(void *arg)
+{
+	int type = (unsigned long) arg;
+	struct mod_status *st = &stat[type];
+	int rc;
+	unsigned long flags;
+
+	daemonize("libusual_%d", type);	/* "usb-storage" is kinda too long */
+
+	/* A completion does not work here because it's counted. */
+	down(&usu_init_notify);
+	up(&usu_init_notify);
+
+	rc = request_module(bias_names[type]);
+	spin_lock_irqsave(&usu_lock, flags);
+	if (rc == 0 && (st->fls & USU_MOD_FL_PRESENT) == 0) {
+		/*
+		 * This should not happen, but let us keep tabs on it.
+		 */
+		printk(KERN_NOTICE "libusual: "
+		    "modprobe for %s succeeded, but module is not present\n",
+		    bias_names[type]);
+	}
+	st->fls &= ~USU_MOD_FL_THREAD;
+	spin_unlock_irqrestore(&usu_lock, flags);
+
+	complete_and_exit(&usu_end_notify, 0);
+}
+
+/*
+ */
+static int __init usb_usual_init(void)
+{
+	int rc;
+
+	bias[BIAS_NAME_SIZE-1] = 0;
+	usb_usual_bias = parse_bias(bias);
+
+	rc = usb_register(&usu_driver);
+	up(&usu_init_notify);
+	return rc;
+}
+
+static void __exit usb_usual_exit(void)
+{
+	/*
+	 * We do not check for any drivers present, because
+	 * they keep us pinned with symbol references.
+	 */
+
+	usb_deregister(&usu_driver);
+
+	while (atomic_read(&total_threads) > 0) {
+		wait_for_completion(&usu_end_notify);
+		atomic_dec(&total_threads);
+	}
+}
+
+/*
+ * Validate and accept the bias parameter.
+ * Maybe make an sysfs method later. XXX
+ */
+static int parse_bias(const char *bias_s)
+{
+	int i;
+	int bias_n = 0;
+
+	if (bias_s[0] == 0 || bias_s[0] == ' ') {
+		bias_n = USB_US_DEFAULT_BIAS;
+	} else {
+		for (i = 1; i < 3; i++) {
+			if (strcmp(bias_s, bias_names[i]) == 0) {
+				bias_n = i;
+				break;
+			}
+		}
+		if (bias_n == 0) {
+			bias_n = USB_US_DEFAULT_BIAS;
+			printk(KERN_INFO
+			    "libusual: unknown bias \"%s\", using \"%s\"\n",
+			    bias_s, bias_names[bias_n]);
+		}
+	}
+	return bias_n;
+}
+
+module_init(usb_usual_init);
+module_exit(usb_usual_exit);
+
+module_param_string(bias, bias, BIAS_NAME_SIZE,  S_IRUGO|S_IWUSR);
+MODULE_PARM_DESC(bias, "Bias to usb-storage or ub");
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/usb/storage/protocol.h b/drivers/usb/storage/protocol.h
index 02bff01ab09c..845bed4b8031 100644
--- a/drivers/usb/storage/protocol.h
+++ b/drivers/usb/storage/protocol.h
@@ -41,20 +41,6 @@
 #ifndef _PROTOCOL_H_
 #define _PROTOCOL_H_
 
-/* Sub Classes */
-
-#define US_SC_RBC	0x01		/* Typically, flash devices */
-#define US_SC_8020	0x02		/* CD-ROM */
-#define US_SC_QIC	0x03		/* QIC-157 Tapes */
-#define US_SC_UFI	0x04		/* Floppy */
-#define US_SC_8070	0x05		/* Removable media */
-#define US_SC_SCSI	0x06		/* Transparent */
-#define US_SC_ISD200    0x07		/* ISD200 ATA */
-#define US_SC_MIN	US_SC_RBC
-#define US_SC_MAX	US_SC_ISD200
-
-#define US_SC_DEVICE	0xff		/* Use device's value */
-
 /* Protocol handling routines */
 extern void usb_stor_ATAPI_command(struct scsi_cmnd*, struct us_data*);
 extern void usb_stor_qic157_command(struct scsi_cmnd*, struct us_data*);
diff --git a/drivers/usb/storage/transport.h b/drivers/usb/storage/transport.h
index 0a362cc781ad..633a715850a4 100644
--- a/drivers/usb/storage/transport.h
+++ b/drivers/usb/storage/transport.h
@@ -41,39 +41,8 @@
 #ifndef _TRANSPORT_H_
 #define _TRANSPORT_H_
 
-#include <linux/config.h>
 #include <linux/blkdev.h>
 
-/* Protocols */
-
-#define US_PR_CBI	0x00		/* Control/Bulk/Interrupt */
-#define US_PR_CB	0x01		/* Control/Bulk w/o interrupt */
-#define US_PR_BULK	0x50		/* bulk only */
-#ifdef CONFIG_USB_STORAGE_USBAT
-#define US_PR_USBAT	0x80		/* SCM-ATAPI bridge */
-#endif
-#ifdef CONFIG_USB_STORAGE_SDDR09
-#define US_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
-#endif
-#ifdef CONFIG_USB_STORAGE_SDDR55
-#define US_PR_SDDR55	0x82		/* SDDR-55 (made up) */
-#endif
-#define US_PR_DPCM_USB  0xf0		/* Combination CB/SDDR09 */
-
-#ifdef CONFIG_USB_STORAGE_FREECOM
-#define US_PR_FREECOM   0xf1		/* Freecom */
-#endif
-
-#ifdef CONFIG_USB_STORAGE_DATAFAB
-#define US_PR_DATAFAB   0xf2		/* Datafab chipsets */
-#endif
-
-#ifdef CONFIG_USB_STORAGE_JUMPSHOT
-#define US_PR_JUMPSHOT  0xf3		/* Lexar Jumpshot */
-#endif
-
-#define US_PR_DEVICE	0xff		/* Use device's value */
-
 /*
  * Bulk only data structures
  */
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index f5f47a34b168..76904ad11241 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -1134,3 +1134,27 @@ UNUSUAL_DEV(  0x55aa, 0xa103, 0x0000, 0x9999,
 		US_SC_SCSI, US_PR_SDDR55, NULL,
 		US_FL_SINGLE_LUN),
 #endif
+
+/* Control/Bulk transport for all SubClass values */
+USUAL_DEV(US_SC_RBC, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8020, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_QIC, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_UFI, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8070, US_PR_CB, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_SCSI, US_PR_CB, USB_US_TYPE_STOR),
+
+/* Control/Bulk/Interrupt transport for all SubClass values */
+USUAL_DEV(US_SC_RBC, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8020, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_QIC, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_UFI, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8070, US_PR_CBI, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_SCSI, US_PR_CBI, USB_US_TYPE_STOR),
+
+/* Bulk-only transport for all SubClass values */
+USUAL_DEV(US_SC_RBC, US_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8020, US_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_QIC, US_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_UFI, US_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_8070, US_PR_BULK, USB_US_TYPE_STOR),
+USUAL_DEV(US_SC_SCSI, US_PR_BULK, 0),
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 3847ebed2aa4..c8375aa62723 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -112,49 +112,33 @@ static atomic_t total_threads = ATOMIC_INIT(0);
 static DECLARE_COMPLETION(threads_gone);
 
 
-/* The entries in this table, except for final ones here
- * (USB_MASS_STORAGE_CLASS and the empty entry), correspond,
- * line for line with the entries of us_unsuaul_dev_list[].
+/*
+ * The entries in this table correspond, line for line,
+ * with the entries of us_unusual_dev_list[].
  */
+#ifndef CONFIG_USB_LIBUSUAL
 
 #define UNUSUAL_DEV(id_vendor, id_product, bcdDeviceMin, bcdDeviceMax, \
 		    vendorName, productName,useProtocol, useTransport, \
 		    initFunction, flags) \
-{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax) }
+{ USB_DEVICE_VER(id_vendor, id_product, bcdDeviceMin,bcdDeviceMax), \
+  .driver_info = (flags)|(USB_US_TYPE_STOR<<24) }
+
+#define USUAL_DEV(useProto, useTrans, useType) \
+{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, useProto, useTrans), \
+  .driver_info = (USB_US_TYPE_STOR<<24) }
 
 static struct usb_device_id storage_usb_ids [] = {
 
 #	include "unusual_devs.h"
 #undef UNUSUAL_DEV
-	/* Control/Bulk transport for all SubClass values */
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_CB) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_CB) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_CB) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_CB) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_CB) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_CB) },
-
-	/* Control/Bulk/Interrupt transport for all SubClass values */
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_CBI) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_CBI) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_CBI) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_CBI) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_CBI) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_CBI) },
-
-	/* Bulk-only transport for all SubClass values */
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_RBC, US_PR_BULK) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8020, US_PR_BULK) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_QIC, US_PR_BULK) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_UFI, US_PR_BULK) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_8070, US_PR_BULK) },
-	{ USB_INTERFACE_INFO(USB_CLASS_MASS_STORAGE, US_SC_SCSI, US_PR_BULK) },
-
+#undef USUAL_DEV
 	/* Terminating entry */
 	{ }
 };
 
 MODULE_DEVICE_TABLE (usb, storage_usb_ids);
+#endif /* CONFIG_USB_LIBUSUAL */
 
 /* This is the list of devices we recognize, along with their flag data */
 
@@ -167,7 +151,6 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids);
  * are free to use as many characters as you like.
  */
 
-#undef UNUSUAL_DEV
 #define UNUSUAL_DEV(idVendor, idProduct, bcdDeviceMin, bcdDeviceMax, \
 		    vendor_name, product_name, use_protocol, use_transport, \
 		    init_function, Flags) \
@@ -177,53 +160,18 @@ MODULE_DEVICE_TABLE (usb, storage_usb_ids);
 	.useProtocol = use_protocol,	\
 	.useTransport = use_transport,	\
 	.initFunction = init_function,	\
-	.flags = Flags, \
+}
+
+#define USUAL_DEV(use_protocol, use_transport, use_type) \
+{ \
+	.useProtocol = use_protocol,	\
+	.useTransport = use_transport,	\
 }
 
 static struct us_unusual_dev us_unusual_dev_list[] = {
 #	include "unusual_devs.h" 
 #	undef UNUSUAL_DEV
-	/* Control/Bulk transport for all SubClass values */
-	{ .useProtocol = US_SC_RBC,
-	  .useTransport = US_PR_CB},
-	{ .useProtocol = US_SC_8020,
-	  .useTransport = US_PR_CB},
-	{ .useProtocol = US_SC_QIC,
-	  .useTransport = US_PR_CB},
-	{ .useProtocol = US_SC_UFI,
-	  .useTransport = US_PR_CB},
-	{ .useProtocol = US_SC_8070,
-	  .useTransport = US_PR_CB},
-	{ .useProtocol = US_SC_SCSI,
-	  .useTransport = US_PR_CB},
-
-	/* Control/Bulk/Interrupt transport for all SubClass values */
-	{ .useProtocol = US_SC_RBC,
-	  .useTransport = US_PR_CBI},
-	{ .useProtocol = US_SC_8020,
-	  .useTransport = US_PR_CBI},
-	{ .useProtocol = US_SC_QIC,
-	  .useTransport = US_PR_CBI},
-	{ .useProtocol = US_SC_UFI,
-	  .useTransport = US_PR_CBI},
-	{ .useProtocol = US_SC_8070,
-	  .useTransport = US_PR_CBI},
-	{ .useProtocol = US_SC_SCSI,
-	  .useTransport = US_PR_CBI},
-
-	/* Bulk-only transport for all SubClass values */
-	{ .useProtocol = US_SC_RBC,
-	  .useTransport = US_PR_BULK},
-	{ .useProtocol = US_SC_8020,
-	  .useTransport = US_PR_BULK},
-	{ .useProtocol = US_SC_QIC,
-	  .useTransport = US_PR_BULK},
-	{ .useProtocol = US_SC_UFI,
-	  .useTransport = US_PR_BULK},
-	{ .useProtocol = US_SC_8070,
-	  .useTransport = US_PR_BULK},
-	{ .useProtocol = US_SC_SCSI,
-	  .useTransport = US_PR_BULK},
+#	undef USUAL_DEV
 
 	/* Terminating entry */
 	{ NULL }
@@ -484,14 +432,20 @@ static int associate_dev(struct us_data *us, struct usb_interface *intf)
 	return 0;
 }
 
+/* Find an unusual_dev descriptor (always succeeds in the current code) */
+static struct us_unusual_dev *find_unusual(const struct usb_device_id *id)
+{
+	const int id_index = id - storage_usb_ids;
+	return &us_unusual_dev_list[id_index];
+}
+
 /* Get the unusual_devs entries and the string descriptors */
-static void get_device_info(struct us_data *us, int id_index)
+static void get_device_info(struct us_data *us, const struct usb_device_id *id)
 {
 	struct usb_device *dev = us->pusb_dev;
 	struct usb_interface_descriptor *idesc =
 		&us->pusb_intf->cur_altsetting->desc;
-	struct us_unusual_dev *unusual_dev = &us_unusual_dev_list[id_index];
-	struct usb_device_id *id = &storage_usb_ids[id_index];
+	struct us_unusual_dev *unusual_dev = find_unusual(id);
 
 	/* Store the entries */
 	us->unusual_dev = unusual_dev;
@@ -501,7 +455,7 @@ static void get_device_info(struct us_data *us, int id_index)
 	us->protocol = (unusual_dev->useTransport == US_PR_DEVICE) ?
 			idesc->bInterfaceProtocol :
 			unusual_dev->useTransport;
-	us->flags = unusual_dev->flags;
+	us->flags = USB_US_ORIG_FLAGS(id->driver_info);
 
 	/*
 	 * This flag is only needed when we're in high-speed, so let's
@@ -529,7 +483,7 @@ static void get_device_info(struct us_data *us, int id_index)
 		if (unusual_dev->useTransport != US_PR_DEVICE &&
 			us->protocol == idesc->bInterfaceProtocol)
 			msg += 2;
-		if (msg >= 0 && !(unusual_dev->flags & US_FL_NEED_OVERRIDE))
+		if (msg >= 0 && !(us->flags & US_FL_NEED_OVERRIDE))
 			printk(KERN_NOTICE USB_STORAGE "This device "
 				"(%04x,%04x,%04x S %02x P %02x)"
 				" has %s in unusual_devs.h\n"
@@ -921,10 +875,12 @@ static int storage_probe(struct usb_interface *intf,
 {
 	struct Scsi_Host *host;
 	struct us_data *us;
-	const int id_index = id - storage_usb_ids; 
 	int result;
 	struct task_struct *th;
 
+	if (usb_usual_check_type(id, USB_US_TYPE_STOR))
+		return -ENXIO;
+
 	US_DEBUGP("USB Mass Storage device detected\n");
 
 	/*
@@ -957,7 +913,7 @@ static int storage_probe(struct usb_interface *intf,
 	 * of the match from the usb_device_id table, so we can find the
 	 * corresponding entry in the private table.
 	 */
-	get_device_info(us, id_index);
+	get_device_info(us, id);
 
 #ifdef CONFIG_USB_STORAGE_SDDR09
 	if (us->protocol == US_PR_EUSB_SDDR09 ||
@@ -1062,9 +1018,10 @@ static int __init usb_stor_init(void)
 
 	/* register the driver, return usb_register return code if error */
 	retval = usb_register(&usb_storage_driver);
-	if (retval == 0)
+	if (retval == 0) {
 		printk(KERN_INFO "USB Mass Storage support registered.\n");
-
+		usb_usual_set_present(USB_US_TYPE_STOR);
+	}
 	return retval;
 }
 
@@ -1088,6 +1045,8 @@ static void __exit usb_stor_exit(void)
 		wait_for_completion(&threads_gone);
 		atomic_dec(&total_threads);
 	}
+
+	usb_usual_clear_present(USB_US_TYPE_STOR);
 }
 
 module_init(usb_stor_init);
diff --git a/drivers/usb/storage/usb.h b/drivers/usb/storage/usb.h
index 98b09711a739..0cd1eebc4497 100644
--- a/drivers/usb/storage/usb.h
+++ b/drivers/usb/storage/usb.h
@@ -45,6 +45,7 @@
 #define _USB_H_
 
 #include <linux/usb.h>
+#include <linux/usb_usual.h>
 #include <linux/blkdev.h>
 #include <linux/smp_lock.h>
 #include <linux/completion.h>
@@ -63,38 +64,8 @@ struct us_unusual_dev {
 	__u8  useProtocol;
 	__u8  useTransport;
 	int (*initFunction)(struct us_data *);
-	unsigned int flags;
 };
 
-/*
- * Static flag definitions.  We use this roundabout technique so that the
- * proc_info() routine can automatically display a message for each flag.
- */
-#define US_DO_ALL_FLAGS						\
-	US_FLAG(SINGLE_LUN,	0x00000001)			\
-		/* allow access to only LUN 0 */		\
-	US_FLAG(NEED_OVERRIDE,	0x00000002)			\
-		/* unusual_devs entry is necessary */		\
-	US_FLAG(SCM_MULT_TARG,	0x00000004)			\
-		/* supports multiple targets */			\
-	US_FLAG(FIX_INQUIRY,	0x00000008)			\
-		/* INQUIRY response needs faking */		\
-	US_FLAG(FIX_CAPACITY,	0x00000010)			\
-		/* READ CAPACITY response too big */		\
-	US_FLAG(IGNORE_RESIDUE,	0x00000020)			\
-		/* reported residue is wrong */			\
-	US_FLAG(BULK32,		0x00000040)			\
-		/* Uses 32-byte CBW length */			\
-	US_FLAG(NOT_LOCKABLE,	0x00000080)			\
-		/* PREVENT/ALLOW not supported */		\
-	US_FLAG(GO_SLOW,	0x00000100)			\
-		/* Need delay after Command phase */		\
-	US_FLAG(NO_WP_DETECT,	0x00000200)			\
-		/* Don't check for write-protect */		\
-
-#define US_FLAG(name, value)	US_FL_##name = value ,
-enum { US_DO_ALL_FLAGS };
-#undef US_FLAG
 
 /* Dynamic flag definitions: used in set_bit() etc. */
 #define US_FLIDX_URB_ACTIVE	18  /* 0x00040000  current_urb is in use  */
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
new file mode 100644
index 000000000000..f9c058f33712
--- /dev/null
+++ b/include/linux/usb_usual.h
@@ -0,0 +1,123 @@
+/*
+ * Interface to the libusual.
+ *
+ * Copyright (c) 2005 Pete Zaitcev <zaitcev@redhat.com>
+ * Copyright (c) 1999-2002 Matthew Dharm (mdharm-usb@one-eyed-alien.net)
+ * Copyright (c) 1999 Michael Gee (michael@linuxspecific.com)
+ */
+
+#ifndef __LINUX_USB_USUAL_H
+#define __LINUX_USB_USUAL_H
+
+#include <linux/config.h>
+
+/* We should do this for cleanliness... But other usb_foo.h do not do this. */
+/* #include <linux/usb.h> */
+
+/*
+ * The flags field, which we store in usb_device_id.driver_info.
+ * It is compatible with the old usb-storage flags in lower 24 bits.
+ */
+
+/*
+ * Static flag definitions.  We use this roundabout technique so that the
+ * proc_info() routine can automatically display a message for each flag.
+ */
+#define US_DO_ALL_FLAGS						\
+	US_FLAG(SINGLE_LUN,	0x00000001)			\
+		/* allow access to only LUN 0 */		\
+	US_FLAG(NEED_OVERRIDE,	0x00000002)			\
+		/* unusual_devs entry is necessary */		\
+	US_FLAG(SCM_MULT_TARG,	0x00000004)			\
+		/* supports multiple targets */			\
+	US_FLAG(FIX_INQUIRY,	0x00000008)			\
+		/* INQUIRY response needs faking */		\
+	US_FLAG(FIX_CAPACITY,	0x00000010)			\
+		/* READ CAPACITY response too big */		\
+	US_FLAG(IGNORE_RESIDUE,	0x00000020)			\
+		/* reported residue is wrong */			\
+	US_FLAG(BULK32,		0x00000040)			\
+		/* Uses 32-byte CBW length */			\
+	US_FLAG(NOT_LOCKABLE,	0x00000080)			\
+		/* PREVENT/ALLOW not supported */		\
+	US_FLAG(GO_SLOW,	0x00000100)			\
+		/* Need delay after Command phase */		\
+	US_FLAG(NO_WP_DETECT,	0x00000200)			\
+		/* Don't check for write-protect */		\
+
+#define US_FLAG(name, value)	US_FL_##name = value ,
+enum { US_DO_ALL_FLAGS };
+#undef US_FLAG
+
+/*
+ * The bias field for libusual and friends.
+ */
+#define USB_US_TYPE_NONE   0
+#define USB_US_TYPE_STOR   1		/* usb-storage */
+#define USB_US_TYPE_UB     2		/* ub */
+
+#define USB_US_TYPE(flags) 		(((flags) >> 24) & 0xFF)
+#define USB_US_ORIG_FLAGS(flags)	((flags) & 0x00FFFFFF)
+
+/*
+ * This is probably not the best place to keep these constants, conceptually.
+ * But it's the only header included into all places which need them.
+ */
+
+/* Sub Classes */
+
+#define US_SC_RBC	0x01		/* Typically, flash devices */
+#define US_SC_8020	0x02		/* CD-ROM */
+#define US_SC_QIC	0x03		/* QIC-157 Tapes */
+#define US_SC_UFI	0x04		/* Floppy */
+#define US_SC_8070	0x05		/* Removable media */
+#define US_SC_SCSI	0x06		/* Transparent */
+#define US_SC_ISD200    0x07		/* ISD200 ATA */
+#define US_SC_MIN	US_SC_RBC
+#define US_SC_MAX	US_SC_ISD200
+
+#define US_SC_DEVICE	0xff		/* Use device's value */
+
+/* Protocols */
+
+#define US_PR_CBI	0x00		/* Control/Bulk/Interrupt */
+#define US_PR_CB	0x01		/* Control/Bulk w/o interrupt */
+#define US_PR_BULK	0x50		/* bulk only */
+#ifdef CONFIG_USB_STORAGE_USBAT
+#define US_PR_USBAT	0x80		/* SCM-ATAPI bridge */
+#endif
+#ifdef CONFIG_USB_STORAGE_SDDR09
+#define US_PR_EUSB_SDDR09	0x81	/* SCM-SCSI bridge for SDDR-09 */
+#endif
+#ifdef CONFIG_USB_STORAGE_SDDR55
+#define US_PR_SDDR55	0x82		/* SDDR-55 (made up) */
+#endif
+#define US_PR_DPCM_USB  0xf0		/* Combination CB/SDDR09 */
+#ifdef CONFIG_USB_STORAGE_FREECOM
+#define US_PR_FREECOM   0xf1		/* Freecom */
+#endif
+#ifdef CONFIG_USB_STORAGE_DATAFAB
+#define US_PR_DATAFAB   0xf2		/* Datafab chipsets */
+#endif
+#ifdef CONFIG_USB_STORAGE_JUMPSHOT
+#define US_PR_JUMPSHOT  0xf3		/* Lexar Jumpshot */
+#endif
+
+#define US_PR_DEVICE	0xff		/* Use device's value */
+
+/*
+ */
+#ifdef CONFIG_USB_LIBUSUAL
+
+extern struct usb_device_id storage_usb_ids[];
+extern void usb_usual_set_present(int type);
+extern void usb_usual_clear_present(int type);
+extern int usb_usual_check_type(const struct usb_device_id *, int type);
+#else
+
+#define usb_usual_set_present(t)	do { } while(0)
+#define usb_usual_clear_present(t)	do { } while(0)
+#define usb_usual_check_type(id, t)	(0)
+#endif /* CONFIG_USB_LIBUSUAL */
+
+#endif /* __LINUX_USB_USUAL_H */
-- 
cgit v1.2.3


From 733260ff9c45bd4db60f45d17e8560a4a68dff4d Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 16 Nov 2005 13:41:28 -0800
Subject: [PATCH] USB: add dynamic id functionality to USB core

Echo the usb vendor and product id to the "new_id" file in the driver's
sysfs directory, and then that driver will be able to bind to a device
with those ids if it is present.

Example:
	echo 0557 2008 > /sys/bus/usb/drivers/foo_driver/new_id
adds the hex values 0557 and 2008 to the device id table for the foo_driver.

Note, usb-serial drivers do not currently work with this capability yet.
usb-storage also might have some oddities.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 218 +++++++++++++++++++++++++++++++++++-----------
 include/linux/usb.h       |   8 ++
 2 files changed, 176 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 921a21be651d..1c0611045379 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -27,6 +27,15 @@
 #include "hcd.h"
 #include "usb.h"
 
+static int usb_match_one_id(struct usb_interface *interface,
+			    const struct usb_device_id *id);
+
+struct usb_dynid {
+	struct list_head node;
+	struct usb_device_id id;
+};
+
+
 static int generic_probe(struct device *dev)
 {
 	return 0;
@@ -58,6 +67,96 @@ struct device_driver usb_generic_driver = {
  * usb device or a usb interface. */
 int usb_generic_driver_data;
 
+#ifdef CONFIG_HOTPLUG
+
+/*
+ * Adds a new dynamic USBdevice ID to this driver,
+ * and cause the driver to probe for all devices again.
+ */
+static ssize_t store_new_id(struct device_driver *driver,
+			    const char *buf, size_t count)
+{
+	struct usb_driver *usb_drv = to_usb_driver(driver);
+	struct usb_dynid *dynid;
+	u32 idVendor = 0;
+	u32 idProduct = 0;
+	int fields = 0;
+
+	fields = sscanf(buf, "%x %x", &idVendor, &idProduct);
+	if (fields < 2)
+		return -EINVAL;
+
+	dynid = kzalloc(sizeof(*dynid), GFP_KERNEL);
+	if (!dynid)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&dynid->node);
+	dynid->id.idVendor = idVendor;
+	dynid->id.idProduct = idProduct;
+	dynid->id.match_flags = USB_DEVICE_ID_MATCH_DEVICE;
+
+	spin_lock(&usb_drv->dynids.lock);
+	list_add_tail(&usb_drv->dynids.list, &dynid->node);
+	spin_unlock(&usb_drv->dynids.lock);
+
+	if (get_driver(driver)) {
+		driver_attach(driver);
+		put_driver(driver);
+	}
+
+	return count;
+}
+static DRIVER_ATTR(new_id, S_IWUSR, NULL, store_new_id);
+
+static int usb_create_newid_file(struct usb_driver *usb_drv)
+{
+	int error = 0;
+
+	if (usb_drv->probe != NULL)
+		error = sysfs_create_file(&usb_drv->driver.kobj,
+					  &driver_attr_new_id.attr);
+	return error;
+}
+
+static void usb_free_dynids(struct usb_driver *usb_drv)
+{
+	struct usb_dynid *dynid, *n;
+
+	spin_lock(&usb_drv->dynids.lock);
+	list_for_each_entry_safe(dynid, n, &usb_drv->dynids.list, node) {
+		list_del(&dynid->node);
+		kfree(dynid);
+	}
+	spin_unlock(&usb_drv->dynids.lock);
+}
+#else
+static inline int usb_create_newid_file(struct usb_driver *usb_drv)
+{
+	return 0;
+}
+
+static inline void usb_free_dynids(struct usb_driver *usb_drv)
+{
+}
+#endif
+
+static const struct usb_device_id *usb_match_dynamic_id(struct usb_interface *intf,
+							struct usb_driver *drv)
+{
+	struct usb_dynid *dynid;
+
+	spin_lock(&drv->dynids.lock);
+	list_for_each_entry(dynid, &drv->dynids.list, node) {
+		if (usb_match_one_id(intf, &dynid->id)) {
+			spin_unlock(&drv->dynids.lock);
+			return &dynid->id;
+		}
+	}
+	spin_unlock(&drv->dynids.lock);
+	return NULL;
+}
+
+
 /* called from driver core with usb_bus_type.subsys writelock */
 static int usb_probe_interface(struct device *dev)
 {
@@ -75,6 +174,8 @@ static int usb_probe_interface(struct device *dev)
 		return -EHOSTUNREACH;
 
 	id = usb_match_id(intf, driver->id_table);
+	if (!id)
+		id = usb_match_dynamic_id(intf, driver);
 	if (id) {
 		dev_dbg(dev, "%s - got id\n", __FUNCTION__);
 
@@ -120,6 +221,64 @@ static int usb_unbind_interface(struct device *dev)
 	return 0;
 }
 
+/* returns 0 if no match, 1 if match */
+static int usb_match_one_id(struct usb_interface *interface,
+			    const struct usb_device_id *id)
+{
+	struct usb_host_interface *intf;
+	struct usb_device *dev;
+
+	/* proc_connectinfo in devio.c may call us with id == NULL. */
+	if (id == NULL)
+		return 0;
+
+	intf = interface->cur_altsetting;
+	dev = interface_to_usbdev(interface);
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) &&
+	    id->idVendor != le16_to_cpu(dev->descriptor.idVendor))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) &&
+	    id->idProduct != le16_to_cpu(dev->descriptor.idProduct))
+		return 0;
+
+	/* No need to test id->bcdDevice_lo != 0, since 0 is never
+	   greater than any unsigned number. */
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) &&
+	    (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice)))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) &&
+	    (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice)))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) &&
+	    (id->bDeviceClass != dev->descriptor.bDeviceClass))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) &&
+	    (id->bDeviceSubClass!= dev->descriptor.bDeviceSubClass))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) &&
+	    (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) &&
+	    (id->bInterfaceClass != intf->desc.bInterfaceClass))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) &&
+	    (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass))
+		return 0;
+
+	if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) &&
+	    (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol))
+		return 0;
+
+	return 1;
+}
 /**
  * usb_match_id - find first usb_device_id matching device or interface
  * @interface: the interface of interest
@@ -184,16 +343,10 @@ static int usb_unbind_interface(struct device *dev)
 const struct usb_device_id *usb_match_id(struct usb_interface *interface,
 					 const struct usb_device_id *id)
 {
-	struct usb_host_interface *intf;
-	struct usb_device *dev;
-
 	/* proc_connectinfo in devio.c may call us with id == NULL. */
 	if (id == NULL)
 		return NULL;
 
-	intf = interface->cur_altsetting;
-	dev = interface_to_usbdev(interface);
-
 	/* It is important to check that id->driver_info is nonzero,
 	   since an entry that is all zeroes except for a nonzero
 	   id->driver_info is the way to create an entry that
@@ -201,50 +354,8 @@ const struct usb_device_id *usb_match_id(struct usb_interface *interface,
 	   device and interface. */
 	for (; id->idVendor || id->bDeviceClass || id->bInterfaceClass ||
 	       id->driver_info; id++) {
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_VENDOR) &&
-		    id->idVendor != le16_to_cpu(dev->descriptor.idVendor))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_PRODUCT) &&
-		    id->idProduct != le16_to_cpu(dev->descriptor.idProduct))
-			continue;
-
-		/* No need to test id->bcdDevice_lo != 0, since 0 is never
-		   greater than any unsigned number. */
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_LO) &&
-		    (id->bcdDevice_lo > le16_to_cpu(dev->descriptor.bcdDevice)))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_HI) &&
-		    (id->bcdDevice_hi < le16_to_cpu(dev->descriptor.bcdDevice)))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_CLASS) &&
-		    (id->bDeviceClass != dev->descriptor.bDeviceClass))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_SUBCLASS) &&
-		    (id->bDeviceSubClass!= dev->descriptor.bDeviceSubClass))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_DEV_PROTOCOL) &&
-		    (id->bDeviceProtocol != dev->descriptor.bDeviceProtocol))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_CLASS) &&
-		    (id->bInterfaceClass != intf->desc.bInterfaceClass))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_SUBCLASS) &&
-		    (id->bInterfaceSubClass != intf->desc.bInterfaceSubClass))
-			continue;
-
-		if ((id->match_flags & USB_DEVICE_ID_MATCH_INT_PROTOCOL) &&
-		    (id->bInterfaceProtocol != intf->desc.bInterfaceProtocol))
-			continue;
-
-		return id;
+		if (usb_match_one_id(interface, id))
+			return id;
 	}
 
 	return NULL;
@@ -268,6 +379,9 @@ int usb_device_match(struct device *dev, struct device_driver *drv)
 	if (id)
 		return 1;
 
+	id = usb_match_dynamic_id(intf, usb_drv);
+	if (id)
+		return 1;
 	return 0;
 }
 
@@ -296,6 +410,8 @@ int usb_register(struct usb_driver *new_driver)
 	new_driver->driver.probe = usb_probe_interface;
 	new_driver->driver.remove = usb_unbind_interface;
 	new_driver->driver.owner = new_driver->owner;
+	spin_lock_init(&new_driver->dynids.lock);
+	INIT_LIST_HEAD(&new_driver->dynids.list);
 
 	usb_lock_all_devices();
 	retval = driver_register(&new_driver->driver);
@@ -305,6 +421,7 @@ int usb_register(struct usb_driver *new_driver)
 		pr_info("%s: registered new driver %s\n",
 			usbcore_name, new_driver->name);
 		usbfs_update_special();
+		usb_create_newid_file(new_driver);
 	} else {
 		printk(KERN_ERR "%s: error %d registering driver %s\n",
 			usbcore_name, retval, new_driver->name);
@@ -330,6 +447,7 @@ void usb_deregister(struct usb_driver *driver)
 	pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name);
 
 	usb_lock_all_devices();
+	usb_free_dynids(driver);
 	driver_unregister(&driver->driver);
 	usb_unlock_all_devices();
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d81b050e5955..0dd96ef78c13 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -529,6 +529,11 @@ static inline int usb_make_path (struct usb_device *dev, char *buf,
 
 /* ----------------------------------------------------------------------- */
 
+struct usb_dynids {
+	spinlock_t lock;
+	struct list_head list;
+};
+
 /**
  * struct usb_driver - identifies USB driver to usbcore
  * @owner: Pointer to the module owner of this driver; initialize
@@ -553,6 +558,8 @@ static inline int usb_make_path (struct usb_device *dev, char *buf,
  * @id_table: USB drivers use ID table to support hotplugging.
  *	Export this with MODULE_DEVICE_TABLE(usb,...).  This must be set
  *	or your driver's probe function will never get called.
+ * @dynids: used internally to hold the list of dynamically added device
+ *	ids for this driver.
  * @driver: the driver model core driver structure.
  *
  * USB drivers must provide a name, probe() and disconnect() methods,
@@ -588,6 +595,7 @@ struct usb_driver {
 
 	const struct usb_device_id *id_table;
 
+	struct usb_dynids dynids;
 	struct device_driver driver;
 };
 #define	to_usb_driver(d) container_of(d, struct usb_driver, driver)
-- 
cgit v1.2.3


From ba9dc657af86d05d2971633e57d1f6f94ed60472 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Wed, 16 Nov 2005 13:41:28 -0800
Subject: [PATCH] USB: allow usb drivers to disable dynamic ids

This lets drivers, like the usb-serial ones, disable the ability to add
ids from sysfs.

The usb-serial drivers are "odd" in that they are really usb-serial bus
drivers, not usb bus drivers, so the dynamic id logic will have to go
into the usb-serial bus core for those drivers to get that ability.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c             | 19 +++++++++++++++++++
 drivers/usb/serial/airprime.c         |  1 +
 drivers/usb/serial/anydata.c          |  1 +
 drivers/usb/serial/belkin_sa.c        |  1 +
 drivers/usb/serial/cp2101.c           |  1 +
 drivers/usb/serial/cyberjack.c        |  1 +
 drivers/usb/serial/cypress_m8.c       |  1 +
 drivers/usb/serial/digi_acceleport.c  |  1 +
 drivers/usb/serial/empeg.c            |  1 +
 drivers/usb/serial/ftdi_sio.c         |  1 +
 drivers/usb/serial/garmin_gps.c       |  1 +
 drivers/usb/serial/generic.c          |  1 +
 drivers/usb/serial/hp4x.c             |  1 +
 drivers/usb/serial/io_edgeport.c      |  1 +
 drivers/usb/serial/io_ti.c            |  1 +
 drivers/usb/serial/ipaq.c             |  1 +
 drivers/usb/serial/ipw.c              |  1 +
 drivers/usb/serial/ir-usb.c           |  1 +
 drivers/usb/serial/keyspan.h          |  1 +
 drivers/usb/serial/keyspan_pda.c      |  1 +
 drivers/usb/serial/kl5kusb105.c       |  1 +
 drivers/usb/serial/kobil_sct.c        |  1 +
 drivers/usb/serial/mct_u232.c         |  1 +
 drivers/usb/serial/omninet.c          |  1 +
 drivers/usb/serial/option.c           |  1 +
 drivers/usb/serial/pl2303.c           |  1 +
 drivers/usb/serial/safe_serial.c      |  1 +
 drivers/usb/serial/ti_usb_3410_5052.c |  1 +
 drivers/usb/serial/usb-serial.c       |  1 +
 drivers/usb/serial/visor.c            |  1 +
 drivers/usb/serial/whiteheat.c        |  1 +
 include/linux/usb.h                   |  3 +++
 32 files changed, 52 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 1c0611045379..5e65bc258e1b 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -112,12 +112,26 @@ static int usb_create_newid_file(struct usb_driver *usb_drv)
 {
 	int error = 0;
 
+	if (usb_drv->no_dynamic_id)
+		goto exit;
+
 	if (usb_drv->probe != NULL)
 		error = sysfs_create_file(&usb_drv->driver.kobj,
 					  &driver_attr_new_id.attr);
+exit:
 	return error;
 }
 
+static void usb_remove_newid_file(struct usb_driver *usb_drv)
+{
+	if (usb_drv->no_dynamic_id)
+		return;
+
+	if (usb_drv->probe != NULL)
+		sysfs_remove_file(&usb_drv->driver.kobj,
+				  &driver_attr_new_id.attr);
+}
+
 static void usb_free_dynids(struct usb_driver *usb_drv)
 {
 	struct usb_dynid *dynid, *n;
@@ -135,6 +149,10 @@ static inline int usb_create_newid_file(struct usb_driver *usb_drv)
 	return 0;
 }
 
+static void usb_remove_newid_file(struct usb_driver *usb_drv)
+{
+}
+
 static inline void usb_free_dynids(struct usb_driver *usb_drv)
 {
 }
@@ -447,6 +465,7 @@ void usb_deregister(struct usb_driver *driver)
 	pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name);
 
 	usb_lock_all_devices();
+	usb_remove_newid_file(driver);
 	usb_free_dynids(driver);
 	driver_unregister(&driver->driver);
 	usb_unlock_all_devices();
diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c
index 1f29d8837327..2ef9945a6c07 100644
--- a/drivers/usb/serial/airprime.c
+++ b/drivers/usb/serial/airprime.c
@@ -28,6 +28,7 @@ static struct usb_driver airprime_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver airprime_device = {
diff --git a/drivers/usb/serial/anydata.c b/drivers/usb/serial/anydata.c
index 18022a74a3dc..7a171e034b59 100644
--- a/drivers/usb/serial/anydata.c
+++ b/drivers/usb/serial/anydata.c
@@ -32,6 +32,7 @@ static struct usb_driver anydata_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static int anydata_open(struct usb_serial_port *port, struct file *filp)
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 84bc0ee4f061..69039bd9fc5e 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -118,6 +118,7 @@ static struct usb_driver belkin_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 /* All of the device info needed for the serial converters */
diff --git a/drivers/usb/serial/cp2101.c b/drivers/usb/serial/cp2101.c
index c9787001cf2a..813bab37e076 100644
--- a/drivers/usb/serial/cp2101.c
+++ b/drivers/usb/serial/cp2101.c
@@ -72,6 +72,7 @@ static struct usb_driver cp2101_driver = {
 	.probe		= usb_serial_probe,
 	.disconnect	= usb_serial_disconnect,
 	.id_table	= id_table,
+	.no_dynamic_id	= 	1,
 };
 
 static struct usb_serial_driver cp2101_device = {
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index e581e4ae8483..8c10e4004905 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -81,6 +81,7 @@ static struct usb_driver cyberjack_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver cyberjack_device = {
diff --git a/drivers/usb/serial/cypress_m8.c b/drivers/usb/serial/cypress_m8.c
index af9290ed257b..af18355e94cc 100644
--- a/drivers/usb/serial/cypress_m8.c
+++ b/drivers/usb/serial/cypress_m8.c
@@ -112,6 +112,7 @@ static struct usb_driver cypress_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 struct cypress_private {
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index dc74644a603d..c50cec95f49b 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -498,6 +498,7 @@ static struct usb_driver digi_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index 0b0546dcc7b9..e5e40064caf2 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -110,6 +110,7 @@ static struct usb_driver empeg_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver empeg_device = {
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 06e04b442ff1..857fe791d702 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -488,6 +488,7 @@ static struct usb_driver ftdi_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 static char *ftdi_chip_name[] = {
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 35820bda7ae1..198a322286f9 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -227,6 +227,7 @@ static struct usb_driver garmin_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 53a47c31cd0e..c00a440dc421 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -73,6 +73,7 @@ static struct usb_driver generic_driver = {
 	.probe =	generic_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	generic_serial_ids,
+	.no_dynamic_id = 	1,
 };
 #endif
 
diff --git a/drivers/usb/serial/hp4x.c b/drivers/usb/serial/hp4x.c
index 8eadfb705601..e588c3fe632d 100644
--- a/drivers/usb/serial/hp4x.c
+++ b/drivers/usb/serial/hp4x.c
@@ -42,6 +42,7 @@ static struct usb_driver hp49gp_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver hp49gp_device = {
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index dc4c498bd1ed..276bd425a474 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -247,6 +247,7 @@ static struct usb_driver io_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 /* function prototypes for all of our local functions */
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 832b6d6734c0..8b2e4c78abcd 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -221,6 +221,7 @@ static struct usb_driver io_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index d5d066488100..efb568be7015 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -547,6 +547,7 @@ static struct usb_driver ipaq_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	ipaq_id_table,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 7744b8148bc5..64e2cda2a84a 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -157,6 +157,7 @@ static struct usb_driver usb_ipw_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	usb_ipw_ids,
+	.no_dynamic_id = 	1,
 };
 
 static int debug;
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 19f329e9bdcf..647431c1ccb1 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -130,6 +130,7 @@ static struct usb_driver ir_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 5cfc13b5e56f..4e6f626f6062 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -525,6 +525,7 @@ static struct usb_driver keyspan_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	keyspan_ids_combined,
+	.no_dynamic_id = 	1,
 };
 
 /* usb_device_id table for the pre-firmware download keyspan devices */
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index cd4f48bd83b6..0d1f15268549 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -155,6 +155,7 @@ static struct usb_driver keyspan_pda_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_device_id id_table_std [] = {
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index a8951c0fd020..bd68638b7c35 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -121,6 +121,7 @@ static struct usb_driver kl5kusb105d_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver kl5kusb105d_device = {
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 9456dd9dd136..4c853afea385 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -102,6 +102,7 @@ static struct usb_driver kobil_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index ca5dbadb9b7e..b0415e7542c4 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -130,6 +130,7 @@ static struct usb_driver mct_u232_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver mct_u232_device = {
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index 3caf97072ac0..b595befb24cf 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -85,6 +85,7 @@ static struct usb_driver omninet_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 7716000045b7..4ee657eaaa0b 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -100,6 +100,7 @@ static struct usb_driver option_driver = {
 	.probe      = usb_serial_probe,
 	.disconnect = usb_serial_disconnect,
 	.id_table   = option_ids,
+	.no_dynamic_id = 	1,
 };
 
 /* The card has three separate interfaces, wich the serial driver
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index 41a45a5025b2..e302a320444c 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -87,6 +87,7 @@ static struct usb_driver pl2303_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 #define SET_LINE_REQUEST_TYPE		0x21
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index c22bdc0c4dfd..f8241c152043 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -165,6 +165,7 @@ static struct usb_driver safe_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table,
+	.no_dynamic_id = 	1,
 };
 
 static __u16 crc10_table[256] = {
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 205dbf7201da..17a1f09483bd 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -253,6 +253,7 @@ static struct usb_driver ti_usb_driver = {
 	.probe			= usb_serial_probe,
 	.disconnect		= usb_serial_disconnect,
 	.id_table		= ti_id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 static struct usb_serial_driver ti_1port_device = {
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 0c4881d18cd5..2ac37b52485a 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -46,6 +46,7 @@ static struct usb_driver usb_serial_driver = {
 	.name =		"usbserial",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
+	.no_dynamic_id = 	1,
 };
 
 /* There is no MODULE_DEVICE_TABLE for usbserial.c.  Instead
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index a473c1c34559..2973f5564c06 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -178,6 +178,7 @@ static struct usb_driver visor_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 /* All of the device info needed for the Handspring Visor, and Palm 4.0 devices */
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 18c3183be769..19c6386bb692 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -132,6 +132,7 @@ static struct usb_driver whiteheat_driver = {
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
 	.id_table =	id_table_combined,
+	.no_dynamic_id = 	1,
 };
 
 /* function prototypes for the Connect Tech WhiteHEAT prerenumeration device */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 0dd96ef78c13..8d5829936bc4 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -561,6 +561,8 @@ struct usb_dynids {
  * @dynids: used internally to hold the list of dynamically added device
  *	ids for this driver.
  * @driver: the driver model core driver structure.
+ * @no_dynamic_id: if set to 1, the USB core will not allow dynamic ids to be
+ *	added to this driver by preventing the sysfs file from being created.
  *
  * USB drivers must provide a name, probe() and disconnect() methods,
  * and an id_table.  Other driver fields are optional.
@@ -597,6 +599,7 @@ struct usb_driver {
 
 	struct usb_dynids dynids;
 	struct device_driver driver;
+	unsigned int no_dynamic_id:1;
 };
 #define	to_usb_driver(d) container_of(d, struct usb_driver, driver)
 
-- 
cgit v1.2.3


From 2143acc6dc79bdbff812f02a7dc5ab9d4fc81fc8 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 21 Nov 2005 14:53:03 -0800
Subject: [PATCH] USB: make registering a usb driver automatically set the
 module owner

This fixes the driver that forgot to set the module owner up.  Now we
can remove the unneeded pointer from the usb driver structure.  The idea
for how to do this was from Al Viro, who did this for the PCI drivers.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 9 +++++----
 include/linux/usb.h       | 6 +++++-
 2 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 5e65bc258e1b..bb139f06bcd6 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -404,8 +404,9 @@ int usb_device_match(struct device *dev, struct device_driver *drv)
 }
 
 /**
- * usb_register - register a USB driver
+ * usb_register_driver - register a USB driver
  * @new_driver: USB operations for the driver
+ * @owner: module owner of this driver.
  *
  * Registers a USB driver with the USB core.  The list of unattached
  * interfaces will be rescanned whenever a new driver is added, allowing
@@ -416,7 +417,7 @@ int usb_device_match(struct device *dev, struct device_driver *drv)
  * usb_register_dev() to enable that functionality.  This function no longer
  * takes care of that.
  */
-int usb_register(struct usb_driver *new_driver)
+int usb_register_driver(struct usb_driver *new_driver, struct module *owner)
 {
 	int retval = 0;
 
@@ -427,7 +428,7 @@ int usb_register(struct usb_driver *new_driver)
 	new_driver->driver.bus = &usb_bus_type;
 	new_driver->driver.probe = usb_probe_interface;
 	new_driver->driver.remove = usb_unbind_interface;
-	new_driver->driver.owner = new_driver->owner;
+	new_driver->driver.owner = owner;
 	spin_lock_init(&new_driver->dynids.lock);
 	INIT_LIST_HEAD(&new_driver->dynids.list);
 
@@ -447,7 +448,7 @@ int usb_register(struct usb_driver *new_driver)
 
 	return retval;
 }
-EXPORT_SYMBOL_GPL(usb_register);
+EXPORT_SYMBOL_GPL(usb_register_driver);
 
 /**
  * usb_deregister - unregister a USB driver
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 8d5829936bc4..3d05c63451a8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -625,7 +625,11 @@ struct usb_class_driver {
  * use these in module_init()/module_exit()
  * and don't forget MODULE_DEVICE_TABLE(usb, ...)
  */
-extern int usb_register(struct usb_driver *);
+int usb_register_driver(struct usb_driver *, struct module *);
+static inline int usb_register(struct usb_driver *driver)
+{
+	return usb_register_driver(driver, THIS_MODULE);
+}
 extern void usb_deregister(struct usb_driver *);
 
 extern int usb_register_dev(struct usb_interface *intf,
-- 
cgit v1.2.3


From 75318d2d7cab77b14c5d3dbd5e69f2680a769e16 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 21 Nov 2005 14:53:03 -0800
Subject: [PATCH] USB: remove .owner field from struct usb_driver

It is no longer needed, so let's remove it, saving a bit of memory.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/block/ub.c                         | 1 -
 drivers/bluetooth/bcm203x.c                | 1 -
 drivers/bluetooth/bfusb.c                  | 1 -
 drivers/bluetooth/bpa10x.c                 | 1 -
 drivers/bluetooth/hci_usb.c                | 1 -
 drivers/char/watchdog/pcwd_usb.c           | 1 -
 drivers/input/joystick/iforce/iforce-usb.c | 1 -
 drivers/isdn/hisax/hfc_usb.c               | 1 -
 drivers/isdn/hisax/st5481_init.c           | 1 -
 drivers/media/dvb/b2c2/flexcop-usb.c       | 1 -
 drivers/media/dvb/cinergyT2/cinergyT2.c    | 1 -
 drivers/media/dvb/dvb-usb/a800.c           | 1 -
 drivers/media/dvb/dvb-usb/cxusb.c          | 1 -
 drivers/media/dvb/dvb-usb/dibusb-mb.c      | 1 -
 drivers/media/dvb/dvb-usb/dibusb-mc.c      | 1 -
 drivers/media/dvb/dvb-usb/digitv.c         | 1 -
 drivers/media/dvb/dvb-usb/dtt200u.c        | 1 -
 drivers/media/dvb/dvb-usb/nova-t-usb2.c    | 1 -
 drivers/media/dvb/dvb-usb/umt-010.c        | 1 -
 drivers/media/dvb/dvb-usb/vp702x.c         | 1 -
 drivers/media/dvb/dvb-usb/vp7045.c         | 1 -
 drivers/media/video/cpia_usb.c             | 1 -
 drivers/media/video/em28xx/em28xx-video.c  | 1 -
 drivers/net/irda/irda-usb.c                | 1 -
 drivers/net/irda/stir4200.c                | 1 -
 drivers/usb/atm/cxacru.c                   | 1 -
 drivers/usb/atm/speedtch.c                 | 1 -
 drivers/usb/atm/ueagle-atm.c               | 1 -
 drivers/usb/atm/xusbatm.c                  | 1 -
 drivers/usb/class/audio.c                  | 1 -
 drivers/usb/class/cdc-acm.c                | 1 -
 drivers/usb/class/usb-midi.c               | 1 -
 drivers/usb/class/usblp.c                  | 1 -
 drivers/usb/core/devio.c                   | 1 -
 drivers/usb/core/hub.c                     | 1 -
 drivers/usb/image/mdc800.c                 | 1 -
 drivers/usb/image/microtek.c               | 1 -
 drivers/usb/input/acecad.c                 | 1 -
 drivers/usb/input/aiptek.c                 | 1 -
 drivers/usb/input/appletouch.c             | 1 -
 drivers/usb/input/ati_remote.c             | 1 -
 drivers/usb/input/hid-core.c               | 1 -
 drivers/usb/input/hiddev.c                 | 1 -
 drivers/usb/input/itmtouch.c               | 1 -
 drivers/usb/input/kbtab.c                  | 1 -
 drivers/usb/input/keyspan_remote.c         | 1 -
 drivers/usb/input/mtouchusb.c              | 1 -
 drivers/usb/input/powermate.c              | 1 -
 drivers/usb/input/touchkitusb.c            | 1 -
 drivers/usb/input/usbkbd.c                 | 1 -
 drivers/usb/input/usbmouse.c               | 1 -
 drivers/usb/input/wacom.c                  | 1 -
 drivers/usb/input/xpad.c                   | 1 -
 drivers/usb/input/yealink.c                | 1 -
 drivers/usb/media/dabusb.c                 | 1 -
 drivers/usb/media/dsbr100.c                | 1 -
 drivers/usb/media/ov511.c                  | 1 -
 drivers/usb/media/pwc/pwc-if.c             | 1 -
 drivers/usb/media/se401.c                  | 1 -
 drivers/usb/media/sn9c102_core.c           | 1 -
 drivers/usb/media/stv680.c                 | 1 -
 drivers/usb/media/vicam.c                  | 1 -
 drivers/usb/media/w9968cf.c                | 1 -
 drivers/usb/misc/auerswald.c               | 1 -
 drivers/usb/misc/cytherm.c                 | 1 -
 drivers/usb/misc/emi26.c                   | 1 -
 drivers/usb/misc/emi62.c                   | 1 -
 drivers/usb/misc/idmouse.c                 | 1 -
 drivers/usb/misc/ldusb.c                   | 1 -
 drivers/usb/misc/legousbtower.c            | 1 -
 drivers/usb/misc/phidgetkit.c              | 1 -
 drivers/usb/misc/phidgetservo.c            | 1 -
 drivers/usb/misc/rio500.c                  | 1 -
 drivers/usb/misc/sisusbvga/sisusb.c        | 1 -
 drivers/usb/misc/usblcd.c                  | 1 -
 drivers/usb/misc/usbled.c                  | 1 -
 drivers/usb/misc/usbtest.c                 | 1 -
 drivers/usb/misc/uss720.c                  | 1 -
 drivers/usb/net/asix.c                     | 1 -
 drivers/usb/net/catc.c                     | 1 -
 drivers/usb/net/cdc_ether.c                | 1 -
 drivers/usb/net/cdc_subset.c               | 1 -
 drivers/usb/net/gl620a.c                   | 1 -
 drivers/usb/net/kaweth.c                   | 1 -
 drivers/usb/net/net1080.c                  | 1 -
 drivers/usb/net/plusb.c                    | 1 -
 drivers/usb/net/rndis_host.c               | 1 -
 drivers/usb/net/rtl8150.c                  | 1 -
 drivers/usb/net/zaurus.c                   | 1 -
 drivers/usb/net/zd1201.c                   | 1 -
 drivers/usb/serial/airprime.c              | 1 -
 drivers/usb/serial/anydata.c               | 1 -
 drivers/usb/serial/belkin_sa.c             | 1 -
 drivers/usb/serial/cp2101.c                | 1 -
 drivers/usb/serial/cyberjack.c             | 1 -
 drivers/usb/serial/digi_acceleport.c       | 1 -
 drivers/usb/serial/empeg.c                 | 1 -
 drivers/usb/serial/garmin_gps.c            | 1 -
 drivers/usb/serial/generic.c               | 1 -
 drivers/usb/serial/hp4x.c                  | 1 -
 drivers/usb/serial/io_edgeport.c           | 1 -
 drivers/usb/serial/io_ti.c                 | 1 -
 drivers/usb/serial/ipaq.c                  | 1 -
 drivers/usb/serial/ipw.c                   | 1 -
 drivers/usb/serial/ir-usb.c                | 1 -
 drivers/usb/serial/keyspan.h               | 1 -
 drivers/usb/serial/keyspan_pda.c           | 1 -
 drivers/usb/serial/kl5kusb105.c            | 1 -
 drivers/usb/serial/kobil_sct.c             | 1 -
 drivers/usb/serial/mct_u232.c              | 1 -
 drivers/usb/serial/omninet.c               | 1 -
 drivers/usb/serial/option.c                | 1 -
 drivers/usb/serial/pl2303.c                | 1 -
 drivers/usb/serial/safe_serial.c           | 1 -
 drivers/usb/serial/ti_usb_3410_5052.c      | 1 -
 drivers/usb/serial/usb-serial.c            | 1 -
 drivers/usb/serial/visor.c                 | 1 -
 drivers/usb/serial/whiteheat.c             | 1 -
 drivers/usb/storage/libusual.c             | 1 -
 drivers/usb/storage/usb.c                  | 1 -
 drivers/usb/usb-skeleton.c                 | 1 -
 drivers/w1/dscore.c                        | 1 -
 include/linux/usb.h                        | 4 ----
 sound/usb/usbaudio.c                       | 1 -
 sound/usb/usx2y/usbusx2y.c                 | 1 -
 125 files changed, 128 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 06d741d58a68..c7a28f5be42f 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -2460,7 +2460,6 @@ static void ub_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver ub_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ub",
 	.probe =	ub_probe,
 	.disconnect =	ub_disconnect,
diff --git a/drivers/bluetooth/bcm203x.c b/drivers/bluetooth/bcm203x.c
index 8e7fb3551775..3e7a067cc087 100644
--- a/drivers/bluetooth/bcm203x.c
+++ b/drivers/bluetooth/bcm203x.c
@@ -275,7 +275,6 @@ static void bcm203x_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver bcm203x_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "bcm203x",
 	.probe		= bcm203x_probe,
 	.disconnect	= bcm203x_disconnect,
diff --git a/drivers/bluetooth/bfusb.c b/drivers/bluetooth/bfusb.c
index 067e27893e4a..8947c8837dac 100644
--- a/drivers/bluetooth/bfusb.c
+++ b/drivers/bluetooth/bfusb.c
@@ -768,7 +768,6 @@ static void bfusb_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver bfusb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "bfusb",
 	.probe		= bfusb_probe,
 	.disconnect	= bfusb_disconnect,
diff --git a/drivers/bluetooth/bpa10x.c b/drivers/bluetooth/bpa10x.c
index 394796315adc..9446960ac742 100644
--- a/drivers/bluetooth/bpa10x.c
+++ b/drivers/bluetooth/bpa10x.c
@@ -619,7 +619,6 @@ static void bpa10x_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver bpa10x_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "bpa10x",
 	.probe		= bpa10x_probe,
 	.disconnect	= bpa10x_disconnect,
diff --git a/drivers/bluetooth/hci_usb.c b/drivers/bluetooth/hci_usb.c
index 057cb2b6e6d1..92382e823285 100644
--- a/drivers/bluetooth/hci_usb.c
+++ b/drivers/bluetooth/hci_usb.c
@@ -1044,7 +1044,6 @@ static void hci_usb_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver hci_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "hci_usb",
 	.probe		= hci_usb_probe,
 	.disconnect	= hci_usb_disconnect,
diff --git a/drivers/char/watchdog/pcwd_usb.c b/drivers/char/watchdog/pcwd_usb.c
index 092e9b133750..1533f56baa42 100644
--- a/drivers/char/watchdog/pcwd_usb.c
+++ b/drivers/char/watchdog/pcwd_usb.c
@@ -151,7 +151,6 @@ static void usb_pcwd_disconnect	(struct usb_interface *interface);
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver usb_pcwd_driver = {
-	.owner =	THIS_MODULE,
 	.name =		DRIVER_NAME,
 	.probe =	usb_pcwd_probe,
 	.disconnect =	usb_pcwd_disconnect,
diff --git a/drivers/input/joystick/iforce/iforce-usb.c b/drivers/input/joystick/iforce/iforce-usb.c
index 64b4a3080985..bc2fce60f9f8 100644
--- a/drivers/input/joystick/iforce/iforce-usb.c
+++ b/drivers/input/joystick/iforce/iforce-usb.c
@@ -235,7 +235,6 @@ static struct usb_device_id iforce_usb_ids [] = {
 MODULE_DEVICE_TABLE (usb, iforce_usb_ids);
 
 struct usb_driver iforce_usb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"iforce",
 	.probe =	iforce_usb_probe,
 	.disconnect =	iforce_usb_disconnect,
diff --git a/drivers/isdn/hisax/hfc_usb.c b/drivers/isdn/hisax/hfc_usb.c
index f8457ef48826..ca5b4a3b683e 100644
--- a/drivers/isdn/hisax/hfc_usb.c
+++ b/drivers/isdn/hisax/hfc_usb.c
@@ -1715,7 +1715,6 @@ hfc_usb_disconnect(struct usb_interface
 /* our driver information structure */
 /************************************/
 static struct usb_driver hfc_drv = {
-	.owner = THIS_MODULE,
 	.name  = "hfc_usb",
 	.id_table = hfcusb_idtab,
 	.probe = hfc_usb_probe,
diff --git a/drivers/isdn/hisax/st5481_init.c b/drivers/isdn/hisax/st5481_init.c
index 8e192a3a3490..99cb0f3d59a1 100644
--- a/drivers/isdn/hisax/st5481_init.c
+++ b/drivers/isdn/hisax/st5481_init.c
@@ -180,7 +180,6 @@ static struct usb_device_id st5481_ids[] = {
 MODULE_DEVICE_TABLE (usb, st5481_ids);
 
 static struct usb_driver st5481_usb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"st5481_usb",
 	.probe =	probe_st5481,
 	.disconnect =	disconnect_st5481,
diff --git a/drivers/media/dvb/b2c2/flexcop-usb.c b/drivers/media/dvb/b2c2/flexcop-usb.c
index 0a78ba3737a5..a6c91db40ad6 100644
--- a/drivers/media/dvb/b2c2/flexcop-usb.c
+++ b/drivers/media/dvb/b2c2/flexcop-usb.c
@@ -544,7 +544,6 @@ static struct usb_device_id flexcop_usb_table [] = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver flexcop_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "b2c2_flexcop_usb",
 	.probe		= flexcop_usb_probe,
 	.disconnect = flexcop_usb_disconnect,
diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index 336fc284fa52..b996fb59b7e4 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -986,7 +986,6 @@ static const struct usb_device_id cinergyt2_table [] __devinitdata = {
 MODULE_DEVICE_TABLE(usb, cinergyt2_table);
 
 static struct usb_driver cinergyt2_driver = {
-	.owner	= THIS_MODULE,
 	.name	= "cinergyT2",
 	.probe	= cinergyt2_probe,
 	.disconnect	= cinergyt2_disconnect,
diff --git a/drivers/media/dvb/dvb-usb/a800.c b/drivers/media/dvb/dvb-usb/a800.c
index 8c7beffb045f..ce44aa6bbb83 100644
--- a/drivers/media/dvb/dvb-usb/a800.c
+++ b/drivers/media/dvb/dvb-usb/a800.c
@@ -144,7 +144,6 @@ static struct dvb_usb_properties a800_properties = {
 };
 
 static struct usb_driver a800_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_a800",
 	.probe		= a800_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/cxusb.c b/drivers/media/dvb/dvb-usb/cxusb.c
index 3fe383f4bb4c..d05fab01cccd 100644
--- a/drivers/media/dvb/dvb-usb/cxusb.c
+++ b/drivers/media/dvb/dvb-usb/cxusb.c
@@ -241,7 +241,6 @@ static struct dvb_usb_properties cxusb_properties = {
 };
 
 static struct usb_driver cxusb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_cxusb",
 	.probe		= cxusb_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/dibusb-mb.c b/drivers/media/dvb/dvb-usb/dibusb-mb.c
index aa271a2496d5..52ac3e5adf5d 100644
--- a/drivers/media/dvb/dvb-usb/dibusb-mb.c
+++ b/drivers/media/dvb/dvb-usb/dibusb-mb.c
@@ -373,7 +373,6 @@ static struct dvb_usb_properties artec_t1_usb2_properties = {
 };
 
 static struct usb_driver dibusb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_dibusb_mb",
 	.probe		= dibusb_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/dibusb-mc.c b/drivers/media/dvb/dvb-usb/dibusb-mc.c
index 6a0912eab396..55802fba3c29 100644
--- a/drivers/media/dvb/dvb-usb/dibusb-mc.c
+++ b/drivers/media/dvb/dvb-usb/dibusb-mc.c
@@ -82,7 +82,6 @@ static struct dvb_usb_properties dibusb_mc_properties = {
 };
 
 static struct usb_driver dibusb_mc_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_dibusb_mc",
 	.probe		= dibusb_mc_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/digitv.c b/drivers/media/dvb/dvb-usb/digitv.c
index f98e306a5759..450417a9e64b 100644
--- a/drivers/media/dvb/dvb-usb/digitv.c
+++ b/drivers/media/dvb/dvb-usb/digitv.c
@@ -233,7 +233,6 @@ static struct dvb_usb_properties digitv_properties = {
 };
 
 static struct usb_driver digitv_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_digitv",
 	.probe		= digitv_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/dtt200u.c b/drivers/media/dvb/dvb-usb/dtt200u.c
index b595476332cd..6e2bac873445 100644
--- a/drivers/media/dvb/dvb-usb/dtt200u.c
+++ b/drivers/media/dvb/dvb-usb/dtt200u.c
@@ -198,7 +198,6 @@ static struct dvb_usb_properties wt220u_properties = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver dtt200u_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_dtt200u",
 	.probe		= dtt200u_usb_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/nova-t-usb2.c b/drivers/media/dvb/dvb-usb/nova-t-usb2.c
index 1841a66427bf..fac48fc7a4ac 100644
--- a/drivers/media/dvb/dvb-usb/nova-t-usb2.c
+++ b/drivers/media/dvb/dvb-usb/nova-t-usb2.c
@@ -202,7 +202,6 @@ static struct dvb_usb_properties nova_t_properties = {
 };
 
 static struct usb_driver nova_t_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_nova_t_usb2",
 	.probe		= nova_t_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/umt-010.c b/drivers/media/dvb/dvb-usb/umt-010.c
index 6fd67657c269..14f1911c79bb 100644
--- a/drivers/media/dvb/dvb-usb/umt-010.c
+++ b/drivers/media/dvb/dvb-usb/umt-010.c
@@ -128,7 +128,6 @@ static struct dvb_usb_properties umt_properties = {
 };
 
 static struct usb_driver umt_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_umt_010",
 	.probe		= umt_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/vp702x.c b/drivers/media/dvb/dvb-usb/vp702x.c
index de13c04e8e64..afa00fdb5ec0 100644
--- a/drivers/media/dvb/dvb-usb/vp702x.c
+++ b/drivers/media/dvb/dvb-usb/vp702x.c
@@ -256,7 +256,6 @@ static struct dvb_usb_properties vp702x_properties = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver vp702x_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb-usb-vp702x",
 	.probe 		= vp702x_usb_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/dvb/dvb-usb/vp7045.c b/drivers/media/dvb/dvb-usb/vp7045.c
index 75765e3a569c..3835235b68df 100644
--- a/drivers/media/dvb/dvb-usb/vp7045.c
+++ b/drivers/media/dvb/dvb-usb/vp7045.c
@@ -253,7 +253,6 @@ static struct dvb_usb_properties vp7045_properties = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver vp7045_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "dvb_usb_vp7045",
 	.probe		= vp7045_usb_probe,
 	.disconnect = dvb_usb_device_exit,
diff --git a/drivers/media/video/cpia_usb.c b/drivers/media/video/cpia_usb.c
index 9774e94d1e7d..1439cb752874 100644
--- a/drivers/media/video/cpia_usb.c
+++ b/drivers/media/video/cpia_usb.c
@@ -582,7 +582,6 @@ MODULE_LICENSE("GPL");
 
 
 static struct usb_driver cpia_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "cpia",
 	.probe		= cpia_probe,
 	.disconnect	= cpia_disconnect,
diff --git a/drivers/media/video/em28xx/em28xx-video.c b/drivers/media/video/em28xx/em28xx-video.c
index 06d76879bde2..3a56120397ae 100644
--- a/drivers/media/video/em28xx/em28xx-video.c
+++ b/drivers/media/video/em28xx/em28xx-video.c
@@ -1884,7 +1884,6 @@ static void em28xx_usb_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver em28xx_usb_driver = {
-	.owner = THIS_MODULE,
 	.name = "em28xx",
 	.probe = em28xx_usb_probe,
 	.disconnect = em28xx_usb_disconnect,
diff --git a/drivers/net/irda/irda-usb.c b/drivers/net/irda/irda-usb.c
index c22c0517883c..fa176ffb4ad5 100644
--- a/drivers/net/irda/irda-usb.c
+++ b/drivers/net/irda/irda-usb.c
@@ -1539,7 +1539,6 @@ static void irda_usb_disconnect(struct usb_interface *intf)
  * USB device callbacks
  */
 static struct usb_driver irda_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "irda-usb",
 	.probe		= irda_usb_probe,
 	.disconnect	= irda_usb_disconnect,
diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c
index 3961a754e920..31867e4b891b 100644
--- a/drivers/net/irda/stir4200.c
+++ b/drivers/net/irda/stir4200.c
@@ -1152,7 +1152,6 @@ static int stir_resume(struct usb_interface *intf)
  * USB device callbacks
  */
 static struct usb_driver irda_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "stir4200",
 	.probe		= stir_probe,
 	.disconnect	= stir_disconnect,
diff --git a/drivers/usb/atm/cxacru.c b/drivers/usb/atm/cxacru.c
index 9d59dc62e6d2..af0a41e7870e 100644
--- a/drivers/usb/atm/cxacru.c
+++ b/drivers/usb/atm/cxacru.c
@@ -853,7 +853,6 @@ static int cxacru_usb_probe(struct usb_interface *intf, const struct usb_device_
 }
 
 static struct usb_driver cxacru_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= cxacru_driver_name,
 	.probe		= cxacru_usb_probe,
 	.disconnect	= usbatm_usb_disconnect,
diff --git a/drivers/usb/atm/speedtch.c b/drivers/usb/atm/speedtch.c
index d0cbbb7f0385..b28336148658 100644
--- a/drivers/usb/atm/speedtch.c
+++ b/drivers/usb/atm/speedtch.c
@@ -659,7 +659,6 @@ MODULE_DEVICE_TABLE(usb, speedtch_usb_ids);
 static int speedtch_usb_probe(struct usb_interface *, const struct usb_device_id *);
 
 static struct usb_driver speedtch_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= speedtch_driver_name,
 	.probe		= speedtch_usb_probe,
 	.disconnect	= usbatm_usb_disconnect,
diff --git a/drivers/usb/atm/ueagle-atm.c b/drivers/usb/atm/ueagle-atm.c
index be08e16df09f..7d2a679989ed 100644
--- a/drivers/usb/atm/ueagle-atm.c
+++ b/drivers/usb/atm/ueagle-atm.c
@@ -1776,7 +1776,6 @@ static const struct usb_device_id uea_ids[] = {
  * USB driver descriptor
  */
 static struct usb_driver uea_driver = {
-	.owner = THIS_MODULE,
 	.name = "ueagle-atm",
 	.id_table = uea_ids,
 	.probe = uea_probe,
diff --git a/drivers/usb/atm/xusbatm.c b/drivers/usb/atm/xusbatm.c
index 7fe7fb484d10..5c76e3aaaa5e 100644
--- a/drivers/usb/atm/xusbatm.c
+++ b/drivers/usb/atm/xusbatm.c
@@ -140,7 +140,6 @@ static int xusbatm_usb_probe(struct usb_interface *intf,
 }
 
 static struct usb_driver xusbatm_usb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= xusbatm_driver_name,
 	.probe		= xusbatm_usb_probe,
 	.disconnect	= usbatm_usb_disconnect,
diff --git a/drivers/usb/class/audio.c b/drivers/usb/class/audio.c
index 50858273f8d3..3ad9ee8b84a9 100644
--- a/drivers/usb/class/audio.c
+++ b/drivers/usb/class/audio.c
@@ -2732,7 +2732,6 @@ static struct usb_device_id usb_audio_ids [] = {
 MODULE_DEVICE_TABLE (usb, usb_audio_ids);
 
 static struct usb_driver usb_audio_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"audio",
 	.probe =	usb_audio_probe,
 	.disconnect =	usb_audio_disconnect,
diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
index 72936dc15ec9..93de121f52a8 100644
--- a/drivers/usb/class/cdc-acm.c
+++ b/drivers/usb/class/cdc-acm.c
@@ -1088,7 +1088,6 @@ static struct usb_device_id acm_ids[] = {
 MODULE_DEVICE_TABLE (usb, acm_ids);
 
 static struct usb_driver acm_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"cdc_acm",
 	.probe =	acm_probe,
 	.disconnect =	acm_disconnect,
diff --git a/drivers/usb/class/usb-midi.c b/drivers/usb/class/usb-midi.c
index 5f8af35e7633..f13f004d311f 100644
--- a/drivers/usb/class/usb-midi.c
+++ b/drivers/usb/class/usb-midi.c
@@ -2027,7 +2027,6 @@ static struct usb_device_id id_table[] = {
 };
 
 static struct usb_driver usb_midi_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"midi",
 	.probe =	usb_midi_probe,
 	.disconnect =	usb_midi_disconnect,
diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c
index 357e75335f17..10406b857ac7 100644
--- a/drivers/usb/class/usblp.c
+++ b/drivers/usb/class/usblp.c
@@ -1186,7 +1186,6 @@ static struct usb_device_id usblp_ids [] = {
 MODULE_DEVICE_TABLE (usb, usblp_ids);
 
 static struct usb_driver usblp_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usblp",
 	.probe =	usblp_probe,
 	.disconnect =	usblp_disconnect,
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index b1d6e9af732d..3a73170e95dd 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -402,7 +402,6 @@ static void driver_disconnect(struct usb_interface *intf)
 }
 
 struct usb_driver usbfs_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbfs",
 	.probe =	driver_probe,
 	.disconnect =	driver_disconnect,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 5faf7edd73cb..40c6c50c6bd9 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -2865,7 +2865,6 @@ static struct usb_device_id hub_id_table [] = {
 MODULE_DEVICE_TABLE (usb, hub_id_table);
 
 static struct usb_driver hub_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"hub",
 	.probe =	hub_probe,
 	.disconnect =	hub_disconnect,
diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c
index 1d973bcf56aa..049871145d63 100644
--- a/drivers/usb/image/mdc800.c
+++ b/drivers/usb/image/mdc800.c
@@ -962,7 +962,6 @@ MODULE_DEVICE_TABLE (usb, mdc800_table);
  */
 static struct usb_driver mdc800_usb_driver =
 {
-	.owner =	THIS_MODULE,
 	.name =		"mdc800",
 	.probe =	mdc800_usb_probe,
 	.disconnect =	mdc800_usb_disconnect,
diff --git a/drivers/usb/image/microtek.c b/drivers/usb/image/microtek.c
index 950543aa5ac7..458f2acdeb0a 100644
--- a/drivers/usb/image/microtek.c
+++ b/drivers/usb/image/microtek.c
@@ -160,7 +160,6 @@ static void mts_usb_disconnect(struct usb_interface *intf);
 static struct usb_device_id mts_usb_ids [];
 
 static struct usb_driver mts_usb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"microtekX6",
 	.probe =	mts_usb_probe,
 	.disconnect =	mts_usb_disconnect,
diff --git a/drivers/usb/input/acecad.c b/drivers/usb/input/acecad.c
index a32558b4048e..df29b8078b54 100644
--- a/drivers/usb/input/acecad.c
+++ b/drivers/usb/input/acecad.c
@@ -261,7 +261,6 @@ static struct usb_device_id usb_acecad_id_table [] = {
 MODULE_DEVICE_TABLE(usb, usb_acecad_id_table);
 
 static struct usb_driver usb_acecad_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usb_acecad",
 	.probe =	usb_acecad_probe,
 	.disconnect =	usb_acecad_disconnect,
diff --git a/drivers/usb/input/aiptek.c b/drivers/usb/input/aiptek.c
index 0e2505c073db..356284c746a0 100644
--- a/drivers/usb/input/aiptek.c
+++ b/drivers/usb/input/aiptek.c
@@ -2190,7 +2190,6 @@ fail1:	input_free_device(inputdev);
 static void aiptek_disconnect(struct usb_interface *intf);
 
 static struct usb_driver aiptek_driver = {
-	.owner = THIS_MODULE,
 	.name = "aiptek",
 	.probe = aiptek_probe,
 	.disconnect = aiptek_disconnect,
diff --git a/drivers/usb/input/appletouch.c b/drivers/usb/input/appletouch.c
index 15840db092a5..1949b54f41f2 100644
--- a/drivers/usb/input/appletouch.c
+++ b/drivers/usb/input/appletouch.c
@@ -452,7 +452,6 @@ static int atp_resume(struct usb_interface *iface)
 }
 
 static struct usb_driver atp_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "appletouch",
 	.probe		= atp_probe,
 	.disconnect	= atp_disconnect,
diff --git a/drivers/usb/input/ati_remote.c b/drivers/usb/input/ati_remote.c
index 9a2a47db9494..8948e5c3941f 100644
--- a/drivers/usb/input/ati_remote.c
+++ b/drivers/usb/input/ati_remote.c
@@ -295,7 +295,6 @@ static void ati_remote_disconnect	(struct usb_interface *interface);
 
 /* usb specific object to register with the usb subsystem */
 static struct usb_driver ati_remote_driver = {
-	.owner        = THIS_MODULE,
 	.name         = "ati_remote",
 	.probe        = ati_remote_probe,
 	.disconnect   = ati_remote_disconnect,
diff --git a/drivers/usb/input/hid-core.c b/drivers/usb/input/hid-core.c
index a3e44ef1df43..256d7325d4a5 100644
--- a/drivers/usb/input/hid-core.c
+++ b/drivers/usb/input/hid-core.c
@@ -1930,7 +1930,6 @@ static struct usb_device_id hid_usb_ids [] = {
 MODULE_DEVICE_TABLE (usb, hid_usb_ids);
 
 static struct usb_driver hid_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbhid",
 	.probe =	hid_probe,
 	.disconnect =	hid_disconnect,
diff --git a/drivers/usb/input/hiddev.c b/drivers/usb/input/hiddev.c
index 440377c7a0da..4dff8473553d 100644
--- a/drivers/usb/input/hiddev.c
+++ b/drivers/usb/input/hiddev.c
@@ -826,7 +826,6 @@ static int hiddev_usbd_probe(struct usb_interface *intf,
 
 
 static /* const */ struct usb_driver hiddev_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"hiddev",
 	.probe =	hiddev_usbd_probe,
 };
diff --git a/drivers/usb/input/itmtouch.c b/drivers/usb/input/itmtouch.c
index 4a50acb39d29..7618ae5c104f 100644
--- a/drivers/usb/input/itmtouch.c
+++ b/drivers/usb/input/itmtouch.c
@@ -250,7 +250,6 @@ static void itmtouch_disconnect(struct usb_interface *intf)
 MODULE_DEVICE_TABLE(usb, itmtouch_ids);
 
 static struct usb_driver itmtouch_driver = {
-	.owner =        THIS_MODULE,
 	.name =         "itmtouch",
 	.probe =        itmtouch_probe,
 	.disconnect =   itmtouch_disconnect,
diff --git a/drivers/usb/input/kbtab.c b/drivers/usb/input/kbtab.c
index fd48e74e78ed..f6d5cead542b 100644
--- a/drivers/usb/input/kbtab.c
+++ b/drivers/usb/input/kbtab.c
@@ -197,7 +197,6 @@ static void kbtab_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver kbtab_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"kbtab",
 	.probe =	kbtab_probe,
 	.disconnect =	kbtab_disconnect,
diff --git a/drivers/usb/input/keyspan_remote.c b/drivers/usb/input/keyspan_remote.c
index a32cfe51b77d..5ae5201dbf5a 100644
--- a/drivers/usb/input/keyspan_remote.c
+++ b/drivers/usb/input/keyspan_remote.c
@@ -559,7 +559,6 @@ static void keyspan_disconnect(struct usb_interface *interface)
  */
 static struct usb_driver keyspan_driver =
 {
-	.owner =	THIS_MODULE,
 	.name =		"keyspan_remote",
 	.probe =	keyspan_probe,
 	.disconnect =	keyspan_disconnect,
diff --git a/drivers/usb/input/mtouchusb.c b/drivers/usb/input/mtouchusb.c
index 52cc18cd247d..f018953a5485 100644
--- a/drivers/usb/input/mtouchusb.c
+++ b/drivers/usb/input/mtouchusb.c
@@ -310,7 +310,6 @@ static void mtouchusb_disconnect(struct usb_interface *intf)
 MODULE_DEVICE_TABLE(usb, mtouchusb_devices);
 
 static struct usb_driver mtouchusb_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "mtouchusb",
 	.probe		= mtouchusb_probe,
 	.disconnect	= mtouchusb_disconnect,
diff --git a/drivers/usb/input/powermate.c b/drivers/usb/input/powermate.c
index b7476233ef5d..fdf0f788062c 100644
--- a/drivers/usb/input/powermate.c
+++ b/drivers/usb/input/powermate.c
@@ -441,7 +441,6 @@ static struct usb_device_id powermate_devices [] = {
 MODULE_DEVICE_TABLE (usb, powermate_devices);
 
 static struct usb_driver powermate_driver = {
-	.owner =	THIS_MODULE,
         .name =         "powermate",
         .probe =        powermate_probe,
         .disconnect =   powermate_disconnect,
diff --git a/drivers/usb/input/touchkitusb.c b/drivers/usb/input/touchkitusb.c
index 7420c6b84284..75e7c12e7189 100644
--- a/drivers/usb/input/touchkitusb.c
+++ b/drivers/usb/input/touchkitusb.c
@@ -267,7 +267,6 @@ static void touchkit_disconnect(struct usb_interface *intf)
 MODULE_DEVICE_TABLE(usb, touchkit_devices);
 
 static struct usb_driver touchkit_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "touchkitusb",
 	.probe		= touchkit_probe,
 	.disconnect	= touchkit_disconnect,
diff --git a/drivers/usb/input/usbkbd.c b/drivers/usb/input/usbkbd.c
index 226b6f90a907..2f3edc26cb50 100644
--- a/drivers/usb/input/usbkbd.c
+++ b/drivers/usb/input/usbkbd.c
@@ -345,7 +345,6 @@ static struct usb_device_id usb_kbd_id_table [] = {
 MODULE_DEVICE_TABLE (usb, usb_kbd_id_table);
 
 static struct usb_driver usb_kbd_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbkbd",
 	.probe =	usb_kbd_probe,
 	.disconnect =	usb_kbd_disconnect,
diff --git a/drivers/usb/input/usbmouse.c b/drivers/usb/input/usbmouse.c
index 230f6b1b314a..af526135d210 100644
--- a/drivers/usb/input/usbmouse.c
+++ b/drivers/usb/input/usbmouse.c
@@ -226,7 +226,6 @@ static struct usb_device_id usb_mouse_id_table [] = {
 MODULE_DEVICE_TABLE (usb, usb_mouse_id_table);
 
 static struct usb_driver usb_mouse_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "usbmouse",
 	.probe		= usb_mouse_probe,
 	.disconnect	= usb_mouse_disconnect,
diff --git a/drivers/usb/input/wacom.c b/drivers/usb/input/wacom.c
index dc099bbe12bf..48df4cfd5a42 100644
--- a/drivers/usb/input/wacom.c
+++ b/drivers/usb/input/wacom.c
@@ -945,7 +945,6 @@ static void wacom_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver wacom_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"wacom",
 	.probe =	wacom_probe,
 	.disconnect =	wacom_disconnect,
diff --git a/drivers/usb/input/xpad.c b/drivers/usb/input/xpad.c
index 43112f040b6d..e421328615fb 100644
--- a/drivers/usb/input/xpad.c
+++ b/drivers/usb/input/xpad.c
@@ -316,7 +316,6 @@ static void xpad_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver xpad_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "xpad",
 	.probe		= xpad_probe,
 	.disconnect	= xpad_disconnect,
diff --git a/drivers/usb/input/yealink.c b/drivers/usb/input/yealink.c
index f526aebea502..1bfc105ad4d6 100644
--- a/drivers/usb/input/yealink.c
+++ b/drivers/usb/input/yealink.c
@@ -987,7 +987,6 @@ static int usb_probe(struct usb_interface *intf, const struct usb_device_id *id)
 }
 
 static struct usb_driver yealink_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "yealink",
 	.probe		= usb_probe,
 	.disconnect	= usb_disconnect,
diff --git a/drivers/usb/media/dabusb.c b/drivers/usb/media/dabusb.c
index 27b23c55bbc7..18d8eaf408d5 100644
--- a/drivers/usb/media/dabusb.c
+++ b/drivers/usb/media/dabusb.c
@@ -812,7 +812,6 @@ static struct usb_device_id dabusb_ids [] = {
 MODULE_DEVICE_TABLE (usb, dabusb_ids);
 
 static struct usb_driver dabusb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"dabusb",
 	.probe =	dabusb_probe,
 	.disconnect =	dabusb_disconnect,
diff --git a/drivers/usb/media/dsbr100.c b/drivers/usb/media/dsbr100.c
index 7503f5b96f59..6a5700e9d428 100644
--- a/drivers/usb/media/dsbr100.c
+++ b/drivers/usb/media/dsbr100.c
@@ -150,7 +150,6 @@ MODULE_DEVICE_TABLE (usb, usb_dsbr100_device_table);
 
 /* USB subsystem interface */
 static struct usb_driver usb_dsbr100_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"dsbr100",
 	.probe =	usb_dsbr100_probe,
 	.disconnect =	usb_dsbr100_disconnect,
diff --git a/drivers/usb/media/ov511.c b/drivers/usb/media/ov511.c
index 036c485d1d1e..8df4f9de5ee5 100644
--- a/drivers/usb/media/ov511.c
+++ b/drivers/usb/media/ov511.c
@@ -6008,7 +6008,6 @@ ov51x_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver ov511_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ov511",
 	.id_table =	device_table,
 	.probe =	ov51x_probe,
diff --git a/drivers/usb/media/pwc/pwc-if.c b/drivers/usb/media/pwc/pwc-if.c
index 5524fd70210b..09ca6128ac20 100644
--- a/drivers/usb/media/pwc/pwc-if.c
+++ b/drivers/usb/media/pwc/pwc-if.c
@@ -111,7 +111,6 @@ static int usb_pwc_probe(struct usb_interface *intf, const struct usb_device_id
 static void usb_pwc_disconnect(struct usb_interface *intf);
 
 static struct usb_driver pwc_driver = {
-	.owner =		THIS_MODULE,
 	.name =			"Philips webcam",	/* name */
 	.id_table =		pwc_device_table,
 	.probe =		usb_pwc_probe,		/* probe() */
diff --git a/drivers/usb/media/se401.c b/drivers/usb/media/se401.c
index f69e443cd1bc..b2ae29af5940 100644
--- a/drivers/usb/media/se401.c
+++ b/drivers/usb/media/se401.c
@@ -1401,7 +1401,6 @@ static void se401_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver se401_driver = {
-	.owner		= THIS_MODULE,
         .name		= "se401",
         .id_table	= device_table,
 	.probe		= se401_probe,
diff --git a/drivers/usb/media/sn9c102_core.c b/drivers/usb/media/sn9c102_core.c
index b2e66e3b90aa..08723459da86 100644
--- a/drivers/usb/media/sn9c102_core.c
+++ b/drivers/usb/media/sn9c102_core.c
@@ -2711,7 +2711,6 @@ static void sn9c102_usb_disconnect(struct usb_interface* intf)
 
 
 static struct usb_driver sn9c102_usb_driver = {
-	.owner =      THIS_MODULE,
 	.name =       "sn9c102",
 	.id_table =   sn9c102_id_table,
 	.probe =      sn9c102_usb_probe,
diff --git a/drivers/usb/media/stv680.c b/drivers/usb/media/stv680.c
index 0fd0fa9fec21..774038b352cd 100644
--- a/drivers/usb/media/stv680.c
+++ b/drivers/usb/media/stv680.c
@@ -1477,7 +1477,6 @@ static void stv680_disconnect (struct usb_interface *intf)
 }
 
 static struct usb_driver stv680_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"stv680",
 	.probe =	stv680_probe,
 	.disconnect =	stv680_disconnect,
diff --git a/drivers/usb/media/vicam.c b/drivers/usb/media/vicam.c
index 0bc0b1247a6b..1c73155c8d77 100644
--- a/drivers/usb/media/vicam.c
+++ b/drivers/usb/media/vicam.c
@@ -1257,7 +1257,6 @@ static struct usb_device_id vicam_table[] = {
 MODULE_DEVICE_TABLE(usb, vicam_table);
 
 static struct usb_driver vicam_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "vicam",
 	.probe		= vicam_probe,
 	.disconnect	= vicam_disconnect,
diff --git a/drivers/usb/media/w9968cf.c b/drivers/usb/media/w9968cf.c
index 67612c81cb9f..52b90d50febb 100644
--- a/drivers/usb/media/w9968cf.c
+++ b/drivers/usb/media/w9968cf.c
@@ -3668,7 +3668,6 @@ static void w9968cf_usb_disconnect(struct usb_interface* intf)
 
 
 static struct usb_driver w9968cf_usb_driver = {
-	.owner =      THIS_MODULE,
 	.name =       "w9968cf",
 	.id_table =   winbond_id_table,
 	.probe =      w9968cf_usb_probe,
diff --git a/drivers/usb/misc/auerswald.c b/drivers/usb/misc/auerswald.c
index b293db3c28c3..fad387f21891 100644
--- a/drivers/usb/misc/auerswald.c
+++ b/drivers/usb/misc/auerswald.c
@@ -2103,7 +2103,6 @@ MODULE_DEVICE_TABLE (usb, auerswald_ids);
 
 /* Standard usb driver struct */
 static struct usb_driver auerswald_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"auerswald",
 	.probe =	auerswald_probe,
 	.disconnect =	auerswald_disconnect,
diff --git a/drivers/usb/misc/cytherm.c b/drivers/usb/misc/cytherm.c
index b33044d56a1e..6671317b495f 100644
--- a/drivers/usb/misc/cytherm.c
+++ b/drivers/usb/misc/cytherm.c
@@ -50,7 +50,6 @@ static void cytherm_disconnect(struct usb_interface *interface);
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver cytherm_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"cytherm",
 	.probe =	cytherm_probe,
 	.disconnect =	cytherm_disconnect,
diff --git a/drivers/usb/misc/emi26.c b/drivers/usb/misc/emi26.c
index c8155209bf4b..3824df33094e 100644
--- a/drivers/usb/misc/emi26.c
+++ b/drivers/usb/misc/emi26.c
@@ -227,7 +227,6 @@ static void emi26_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver emi26_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "emi26 - firmware loader",
 	.probe		= emi26_probe,
 	.disconnect	= emi26_disconnect,
diff --git a/drivers/usb/misc/emi62.c b/drivers/usb/misc/emi62.c
index 189986af2ac7..52fea2e08db8 100644
--- a/drivers/usb/misc/emi62.c
+++ b/drivers/usb/misc/emi62.c
@@ -266,7 +266,6 @@ static void emi62_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver emi62_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "emi62 - firmware loader",
 	.probe		= emi62_probe,
 	.disconnect	= emi62_disconnect,
diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c
index 1dc3e0f73014..d8cde1017985 100644
--- a/drivers/usb/misc/idmouse.c
+++ b/drivers/usb/misc/idmouse.c
@@ -114,7 +114,6 @@ static struct usb_class_driver idmouse_class = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver idmouse_driver = {
-	.owner = THIS_MODULE,
 	.name = DRIVER_SHORT,
 	.probe = idmouse_probe,
 	.disconnect = idmouse_disconnect,
diff --git a/drivers/usb/misc/ldusb.c b/drivers/usb/misc/ldusb.c
index 7e93ac96490f..981d8a5fbfd9 100644
--- a/drivers/usb/misc/ldusb.c
+++ b/drivers/usb/misc/ldusb.c
@@ -763,7 +763,6 @@ static void ld_usb_disconnect(struct usb_interface *intf)
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver ld_usb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ldusb",
 	.probe =	ld_usb_probe,
 	.disconnect =	ld_usb_disconnect,
diff --git a/drivers/usb/misc/legousbtower.c b/drivers/usb/misc/legousbtower.c
index 2703e205bc8f..1336745b8f55 100644
--- a/drivers/usb/misc/legousbtower.c
+++ b/drivers/usb/misc/legousbtower.c
@@ -282,7 +282,6 @@ static struct usb_class_driver tower_class = {
 
 /* usb specific object needed to register this driver with the usb subsystem */
 static struct usb_driver tower_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"legousbtower",
 	.probe =	tower_probe,
 	.disconnect =	tower_disconnect,
diff --git a/drivers/usb/misc/phidgetkit.c b/drivers/usb/misc/phidgetkit.c
index 067a81486921..605a3c87e05c 100644
--- a/drivers/usb/misc/phidgetkit.c
+++ b/drivers/usb/misc/phidgetkit.c
@@ -555,7 +555,6 @@ static void interfacekit_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver interfacekit_driver = {
-	.owner = THIS_MODULE,
 	.name = "phidgetkit",
 	.probe = interfacekit_probe,
 	.disconnect = interfacekit_disconnect,
diff --git a/drivers/usb/misc/phidgetservo.c b/drivers/usb/misc/phidgetservo.c
index a30d4a6ee824..b3418d2bcc69 100644
--- a/drivers/usb/misc/phidgetservo.c
+++ b/drivers/usb/misc/phidgetservo.c
@@ -306,7 +306,6 @@ servo_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver servo_driver = {
-	.owner = THIS_MODULE,
 	.name = "phidgetservo",
 	.probe = servo_probe,
 	.disconnect = servo_disconnect,
diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c
index 9590dbac5d9a..b9d66074b80c 100644
--- a/drivers/usb/misc/rio500.c
+++ b/drivers/usb/misc/rio500.c
@@ -522,7 +522,6 @@ static struct usb_device_id rio_table [] = {
 MODULE_DEVICE_TABLE (usb, rio_table);
 
 static struct usb_driver rio_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"rio500",
 	.probe =	probe_rio,
 	.disconnect =	disconnect_rio,
diff --git a/drivers/usb/misc/sisusbvga/sisusb.c b/drivers/usb/misc/sisusbvga/sisusb.c
index 41ef2b606751..44350d49ad0a 100644
--- a/drivers/usb/misc/sisusbvga/sisusb.c
+++ b/drivers/usb/misc/sisusbvga/sisusb.c
@@ -3489,7 +3489,6 @@ static struct usb_device_id sisusb_table [] = {
 MODULE_DEVICE_TABLE (usb, sisusb_table);
 
 static struct usb_driver sisusb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"sisusb",
 	.probe =	sisusb_probe,
 	.disconnect =	sisusb_disconnect,
diff --git a/drivers/usb/misc/usblcd.c b/drivers/usb/misc/usblcd.c
index 85f3725334b0..cc3dae3f34e0 100644
--- a/drivers/usb/misc/usblcd.c
+++ b/drivers/usb/misc/usblcd.c
@@ -371,7 +371,6 @@ static void lcd_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver lcd_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usblcd",
 	.probe =	lcd_probe,
 	.disconnect =	lcd_disconnect,
diff --git a/drivers/usb/misc/usbled.c b/drivers/usb/misc/usbled.c
index 3c93921cb6b3..877b081a3a6e 100644
--- a/drivers/usb/misc/usbled.c
+++ b/drivers/usb/misc/usbled.c
@@ -148,7 +148,6 @@ static void led_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver led_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbled",
 	.probe =	led_probe,
 	.disconnect =	led_disconnect,
diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
index 605a2afe34ed..84fa1728f052 100644
--- a/drivers/usb/misc/usbtest.c
+++ b/drivers/usb/misc/usbtest.c
@@ -2134,7 +2134,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver usbtest_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbtest",
 	.id_table =	id_table,
 	.probe =	usbtest_probe,
diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c
index 1cabe7ed91f5..4081990b7d1a 100644
--- a/drivers/usb/misc/uss720.c
+++ b/drivers/usb/misc/uss720.c
@@ -780,7 +780,6 @@ MODULE_DEVICE_TABLE (usb, uss720_table);
 
 
 static struct usb_driver uss720_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"uss720",
 	.probe =	uss720_probe,
 	.disconnect =	uss720_disconnect,
diff --git a/drivers/usb/net/asix.c b/drivers/usb/net/asix.c
index 542120ef1fd2..2faf2f2bdcdd 100644
--- a/drivers/usb/net/asix.c
+++ b/drivers/usb/net/asix.c
@@ -918,7 +918,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver asix_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"asix",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/catc.c b/drivers/usb/net/catc.c
index 37ef365a2472..be5f5e142dd0 100644
--- a/drivers/usb/net/catc.c
+++ b/drivers/usb/net/catc.c
@@ -934,7 +934,6 @@ static struct usb_device_id catc_id_table [] = {
 MODULE_DEVICE_TABLE(usb, catc_id_table);
 
 static struct usb_driver catc_driver = {
-	.owner =	THIS_MODULE,
 	.name =		driver_name,
 	.probe =	catc_probe,
 	.disconnect =	catc_disconnect,
diff --git a/drivers/usb/net/cdc_ether.c b/drivers/usb/net/cdc_ether.c
index c008c981862b..63f1f3ba8e0b 100644
--- a/drivers/usb/net/cdc_ether.c
+++ b/drivers/usb/net/cdc_ether.c
@@ -476,7 +476,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver cdc_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"cdc_ether",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/cdc_subset.c b/drivers/usb/net/cdc_subset.c
index f05cfb83c82d..ec801e8bb1bb 100644
--- a/drivers/usb/net/cdc_subset.c
+++ b/drivers/usb/net/cdc_subset.c
@@ -306,7 +306,6 @@ MODULE_DEVICE_TABLE(usb, products);
 /*-------------------------------------------------------------------------*/
 
 static struct usb_driver cdc_subset_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"cdc_subset",
 	.probe =	usbnet_probe,
 	.suspend =	usbnet_suspend,
diff --git a/drivers/usb/net/gl620a.c b/drivers/usb/net/gl620a.c
index 2455e9a85674..faf1e86be687 100644
--- a/drivers/usb/net/gl620a.c
+++ b/drivers/usb/net/gl620a.c
@@ -377,7 +377,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver gl620a_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"gl620a",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/kaweth.c b/drivers/usb/net/kaweth.c
index b5776518020f..def3bb8e2290 100644
--- a/drivers/usb/net/kaweth.c
+++ b/drivers/usb/net/kaweth.c
@@ -175,7 +175,6 @@ MODULE_DEVICE_TABLE (usb, usb_klsi_table);
  *     kaweth_driver
  ****************************************************************/
 static struct usb_driver kaweth_driver = {
-	.owner =	THIS_MODULE,
 	.name =		driver_name,
 	.probe =	kaweth_probe,
 	.disconnect =	kaweth_disconnect,
diff --git a/drivers/usb/net/net1080.c b/drivers/usb/net/net1080.c
index b3799b1a2b0d..78e6a43b1087 100644
--- a/drivers/usb/net/net1080.c
+++ b/drivers/usb/net/net1080.c
@@ -593,7 +593,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver net1080_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"net1080",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/plusb.c b/drivers/usb/net/plusb.c
index 89856aa0e3b8..4fe863389cb7 100644
--- a/drivers/usb/net/plusb.c
+++ b/drivers/usb/net/plusb.c
@@ -127,7 +127,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver plusb_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"plusb",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/rndis_host.c b/drivers/usb/net/rndis_host.c
index c0ecbab6f6ba..49991ac1bf3b 100644
--- a/drivers/usb/net/rndis_host.c
+++ b/drivers/usb/net/rndis_host.c
@@ -586,7 +586,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver rndis_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"rndis_host",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/rtl8150.c b/drivers/usb/net/rtl8150.c
index 787dd3591d6a..8ca52be23976 100644
--- a/drivers/usb/net/rtl8150.c
+++ b/drivers/usb/net/rtl8150.c
@@ -177,7 +177,6 @@ static int rtl8150_probe(struct usb_interface *intf,
 static const char driver_name [] = "rtl8150";
 
 static struct usb_driver rtl8150_driver = {
-	.owner =	THIS_MODULE,
 	.name =		driver_name,
 	.probe =	rtl8150_probe,
 	.disconnect =	rtl8150_disconnect,
diff --git a/drivers/usb/net/zaurus.c b/drivers/usb/net/zaurus.c
index 680d13957af4..9c5ab251370c 100644
--- a/drivers/usb/net/zaurus.c
+++ b/drivers/usb/net/zaurus.c
@@ -357,7 +357,6 @@ static const struct usb_device_id	products [] = {
 MODULE_DEVICE_TABLE(usb, products);
 
 static struct usb_driver zaurus_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"zaurus",
 	.id_table =	products,
 	.probe =	usbnet_probe,
diff --git a/drivers/usb/net/zd1201.c b/drivers/usb/net/zd1201.c
index 2f52261c7cc1..4d6673adc8fc 100644
--- a/drivers/usb/net/zd1201.c
+++ b/drivers/usb/net/zd1201.c
@@ -1923,7 +1923,6 @@ static int zd1201_resume(struct usb_interface *interface)
 #endif
 
 static struct usb_driver zd1201_usb = {
-	.owner = THIS_MODULE,
 	.name = "zd1201",
 	.probe = zd1201_probe,
 	.disconnect = zd1201_disconnect,
diff --git a/drivers/usb/serial/airprime.c b/drivers/usb/serial/airprime.c
index 2ef9945a6c07..dbf1f063098c 100644
--- a/drivers/usb/serial/airprime.c
+++ b/drivers/usb/serial/airprime.c
@@ -23,7 +23,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 static struct usb_driver airprime_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"airprime",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/anydata.c b/drivers/usb/serial/anydata.c
index 7a171e034b59..343f6f228220 100644
--- a/drivers/usb/serial/anydata.c
+++ b/drivers/usb/serial/anydata.c
@@ -27,7 +27,6 @@ static int buffer_size;
 static int debug;
 
 static struct usb_driver anydata_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"anydata",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/belkin_sa.c b/drivers/usb/serial/belkin_sa.c
index 69039bd9fc5e..4144777ea18b 100644
--- a/drivers/usb/serial/belkin_sa.c
+++ b/drivers/usb/serial/belkin_sa.c
@@ -113,7 +113,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver belkin_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"belkin",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/cp2101.c b/drivers/usb/serial/cp2101.c
index 813bab37e076..da46b351e188 100644
--- a/drivers/usb/serial/cp2101.c
+++ b/drivers/usb/serial/cp2101.c
@@ -67,7 +67,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver cp2101_driver = {
-	.owner		= THIS_MODULE,
 	.name		= "cp2101",
 	.probe		= usb_serial_probe,
 	.disconnect	= usb_serial_disconnect,
diff --git a/drivers/usb/serial/cyberjack.c b/drivers/usb/serial/cyberjack.c
index 8c10e4004905..6d18d4eaba35 100644
--- a/drivers/usb/serial/cyberjack.c
+++ b/drivers/usb/serial/cyberjack.c
@@ -76,7 +76,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver cyberjack_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"cyberjack",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c
index c50cec95f49b..8fc414bd5b24 100644
--- a/drivers/usb/serial/digi_acceleport.c
+++ b/drivers/usb/serial/digi_acceleport.c
@@ -493,7 +493,6 @@ static struct usb_device_id id_table_4 [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver digi_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"digi_acceleport",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/empeg.c b/drivers/usb/serial/empeg.c
index e5e40064caf2..79a766e9ca23 100644
--- a/drivers/usb/serial/empeg.c
+++ b/drivers/usb/serial/empeg.c
@@ -105,7 +105,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver empeg_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"empeg",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/garmin_gps.c b/drivers/usb/serial/garmin_gps.c
index 198a322286f9..452efce72714 100644
--- a/drivers/usb/serial/garmin_gps.c
+++ b/drivers/usb/serial/garmin_gps.c
@@ -222,7 +222,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver garmin_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"garmin_gps",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index c00a440dc421..4ddac620fc0c 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -68,7 +68,6 @@ static int generic_probe(struct usb_interface *interface,
 }
 
 static struct usb_driver generic_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbserial_generic",
 	.probe =	generic_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/hp4x.c b/drivers/usb/serial/hp4x.c
index e588c3fe632d..e9719da2aca1 100644
--- a/drivers/usb/serial/hp4x.c
+++ b/drivers/usb/serial/hp4x.c
@@ -37,7 +37,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 static struct usb_driver hp49gp_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"hp4X",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/io_edgeport.c b/drivers/usb/serial/io_edgeport.c
index 276bd425a474..4e2b599d85a6 100644
--- a/drivers/usb/serial/io_edgeport.c
+++ b/drivers/usb/serial/io_edgeport.c
@@ -242,7 +242,6 @@ static void edge_shutdown		(struct usb_serial *serial);
 #include "io_tables.h"	/* all of the devices that this driver supports */
 
 static struct usb_driver io_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"io_edgeport",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/io_ti.c b/drivers/usb/serial/io_ti.c
index 8b2e4c78abcd..22ad1a5a8f9e 100644
--- a/drivers/usb/serial/io_ti.c
+++ b/drivers/usb/serial/io_ti.c
@@ -216,7 +216,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver io_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"io_ti",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index efb568be7015..06d07cea0b70 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -542,7 +542,6 @@ static struct usb_device_id ipaq_id_table [] = {
 MODULE_DEVICE_TABLE (usb, ipaq_id_table);
 
 static struct usb_driver ipaq_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ipaq",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/ipw.c b/drivers/usb/serial/ipw.c
index 64e2cda2a84a..2dd191f5fe76 100644
--- a/drivers/usb/serial/ipw.c
+++ b/drivers/usb/serial/ipw.c
@@ -152,7 +152,6 @@ static struct usb_device_id usb_ipw_ids[] = {
 MODULE_DEVICE_TABLE(usb, usb_ipw_ids);
 
 static struct usb_driver usb_ipw_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ipwtty",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/ir-usb.c b/drivers/usb/serial/ir-usb.c
index 647431c1ccb1..a59010421444 100644
--- a/drivers/usb/serial/ir-usb.c
+++ b/drivers/usb/serial/ir-usb.c
@@ -125,7 +125,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver ir_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"ir-usb",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/keyspan.h b/drivers/usb/serial/keyspan.h
index 4e6f626f6062..7472ed6bf626 100644
--- a/drivers/usb/serial/keyspan.h
+++ b/drivers/usb/serial/keyspan.h
@@ -520,7 +520,6 @@ static struct usb_device_id keyspan_ids_combined[] = {
 MODULE_DEVICE_TABLE(usb, keyspan_ids_combined);
 
 static struct usb_driver keyspan_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"keyspan",                
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/keyspan_pda.c b/drivers/usb/serial/keyspan_pda.c
index 0d1f15268549..b0441c35f98f 100644
--- a/drivers/usb/serial/keyspan_pda.c
+++ b/drivers/usb/serial/keyspan_pda.c
@@ -150,7 +150,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver keyspan_pda_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"keyspan_pda",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/kl5kusb105.c b/drivers/usb/serial/kl5kusb105.c
index bd68638b7c35..4e2f7dfb58b2 100644
--- a/drivers/usb/serial/kl5kusb105.c
+++ b/drivers/usb/serial/kl5kusb105.c
@@ -116,7 +116,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver kl5kusb105d_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"kl5kusb105d",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/kobil_sct.c b/drivers/usb/serial/kobil_sct.c
index 4c853afea385..d9c21e275130 100644
--- a/drivers/usb/serial/kobil_sct.c
+++ b/drivers/usb/serial/kobil_sct.c
@@ -97,7 +97,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver kobil_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"kobil",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/mct_u232.c b/drivers/usb/serial/mct_u232.c
index b0415e7542c4..b6d6cab9c859 100644
--- a/drivers/usb/serial/mct_u232.c
+++ b/drivers/usb/serial/mct_u232.c
@@ -125,7 +125,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver mct_u232_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"mct_u232",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/omninet.c b/drivers/usb/serial/omninet.c
index b595befb24cf..762d8ff9a1e4 100644
--- a/drivers/usb/serial/omninet.c
+++ b/drivers/usb/serial/omninet.c
@@ -80,7 +80,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver omninet_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"omninet",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
index 4ee657eaaa0b..3fd2405304fd 100644
--- a/drivers/usb/serial/option.c
+++ b/drivers/usb/serial/option.c
@@ -95,7 +95,6 @@ static struct usb_device_id option_ids[] = {
 MODULE_DEVICE_TABLE(usb, option_ids);
 
 static struct usb_driver option_driver = {
-	.owner      = THIS_MODULE,
 	.name       = "option",
 	.probe      = usb_serial_probe,
 	.disconnect = usb_serial_disconnect,
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index e302a320444c..c96ba9fc19e0 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -82,7 +82,6 @@ static struct usb_device_id id_table [] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver pl2303_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"pl2303",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/safe_serial.c b/drivers/usb/serial/safe_serial.c
index f8241c152043..3ea284ce7b8b 100644
--- a/drivers/usb/serial/safe_serial.c
+++ b/drivers/usb/serial/safe_serial.c
@@ -160,7 +160,6 @@ static struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE (usb, id_table);
 
 static struct usb_driver safe_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"safe_serial",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/ti_usb_3410_5052.c b/drivers/usb/serial/ti_usb_3410_5052.c
index 17a1f09483bd..9e53ec75bcfd 100644
--- a/drivers/usb/serial/ti_usb_3410_5052.c
+++ b/drivers/usb/serial/ti_usb_3410_5052.c
@@ -248,7 +248,6 @@ static struct usb_device_id ti_id_table_combined[] = {
 };
 
 static struct usb_driver ti_usb_driver = {
-	.owner			= THIS_MODULE,
 	.name			= "ti_usb_3410_5052",
 	.probe			= usb_serial_probe,
 	.disconnect		= usb_serial_disconnect,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 2ac37b52485a..12aaf18ff9ea 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -42,7 +42,6 @@
 
 /* Driver structure we register with the USB core */
 static struct usb_driver usb_serial_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usbserial",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/visor.c b/drivers/usb/serial/visor.c
index 2973f5564c06..49b1fbe61f25 100644
--- a/drivers/usb/serial/visor.c
+++ b/drivers/usb/serial/visor.c
@@ -173,7 +173,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver visor_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"visor",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/serial/whiteheat.c b/drivers/usb/serial/whiteheat.c
index 19c6386bb692..a7c3c4734d83 100644
--- a/drivers/usb/serial/whiteheat.c
+++ b/drivers/usb/serial/whiteheat.c
@@ -127,7 +127,6 @@ static struct usb_device_id id_table_combined [] = {
 MODULE_DEVICE_TABLE (usb, id_table_combined);
 
 static struct usb_driver whiteheat_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"whiteheat",
 	.probe =	usb_serial_probe,
 	.disconnect =	usb_serial_disconnect,
diff --git a/drivers/usb/storage/libusual.c b/drivers/usb/storage/libusual.c
index 2680c69a2417..b28151d1b609 100644
--- a/drivers/usb/storage/libusual.c
+++ b/drivers/usb/storage/libusual.c
@@ -153,7 +153,6 @@ static void usu_disconnect(struct usb_interface *intf)
 }
 
 static struct usb_driver usu_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"libusual",
 	.probe =	usu_probe,
 	.disconnect =	usu_disconnect,
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index c8375aa62723..484ed297bed0 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -1000,7 +1000,6 @@ static void storage_disconnect(struct usb_interface *intf)
  ***********************************************************************/
 
 static struct usb_driver usb_storage_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"usb-storage",
 	.probe =	storage_probe,
 	.disconnect =	storage_disconnect,
diff --git a/drivers/usb/usb-skeleton.c b/drivers/usb/usb-skeleton.c
index 6c3a53f8f26c..60c458ebaa2d 100644
--- a/drivers/usb/usb-skeleton.c
+++ b/drivers/usb/usb-skeleton.c
@@ -330,7 +330,6 @@ static void skel_disconnect(struct usb_interface *interface)
 }
 
 static struct usb_driver skel_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"skeleton",
 	.probe =	skel_probe,
 	.disconnect =	skel_disconnect,
diff --git a/drivers/w1/dscore.c b/drivers/w1/dscore.c
index 15fb250451e5..b9146306df49 100644
--- a/drivers/w1/dscore.c
+++ b/drivers/w1/dscore.c
@@ -52,7 +52,6 @@ static int ds_send_control_cmd(struct ds_device *, u16, u16);
 
 
 static struct usb_driver ds_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"DS9490R",
 	.probe =	ds_probe,
 	.disconnect =	ds_disconnect,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3d05c63451a8..2714814ab66c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -536,8 +536,6 @@ struct usb_dynids {
 
 /**
  * struct usb_driver - identifies USB driver to usbcore
- * @owner: Pointer to the module owner of this driver; initialize
- *	it using THIS_MODULE.
  * @name: The driver name should be unique among USB drivers,
  *	and should normally be the same as the module name.
  * @probe: Called to see if the driver is willing to manage a particular
@@ -580,8 +578,6 @@ struct usb_dynids {
  * them as necessary, and blocking until the unlinks complete).
  */
 struct usb_driver {
-	struct module *owner;
-
 	const char *name;
 
 	int (*probe) (struct usb_interface *intf,
diff --git a/sound/usb/usbaudio.c b/sound/usb/usbaudio.c
index 99dae024b640..22f8bb612bff 100644
--- a/sound/usb/usbaudio.c
+++ b/sound/usb/usbaudio.c
@@ -1996,7 +1996,6 @@ static struct usb_device_id usb_audio_ids [] = {
 MODULE_DEVICE_TABLE (usb, usb_audio_ids);
 
 static struct usb_driver usb_audio_driver = {
-	.owner =	THIS_MODULE,
 	.name =		"snd-usb-audio",
 	.probe =	usb_audio_probe,
 	.disconnect =	usb_audio_disconnect,
diff --git a/sound/usb/usx2y/usbusx2y.c b/sound/usb/usx2y/usbusx2y.c
index cf77313c609d..a3967f72ab4e 100644
--- a/sound/usb/usx2y/usbusx2y.c
+++ b/sound/usb/usx2y/usbusx2y.c
@@ -409,7 +409,6 @@ static void snd_usX2Y_disconnect(struct usb_interface *intf)
 
 MODULE_DEVICE_TABLE(usb, snd_usX2Y_usb_id_table);
 static struct usb_driver snd_usX2Y_usb_driver = {
- 	.owner =	THIS_MODULE,
 	.name =		"snd-usb-usx2y",
 	.probe =	snd_usX2Y_probe,
 	.disconnect =	snd_usX2Y_disconnect,
-- 
cgit v1.2.3


From 9ad3d6ccf5eee285e233dbaf186369b8d477a666 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 17 Nov 2005 17:10:32 -0500
Subject: [PATCH] USB: Remove USB private semaphore

This patch (as605) removes the private udev->serialize semaphore,
relying instead on the locking provided by the embedded struct device's
semaphore.  The changes are confined to the core, except that the
usb_trylock_device routine now uses the return convention of
down_trylock rather than down_read_trylock (they return opposite values
for no good reason).

A couple of other associated changes are included as well:

	Now that we aren't concerned about HCDs that avoid using the
	hcd glue layer, usb_disconnect no longer needs to acquire the
	usb_bus_lock -- that can be done by usb_remove_hcd where it
	belongs.

	Devices aren't locked over the same scope of code in
	usb_new_device and hub_port_connect_change as they used to be.
	This shouldn't cause any trouble.

Along with the preceding driver core patch, this needs a lot of testing.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/devices.c  |   4 +-
 drivers/usb/core/devio.c    |   2 -
 drivers/usb/core/driver.c   |   4 --
 drivers/usb/core/hcd.c      |   5 +-
 drivers/usb/core/hub.c      |  48 +++++++------------
 drivers/usb/core/usb.c      | 114 ++++----------------------------------------
 drivers/usb/core/usb.h      |   3 --
 drivers/usb/host/ohci-hub.c |   2 +-
 include/linux/usb.h         |   9 ++--
 9 files changed, 37 insertions(+), 154 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index 83e815d3cd52..55bc563a3256 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -545,10 +545,10 @@ static ssize_t usb_device_dump(char __user **buffer, size_t *nbytes, loff_t *ski
 		struct usb_device *childdev = usbdev->children[chix];
 
 		if (childdev) {
-			down(&childdev->serialize);
+			usb_lock_device(childdev);
 			ret = usb_device_dump(buffer, nbytes, skip_bytes, file_offset, childdev,
 					bus, level + 1, chix, ++cnt);
-			up(&childdev->serialize);
+			usb_unlock_device(childdev);
 			if (ret == -EFAULT)
 				return total_written;
 			total_written += ret;
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 3a73170e95dd..2b68998fe4b3 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -1349,9 +1349,7 @@ static int proc_ioctl(struct dev_state *ps, struct usbdevfs_ioctl *ctl)
 	/* let kernel drivers try to (re)bind to the interface */
 	case USBDEVFS_CONNECT:
 		usb_unlock_device(ps->dev);
-		usb_lock_all_devices();
 		bus_rescan_devices(intf->dev.bus);
-		usb_unlock_all_devices();
 		usb_lock_device(ps->dev);
 		break;
 
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index bb139f06bcd6..076462c8ba2a 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -432,9 +432,7 @@ int usb_register_driver(struct usb_driver *new_driver, struct module *owner)
 	spin_lock_init(&new_driver->dynids.lock);
 	INIT_LIST_HEAD(&new_driver->dynids.list);
 
-	usb_lock_all_devices();
 	retval = driver_register(&new_driver->driver);
-	usb_unlock_all_devices();
 
 	if (!retval) {
 		pr_info("%s: registered new driver %s\n",
@@ -465,11 +463,9 @@ void usb_deregister(struct usb_driver *driver)
 {
 	pr_info("%s: deregistering driver %s\n", usbcore_name, driver->name);
 
-	usb_lock_all_devices();
 	usb_remove_newid_file(driver);
 	usb_free_dynids(driver);
 	driver_unregister(&driver->driver);
-	usb_unlock_all_devices();
 
 	usbfs_update_special();
 }
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index da24c31ee00d..d16a0e8a7d72 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -857,9 +857,7 @@ static int register_root_hub (struct usb_device *usb_dev,
 		return (retval < 0) ? retval : -EMSGSIZE;
 	}
 
-	usb_lock_device (usb_dev);
 	retval = usb_new_device (usb_dev);
-	usb_unlock_device (usb_dev);
 	if (retval) {
 		usb_dev->bus->root_hub = NULL;
 		dev_err (parent_dev, "can't register root hub for %s, %d\n",
@@ -1891,7 +1889,10 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	spin_lock_irq (&hcd_root_hub_lock);
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
+
+	down(&usb_bus_list_lock);
 	usb_disconnect(&hcd->self.root_hub);
+	up(&usb_bus_list_lock);
 
 	hcd->poll_rh = 0;
 	del_timer_sync(&hcd->rh_timer);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 40c6c50c6bd9..dd3bcfb2bcb6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -32,7 +32,7 @@
 #include "hub.h"
 
 /* Protect struct usb_device->state and ->children members
- * Note: Both are also protected by ->serialize, except that ->state can
+ * Note: Both are also protected by ->dev.sem, except that ->state can
  * change to USB_STATE_NOTATTACHED even when the semaphore isn't held. */
 static DEFINE_SPINLOCK(device_state_lock);
 
@@ -975,8 +975,8 @@ static int locktree(struct usb_device *udev)
 			/* when everyone grabs locks top->bottom,
 			 * non-overlapping work may be concurrent
 			 */
-			down(&udev->serialize);
-			up(&hdev->serialize);
+			usb_lock_device(udev);
+			usb_unlock_device(hdev);
 			return t + 1;
 		}
 	}
@@ -1132,16 +1132,10 @@ void usb_disconnect(struct usb_device **pdev)
 	 * this quiesces everyting except pending urbs.
 	 */
 	usb_set_device_state(udev, USB_STATE_NOTATTACHED);
-
-	/* lock the bus list on behalf of HCDs unregistering their root hubs */
-	if (!udev->parent) {
-		down(&usb_bus_list_lock);
-		usb_lock_device(udev);
-	} else
-		down(&udev->serialize);
-
 	dev_info (&udev->dev, "USB disconnect, address %d\n", udev->devnum);
 
+	usb_lock_device(udev);
+
 	/* Free up all the children before we remove this device */
 	for (i = 0; i < USB_MAXCHILDREN; i++) {
 		if (udev->children[i])
@@ -1169,11 +1163,7 @@ void usb_disconnect(struct usb_device **pdev)
 	*pdev = NULL;
 	spin_unlock_irq(&device_state_lock);
 
-	if (!udev->parent) {
-		usb_unlock_device(udev);
-		up(&usb_bus_list_lock);
-	} else
-		up(&udev->serialize);
+	usb_unlock_device(udev);
 
 	device_unregister(&udev->dev);
 }
@@ -1243,8 +1233,8 @@ static inline void show_string(struct usb_device *udev, char *id, char *string)
  *
  * This is called with devices which have been enumerated, but not yet
  * configured.  The device descriptor is available, but not descriptors
- * for any device configuration.  The caller must have locked udev and
- * either the parent hub (if udev is a normal device) or else the
+ * for any device configuration.  The caller must have locked either
+ * the parent hub (if udev is a normal device) or else the
  * usb_bus_list_lock (if udev is a root hub).  The parent's pointer to
  * udev has already been installed, but udev is not yet visible through
  * sysfs or other filesystem code.
@@ -1254,8 +1244,7 @@ static inline void show_string(struct usb_device *udev, char *id, char *string)
  *
  * This call is synchronous, and may not be used in an interrupt context.
  *
- * Only the hub driver should ever call this; root hub registration
- * uses it indirectly.
+ * Only the hub driver or root-hub registrar should ever call this.
  */
 int usb_new_device(struct usb_device *udev)
 {
@@ -1364,6 +1353,8 @@ int usb_new_device(struct usb_device *udev)
 	}
 	usb_create_sysfs_dev_files (udev);
 
+	usb_lock_device(udev);
+
 	/* choose and set the configuration. that registers the interfaces
 	 * with the driver core, and lets usb device drivers bind to them.
 	 */
@@ -1385,6 +1376,8 @@ int usb_new_device(struct usb_device *udev)
 	/* USB device state == configured ... usable */
 	usb_notify_add_device(udev);
 
+	usb_unlock_device(udev);
+
 	return 0;
 
 fail:
@@ -1872,11 +1865,8 @@ int usb_resume_device(struct usb_device *udev)
 	usb_unlock_device(udev);
 
 	/* rebind drivers that had no suspend() */
-	if (status == 0) {
-		usb_lock_all_devices();
+	if (status == 0)
 		bus_rescan_devices(&usb_bus_type);
-		usb_unlock_all_devices();
-	}
 	return status;
 }
 
@@ -1889,14 +1879,14 @@ static int remote_wakeup(struct usb_device *udev)
 	/* don't repeat RESUME sequence if this device
 	 * was already woken up by some other task
 	 */
-	down(&udev->serialize);
+	usb_lock_device(udev);
 	if (udev->state == USB_STATE_SUSPENDED) {
 		dev_dbg(&udev->dev, "RESUME (wakeup)\n");
 		/* TRSMRCY = 10 msec */
 		msleep(10);
 		status = finish_device_resume(udev);
 	}
-	up(&udev->serialize);
+	usb_unlock_device(udev);
 #endif
 	return status;
 }
@@ -1997,7 +1987,7 @@ static int hub_resume(struct usb_interface *intf)
 
 		if (!udev || status < 0)
 			continue;
-		down (&udev->serialize);
+		usb_lock_device(udev);
 		if (portstat & USB_PORT_STAT_SUSPEND)
 			status = hub_port_resume(hub, port1, udev);
 		else {
@@ -2008,7 +1998,7 @@ static int hub_resume(struct usb_interface *intf)
 				hub_port_logical_disconnect(hub, port1);
 			}
 		}
-		up(&udev->serialize);
+		usb_unlock_device(udev);
 	}
 	}
 #endif
@@ -2573,7 +2563,6 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		 * udev becomes globally accessible, although presumably
 		 * no one will look at it until hdev is unlocked.
 		 */
-		down (&udev->serialize);
 		status = 0;
 
 		/* We mustn't add new devices if the parent hub has
@@ -2597,7 +2586,6 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 			}
 		}
 
-		up (&udev->serialize);
 		if (status)
 			goto loop_disable;
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 294e9f127477..fcfda21be499 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -32,7 +32,6 @@
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/smp_lock.h>
-#include <linux/rwsem.h>
 #include <linux/usb.h>
 
 #include <asm/io.h>
@@ -49,8 +48,6 @@ const char *usbcore_name = "usbcore";
 static int nousb;	/* Disable USB when built into kernel image */
 			/* Not honored on modular build */
 
-static DECLARE_RWSEM(usb_all_devices_rwsem);
-
 
 /**
  * usb_ifnum_to_if - get the interface object with a given interface number
@@ -446,8 +443,6 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 	dev->parent = parent;
 	INIT_LIST_HEAD(&dev->filelist);
 
-	init_MUTEX(&dev->serialize);
-
 	return dev;
 }
 
@@ -520,75 +515,20 @@ void usb_put_intf(struct usb_interface *intf)
 
 /*			USB device locking
  *
- * Although locking USB devices should be straightforward, it is
- * complicated by the way the driver-model core works.  When a new USB
- * driver is registered or unregistered, the core will automatically
- * probe or disconnect all matching interfaces on all USB devices while
- * holding the USB subsystem writelock.  There's no good way for us to
- * tell which devices will be used or to lock them beforehand; our only
- * option is to effectively lock all the USB devices.
- *
- * We do that by using a private rw-semaphore, usb_all_devices_rwsem.
- * When locking an individual device you must first acquire the rwsem's
- * readlock.  When a driver is registered or unregistered the writelock
- * must be held.  These actions are encapsulated in the subroutines
- * below, so all a driver needs to do is call usb_lock_device() and
- * usb_unlock_device().
+ * USB devices and interfaces are locked using the semaphore in their
+ * embedded struct device.  The hub driver guarantees that whenever a
+ * device is connected or disconnected, drivers are called with the
+ * USB device locked as well as their particular interface.
  *
  * Complications arise when several devices are to be locked at the same
  * time.  Only hub-aware drivers that are part of usbcore ever have to
- * do this; nobody else needs to worry about it.  The problem is that
- * usb_lock_device() must not be called to lock a second device since it
- * would acquire the rwsem's readlock reentrantly, leading to deadlock if
- * another thread was waiting for the writelock.  The solution is simple:
- *
- *	When locking more than one device, call usb_lock_device()
- *	to lock the first one.  Lock the others by calling
- *	down(&udev->serialize) directly.
- *
- *	When unlocking multiple devices, use up(&udev->serialize)
- *	to unlock all but the last one.  Unlock the last one by
- *	calling usb_unlock_device().
+ * do this; nobody else needs to worry about it.  The rule for locking
+ * is simple:
  *
  *	When locking both a device and its parent, always lock the
  *	the parent first.
  */
 
-/**
- * usb_lock_device - acquire the lock for a usb device structure
- * @udev: device that's being locked
- *
- * Use this routine when you don't hold any other device locks;
- * to acquire nested inner locks call down(&udev->serialize) directly.
- * This is necessary for proper interaction with usb_lock_all_devices().
- */
-void usb_lock_device(struct usb_device *udev)
-{
-	down_read(&usb_all_devices_rwsem);
-	down(&udev->serialize);
-}
-
-/**
- * usb_trylock_device - attempt to acquire the lock for a usb device structure
- * @udev: device that's being locked
- *
- * Don't use this routine if you already hold a device lock;
- * use down_trylock(&udev->serialize) instead.
- * This is necessary for proper interaction with usb_lock_all_devices().
- *
- * Returns 1 if successful, 0 if contention.
- */
-int usb_trylock_device(struct usb_device *udev)
-{
-	if (!down_read_trylock(&usb_all_devices_rwsem))
-		return 0;
-	if (down_trylock(&udev->serialize)) {
-		up_read(&usb_all_devices_rwsem);
-		return 0;
-	}
-	return 1;
-}
-
 /**
  * usb_lock_device_for_reset - cautiously acquire the lock for a
  *	usb device structure
@@ -627,7 +567,7 @@ int usb_lock_device_for_reset(struct usb_device *udev,
 		}
 	}
 
-	while (!usb_trylock_device(udev)) {
+	while (usb_trylock_device(udev) != 0) {
 
 		/* If we can't acquire the lock after waiting one second,
 		 * we're probably deadlocked */
@@ -645,39 +585,6 @@ int usb_lock_device_for_reset(struct usb_device *udev,
 	return 1;
 }
 
-/**
- * usb_unlock_device - release the lock for a usb device structure
- * @udev: device that's being unlocked
- *
- * Use this routine when releasing the only device lock you hold;
- * to release inner nested locks call up(&udev->serialize) directly.
- * This is necessary for proper interaction with usb_lock_all_devices().
- */
-void usb_unlock_device(struct usb_device *udev)
-{
-	up(&udev->serialize);
-	up_read(&usb_all_devices_rwsem);
-}
-
-/**
- * usb_lock_all_devices - acquire the lock for all usb device structures
- *
- * This is necessary when registering a new driver or probing a bus,
- * since the driver-model core may try to use any usb_device.
- */
-void usb_lock_all_devices(void)
-{
-	down_write(&usb_all_devices_rwsem);
-}
-
-/**
- * usb_unlock_all_devices - release the lock for all usb device structures
- */
-void usb_unlock_all_devices(void)
-{
-	up_write(&usb_all_devices_rwsem);
-}
-
 
 static struct usb_device *match_device(struct usb_device *dev,
 				       u16 vendor_id, u16 product_id)
@@ -700,10 +607,10 @@ static struct usb_device *match_device(struct usb_device *dev,
 	/* look through all of the children of this device */
 	for (child = 0; child < dev->maxchild; ++child) {
 		if (dev->children[child]) {
-			down(&dev->children[child]->serialize);
+			usb_lock_device(dev->children[child]);
 			ret_dev = match_device(dev->children[child],
 					       vendor_id, product_id);
-			up(&dev->children[child]->serialize);
+			usb_unlock_device(dev->children[child]);
 			if (ret_dev)
 				goto exit;
 		}
@@ -1300,10 +1207,7 @@ EXPORT_SYMBOL(usb_put_dev);
 EXPORT_SYMBOL(usb_get_dev);
 EXPORT_SYMBOL(usb_hub_tt_clear_buffer);
 
-EXPORT_SYMBOL(usb_lock_device);
-EXPORT_SYMBOL(usb_trylock_device);
 EXPORT_SYMBOL(usb_lock_device_for_reset);
-EXPORT_SYMBOL(usb_unlock_device);
 
 EXPORT_SYMBOL(usb_driver_claim_interface);
 EXPORT_SYMBOL(usb_driver_release_interface);
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index 98e85fb4d3b7..4647e1ebc68d 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -16,9 +16,6 @@ extern int usb_get_device_descriptor(struct usb_device *dev,
 extern char *usb_cache_string(struct usb_device *udev, int index);
 extern int usb_set_configuration(struct usb_device *dev, int configuration);
 
-extern void usb_lock_all_devices(void);
-extern void usb_unlock_all_devices(void);
-
 extern void usb_kick_khubd(struct usb_device *dev);
 extern void usb_suspend_root_hub(struct usb_device *hdev);
 extern void usb_resume_root_hub(struct usb_device *dev);
diff --git a/drivers/usb/host/ohci-hub.c b/drivers/usb/host/ohci-hub.c
index 72e3b12a1926..4b2226d77b34 100644
--- a/drivers/usb/host/ohci-hub.c
+++ b/drivers/usb/host/ohci-hub.c
@@ -372,7 +372,7 @@ done:
 					& ohci->hc_control)
 				== OHCI_USB_OPER
 			&& time_after (jiffies, ohci->next_statechange)
-			&& usb_trylock_device (hcd->self.root_hub)
+			&& usb_trylock_device (hcd->self.root_hub) == 0
 			) {
 		ohci_vdbg (ohci, "autosuspend\n");
 		(void) ohci_bus_suspend (hcd);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 2714814ab66c..46dc0421d19e 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -329,8 +329,6 @@ struct usb_device {
 	struct usb_tt	*tt; 		/* low/full speed dev, highspeed hub */
 	int		ttport;		/* device port on that tt hub */
 
-	struct semaphore serialize;
-
 	unsigned int toggle[2];		/* one bit for each endpoint
 					 * ([0] = IN, [1] = OUT) */
 
@@ -377,11 +375,12 @@ struct usb_device {
 extern struct usb_device *usb_get_dev(struct usb_device *dev);
 extern void usb_put_dev(struct usb_device *dev);
 
-extern void usb_lock_device(struct usb_device *udev);
-extern int usb_trylock_device(struct usb_device *udev);
+/* USB device locking */
+#define usb_lock_device(udev)		down(&(udev)->dev.sem)
+#define usb_unlock_device(udev)		up(&(udev)->dev.sem)
+#define usb_trylock_device(udev)	down_trylock(&(udev)->dev.sem)
 extern int usb_lock_device_for_reset(struct usb_device *udev,
 		struct usb_interface *iface);
-extern void usb_unlock_device(struct usb_device *udev);
 
 /* USB port reset for device reinitialization */
 extern int usb_reset_device(struct usb_device *dev);
-- 
cgit v1.2.3


From 55c527187c9d78f840b284d596a0b298bc1493af Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 23 Nov 2005 12:03:12 -0500
Subject: [PATCH] USB: Consider power budget when choosing configuration

This patch (as609) changes the way we keep track of power budgeting for
USB hubs and devices, and it updates the choose_configuration routine to
take this information into account.  (This is something we should have
been doing all along.)  A new field in struct usb_device holds the amount
of bus current available from the upstream port, and the usb_hub structure
keeps track of the current available for each downstream port.

Two new rules for configuration selection are added:

	Don't select a self-powered configuration when only bus power
	is available.

	Don't select a configuration requiring more bus power than is
	available.

However the first rule is #if-ed out, because I found that the internal
hub in my HP USB keyboard claims that its only configuration is
self-powered.  The rule would prevent the configuration from being chosen,
leaving the hub & keyboard unconfigured.  Since similar descriptor errors
may turn out to be fairly common, it seemed wise not to include a rule
that would break automatic configuration unnecessarily for such devices.

The second rule may also trigger unnecessarily, although this should be
less common.  More likely it will annoy people by sometimes failing to
accept configurations that should never have been chosen in the first
place.

The patch also changes usbcore's reaction when no configuration is
suitable.  Instead of raising an error and rejecting the device, now
the core will simply leave the device unconfigured.  People can always
work around such problems by installing configurations manually through
sysfs.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c     |   5 +-
 drivers/usb/core/hub.c     | 229 ++++++++++++++++++++++++++++++---------------
 drivers/usb/core/hub.h     |   3 +-
 drivers/usb/core/message.c |   6 ++
 include/linux/usb.h        |   2 +
 5 files changed, 164 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d16a0e8a7d72..0018bbc4de34 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1825,8 +1825,6 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		retval = -ENOMEM;
 		goto err_allocate_root_hub;
 	}
-	rhdev->speed = (hcd->driver->flags & HCD_USB2) ? USB_SPEED_HIGH :
-			USB_SPEED_FULL;
 
 	/* Although in principle hcd->driver->start() might need to use rhdev,
 	 * none of the current drivers do.
@@ -1844,6 +1842,9 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		dev_dbg(hcd->self.controller, "supports USB remote wakeup\n");
 	hcd->remote_wakeup = hcd->can_wakeup;
 
+	rhdev->speed = (hcd->driver->flags & HCD_USB2) ? USB_SPEED_HIGH :
+			USB_SPEED_FULL;
+	rhdev->bus_mA = min(500u, hcd->power_budget);
 	if ((retval = register_root_hub(rhdev, hcd)) != 0)
 		goto err_register_root_hub;
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 895ac829b9cf..b311005ff1a6 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -702,26 +702,40 @@ static int hub_configure(struct usb_hub *hub,
 	 * and battery-powered root hubs (may provide just 8 mA).
 	 */
 	ret = usb_get_status(hdev, USB_RECIP_DEVICE, 0, &hubstatus);
-	if (ret < 0) {
+	if (ret < 2) {
 		message = "can't get hub status";
 		goto fail;
 	}
 	le16_to_cpus(&hubstatus);
 	if (hdev == hdev->bus->root_hub) {
-		struct usb_hcd *hcd =
-				container_of(hdev->bus, struct usb_hcd, self);
-
-		hub->power_budget = min(500u, hcd->power_budget) / 2;
+		if (hdev->bus_mA == 0 || hdev->bus_mA >= 500)
+			hub->mA_per_port = 500;
+		else {
+			hub->mA_per_port = hdev->bus_mA;
+			hub->limited_power = 1;
+		}
 	} else if ((hubstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0) {
 		dev_dbg(hub_dev, "hub controller current requirement: %dmA\n",
 			hub->descriptor->bHubContrCurrent);
-		hub->power_budget = (501 - hub->descriptor->bHubContrCurrent)
-					/ 2;
+		hub->limited_power = 1;
+		if (hdev->maxchild > 0) {
+			int remaining = hdev->bus_mA -
+					hub->descriptor->bHubContrCurrent;
+
+			if (remaining < hdev->maxchild * 100)
+				dev_warn(hub_dev,
+					"insufficient power available "
+					"to use all downstream ports\n");
+			hub->mA_per_port = 100;		/* 7.2.1.1 */
+		}
+	} else {	/* Self-powered external hub */
+		/* FIXME: What about battery-powered external hubs that
+		 * provide less current per port? */
+		hub->mA_per_port = 500;
 	}
-	if (hub->power_budget)
-		dev_dbg(hub_dev, "%dmA bus power budget for children\n",
-			hub->power_budget * 2);
-
+	if (hub->mA_per_port < 500)
+		dev_dbg(hub_dev, "%umA bus power budget for each child\n",
+				hub->mA_per_port);
 
 	ret = hub_hub_status(hub, &hubstatus, &hubchange);
 	if (ret < 0) {
@@ -1136,45 +1150,107 @@ void usb_disconnect(struct usb_device **pdev)
 	device_unregister(&udev->dev);
 }
 
+static inline const char *plural(int n)
+{
+	return (n == 1 ? "" : "s");
+}
+
 static int choose_configuration(struct usb_device *udev)
 {
-	int c, i;
+	int i;
+	u16 devstatus;
+	int bus_powered;
+	int num_configs;
+	struct usb_host_config *c, *best;
+
+	/* If this fails, assume the device is bus-powered */
+	devstatus = 0;
+	usb_get_status(udev, USB_RECIP_DEVICE, 0, &devstatus);
+	le16_to_cpus(&devstatus);
+	bus_powered = ((devstatus & (1 << USB_DEVICE_SELF_POWERED)) == 0);
+	dev_dbg(&udev->dev, "device is %s-powered\n",
+			bus_powered ? "bus" : "self");
+
+	best = NULL;
+	c = udev->config;
+	num_configs = udev->descriptor.bNumConfigurations;
+	for (i = 0; i < num_configs; (i++, c++)) {
+		struct usb_interface_descriptor	*desc =
+				&c->intf_cache[0]->altsetting->desc;
+
+		/*
+		 * HP's USB bus-powered keyboard has only one configuration
+		 * and it claims to be self-powered; other devices may have
+		 * similar errors in their descriptors.  If the next test
+		 * were allowed to execute, such configurations would always
+		 * be rejected and the devices would not work as expected.
+		 */
+#if 0
+		/* Rule out self-powered configs for a bus-powered device */
+		if (bus_powered && (c->desc.bmAttributes &
+					USB_CONFIG_ATT_SELFPOWER))
+			continue;
+#endif
 
-	/* NOTE: this should interact with hub power budgeting */
+		/*
+		 * The next test may not be as effective as it should be.
+		 * Some hubs have errors in their descriptor, claiming
+		 * to be self-powered when they are really bus-powered.
+		 * We will overestimate the amount of current such hubs
+		 * make available for each port.
+		 *
+		 * This is a fairly benign sort of failure.  It won't
+		 * cause us to reject configurations that we should have
+		 * accepted.
+		 */
 
-	c = udev->config[0].desc.bConfigurationValue;
-	if (udev->descriptor.bNumConfigurations != 1) {
-		for (i = 0; i < udev->descriptor.bNumConfigurations; i++) {
-			struct usb_interface_descriptor	*desc;
+		/* Rule out configs that draw too much bus current */
+		if (c->desc.bMaxPower * 2 > udev->bus_mA)
+			continue;
 
-			/* heuristic:  Linux is more likely to have class
-			 * drivers, so avoid vendor-specific interfaces.
-			 */
-			desc = &udev->config[i].intf_cache[0]
-					->altsetting->desc;
-			if (desc->bInterfaceClass == USB_CLASS_VENDOR_SPEC)
-				continue;
-			/* COMM/2/all is CDC ACM, except 0xff is MSFT RNDIS.
-			 * MSFT needs this to be the first config; never use
-			 * it as the default unless Linux has host-side RNDIS.
-			 * A second config would ideally be CDC-Ethernet, but
-			 * may instead be the "vendor specific" CDC subset
-			 * long used by ARM Linux for sa1100 or pxa255.
-			 */
-			if (desc->bInterfaceClass == USB_CLASS_COMM
-					&& desc->bInterfaceSubClass == 2
-					&& desc->bInterfaceProtocol == 0xff) {
-				c = udev->config[1].desc.bConfigurationValue;
-				continue;
-			}
-			c = udev->config[i].desc.bConfigurationValue;
+		/* If the first config's first interface is COMM/2/0xff
+		 * (MSFT RNDIS), rule it out unless Linux has host-side
+		 * RNDIS support. */
+		if (i == 0 && desc->bInterfaceClass == USB_CLASS_COMM
+				&& desc->bInterfaceSubClass == 2
+				&& desc->bInterfaceProtocol == 0xff) {
+#ifndef CONFIG_USB_NET_RNDIS
+			continue;
+#else
+			best = c;
+#endif
+		}
+
+		/* From the remaining configs, choose the first one whose
+		 * first interface is for a non-vendor-specific class.
+		 * Reason: Linux is more likely to have a class driver
+		 * than a vendor-specific driver. */
+		else if (udev->descriptor.bDeviceClass !=
+						USB_CLASS_VENDOR_SPEC &&
+				desc->bInterfaceClass !=
+						USB_CLASS_VENDOR_SPEC) {
+			best = c;
 			break;
 		}
+
+		/* If all the remaining configs are vendor-specific,
+		 * choose the first one. */
+		else if (!best)
+			best = c;
+	}
+
+	if (best) {
+		i = best->desc.bConfigurationValue;
 		dev_info(&udev->dev,
-			"configuration #%d chosen from %d choices\n",
-			c, udev->descriptor.bNumConfigurations);
+			"configuration #%d chosen from %d choice%s\n",
+			i, num_configs, plural(num_configs));
+	} else {
+		i = -1;
+		dev_warn(&udev->dev,
+			"no configuration chosen from %d choice%s\n",
+			num_configs, plural(num_configs));
 	}
-	return c;
+	return i;
 }
 
 #ifdef DEBUG
@@ -1327,17 +1403,13 @@ int usb_new_device(struct usb_device *udev)
 	 * with the driver core, and lets usb device drivers bind to them.
 	 */
 	c = choose_configuration(udev);
-	if (c < 0)
-		dev_warn(&udev->dev,
-				"can't choose an initial configuration\n");
-	else {
+	if (c >= 0) {
 		err = usb_set_configuration(udev, c);
 		if (err) {
 			dev_err(&udev->dev, "can't set config #%d, error %d\n",
 					c, err);
-			usb_remove_sysfs_dev_files(udev);
-			device_del(&udev->dev);
-			goto fail;
+			/* This need not be fatal.  The user can try to
+			 * set other configurations. */
 		}
 	}
 
@@ -1702,7 +1774,7 @@ static int finish_device_resume(struct usb_device *udev)
 	 * and device drivers will know about any resume quirks.
 	 */
 	status = usb_get_status(udev, USB_RECIP_DEVICE, 0, &devstatus);
-	if (status < 0)
+	if (status < 2)
 		dev_dbg(&udev->dev,
 			"gone after usb resume? status %d\n",
 			status);
@@ -1711,7 +1783,7 @@ static int finish_device_resume(struct usb_device *udev)
 		int		(*resume)(struct device *);
 
 		le16_to_cpus(&devstatus);
-		if (devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP)
+		if ((devstatus & (1 << USB_DEVICE_REMOTE_WAKEUP))
 				&& udev->parent) {
 			status = usb_control_msg(udev,
 					usb_sndctrlpipe(udev, 0),
@@ -2374,39 +2446,36 @@ hub_power_remaining (struct usb_hub *hub)
 {
 	struct usb_device *hdev = hub->hdev;
 	int remaining;
-	unsigned i;
+	int port1;
 
-	remaining = hub->power_budget;
-	if (!remaining)		/* self-powered */
+	if (!hub->limited_power)
 		return 0;
 
-	for (i = 0; i < hdev->maxchild; i++) {
-		struct usb_device	*udev = hdev->children[i];
-		int			delta, ceiling;
+	remaining = hdev->bus_mA - hub->descriptor->bHubContrCurrent;
+	for (port1 = 1; port1 <= hdev->maxchild; ++port1) {
+		struct usb_device	*udev = hdev->children[port1 - 1];
+		int			delta;
 
 		if (!udev)
 			continue;
 
-		/* 100mA per-port ceiling, or 8mA for OTG ports */
-		if (i != (udev->bus->otg_port - 1) || hdev->parent)
-			ceiling = 50;
-		else
-			ceiling = 4;
-
+		/* Unconfigured devices may not use more than 100mA,
+		 * or 8mA for OTG ports */
 		if (udev->actconfig)
-			delta = udev->actconfig->desc.bMaxPower;
+			delta = udev->actconfig->desc.bMaxPower * 2;
+		else if (port1 != udev->bus->otg_port || hdev->parent)
+			delta = 100;
 		else
-			delta = ceiling;
-		// dev_dbg(&udev->dev, "budgeted %dmA\n", 2 * delta);
-		if (delta > ceiling)
-			dev_warn(&udev->dev, "%dmA over %dmA budget!\n",
-				2 * (delta - ceiling), 2 * ceiling);
+			delta = 8;
+		if (delta > hub->mA_per_port)
+			dev_warn(&udev->dev, "%dmA is over %umA budget "
+					"for port %d!\n",
+					delta, hub->mA_per_port, port1);
 		remaining -= delta;
 	}
 	if (remaining < 0) {
-		dev_warn(hub->intfdev,
-			"%dmA over power budget!\n",
-			-2 * remaining);
+		dev_warn(hub->intfdev, "%dmA over power budget!\n",
+			- remaining);
 		remaining = 0;
 	}
 	return remaining;
@@ -2501,7 +2570,8 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 
 		usb_set_device_state(udev, USB_STATE_POWERED);
 		udev->speed = USB_SPEED_UNKNOWN;
- 
+ 		udev->bus_mA = hub->mA_per_port;
+
 		/* set the address */
 		choose_address(udev);
 		if (udev->devnum <= 0) {
@@ -2521,16 +2591,16 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		 * on the parent.
 		 */
 		if (udev->descriptor.bDeviceClass == USB_CLASS_HUB
-				&& hub->power_budget) {
+				&& udev->bus_mA <= 100) {
 			u16	devstat;
 
 			status = usb_get_status(udev, USB_RECIP_DEVICE, 0,
 					&devstat);
-			if (status < 0) {
+			if (status < 2) {
 				dev_dbg(&udev->dev, "get status %d ?\n", status);
 				goto loop_disable;
 			}
-			cpu_to_le16s(&devstat);
+			le16_to_cpus(&devstat);
 			if ((devstat & (1 << USB_DEVICE_SELF_POWERED)) == 0) {
 				dev_err(&udev->dev,
 					"can't connect bus-powered hub "
@@ -2583,9 +2653,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 
 		status = hub_power_remaining(hub);
 		if (status)
-			dev_dbg(hub_dev,
-				"%dmA power budget left\n",
-				2 * status);
+			dev_dbg(hub_dev, "%dmA power budget left\n", status);
 
 		return;
 
@@ -2797,6 +2865,11 @@ static void hub_events(void)
 			if (hubchange & HUB_CHANGE_LOCAL_POWER) {
 				dev_dbg (hub_dev, "power change\n");
 				clear_hub_feature(hdev, C_HUB_LOCAL_POWER);
+				if (hubstatus & HUB_STATUS_LOCAL_POWER)
+					/* FIXME: Is this always true? */
+					hub->limited_power = 0;
+				else
+					hub->limited_power = 1;
 			}
 			if (hubchange & HUB_CHANGE_OVERCURRENT) {
 				dev_dbg (hub_dev, "overcurrent change\n");
diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
index bf23f8978024..29d5f45a8456 100644
--- a/drivers/usb/core/hub.h
+++ b/drivers/usb/core/hub.h
@@ -220,8 +220,9 @@ struct usb_hub {
 	struct usb_hub_descriptor *descriptor;	/* class descriptor */
 	struct usb_tt		tt;		/* Transaction Translator */
 
-	u8			power_budget;	/* in 2mA units; or zero */
+	unsigned		mA_per_port;	/* current for each child */
 
+	unsigned		limited_power:1;
 	unsigned		quiescing:1;
 	unsigned		activating:1;
 	unsigned		resume_root_hub:1;
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index fe74f99ca5f4..99ab774d4fdb 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -1387,6 +1387,12 @@ free_interfaces:
 	if (dev->state != USB_STATE_ADDRESS)
 		usb_disable_device (dev, 1);	// Skip ep0
 
+	n = dev->bus_mA - cp->desc.bMaxPower * 2;
+	if (n < 0)
+		dev_warn(&dev->dev, "new config #%d exceeds power "
+				"limit by %dmA\n",
+				configuration, -n);
+
 	if ((ret = usb_control_msg(dev, usb_sndctrlpipe(dev, 0),
 			USB_REQ_SET_CONFIGURATION, 0, configuration, 0,
 			NULL, 0, USB_CTRL_SET_TIMEOUT)) < 0)
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 46dc0421d19e..27575e678a7c 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -347,6 +347,8 @@ struct usb_device {
 
 	char **rawdescriptors;		/* Raw descriptors for each config */
 
+	unsigned short bus_mA;		/* Current available from the bus */
+
 	int have_langid;		/* whether string_langid is valid */
 	int string_langid;		/* language ID for strings */
 
-- 
cgit v1.2.3


From 12c3da346eb81b6a281031f62eda3bca993dff5a Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 23 Nov 2005 12:09:52 -0500
Subject: [PATCH] USB: Store port number in usb_device

This patch (as610) adds a field to struct usb_device to store the device's
port number.  This allows us to remove several loops in the hub driver
(searching for a particular device among all the entries in the parent's
array of children).

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hub.c | 79 ++++++++++++--------------------------------------
 drivers/usb/core/usb.c |  1 +
 include/linux/usb.h    |  1 +
 3 files changed, 20 insertions(+), 61 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b311005ff1a6..a523c8f20b5d 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -946,24 +946,21 @@ static int locktree(struct usb_device *udev)
 	t = locktree(hdev);
 	if (t < 0)
 		return t;
-	for (t = 0; t < hdev->maxchild; t++) {
-		if (hdev->children[t] == udev) {
-			/* everything is fail-fast once disconnect
-			 * processing starts
-			 */
-			if (udev->state == USB_STATE_NOTATTACHED)
-				break;
 
-			/* when everyone grabs locks top->bottom,
-			 * non-overlapping work may be concurrent
-			 */
-			usb_lock_device(udev);
-			usb_unlock_device(hdev);
-			return t + 1;
-		}
+	/* everything is fail-fast once disconnect
+	 * processing starts
+	 */
+	if (udev->state == USB_STATE_NOTATTACHED) {
+		usb_unlock_device(hdev);
+		return -ENODEV;
 	}
+
+	/* when everyone grabs locks top->bottom,
+	 * non-overlapping work may be concurrent
+	 */
+	usb_lock_device(udev);
 	usb_unlock_device(hdev);
-	return -ENODEV;
+	return udev->portnum;
 }
 
 static void recursively_mark_NOTATTACHED(struct usb_device *udev)
@@ -1335,15 +1332,9 @@ int usb_new_device(struct usb_device *udev)
 					le16_to_cpu(udev->config[0].desc.wTotalLength),
 					USB_DT_OTG, (void **) &desc) == 0) {
 			if (desc->bmAttributes & USB_OTG_HNP) {
-				unsigned		port1;
+				unsigned		port1 = udev->portnum;
 				struct usb_device	*root = udev->parent;
 				
-				for (port1 = 1; port1 <= root->maxchild;
-						port1++) {
-					if (root->children[port1-1] == udev)
-						break;
-				}
-
 				dev_info(&udev->dev,
 					"Dual-Role OTG device on %sHNP port\n",
 					(port1 == bus->otg_port)
@@ -1720,22 +1711,9 @@ static int __usb_suspend_device (struct usb_device *udev, int port1)
 int usb_suspend_device(struct usb_device *udev)
 {
 #ifdef	CONFIG_USB_SUSPEND
-	int	port1;
-
 	if (udev->state == USB_STATE_NOTATTACHED)
 		return -ENODEV;
-	if (!udev->parent)
-		port1 = 0;
-	else {
-		for (port1 = udev->parent->maxchild; port1 > 0; --port1) {
-			if (udev->parent->children[port1-1] == udev)
-				break;
-		}
-		if (port1 == 0)
-			return -ENODEV;
-	}
-
-	return __usb_suspend_device(udev, port1);
+	return __usb_suspend_device(udev, udev->portnum);
 #else
 	/* NOTE:  udev->state unchanged, it's not lying ... */
 	udev->dev.power.power_state = PMSG_SUSPEND;
@@ -1893,20 +1871,10 @@ hub_port_resume(struct usb_hub *hub, int port1, struct usb_device *udev)
  */
 int usb_resume_device(struct usb_device *udev)
 {
-	int	port1, status;
+	int	status;
 
 	if (udev->state == USB_STATE_NOTATTACHED)
 		return -ENODEV;
-	if (!udev->parent)
-		port1 = 0;
-	else {
-		for (port1 = udev->parent->maxchild; port1 > 0; --port1) {
-			if (udev->parent->children[port1-1] == udev)
-				break;
-		}
-		if (port1 == 0)
-			return -ENODEV;
-	}
 
 #ifdef	CONFIG_USB_SUSPEND
 	/* selective resume of one downstream hub-to-device port */
@@ -1915,7 +1883,7 @@ int usb_resume_device(struct usb_device *udev)
 			// NOTE swsusp may bork us, device state being wrong...
 			// NOTE this fails if parent is also suspended...
 			status = hub_port_resume(hdev_to_hub(udev->parent),
-					port1, udev);
+					udev->portnum, udev);
 		} else
 			status = 0;
 	} else
@@ -3029,7 +2997,8 @@ int usb_reset_device(struct usb_device *udev)
 	struct usb_hub			*parent_hub;
 	struct usb_device_descriptor	descriptor = udev->descriptor;
 	struct usb_hub			*hub = NULL;
-	int 				i, ret = 0, port1 = -1;
+	int 				i, ret = 0;
+	int				port1 = udev->portnum;
 
 	if (udev->state == USB_STATE_NOTATTACHED ||
 			udev->state == USB_STATE_SUSPENDED) {
@@ -3043,18 +3012,6 @@ int usb_reset_device(struct usb_device *udev)
 		dev_dbg(&udev->dev, "%s for root hub!\n", __FUNCTION__);
 		return -EISDIR;
 	}
-
-	for (i = 0; i < parent_hdev->maxchild; i++)
-		if (parent_hdev->children[i] == udev) {
-			port1 = i + 1;
-			break;
-		}
-
-	if (port1 < 0) {
-		/* If this ever happens, it's very bad */
-		dev_err(&udev->dev, "Can't locate device's port!\n");
-		return -ENOENT;
-	}
 	parent_hub = hdev_to_hub(parent_hdev);
 
 	/* If we're resetting an active hub, take some special actions */
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index fcfda21be499..39e6b61b898a 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -439,6 +439,7 @@ usb_alloc_dev(struct usb_device *parent, struct usb_bus *bus, unsigned port1)
 		/* hub driver sets up TT records */
 	}
 
+	dev->portnum = port1;
 	dev->bus = bus;
 	dev->parent = parent;
 	INIT_LIST_HEAD(&dev->filelist);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 27575e678a7c..e59d1bd52d4f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -348,6 +348,7 @@ struct usb_device {
 	char **rawdescriptors;		/* Raw descriptors for each config */
 
 	unsigned short bus_mA;		/* Current available from the bus */
+	u8 portnum;			/* Parent port number (origin 1) */
 
 	int have_langid;		/* whether string_langid is valid */
 	int string_langid;		/* language ID for strings */
-- 
cgit v1.2.3


From e80b0fade09ef1ee67b0898d480d4c588f124d5f Mon Sep 17 00:00:00 2001
From: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Date: Sun, 4 Dec 2005 22:02:44 -0800
Subject: [PATCH] USB Storage: add alauda support

This patch adds another usb-storage subdriver, which supports two fairly
old dual-XD/SmartMedia reader-writers (USB1.1 devices).

This driver was written by Daniel Drake <dsd@gentoo.org> -- he notes
that he wrote this driver without specs, however a vendor-supplied GPL
driver for the previous generation of products ("sma03") did prove to be
quite useful, as did the sddr09 driver which also has to deal with
low-level physical block layout on SmartMedia.

The original patch has been reformed by me, as it clashed with the
libusual patches.

We really need to consolidate some of this common SmartMedia code, and
get together with the MTD guys to share it with them as well.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Matthew Dharm <mdharm-usb@one-eyed-alien.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/storage/Kconfig        |    9 +
 drivers/usb/storage/Makefile       |    1 +
 drivers/usb/storage/alauda.c       | 1119 ++++++++++++++++++++++++++++++++++++
 drivers/usb/storage/alauda.h       |  100 ++++
 drivers/usb/storage/unusual_devs.h |   14 +
 drivers/usb/storage/usb.c          |   12 +
 include/linux/usb_usual.h          |    3 +
 7 files changed, 1258 insertions(+)
 create mode 100644 drivers/usb/storage/alauda.c
 create mode 100644 drivers/usb/storage/alauda.h

(limited to 'include/linux')

diff --git a/drivers/usb/storage/Kconfig b/drivers/usb/storage/Kconfig
index bdfcb95d9c12..92be101feba7 100644
--- a/drivers/usb/storage/Kconfig
+++ b/drivers/usb/storage/Kconfig
@@ -112,6 +112,15 @@ config USB_STORAGE_JUMPSHOT
 	  Say Y here to include additional code to support the Lexar Jumpshot
 	  USB CompactFlash reader.
 
+config USB_STORAGE_ALAUDA
+	bool "Olympus MAUSB-10/Fuji DPC-R1 support (EXPERIMENTAL)"
+	depends on USB_STORAGE && EXPERIMENTAL
+	help
+	  Say Y here to include additional code to support the Olympus MAUSB-10
+	  and Fujifilm DPC-R1 USB Card reader/writer devices.
+
+	  These devices are based on the Alauda chip and support support both
+	  XD and SmartMedia cards.
 
 config USB_STORAGE_ONETOUCH
 	bool "Support OneTouch Button on Maxtor Hard Drives (EXPERIMENTAL)"
diff --git a/drivers/usb/storage/Makefile b/drivers/usb/storage/Makefile
index 2d416e9028bb..8cbba22508a4 100644
--- a/drivers/usb/storage/Makefile
+++ b/drivers/usb/storage/Makefile
@@ -18,6 +18,7 @@ usb-storage-obj-$(CONFIG_USB_STORAGE_DPCM)	+= dpcm.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_ISD200)	+= isd200.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_DATAFAB)	+= datafab.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_JUMPSHOT)	+= jumpshot.o
+usb-storage-obj-$(CONFIG_USB_STORAGE_ALAUDA)	+= alauda.o
 usb-storage-obj-$(CONFIG_USB_STORAGE_ONETOUCH)	+= onetouch.o
 
 usb-storage-objs :=	scsiglue.o protocol.o transport.o usb.o \
diff --git a/drivers/usb/storage/alauda.c b/drivers/usb/storage/alauda.c
new file mode 100644
index 000000000000..4d3cbb12b713
--- /dev/null
+++ b/drivers/usb/storage/alauda.c
@@ -0,0 +1,1119 @@
+/*
+ * Driver for Alauda-based card readers
+ *
+ * Current development and maintenance by:
+ *   (c) 2005 Daniel Drake <dsd@gentoo.org>
+ *
+ * The 'Alauda' is a chip manufacturered by RATOC for OEM use.
+ *
+ * Alauda implements a vendor-specific command set to access two media reader
+ * ports (XD, SmartMedia). This driver converts SCSI commands to the commands
+ * which are accepted by these devices.
+ *
+ * The driver was developed through reverse-engineering, with the help of the
+ * sddr09 driver which has many similarities, and with some help from the
+ * (very old) vendor-supplied GPL sma03 driver.
+ *
+ * For protocol info, see http://alauda.sourceforge.net
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <scsi/scsi.h>
+#include <scsi/scsi_cmnd.h>
+#include <scsi/scsi_device.h>
+
+#include "usb.h"
+#include "transport.h"
+#include "protocol.h"
+#include "debug.h"
+#include "alauda.h"
+
+#define short_pack(lsb,msb) ( ((u16)(lsb)) | ( ((u16)(msb))<<8 ) )
+#define LSB_of(s) ((s)&0xFF)
+#define MSB_of(s) ((s)>>8)
+
+#define MEDIA_PORT(us) us->srb->device->lun
+#define MEDIA_INFO(us) ((struct alauda_info *)us->extra)->port[MEDIA_PORT(us)]
+
+#define PBA_LO(pba) ((pba & 0xF) << 5)
+#define PBA_HI(pba) (pba >> 3)
+#define PBA_ZONE(pba) (pba >> 11)
+
+/*
+ * Media handling
+ */
+
+struct alauda_card_info {
+	unsigned char id;		/* id byte */
+	unsigned char chipshift;	/* 1<<cs bytes total capacity */
+	unsigned char pageshift;	/* 1<<ps bytes in a page */
+	unsigned char blockshift;	/* 1<<bs pages per block */
+	unsigned char zoneshift;	/* 1<<zs blocks per zone */
+};
+
+static struct alauda_card_info alauda_card_ids[] = {
+	/* NAND flash */
+	{ 0x6e, 20, 8, 4, 8},	/* 1 MB */
+	{ 0xe8, 20, 8, 4, 8},	/* 1 MB */
+	{ 0xec, 20, 8, 4, 8},	/* 1 MB */
+	{ 0x64, 21, 8, 4, 9}, 	/* 2 MB */
+	{ 0xea, 21, 8, 4, 9},	/* 2 MB */
+	{ 0x6b, 22, 9, 4, 9},	/* 4 MB */
+	{ 0xe3, 22, 9, 4, 9},	/* 4 MB */
+	{ 0xe5, 22, 9, 4, 9},	/* 4 MB */
+	{ 0xe6, 23, 9, 4, 10},	/* 8 MB */
+	{ 0x73, 24, 9, 5, 10},	/* 16 MB */
+	{ 0x75, 25, 9, 5, 10},	/* 32 MB */
+	{ 0x76, 26, 9, 5, 10},	/* 64 MB */
+	{ 0x79, 27, 9, 5, 10},	/* 128 MB */
+	{ 0x71, 28, 9, 5, 10},	/* 256 MB */
+
+	/* MASK ROM */
+	{ 0x5d, 21, 9, 4, 8},	/* 2 MB */
+	{ 0xd5, 22, 9, 4, 9},	/* 4 MB */
+	{ 0xd6, 23, 9, 4, 10},	/* 8 MB */
+	{ 0x57, 24, 9, 4, 11},	/* 16 MB */
+	{ 0x58, 25, 9, 4, 12},	/* 32 MB */
+	{ 0,}
+};
+
+static struct alauda_card_info *alauda_card_find_id(unsigned char id) {
+	int i;
+
+	for (i = 0; alauda_card_ids[i].id != 0; i++)
+		if (alauda_card_ids[i].id == id)
+			return &(alauda_card_ids[i]);
+	return NULL;
+}
+
+/*
+ * ECC computation.
+ */
+
+static unsigned char parity[256];
+static unsigned char ecc2[256];
+
+static void nand_init_ecc(void) {
+	int i, j, a;
+
+	parity[0] = 0;
+	for (i = 1; i < 256; i++)
+		parity[i] = (parity[i&(i-1)] ^ 1);
+
+	for (i = 0; i < 256; i++) {
+		a = 0;
+		for (j = 0; j < 8; j++) {
+			if (i & (1<<j)) {
+				if ((j & 1) == 0)
+					a ^= 0x04;
+				if ((j & 2) == 0)
+					a ^= 0x10;
+				if ((j & 4) == 0)
+					a ^= 0x40;
+			}
+		}
+		ecc2[i] = ~(a ^ (a<<1) ^ (parity[i] ? 0xa8 : 0));
+	}
+}
+
+/* compute 3-byte ecc on 256 bytes */
+static void nand_compute_ecc(unsigned char *data, unsigned char *ecc) {
+	int i, j, a;
+	unsigned char par, bit, bits[8];
+
+	par = 0;
+	for (j = 0; j < 8; j++)
+		bits[j] = 0;
+
+	/* collect 16 checksum bits */
+	for (i = 0; i < 256; i++) {
+		par ^= data[i];
+		bit = parity[data[i]];
+		for (j = 0; j < 8; j++)
+			if ((i & (1<<j)) == 0)
+				bits[j] ^= bit;
+	}
+
+	/* put 4+4+4 = 12 bits in the ecc */
+	a = (bits[3] << 6) + (bits[2] << 4) + (bits[1] << 2) + bits[0];
+	ecc[0] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0));
+
+	a = (bits[7] << 6) + (bits[6] << 4) + (bits[5] << 2) + bits[4];
+	ecc[1] = ~(a ^ (a<<1) ^ (parity[par] ? 0xaa : 0));
+
+	ecc[2] = ecc2[par];
+}
+
+static int nand_compare_ecc(unsigned char *data, unsigned char *ecc) {
+	return (data[0] == ecc[0] && data[1] == ecc[1] && data[2] == ecc[2]);
+}
+
+static void nand_store_ecc(unsigned char *data, unsigned char *ecc) {
+	memcpy(data, ecc, 3);
+}
+
+/*
+ * Alauda driver
+ */
+
+/*
+ * Forget our PBA <---> LBA mappings for a particular port
+ */
+static void alauda_free_maps (struct alauda_media_info *media_info)
+{
+	unsigned int shift = media_info->zoneshift
+		+ media_info->blockshift + media_info->pageshift;
+	unsigned int num_zones = media_info->capacity >> shift;
+	unsigned int i;
+
+	if (media_info->lba_to_pba != NULL)
+		for (i = 0; i < num_zones; i++) {
+			kfree(media_info->lba_to_pba[i]);
+			media_info->lba_to_pba[i] = NULL;
+		}
+
+	if (media_info->pba_to_lba != NULL)
+		for (i = 0; i < num_zones; i++) {
+			kfree(media_info->pba_to_lba[i]);
+			media_info->pba_to_lba[i] = NULL;
+		}
+}
+
+/*
+ * Returns 2 bytes of status data
+ * The first byte describes media status, and second byte describes door status
+ */
+static int alauda_get_media_status(struct us_data *us, unsigned char *data)
+{
+	int rc;
+	unsigned char command;
+
+	if (MEDIA_PORT(us) == ALAUDA_PORT_XD)
+		command = ALAUDA_GET_XD_MEDIA_STATUS;
+	else
+		command = ALAUDA_GET_SM_MEDIA_STATUS;
+
+	rc = usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
+		command, 0xc0, 0, 1, data, 2);
+
+	US_DEBUGP("alauda_get_media_status: Media status %02X %02X\n",
+		data[0], data[1]);
+
+	return rc;
+}
+
+/*
+ * Clears the "media was changed" bit so that we know when it changes again
+ * in the future.
+ */
+static int alauda_ack_media(struct us_data *us)
+{
+	unsigned char command;
+
+	if (MEDIA_PORT(us) == ALAUDA_PORT_XD)
+		command = ALAUDA_ACK_XD_MEDIA_CHANGE;
+	else
+		command = ALAUDA_ACK_SM_MEDIA_CHANGE;
+
+	return usb_stor_ctrl_transfer(us, us->send_ctrl_pipe,
+		command, 0x40, 0, 1, NULL, 0);
+}
+
+/*
+ * Retrieves a 4-byte media signature, which indicates manufacturer, capacity,
+ * and some other details.
+ */
+static int alauda_get_media_signature(struct us_data *us, unsigned char *data)
+{
+	unsigned char command;
+
+	if (MEDIA_PORT(us) == ALAUDA_PORT_XD)
+		command = ALAUDA_GET_XD_MEDIA_SIG;
+	else
+		command = ALAUDA_GET_SM_MEDIA_SIG;
+
+	return usb_stor_ctrl_transfer(us, us->recv_ctrl_pipe,
+		command, 0xc0, 0, 0, data, 4);
+}
+
+/*
+ * Resets the media status (but not the whole device?)
+ */
+static int alauda_reset_media(struct us_data *us)
+{
+	unsigned char *command = us->iobuf;
+
+	memset(command, 0, 9);
+	command[0] = ALAUDA_BULK_CMD;
+	command[1] = ALAUDA_BULK_RESET_MEDIA;
+	command[8] = MEDIA_PORT(us);
+
+	return usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+}
+
+/*
+ * Examines the media and deduces capacity, etc.
+ */
+static int alauda_init_media(struct us_data *us)
+{
+	unsigned char *data = us->iobuf;
+	int ready = 0;
+	struct alauda_card_info *media_info;
+	unsigned int num_zones;
+
+	while (ready == 0) {
+		msleep(20);
+
+		if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD)
+			return USB_STOR_TRANSPORT_ERROR;
+
+		if (data[0] & 0x10)
+			ready = 1;
+	}
+
+	US_DEBUGP("alauda_init_media: We are ready for action!\n");
+
+	if (alauda_ack_media(us) != USB_STOR_XFER_GOOD)
+		return USB_STOR_TRANSPORT_ERROR;
+
+	msleep(10);
+
+	if (alauda_get_media_status(us, data) != USB_STOR_XFER_GOOD)
+		return USB_STOR_TRANSPORT_ERROR;
+
+	if (data[0] != 0x14) {
+		US_DEBUGP("alauda_init_media: Media not ready after ack\n");
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	if (alauda_get_media_signature(us, data) != USB_STOR_XFER_GOOD)
+		return USB_STOR_TRANSPORT_ERROR;
+
+	US_DEBUGP("alauda_init_media: Media signature: %02X %02X %02X %02X\n",
+		data[0], data[1], data[2], data[3]);
+	media_info = alauda_card_find_id(data[1]);
+	if (media_info == NULL) {
+		printk("alauda_init_media: Unrecognised media signature: "
+			"%02X %02X %02X %02X\n",
+			data[0], data[1], data[2], data[3]);
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	MEDIA_INFO(us).capacity = 1 << media_info->chipshift;
+	US_DEBUGP("Found media with capacity: %ldMB\n",
+		MEDIA_INFO(us).capacity >> 20);
+
+	MEDIA_INFO(us).pageshift = media_info->pageshift;
+	MEDIA_INFO(us).blockshift = media_info->blockshift;
+	MEDIA_INFO(us).zoneshift = media_info->zoneshift;
+
+	MEDIA_INFO(us).pagesize = 1 << media_info->pageshift;
+	MEDIA_INFO(us).blocksize = 1 << media_info->blockshift;
+	MEDIA_INFO(us).zonesize = 1 << media_info->zoneshift;
+
+	MEDIA_INFO(us).uzonesize = ((1 << media_info->zoneshift) / 128) * 125;
+	MEDIA_INFO(us).blockmask = MEDIA_INFO(us).blocksize - 1;
+
+	num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift
+		+ MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift);
+	MEDIA_INFO(us).pba_to_lba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
+	MEDIA_INFO(us).lba_to_pba = kcalloc(num_zones, sizeof(u16*), GFP_NOIO);
+
+	if (alauda_reset_media(us) != USB_STOR_XFER_GOOD)
+		return USB_STOR_TRANSPORT_ERROR;
+
+	return USB_STOR_TRANSPORT_GOOD;
+}
+
+/*
+ * Examines the media status and does the right thing when the media has gone,
+ * appeared, or changed.
+ */
+static int alauda_check_media(struct us_data *us)
+{
+	struct alauda_info *info = (struct alauda_info *) us->extra;
+	unsigned char status[2];
+	int rc;
+
+	rc = alauda_get_media_status(us, status);
+
+	/* Check for no media or door open */
+	if ((status[0] & 0x80) || ((status[0] & 0x1F) == 0x10)
+		|| ((status[1] & 0x01) == 0)) {
+		US_DEBUGP("alauda_check_media: No media, or door open\n");
+		alauda_free_maps(&MEDIA_INFO(us));
+		info->sense_key = 0x02;
+		info->sense_asc = 0x3A;
+		info->sense_ascq = 0x00;
+		return USB_STOR_TRANSPORT_FAILED;
+	}
+
+	/* Check for media change */
+	if (status[0] & 0x08) {
+		US_DEBUGP("alauda_check_media: Media change detected\n");
+		alauda_free_maps(&MEDIA_INFO(us));
+		alauda_init_media(us);
+
+		info->sense_key = UNIT_ATTENTION;
+		info->sense_asc = 0x28;
+		info->sense_ascq = 0x00;
+		return USB_STOR_TRANSPORT_FAILED;
+	}
+
+	return USB_STOR_TRANSPORT_GOOD;
+}
+
+/*
+ * Checks the status from the 2nd status register
+ * Returns 3 bytes of status data, only the first is known
+ */
+static int alauda_check_status2(struct us_data *us)
+{
+	int rc;
+	unsigned char command[] = {
+		ALAUDA_BULK_CMD, ALAUDA_BULK_GET_STATUS2,
+		0, 0, 0, 0, 3, 0, MEDIA_PORT(us)
+	};
+	unsigned char data[3];
+
+	rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
+		data, 3, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	US_DEBUGP("alauda_check_status2: %02X %02X %02X\n", data[0], data[1], data[2]);
+	if (data[0] & ALAUDA_STATUS_ERROR)
+		return USB_STOR_XFER_ERROR;
+
+	return USB_STOR_XFER_GOOD;
+}
+
+/*
+ * Gets the redundancy data for the first page of a PBA
+ * Returns 16 bytes.
+ */
+static int alauda_get_redu_data(struct us_data *us, u16 pba, unsigned char *data)
+{
+	int rc;
+	unsigned char command[] = {
+		ALAUDA_BULK_CMD, ALAUDA_BULK_GET_REDU_DATA,
+		PBA_HI(pba), PBA_ZONE(pba), 0, PBA_LO(pba), 0, 0, MEDIA_PORT(us)
+	};
+
+	rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
+		data, 16, NULL);
+}
+
+/*
+ * Finds the first unused PBA in a zone
+ * Returns the absolute PBA of an unused PBA, or 0 if none found.
+ */
+static u16 alauda_find_unused_pba(struct alauda_media_info *info,
+	unsigned int zone)
+{
+	u16 *pba_to_lba = info->pba_to_lba[zone];
+	unsigned int i;
+
+	for (i = 0; i < info->zonesize; i++)
+		if (pba_to_lba[i] == UNDEF)
+			return (zone << info->zoneshift) + i;
+
+	return 0;
+}
+
+/*
+ * Reads the redundancy data for all PBA's in a zone
+ * Produces lba <--> pba mappings
+ */
+static int alauda_read_map(struct us_data *us, unsigned int zone)
+{
+	unsigned char *data = us->iobuf;
+	int result;
+	int i, j;
+	unsigned int zonesize = MEDIA_INFO(us).zonesize;
+	unsigned int uzonesize = MEDIA_INFO(us).uzonesize;
+	unsigned int lba_offset, lba_real, blocknum;
+	unsigned int zone_base_lba = zone * uzonesize;
+	unsigned int zone_base_pba = zone * zonesize;
+	u16 *lba_to_pba = kcalloc(zonesize, sizeof(u16), GFP_NOIO);
+	u16 *pba_to_lba = kcalloc(zonesize, sizeof(u16), GFP_NOIO);
+	if (lba_to_pba == NULL || pba_to_lba == NULL) {
+		result = USB_STOR_TRANSPORT_ERROR;
+		goto error;
+	}
+
+	US_DEBUGP("alauda_read_map: Mapping blocks for zone %d\n", zone);
+
+	/* 1024 PBA's per zone */
+	for (i = 0; i < zonesize; i++)
+		lba_to_pba[i] = pba_to_lba[i] = UNDEF;
+
+	for (i = 0; i < zonesize; i++) {
+		blocknum = zone_base_pba + i;
+
+		result = alauda_get_redu_data(us, blocknum, data);
+		if (result != USB_STOR_XFER_GOOD) {
+			result = USB_STOR_TRANSPORT_ERROR;
+			goto error;
+		}
+
+		/* special PBAs have control field 0^16 */
+		for (j = 0; j < 16; j++)
+			if (data[j] != 0)
+				goto nonz;
+		pba_to_lba[i] = UNUSABLE;
+		US_DEBUGP("alauda_read_map: PBA %d has no logical mapping\n", blocknum);
+		continue;
+
+	nonz:
+		/* unwritten PBAs have control field FF^16 */
+		for (j = 0; j < 16; j++)
+			if (data[j] != 0xff)
+				goto nonff;
+		continue;
+
+	nonff:
+		/* normal PBAs start with six FFs */
+		if (j < 6) {
+			US_DEBUGP("alauda_read_map: PBA %d has no logical mapping: "
+			       "reserved area = %02X%02X%02X%02X "
+			       "data status %02X block status %02X\n",
+			       blocknum, data[0], data[1], data[2], data[3],
+			       data[4], data[5]);
+			pba_to_lba[i] = UNUSABLE;
+			continue;
+		}
+
+		if ((data[6] >> 4) != 0x01) {
+			US_DEBUGP("alauda_read_map: PBA %d has invalid address "
+			       "field %02X%02X/%02X%02X\n",
+			       blocknum, data[6], data[7], data[11], data[12]);
+			pba_to_lba[i] = UNUSABLE;
+			continue;
+		}
+
+		/* check even parity */
+		if (parity[data[6] ^ data[7]]) {
+			printk("alauda_read_map: Bad parity in LBA for block %d"
+			       " (%02X %02X)\n", i, data[6], data[7]);
+			pba_to_lba[i] = UNUSABLE;
+			continue;
+		}
+
+		lba_offset = short_pack(data[7], data[6]);
+		lba_offset = (lba_offset & 0x07FF) >> 1;
+		lba_real = lba_offset + zone_base_lba;
+
+		/*
+		 * Every 1024 physical blocks ("zone"), the LBA numbers
+		 * go back to zero, but are within a higher block of LBA's.
+		 * Also, there is a maximum of 1000 LBA's per zone.
+		 * In other words, in PBA 1024-2047 you will find LBA 0-999
+		 * which are really LBA 1000-1999. This allows for 24 bad
+		 * or special physical blocks per zone.
+		 */
+
+		if (lba_offset >= uzonesize) {
+			printk("alauda_read_map: Bad low LBA %d for block %d\n",
+			       lba_real, blocknum);
+			continue;
+		}
+
+		if (lba_to_pba[lba_offset] != UNDEF) {
+			printk("alauda_read_map: LBA %d seen for PBA %d and %d\n",
+			       lba_real, lba_to_pba[lba_offset], blocknum);
+			continue;
+		}
+
+		pba_to_lba[i] = lba_real;
+		lba_to_pba[lba_offset] = blocknum;
+		continue;
+	}
+
+	MEDIA_INFO(us).lba_to_pba[zone] = lba_to_pba;
+	MEDIA_INFO(us).pba_to_lba[zone] = pba_to_lba;
+	result = 0;
+	goto out;
+
+error:
+	kfree(lba_to_pba);
+	kfree(pba_to_lba);
+out:
+	return result;
+}
+
+/*
+ * Checks to see whether we have already mapped a certain zone
+ * If we haven't, the map is generated
+ */
+static void alauda_ensure_map_for_zone(struct us_data *us, unsigned int zone)
+{
+	if (MEDIA_INFO(us).lba_to_pba[zone] == NULL
+		|| MEDIA_INFO(us).pba_to_lba[zone] == NULL)
+		alauda_read_map(us, zone);
+}
+
+/*
+ * Erases an entire block
+ */
+static int alauda_erase_block(struct us_data *us, u16 pba)
+{
+	int rc;
+	unsigned char command[] = {
+		ALAUDA_BULK_CMD, ALAUDA_BULK_ERASE_BLOCK, PBA_HI(pba),
+		PBA_ZONE(pba), 0, PBA_LO(pba), 0x02, 0, MEDIA_PORT(us)
+	};
+	unsigned char buf[2];
+
+	US_DEBUGP("alauda_erase_block: Erasing PBA %d\n", pba);
+
+	rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	rc = usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
+		buf, 2, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	US_DEBUGP("alauda_erase_block: Erase result: %02X %02X\n",
+		buf[0], buf[1]);
+	return rc;
+}
+
+/*
+ * Reads data from a certain offset page inside a PBA, including interleaved
+ * redundancy data. Returns (pagesize+64)*pages bytes in data.
+ */
+static int alauda_read_block_raw(struct us_data *us, u16 pba,
+		unsigned int page, unsigned int pages, unsigned char *data)
+{
+	int rc;
+	unsigned char command[] = {
+		ALAUDA_BULK_CMD, ALAUDA_BULK_READ_BLOCK, PBA_HI(pba),
+		PBA_ZONE(pba), 0, PBA_LO(pba) + page, pages, 0, MEDIA_PORT(us)
+	};
+
+	US_DEBUGP("alauda_read_block: pba %d page %d count %d\n",
+		pba, page, pages);
+
+	rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	return usb_stor_bulk_transfer_buf(us, us->recv_bulk_pipe,
+		data, (MEDIA_INFO(us).pagesize + 64) * pages, NULL);
+}
+
+/*
+ * Reads data from a certain offset page inside a PBA, excluding redundancy
+ * data. Returns pagesize*pages bytes in data. Note that data must be big enough
+ * to hold (pagesize+64)*pages bytes of data, but you can ignore those 'extra'
+ * trailing bytes outside this function.
+ */
+static int alauda_read_block(struct us_data *us, u16 pba,
+		unsigned int page, unsigned int pages, unsigned char *data)
+{
+	int i, rc;
+	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+
+	rc = alauda_read_block_raw(us, pba, page, pages, data);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	/* Cut out the redundancy data */
+	for (i = 0; i < pages; i++) {
+		int dest_offset = i * pagesize;
+		int src_offset = i * (pagesize + 64);
+		memmove(data + dest_offset, data + src_offset, pagesize);
+	}
+
+	return rc;
+}
+
+/*
+ * Writes an entire block of data and checks status after write.
+ * Redundancy data must be already included in data. Data should be
+ * (pagesize+64)*blocksize bytes in length.
+ */
+static int alauda_write_block(struct us_data *us, u16 pba, unsigned char *data)
+{
+	int rc;
+	struct alauda_info *info = (struct alauda_info *) us->extra;
+	unsigned char command[] = {
+		ALAUDA_BULK_CMD, ALAUDA_BULK_WRITE_BLOCK, PBA_HI(pba),
+		PBA_ZONE(pba), 0, PBA_LO(pba), 32, 0, MEDIA_PORT(us)
+	};
+
+	US_DEBUGP("alauda_write_block: pba %d\n", pba);
+
+	rc = usb_stor_bulk_transfer_buf(us, us->send_bulk_pipe,
+		command, 9, NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	rc = usb_stor_bulk_transfer_buf(us, info->wr_ep, data,
+		(MEDIA_INFO(us).pagesize + 64) * MEDIA_INFO(us).blocksize,
+		NULL);
+	if (rc != USB_STOR_XFER_GOOD)
+		return rc;
+
+	return alauda_check_status2(us);
+}
+
+/*
+ * Write some data to a specific LBA.
+ */
+static int alauda_write_lba(struct us_data *us, u16 lba,
+		 unsigned int page, unsigned int pages,
+		 unsigned char *ptr, unsigned char *blockbuffer)
+{
+	u16 pba, lbap, new_pba;
+	unsigned char *bptr, *cptr, *xptr;
+	unsigned char ecc[3];
+	int i, result;
+	unsigned int uzonesize = MEDIA_INFO(us).uzonesize;
+	unsigned int zonesize = MEDIA_INFO(us).zonesize;
+	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+	unsigned int blocksize = MEDIA_INFO(us).blocksize;
+	unsigned int lba_offset = lba % uzonesize;
+	unsigned int new_pba_offset;
+	unsigned int zone = lba / uzonesize;
+
+	alauda_ensure_map_for_zone(us, zone);
+
+	pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset];
+	if (pba == 1) {
+		/* Maybe it is impossible to write to PBA 1.
+		   Fake success, but don't do anything. */
+		printk("alauda_write_lba: avoid writing to pba 1\n");
+		return USB_STOR_TRANSPORT_GOOD;
+	}
+
+	new_pba = alauda_find_unused_pba(&MEDIA_INFO(us), zone);
+	if (!new_pba) {
+		printk("alauda_write_lba: Out of unused blocks\n");
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	/* read old contents */
+	if (pba != UNDEF) {
+		result = alauda_read_block_raw(us, pba, 0,
+			blocksize, blockbuffer);
+		if (result != USB_STOR_XFER_GOOD)
+			return result;
+	} else {
+		memset(blockbuffer, 0, blocksize * (pagesize + 64));
+	}
+
+	lbap = (lba_offset << 1) | 0x1000;
+	if (parity[MSB_of(lbap) ^ LSB_of(lbap)])
+		lbap ^= 1;
+
+	/* check old contents and fill lba */
+	for (i = 0; i < blocksize; i++) {
+		bptr = blockbuffer + (i * (pagesize + 64));
+		cptr = bptr + pagesize;
+		nand_compute_ecc(bptr, ecc);
+		if (!nand_compare_ecc(cptr+13, ecc)) {
+			US_DEBUGP("Warning: bad ecc in page %d- of pba %d\n",
+				  i, pba);
+			nand_store_ecc(cptr+13, ecc);
+		}
+		nand_compute_ecc(bptr + (pagesize / 2), ecc);
+		if (!nand_compare_ecc(cptr+8, ecc)) {
+			US_DEBUGP("Warning: bad ecc in page %d+ of pba %d\n",
+				  i, pba);
+			nand_store_ecc(cptr+8, ecc);
+		}
+		cptr[6] = cptr[11] = MSB_of(lbap);
+		cptr[7] = cptr[12] = LSB_of(lbap);
+	}
+
+	/* copy in new stuff and compute ECC */
+	xptr = ptr;
+	for (i = page; i < page+pages; i++) {
+		bptr = blockbuffer + (i * (pagesize + 64));
+		cptr = bptr + pagesize;
+		memcpy(bptr, xptr, pagesize);
+		xptr += pagesize;
+		nand_compute_ecc(bptr, ecc);
+		nand_store_ecc(cptr+13, ecc);
+		nand_compute_ecc(bptr + (pagesize / 2), ecc);
+		nand_store_ecc(cptr+8, ecc);
+	}
+
+	result = alauda_write_block(us, new_pba, blockbuffer);
+	if (result != USB_STOR_XFER_GOOD)
+		return result;
+
+	new_pba_offset = new_pba - (zone * zonesize);
+	MEDIA_INFO(us).pba_to_lba[zone][new_pba_offset] = lba;
+	MEDIA_INFO(us).lba_to_pba[zone][lba_offset] = new_pba;
+	US_DEBUGP("alauda_write_lba: Remapped LBA %d to PBA %d\n",
+		lba, new_pba);
+
+	if (pba != UNDEF) {
+		unsigned int pba_offset = pba - (zone * zonesize);
+		result = alauda_erase_block(us, pba);
+		if (result != USB_STOR_XFER_GOOD)
+			return result;
+		MEDIA_INFO(us).pba_to_lba[zone][pba_offset] = UNDEF;
+	}
+
+	return USB_STOR_TRANSPORT_GOOD;
+}
+
+/*
+ * Read data from a specific sector address
+ */
+static int alauda_read_data(struct us_data *us, unsigned long address,
+		unsigned int sectors)
+{
+	unsigned char *buffer;
+	u16 lba, max_lba;
+	unsigned int page, len, index, offset;
+	unsigned int blockshift = MEDIA_INFO(us).blockshift;
+	unsigned int pageshift = MEDIA_INFO(us).pageshift;
+	unsigned int blocksize = MEDIA_INFO(us).blocksize;
+	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+	unsigned int uzonesize = MEDIA_INFO(us).uzonesize;
+	int result;
+
+	/*
+	 * Since we only read in one block at a time, we have to create
+	 * a bounce buffer and move the data a piece at a time between the
+	 * bounce buffer and the actual transfer buffer.
+	 * We make this buffer big enough to hold temporary redundancy data,
+	 * which we use when reading the data blocks.
+	 */
+
+	len = min(sectors, blocksize) * (pagesize + 64);
+	buffer = kmalloc(len, GFP_NOIO);
+	if (buffer == NULL) {
+		printk("alauda_read_data: Out of memory\n");
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	/* Figure out the initial LBA and page */
+	lba = address >> blockshift;
+	page = (address & MEDIA_INFO(us).blockmask);
+	max_lba = MEDIA_INFO(us).capacity >> (blockshift + pageshift);
+
+	result = USB_STOR_TRANSPORT_GOOD;
+	index = offset = 0;
+
+	while (sectors > 0) {
+		unsigned int zone = lba / uzonesize; /* integer division */
+		unsigned int lba_offset = lba - (zone * uzonesize);
+		unsigned int pages;
+		u16 pba;
+		alauda_ensure_map_for_zone(us, zone);
+
+		/* Not overflowing capacity? */
+		if (lba >= max_lba) {
+			US_DEBUGP("Error: Requested lba %u exceeds "
+				  "maximum %u\n", lba, max_lba);
+			result = USB_STOR_TRANSPORT_ERROR;
+			break;
+		}
+
+		/* Find number of pages we can read in this block */
+		pages = min(sectors, blocksize - page);
+		len = pages << pageshift;
+
+		/* Find where this lba lives on disk */
+		pba = MEDIA_INFO(us).lba_to_pba[zone][lba_offset];
+
+		if (pba == UNDEF) {	/* this lba was never written */
+			US_DEBUGP("Read %d zero pages (LBA %d) page %d\n",
+				  pages, lba, page);
+
+			/* This is not really an error. It just means
+			   that the block has never been written.
+			   Instead of returning USB_STOR_TRANSPORT_ERROR
+			   it is better to return all zero data. */
+
+			memset(buffer, 0, len);
+		} else {
+			US_DEBUGP("Read %d pages, from PBA %d"
+				  " (LBA %d) page %d\n",
+				  pages, pba, lba, page);
+
+			result = alauda_read_block(us, pba, page, pages, buffer);
+			if (result != USB_STOR_TRANSPORT_GOOD)
+				break;
+		}
+
+		/* Store the data in the transfer buffer */
+		usb_stor_access_xfer_buf(buffer, len, us->srb,
+				&index, &offset, TO_XFER_BUF);
+
+		page = 0;
+		lba++;
+		sectors -= pages;
+	}
+
+	kfree(buffer);
+	return result;
+}
+
+/*
+ * Write data to a specific sector address
+ */
+static int alauda_write_data(struct us_data *us, unsigned long address,
+		unsigned int sectors)
+{
+	unsigned char *buffer, *blockbuffer;
+	unsigned int page, len, index, offset;
+	unsigned int blockshift = MEDIA_INFO(us).blockshift;
+	unsigned int pageshift = MEDIA_INFO(us).pageshift;
+	unsigned int blocksize = MEDIA_INFO(us).blocksize;
+	unsigned int pagesize = MEDIA_INFO(us).pagesize;
+	u16 lba, max_lba;
+	int result;
+
+	/*
+	 * Since we don't write the user data directly to the device,
+	 * we have to create a bounce buffer and move the data a piece
+	 * at a time between the bounce buffer and the actual transfer buffer.
+	 */
+
+	len = min(sectors, blocksize) * pagesize;
+	buffer = kmalloc(len, GFP_NOIO);
+	if (buffer == NULL) {
+		printk("alauda_write_data: Out of memory\n");
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	/*
+	 * We also need a temporary block buffer, where we read in the old data,
+	 * overwrite parts with the new data, and manipulate the redundancy data
+	 */
+	blockbuffer = kmalloc((pagesize + 64) * blocksize, GFP_NOIO);
+	if (blockbuffer == NULL) {
+		printk("alauda_write_data: Out of memory\n");
+		kfree(buffer);
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+
+	/* Figure out the initial LBA and page */
+	lba = address >> blockshift;
+	page = (address & MEDIA_INFO(us).blockmask);
+	max_lba = MEDIA_INFO(us).capacity >> (pageshift + blockshift);
+
+	result = USB_STOR_TRANSPORT_GOOD;
+	index = offset = 0;
+
+	while (sectors > 0) {
+		/* Write as many sectors as possible in this block */
+		unsigned int pages = min(sectors, blocksize - page);
+		len = pages << pageshift;
+
+		/* Not overflowing capacity? */
+		if (lba >= max_lba) {
+			US_DEBUGP("alauda_write_data: Requested lba %u exceeds "
+				  "maximum %u\n", lba, max_lba);
+			result = USB_STOR_TRANSPORT_ERROR;
+			break;
+		}
+
+		/* Get the data from the transfer buffer */
+		usb_stor_access_xfer_buf(buffer, len, us->srb,
+				&index, &offset, FROM_XFER_BUF);
+
+		result = alauda_write_lba(us, lba, page, pages, buffer,
+			blockbuffer);
+		if (result != USB_STOR_TRANSPORT_GOOD)
+			break;
+
+		page = 0;
+		lba++;
+		sectors -= pages;
+	}
+
+	kfree(buffer);
+	kfree(blockbuffer);
+	return result;
+}
+
+/*
+ * Our interface with the rest of the world
+ */
+
+static void alauda_info_destructor(void *extra)
+{
+	struct alauda_info *info = (struct alauda_info *) extra;
+	int port;
+
+	if (!info)
+		return;
+
+	for (port = 0; port < 2; port++) {
+		struct alauda_media_info *media_info = &info->port[port];
+
+		alauda_free_maps(media_info);
+		kfree(media_info->lba_to_pba);
+		kfree(media_info->pba_to_lba);
+	}
+}
+
+/*
+ * Initialize alauda_info struct and find the data-write endpoint
+ */
+int init_alauda(struct us_data *us)
+{
+	struct alauda_info *info;
+	struct usb_host_interface *altsetting = us->pusb_intf->cur_altsetting;
+	nand_init_ecc();
+
+	us->extra = kzalloc(sizeof(struct alauda_info), GFP_NOIO);
+	if (!us->extra) {
+		US_DEBUGP("init_alauda: Gah! Can't allocate storage for"
+			"alauda info struct!\n");
+		return USB_STOR_TRANSPORT_ERROR;
+	}
+	info = (struct alauda_info *) us->extra;
+	us->extra_destructor = alauda_info_destructor;
+
+	info->wr_ep = usb_sndbulkpipe(us->pusb_dev,
+		altsetting->endpoint[0].desc.bEndpointAddress
+		& USB_ENDPOINT_NUMBER_MASK);
+
+	return USB_STOR_TRANSPORT_GOOD;
+}
+
+int alauda_transport(struct scsi_cmnd *srb, struct us_data *us)
+{
+	int rc;
+	struct alauda_info *info = (struct alauda_info *) us->extra;
+	unsigned char *ptr = us->iobuf;
+	static unsigned char inquiry_response[36] = {
+		0x00, 0x80, 0x00, 0x01, 0x1F, 0x00, 0x00, 0x00
+	};
+
+	if (srb->cmnd[0] == INQUIRY) {
+		US_DEBUGP("alauda_transport: INQUIRY. "
+			"Returning bogus response.\n");
+		memcpy(ptr, inquiry_response, sizeof(inquiry_response));
+		fill_inquiry_response(us, ptr, 36);
+		return USB_STOR_TRANSPORT_GOOD;
+	}
+
+	if (srb->cmnd[0] == TEST_UNIT_READY) {
+		US_DEBUGP("alauda_transport: TEST_UNIT_READY.\n");
+		return alauda_check_media(us);
+	}
+
+	if (srb->cmnd[0] == READ_CAPACITY) {
+		unsigned int num_zones;
+		unsigned long capacity;
+
+		rc = alauda_check_media(us);
+		if (rc != USB_STOR_TRANSPORT_GOOD)
+			return rc;
+
+		num_zones = MEDIA_INFO(us).capacity >> (MEDIA_INFO(us).zoneshift
+			+ MEDIA_INFO(us).blockshift + MEDIA_INFO(us).pageshift);
+
+		capacity = num_zones * MEDIA_INFO(us).uzonesize
+			* MEDIA_INFO(us).blocksize;
+
+		/* Report capacity and page size */
+		((__be32 *) ptr)[0] = cpu_to_be32(capacity - 1);
+		((__be32 *) ptr)[1] = cpu_to_be32(512);
+
+		usb_stor_set_xfer_buf(ptr, 8, srb);
+		return USB_STOR_TRANSPORT_GOOD;
+	}
+
+	if (srb->cmnd[0] == READ_10) {
+		unsigned int page, pages;
+
+		rc = alauda_check_media(us);
+		if (rc != USB_STOR_TRANSPORT_GOOD)
+			return rc;
+
+		page = short_pack(srb->cmnd[3], srb->cmnd[2]);
+		page <<= 16;
+		page |= short_pack(srb->cmnd[5], srb->cmnd[4]);
+		pages = short_pack(srb->cmnd[8], srb->cmnd[7]);
+
+		US_DEBUGP("alauda_transport: READ_10: page %d pagect %d\n",
+			  page, pages);
+
+		return alauda_read_data(us, page, pages);
+	}
+
+	if (srb->cmnd[0] == WRITE_10) {
+		unsigned int page, pages;
+
+		rc = alauda_check_media(us);
+		if (rc != USB_STOR_TRANSPORT_GOOD)
+			return rc;
+
+		page = short_pack(srb->cmnd[3], srb->cmnd[2]);
+		page <<= 16;
+		page |= short_pack(srb->cmnd[5], srb->cmnd[4]);
+		pages = short_pack(srb->cmnd[8], srb->cmnd[7]);
+
+		US_DEBUGP("alauda_transport: WRITE_10: page %d pagect %d\n",
+			  page, pages);
+
+		return alauda_write_data(us, page, pages);
+	}
+
+	if (srb->cmnd[0] == REQUEST_SENSE) {
+		US_DEBUGP("alauda_transport: REQUEST_SENSE.\n");
+
+		memset(ptr, 0, 18);
+		ptr[0] = 0xF0;
+		ptr[2] = info->sense_key;
+		ptr[7] = 11;
+		ptr[12] = info->sense_asc;
+		ptr[13] = info->sense_ascq;
+		usb_stor_set_xfer_buf(ptr, 18, srb);
+
+		return USB_STOR_TRANSPORT_GOOD;
+	}
+
+	if (srb->cmnd[0] == ALLOW_MEDIUM_REMOVAL) {
+		/* sure.  whatever.  not like we can stop the user from popping
+		   the media out of the device (no locking doors, etc) */
+		return USB_STOR_TRANSPORT_GOOD;
+	}
+
+	US_DEBUGP("alauda_transport: Gah! Unknown command: %d (0x%x)\n",
+		srb->cmnd[0], srb->cmnd[0]);
+	info->sense_key = 0x05;
+	info->sense_asc = 0x20;
+	info->sense_ascq = 0x00;
+	return USB_STOR_TRANSPORT_FAILED;
+}
+
diff --git a/drivers/usb/storage/alauda.h b/drivers/usb/storage/alauda.h
new file mode 100644
index 000000000000..a700f87d0803
--- /dev/null
+++ b/drivers/usb/storage/alauda.h
@@ -0,0 +1,100 @@
+/*
+ * Driver for Alauda-based card readers
+ *
+ * Current development and maintenance by:
+ *    (c) 2005 Daniel Drake <dsd@gentoo.org>
+ *
+ * See alauda.c for more explanation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _USB_ALAUDA_H
+#define _USB_ALAUDA_H
+
+/*
+ * Status bytes
+ */
+#define ALAUDA_STATUS_ERROR		0x01
+#define ALAUDA_STATUS_READY		0x40
+
+/*
+ * Control opcodes (for request field)
+ */
+#define ALAUDA_GET_XD_MEDIA_STATUS	0x08
+#define ALAUDA_GET_SM_MEDIA_STATUS	0x98
+#define ALAUDA_ACK_XD_MEDIA_CHANGE	0x0a
+#define ALAUDA_ACK_SM_MEDIA_CHANGE	0x9a
+#define ALAUDA_GET_XD_MEDIA_SIG		0x86
+#define ALAUDA_GET_SM_MEDIA_SIG		0x96
+
+/*
+ * Bulk command identity (byte 0)
+ */
+#define ALAUDA_BULK_CMD			0x40
+
+/*
+ * Bulk opcodes (byte 1)
+ */
+#define ALAUDA_BULK_GET_REDU_DATA	0x85
+#define ALAUDA_BULK_READ_BLOCK		0x94
+#define ALAUDA_BULK_ERASE_BLOCK		0xa3
+#define ALAUDA_BULK_WRITE_BLOCK		0xb4
+#define ALAUDA_BULK_GET_STATUS2		0xb7
+#define ALAUDA_BULK_RESET_MEDIA		0xe0
+
+/*
+ * Port to operate on (byte 8)
+ */
+#define ALAUDA_PORT_XD			0x00
+#define ALAUDA_PORT_SM			0x01
+
+/*
+ * LBA and PBA are unsigned ints. Special values.
+ */
+#define UNDEF    0xffff
+#define SPARE    0xfffe
+#define UNUSABLE 0xfffd
+
+int init_alauda(struct us_data *us);
+int alauda_transport(struct scsi_cmnd *srb, struct us_data *us);
+
+struct alauda_media_info {
+	unsigned long capacity;		/* total media size in bytes */
+	unsigned int pagesize;		/* page size in bytes */
+	unsigned int blocksize;		/* number of pages per block */
+	unsigned int uzonesize;		/* number of usable blocks per zone */
+	unsigned int zonesize;		/* number of blocks per zone */
+	unsigned int blockmask;		/* mask to get page from address */
+
+	unsigned char pageshift;
+	unsigned char blockshift;
+	unsigned char zoneshift;
+
+	u16 **lba_to_pba;		/* logical to physical block map */
+	u16 **pba_to_lba;		/* physical to logical block map */
+};
+
+struct alauda_info {
+	struct alauda_media_info port[2];
+	int wr_ep;			/* endpoint to write data out of */
+
+	unsigned char sense_key;
+	unsigned long sense_asc;	/* additional sense code */
+	unsigned long sense_ascq;	/* additional sense code qualifier */
+};
+
+#endif
+
diff --git a/drivers/usb/storage/unusual_devs.h b/drivers/usb/storage/unusual_devs.h
index be3c06d17533..7a865dd04683 100644
--- a/drivers/usb/storage/unusual_devs.h
+++ b/drivers/usb/storage/unusual_devs.h
@@ -535,6 +535,13 @@ UNUSUAL_DEV(  0x057b, 0x0022, 0x0000, 0x9999,
 		"Silicon Media R/W",
 		US_SC_DEVICE, US_PR_DEVICE, NULL, 0),
 
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+UNUSUAL_DEV(  0x0584, 0x0008, 0x0102, 0x0102,
+		"Fujifilm",
+		"DPC-R1 (Alauda)",
+ 		US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0 ),
+#endif
+
 /* Fabrizio Fellini <fello@libero.it> */
 UNUSUAL_DEV(  0x0595, 0x4343, 0x0000, 0x2210,
 		"Fujifilm",
@@ -784,6 +791,13 @@ UNUSUAL_DEV(  0x07af, 0x0006, 0x0100, 0x0100,
  		US_SC_SCSI, US_PR_DPCM_USB, NULL, 0 ),
 #endif
 
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+UNUSUAL_DEV(  0x07b4, 0x010a, 0x0102, 0x0102,
+		"Olympus",
+		"MAUSB-10 (Alauda)",
+ 		US_SC_SCSI, US_PR_ALAUDA, init_alauda, 0 ),
+#endif
+
 #ifdef CONFIG_USB_STORAGE_DATAFAB
 UNUSUAL_DEV(  0x07c4, 0xa000, 0x0000, 0x0015,
 		"Datafab",
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index 85c8c17b3c0c..dbcf23980ff1 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -94,6 +94,9 @@
 #ifdef CONFIG_USB_STORAGE_ONETOUCH
 #include "onetouch.h"
 #endif
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+#include "alauda.h"
+#endif
 
 /* Some informational data */
 MODULE_AUTHOR("Matthew Dharm <mdharm-usb@one-eyed-alien.net>");
@@ -644,6 +647,15 @@ static int get_protocol(struct us_data *us)
 		break;
 #endif
 
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+	case US_PR_ALAUDA:
+		us->transport_name  = "Alauda Control/Bulk";
+		us->transport = alauda_transport;
+		us->transport_reset = usb_stor_Bulk_reset;
+		us->max_lun = 1;
+		break;
+#endif
+
 	default:
 		return -EIO;
 	}
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index f9c058f33712..b2d08984a9f7 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -102,6 +102,9 @@ enum { US_DO_ALL_FLAGS };
 #ifdef CONFIG_USB_STORAGE_JUMPSHOT
 #define US_PR_JUMPSHOT  0xf3		/* Lexar Jumpshot */
 #endif
+#ifdef CONFIG_USB_STORAGE_ALAUDA
+#define US_PR_ALAUDA    0xf4		/* Alauda chipsets */
+#endif
 
 #define US_PR_DEVICE	0xff		/* Use device's value */
 
-- 
cgit v1.2.3


From 0296b2281352e4794e174b393c37f131502e09f0 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@suse.de>
Date: Fri, 11 Nov 2005 05:33:52 +0100
Subject: [PATCH] remove CONFIG_KOBJECT_UEVENT option

It makes zero sense to have hotplug, but not the netlink
events enabled today. Remove this option and merge the
kobject_uevent.h header into the kobject.h header file.

Signed-off-by: Kay Sievers <kay.sievers@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 MAINTAINERS                    |  6 -----
 drivers/input/input.c          |  1 -
 drivers/s390/crypto/z90main.c  |  1 -
 include/linux/kobject.h        | 35 +++++++++++++++++++++++++-
 include/linux/kobject_uevent.h | 57 ------------------------------------------
 init/Kconfig                   | 19 --------------
 kernel/sysctl.c                |  4 +--
 lib/kobject_uevent.c           | 24 ++++--------------
 8 files changed, 40 insertions(+), 107 deletions(-)
 delete mode 100644 include/linux/kobject_uevent.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 6af683025ae0..b49a4ad3b872 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1476,12 +1476,6 @@ W:	http://nfs.sourceforge.net/
 W:	http://www.cse.unsw.edu.au/~neilb/patches/linux-devel/
 S:	Maintained
 
-KERNEL EVENT LAYER (KOBJECT_UEVENT)
-P:	Robert Love
-M:	rml@novell.com
-L:	linux-kernel@vger.kernel.org
-S:	Maintained
-
 KEXEC
 P:	Eric Biederman
 P:	Randy Dunlap
diff --git a/drivers/input/input.c b/drivers/input/input.c
index bdd2a7fc268d..43b49ccd7dad 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -18,7 +18,6 @@
 #include <linux/random.h>
 #include <linux/major.h>
 #include <linux/proc_fs.h>
-#include <linux/kobject_uevent.h>
 #include <linux/interrupt.h>
 #include <linux/poll.h>
 #include <linux/device.h>
diff --git a/drivers/s390/crypto/z90main.c b/drivers/s390/crypto/z90main.c
index 4010f2bb85af..790fcbb74b43 100644
--- a/drivers/s390/crypto/z90main.c
+++ b/drivers/s390/crypto/z90main.c
@@ -34,7 +34,6 @@
 #include <linux/miscdevice.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
-#include <linux/kobject_uevent.h>
 #include <linux/proc_fs.h>
 #include <linux/syscalls.h>
 #include "z90crypt.h"
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 7f7403aa4a41..baf5251d9f63 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -23,15 +23,31 @@
 #include <linux/spinlock.h>
 #include <linux/rwsem.h>
 #include <linux/kref.h>
-#include <linux/kobject_uevent.h>
 #include <linux/kernel.h>
 #include <asm/atomic.h>
 
 #define KOBJ_NAME_LEN	20
 
+#define HOTPLUG_PATH_LEN	256
+
+/* path to the userspace helper executed on an event */
+extern char hotplug_path[];
+
 /* counter to tag the hotplug event, read only except for the kobject core */
 extern u64 hotplug_seqnum;
 
+/* the actions here must match the proper string in lib/kobject_uevent.c */
+typedef int __bitwise kobject_action_t;
+enum kobject_action {
+	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* add event, for hotplug */
+	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* remove event, for hotplug */
+	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* a sysfs attribute file has changed */
+	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices */
+	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices */
+	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* offline event for hotplug devices */
+	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* online event for hotplug devices */
+};
+
 struct kobject {
 	const char		* k_name;
 	char			name[KOBJ_NAME_LEN];
@@ -243,16 +259,33 @@ extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
 #ifdef CONFIG_HOTPLUG
 void kobject_hotplug(struct kobject *kobj, enum kobject_action action);
+
 int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
 			char *buffer, int buffer_size, int *cur_len,
 			const char *format, ...)
 	__attribute__((format (printf, 7, 8)));
+
+int kobject_uevent(struct kobject *kobj,
+		   enum kobject_action action,
+		   struct attribute *attr);
+int kobject_uevent_atomic(struct kobject *kobj,
+			  enum kobject_action action,
+			  struct attribute *attr);
+
 #else
 static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { }
 static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, 
 				      char *buffer, int buffer_size, int *cur_len, 
 				      const char *format, ...)
 { return 0; }
+int kobject_uevent(struct kobject *kobj,
+		   enum kobject_action action,
+		   struct attribute *attr)
+{ return 0; }
+int kobject_uevent_atomic(struct kobject *kobj,
+			  enum kobject_action action,
+			  struct attribute *attr)
+{ return 0; }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/include/linux/kobject_uevent.h b/include/linux/kobject_uevent.h
deleted file mode 100644
index aa664fe7e561..000000000000
--- a/include/linux/kobject_uevent.h
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * kobject_uevent.h - list of kobject user events that can be generated
- *
- * Copyright (C) 2004 IBM Corp.
- * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
- *
- * This file is released under the GPLv2.
- *
- */
-
-#ifndef _KOBJECT_EVENT_H_
-#define _KOBJECT_EVENT_H_
-
-#define HOTPLUG_PATH_LEN	256
-
-/* path to the hotplug userspace helper executed on an event */
-extern char hotplug_path[];
-
-/*
- * If you add an action here, you must also add the proper string to the
- * lib/kobject_uevent.c file.
- */
-typedef int __bitwise kobject_action_t;
-enum kobject_action {
-	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* add event, for hotplug */
-	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* remove event, for hotplug */
-	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* a sysfs attribute file has changed */
-	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices */
-	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* offline event for hotplug devices */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* online event for hotplug devices */
-};
-
-
-#ifdef CONFIG_KOBJECT_UEVENT
-int kobject_uevent(struct kobject *kobj,
-		   enum kobject_action action,
-		   struct attribute *attr);
-int kobject_uevent_atomic(struct kobject *kobj,
-			  enum kobject_action action,
-			  struct attribute *attr);
-#else
-static inline int kobject_uevent(struct kobject *kobj,
-				 enum kobject_action action,
-				 struct attribute *attr)
-{
-	return 0;
-}
-static inline int kobject_uevent_atomic(struct kobject *kobj,
-				        enum kobject_action action,
-					struct attribute *attr)
-{
-	return 0;
-}
-#endif
-
-#endif
diff --git a/init/Kconfig b/init/Kconfig
index 9fc0759fa942..0de8b7765ae4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -205,25 +205,6 @@ config HOTPLUG
 	  modules require HOTPLUG functionality, but a module built
 	  outside the kernel tree does. Such modules require Y here.
 
-config KOBJECT_UEVENT
-	bool "Kernel Userspace Events" if EMBEDDED
-	depends on NET
-	default y
-	help
-	  This option enables the kernel userspace event layer, which is a
-	  simple mechanism for kernel-to-user communication over a netlink
-	  socket.
-	  The goal of the kernel userspace events layer is to provide a simple
-	  and efficient events system, that notifies userspace about kobject
-	  state changes. This will enable applications to just listen for
-	  events instead of polling system devices and files.
-	  Hotplug events (kobject addition and removal) are also available on
-	  the netlink socket in addition to the execution of /sbin/hotplug if
-	  CONFIG_HOTPLUG is enabled.
-
-	  Say Y, unless you are building a system requiring minimal memory
-	  consumption.
-
 config IKCONFIG
 	bool "Kernel .config support"
 	---help---
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index b53115b882e1..6a51e25d4466 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -31,6 +31,7 @@
 #include <linux/smp_lock.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/kobject.h>
 #include <linux/net.h>
 #include <linux/sysrq.h>
 #include <linux/highuid.h>
@@ -83,9 +84,6 @@ static int ngroups_max = NGROUPS_MAX;
 #ifdef CONFIG_KMOD
 extern char modprobe_path[];
 #endif
-#ifdef CONFIG_HOTPLUG
-extern char hotplug_path[];
-#endif
 #ifdef CONFIG_CHR_DEV_SG
 extern int sg_big_buff;
 #endif
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 3ab375411e38..1f90eea7eebc 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -19,14 +19,17 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/string.h>
-#include <linux/kobject_uevent.h>
 #include <linux/kobject.h>
 #include <net/sock.h>
 
 #define BUFFER_SIZE	1024	/* buffer for the hotplug env */
 #define NUM_ENVP	32	/* number of env pointers */
 
-#if defined(CONFIG_KOBJECT_UEVENT) || defined(CONFIG_HOTPLUG)
+#if defined(CONFIG_HOTPLUG)
+char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug";
+u64 hotplug_seqnum;
+static DEFINE_SPINLOCK(sequence_lock);
+
 static char *action_to_string(enum kobject_action action)
 {
 	switch (action) {
@@ -48,9 +51,7 @@ static char *action_to_string(enum kobject_action action)
 		return NULL;
 	}
 }
-#endif
 
-#ifdef CONFIG_KOBJECT_UEVENT
 static struct sock *uevent_sock;
 
 /**
@@ -168,21 +169,6 @@ static int __init kobject_uevent_init(void)
 
 postcore_initcall(kobject_uevent_init);
 
-#else
-static inline int send_uevent(const char *signal, const char *obj,
-			      char **envp, int gfp_mask)
-{
-	return 0;
-}
-
-#endif /* CONFIG_KOBJECT_UEVENT */
-
-
-#ifdef CONFIG_HOTPLUG
-char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug";
-u64 hotplug_seqnum;
-static DEFINE_SPINLOCK(sequence_lock);
-
 /**
  * kobject_hotplug - notify userspace by executing /sbin/hotplug
  *
-- 
cgit v1.2.3


From 033b96fd30db52a710d97b06f87d16fc59fee0f1 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@suse.de>
Date: Fri, 11 Nov 2005 06:09:55 +0100
Subject: [PATCH] remove mount/umount uevents from superblock handling

The names of these events have been confusing from the beginning
on, as they have been more like claim/release events. We needed these
events for noticing HAL if storage devices have been mounted.

Thanks to Al, we have the proper solution now and can poll()
/proc/mounts instead to get notfied about mount tree changes.

Signed-off-by: Kay Sievers <kay.sievers@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/super.c              | 15 +--------------
 include/linux/kobject.h |  6 ++----
 lib/kobject_uevent.c    |  4 ----
 3 files changed, 3 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/super.c b/fs/super.c
index 6689dded3c84..5a347a4f673a 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -665,16 +665,6 @@ static int test_bdev_super(struct super_block *s, void *data)
 	return (void *)s->s_bdev == data;
 }
 
-static void bdev_uevent(struct block_device *bdev, enum kobject_action action)
-{
-	if (bdev->bd_disk) {
-		if (bdev->bd_part)
-			kobject_uevent(&bdev->bd_part->kobj, action, NULL);
-		else
-			kobject_uevent(&bdev->bd_disk->kobj, action, NULL);
-	}
-}
-
 struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data,
 	int (*fill_super)(struct super_block *, void *, int))
@@ -717,10 +707,8 @@ struct super_block *get_sb_bdev(struct file_system_type *fs_type,
 			up_write(&s->s_umount);
 			deactivate_super(s);
 			s = ERR_PTR(error);
-		} else {
+		} else
 			s->s_flags |= MS_ACTIVE;
-			bdev_uevent(bdev, KOBJ_MOUNT);
-		}
 	}
 
 	return s;
@@ -736,7 +724,6 @@ void kill_block_super(struct super_block *sb)
 {
 	struct block_device *bdev = sb->s_bdev;
 
-	bdev_uevent(bdev, KOBJ_UMOUNT);
 	generic_shutdown_super(sb);
 	sync_blockdev(bdev);
 	close_bdev_excl(bdev);
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index baf5251d9f63..e6926b327538 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -42,10 +42,8 @@ enum kobject_action {
 	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* add event, for hotplug */
 	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* remove event, for hotplug */
 	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* a sysfs attribute file has changed */
-	KOBJ_MOUNT	= (__force kobject_action_t) 0x04,	/* mount event for block devices */
-	KOBJ_UMOUNT	= (__force kobject_action_t) 0x05,	/* umount event for block devices */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x06,	/* offline event for hotplug devices */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x07,	/* online event for hotplug devices */
+	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* offline event for hotplug devices */
+	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* online event for hotplug devices */
 };
 
 struct kobject {
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 1f90eea7eebc..845bf67d94ca 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -39,10 +39,6 @@ static char *action_to_string(enum kobject_action action)
 		return "remove";
 	case KOBJ_CHANGE:
 		return "change";
-	case KOBJ_MOUNT:
-		return "mount";
-	case KOBJ_UMOUNT:
-		return "umount";
 	case KOBJ_OFFLINE:
 		return "offline";
 	case KOBJ_ONLINE:
-- 
cgit v1.2.3


From 5f123fbd80f4f788554636f02bf73e40f914e0d6 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@suse.de>
Date: Fri, 11 Nov 2005 14:43:07 +0100
Subject: [PATCH] merge kobject_uevent and kobject_hotplug

The distinction between hotplug and uevent does not make sense these
days, netlink events are the default.

udev depends entirely on netlink uevents. Only during early boot and
in initramfs, /sbin/hotplug is needed. So merge the two functions and
provide only one interface without all the options.

The netlink layer got a nice generic interface with named slots
recently, which is probably a better facility to plug events for
subsystem specific events.
Also the new poll() interface to /proc/mounts is a nicer way to
notify about changes than sending events through the core.
The uevents should only be used for driver core related requests to
userspace now.

Signed-off-by: Kay Sievers <kay.sievers@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/scsi/ipr.c      |   2 +-
 include/linux/kobject.h |  27 ++---
 lib/kobject_uevent.c    | 279 +++++++++++++++++-------------------------------
 3 files changed, 102 insertions(+), 206 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index fa2cb3582cfa..bf44a409ba0d 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2132,7 +2132,7 @@ restart:
 	}
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-	kobject_uevent(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE, NULL);
+	kobject_hotplug(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE);
 	LEAVE;
 }
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index e6926b327538..5b08248fba72 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -39,11 +39,11 @@ extern u64 hotplug_seqnum;
 /* the actions here must match the proper string in lib/kobject_uevent.c */
 typedef int __bitwise kobject_action_t;
 enum kobject_action {
-	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* add event, for hotplug */
-	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* remove event, for hotplug */
-	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* a sysfs attribute file has changed */
-	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* offline event for hotplug devices */
-	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* online event for hotplug devices */
+	KOBJ_ADD	= (__force kobject_action_t) 0x01,	/* exclusive to core */
+	KOBJ_REMOVE	= (__force kobject_action_t) 0x02,	/* exclusive to core */
+	KOBJ_CHANGE	= (__force kobject_action_t) 0x03,	/* device state change */
+	KOBJ_OFFLINE	= (__force kobject_action_t) 0x04,	/* device offline */
+	KOBJ_ONLINE	= (__force kobject_action_t) 0x05,	/* device online */
 };
 
 struct kobject {
@@ -262,28 +262,13 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
 			char *buffer, int buffer_size, int *cur_len,
 			const char *format, ...)
 	__attribute__((format (printf, 7, 8)));
-
-int kobject_uevent(struct kobject *kobj,
-		   enum kobject_action action,
-		   struct attribute *attr);
-int kobject_uevent_atomic(struct kobject *kobj,
-			  enum kobject_action action,
-			  struct attribute *attr);
-
 #else
 static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { }
+
 static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, 
 				      char *buffer, int buffer_size, int *cur_len, 
 				      const char *format, ...)
 { return 0; }
-int kobject_uevent(struct kobject *kobj,
-		   enum kobject_action action,
-		   struct attribute *attr)
-{ return 0; }
-int kobject_uevent_atomic(struct kobject *kobj,
-			  enum kobject_action action,
-			  struct attribute *attr)
-{ return 0; }
 #endif
 
 #endif /* __KERNEL__ */
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 845bf67d94ca..dd061da3aba9 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -29,6 +29,7 @@
 char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug";
 u64 hotplug_seqnum;
 static DEFINE_SPINLOCK(sequence_lock);
+static struct sock *uevent_sock;
 
 static char *action_to_string(enum kobject_action action)
 {
@@ -48,123 +49,6 @@ static char *action_to_string(enum kobject_action action)
 	}
 }
 
-static struct sock *uevent_sock;
-
-/**
- * send_uevent - notify userspace by sending event through netlink socket
- *
- * @signal: signal name
- * @obj: object path (kobject)
- * @envp: possible hotplug environment to pass with the message
- * @gfp_mask:
- */
-static int send_uevent(const char *signal, const char *obj,
-		       char **envp, gfp_t gfp_mask)
-{
-	struct sk_buff *skb;
-	char *pos;
-	int len;
-
-	if (!uevent_sock)
-		return -EIO;
-
-	len = strlen(signal) + 1;
-	len += strlen(obj) + 1;
-
-	/* allocate buffer with the maximum possible message size */
-	skb = alloc_skb(len + BUFFER_SIZE, gfp_mask);
-	if (!skb)
-		return -ENOMEM;
-
-	pos = skb_put(skb, len);
-	sprintf(pos, "%s@%s", signal, obj);
-
-	/* copy the environment key by key to our continuous buffer */
-	if (envp) {
-		int i;
-
-		for (i = 2; envp[i]; i++) {
-			len = strlen(envp[i]) + 1;
-			pos = skb_put(skb, len);
-			strcpy(pos, envp[i]);
-		}
-	}
-
-	NETLINK_CB(skb).dst_group = 1;
-	return netlink_broadcast(uevent_sock, skb, 0, 1, gfp_mask);
-}
-
-static int do_kobject_uevent(struct kobject *kobj, enum kobject_action action, 
-			     struct attribute *attr, gfp_t gfp_mask)
-{
-	char *path;
-	char *attrpath;
-	char *signal;
-	int len;
-	int rc = -ENOMEM;
-
-	path = kobject_get_path(kobj, gfp_mask);
-	if (!path)
-		return -ENOMEM;
-
-	signal = action_to_string(action);
-	if (!signal)
-		return -EINVAL;
-
-	if (attr) {
-		len = strlen(path);
-		len += strlen(attr->name) + 2;
-		attrpath = kmalloc(len, gfp_mask);
-		if (!attrpath)
-			goto exit;
-		sprintf(attrpath, "%s/%s", path, attr->name);
-		rc = send_uevent(signal, attrpath, NULL, gfp_mask);
-		kfree(attrpath);
-	} else
-		rc = send_uevent(signal, path, NULL, gfp_mask);
-
-exit:
-	kfree(path);
-	return rc;
-}
-
-/**
- * kobject_uevent - notify userspace by sending event through netlink socket
- * 
- * @signal: signal name
- * @kobj: struct kobject that the event is happening to
- * @attr: optional struct attribute the event belongs to
- */
-int kobject_uevent(struct kobject *kobj, enum kobject_action action,
-		   struct attribute *attr)
-{
-	return do_kobject_uevent(kobj, action, attr, GFP_KERNEL);
-}
-EXPORT_SYMBOL_GPL(kobject_uevent);
-
-int kobject_uevent_atomic(struct kobject *kobj, enum kobject_action action,
-			  struct attribute *attr)
-{
-	return do_kobject_uevent(kobj, action, attr, GFP_ATOMIC);
-}
-EXPORT_SYMBOL_GPL(kobject_uevent_atomic);
-
-static int __init kobject_uevent_init(void)
-{
-	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
-					    THIS_MODULE);
-
-	if (!uevent_sock) {
-		printk(KERN_ERR
-		       "kobject_uevent: unable to create netlink socket!\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-
-postcore_initcall(kobject_uevent_init);
-
 /**
  * kobject_hotplug - notify userspace by executing /sbin/hotplug
  *
@@ -173,95 +57,84 @@ postcore_initcall(kobject_uevent_init);
  */
 void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 {
-	char *argv [3];
-	char **envp = NULL;
-	char *buffer = NULL;
-	char *seq_buff;
+	char **envp;
+	char *buffer;
 	char *scratch;
+	const char *action_string;
+	const char *devpath = NULL;
+	const char *subsystem;
+	struct kobject *top_kobj;
+	struct kset *kset;
+	struct kset_hotplug_ops *hotplug_ops;
+	u64 seq;
+	char *seq_buff;
 	int i = 0;
 	int retval;
-	char *kobj_path = NULL;
-	const char *name = NULL;
-	char *action_string;
-	u64 seq;
-	struct kobject *top_kobj = kobj;
-	struct kset *kset;
-	static struct kset_hotplug_ops null_hotplug_ops;
-	struct kset_hotplug_ops *hotplug_ops = &null_hotplug_ops;
 
-	/* If this kobj does not belong to a kset,
-	   try to find a parent that does. */
+	pr_debug("%s\n", __FUNCTION__);
+
+	action_string = action_to_string(action);
+	if (!action_string)
+		return;
+
+	/* search the kset we belong to */
+	top_kobj = kobj;
 	if (!top_kobj->kset && top_kobj->parent) {
 		do {
 			top_kobj = top_kobj->parent;
 		} while (!top_kobj->kset && top_kobj->parent);
 	}
-
-	if (top_kobj->kset)
-		kset = top_kobj->kset;
-	else
+	if (!top_kobj->kset)
 		return;
 
-	if (kset->hotplug_ops)
-		hotplug_ops = kset->hotplug_ops;
+	kset = top_kobj->kset;
+	hotplug_ops = kset->hotplug_ops;
 
-	/* If the kset has a filter operation, call it.
-	   Skip the event, if the filter returns zero. */
-	if (hotplug_ops->filter) {
+	/*  skip the event, if the filter returns zero. */
+	if (hotplug_ops && hotplug_ops->filter)
 		if (!hotplug_ops->filter(kset, kobj))
 			return;
-	}
-
-	pr_debug ("%s\n", __FUNCTION__);
-
-	action_string = action_to_string(action);
-	if (!action_string)
-		return;
 
-	envp = kmalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL);
+	/* environment index */
+	envp = kzalloc(NUM_ENVP * sizeof (char *), GFP_KERNEL);
 	if (!envp)
 		return;
-	memset (envp, 0x00, NUM_ENVP * sizeof (char *));
 
+	/* environment values */
 	buffer = kmalloc(BUFFER_SIZE, GFP_KERNEL);
 	if (!buffer)
 		goto exit;
 
-	if (hotplug_ops->name)
-		name = hotplug_ops->name(kset, kobj);
-	if (name == NULL)
-		name = kobject_name(&kset->kobj);
+	/* complete object path */
+	devpath = kobject_get_path(kobj, GFP_KERNEL);
+	if (!devpath)
+		goto exit;
 
-	argv [0] = hotplug_path;
-	argv [1] = (char *)name; /* won't be changed but 'const' has to go */
-	argv [2] = NULL;
+	/* originating subsystem */
+	if (hotplug_ops && hotplug_ops->name)
+		subsystem = hotplug_ops->name(kset, kobj);
+	else
+		subsystem = kobject_name(&kset->kobj);
 
-	/* minimal command environment */
-	envp [i++] = "HOME=/";
-	envp [i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+	/* event environemnt for helper process only */
+	envp[i++] = "HOME=/";
+	envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
 
+	/* default keys */
 	scratch = buffer;
-
 	envp [i++] = scratch;
 	scratch += sprintf(scratch, "ACTION=%s", action_string) + 1;
-
-	kobj_path = kobject_get_path(kobj, GFP_KERNEL);
-	if (!kobj_path)
-		goto exit;
-
 	envp [i++] = scratch;
-	scratch += sprintf (scratch, "DEVPATH=%s", kobj_path) + 1;
-
+	scratch += sprintf (scratch, "DEVPATH=%s", devpath) + 1;
 	envp [i++] = scratch;
-	scratch += sprintf(scratch, "SUBSYSTEM=%s", name) + 1;
+	scratch += sprintf(scratch, "SUBSYSTEM=%s", subsystem) + 1;
 
-	/* reserve space for the sequence,
-	 * put the real one in after the hotplug call */
+	/* just reserve the space, overwrite it after kset call has returned */
 	envp[i++] = seq_buff = scratch;
 	scratch += strlen("SEQNUM=18446744073709551616") + 1;
 
-	if (hotplug_ops->hotplug) {
-		/* have the kset specific function add its stuff */
+	/* let the kset specific function add its stuff */
+	if (hotplug_ops && hotplug_ops->hotplug) {
 		retval = hotplug_ops->hotplug (kset, kobj,
 				  &envp[i], NUM_ENVP - i, scratch,
 				  BUFFER_SIZE - (scratch - buffer));
@@ -272,27 +145,49 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 		}
 	}
 
+	/* we will send an event, request a new sequence number */
 	spin_lock(&sequence_lock);
 	seq = ++hotplug_seqnum;
 	spin_unlock(&sequence_lock);
 	sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq);
 
-	pr_debug ("%s: %s %s seq=%llu %s %s %s %s %s\n",
-		  __FUNCTION__, argv[0], argv[1], (unsigned long long)seq,
-		  envp[0], envp[1], envp[2], envp[3], envp[4]);
-
-	send_uevent(action_string, kobj_path, envp, GFP_KERNEL);
+	/* send netlink message */
+	if (uevent_sock) {
+		struct sk_buff *skb;
+		size_t len;
+
+		/* allocate message with the maximum possible size */
+		len = strlen(action_string) + strlen(devpath) + 2;
+		skb = alloc_skb(len + BUFFER_SIZE, GFP_KERNEL);
+		if (skb) {
+			/* add header */
+			scratch = skb_put(skb, len);
+			sprintf(scratch, "%s@%s", action_string, devpath);
+
+			/* copy keys to our continuous event payload buffer */
+			for (i = 2; envp[i]; i++) {
+				len = strlen(envp[i]) + 1;
+				scratch = skb_put(skb, len);
+				strcpy(scratch, envp[i]);
+			}
+
+			NETLINK_CB(skb).dst_group = 1;
+			netlink_broadcast(uevent_sock, skb, 0, 1, GFP_KERNEL);
+		}
+	}
 
-	if (!hotplug_path[0])
-		goto exit;
+	/* call uevent_helper, usually only enabled during early boot */
+	if (hotplug_path[0]) {
+		char *argv [3];
 
-	retval = call_usermodehelper (argv[0], argv, envp, 0);
-	if (retval)
-		pr_debug ("%s - call_usermodehelper returned %d\n",
-			  __FUNCTION__, retval);
+		argv [0] = hotplug_path;
+		argv [1] = (char *)subsystem;
+		argv [2] = NULL;
+		call_usermodehelper (argv[0], argv, envp, 0);
+	}
 
 exit:
-	kfree(kobj_path);
+	kfree(devpath);
 	kfree(buffer);
 	kfree(envp);
 	return;
@@ -350,4 +245,20 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
 }
 EXPORT_SYMBOL(add_hotplug_env_var);
 
+static int __init kobject_uevent_init(void)
+{
+	uevent_sock = netlink_kernel_create(NETLINK_KOBJECT_UEVENT, 1, NULL,
+					    THIS_MODULE);
+
+	if (!uevent_sock) {
+		printk(KERN_ERR
+		       "kobject_uevent: unable to create netlink socket!\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+postcore_initcall(kobject_uevent_init);
+
 #endif /* CONFIG_HOTPLUG */
-- 
cgit v1.2.3


From 312c004d36ce6c739512bac83b452f4c20ab1f62 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@suse.de>
Date: Wed, 16 Nov 2005 09:00:00 +0100
Subject: [PATCH] driver core: replace "hotplug" by "uevent"

Leave the overloaded "hotplug" word to susbsystems which are handling
real devices. The driver core does not "plug" anything, it just exports
the state to userspace and generates events.

Signed-off-by: Kay Sievers <kay.sievers@suse.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/powerpc/eeh-pci-error-recovery.txt | 31 ++++-----
 arch/powerpc/kernel/vio.c                        |  2 +-
 block/genhd.c                                    | 48 ++++++-------
 drivers/acpi/container.c                         |  8 +--
 drivers/acpi/processor_core.c                    |  8 +--
 drivers/acpi/scan.c                              | 14 ++--
 drivers/base/Kconfig                             |  4 +-
 drivers/base/class.c                             | 66 +++++++++---------
 drivers/base/core.c                              | 42 ++++++------
 drivers/base/cpu.c                               |  4 +-
 drivers/base/firmware_class.c                    | 45 ++++++-------
 drivers/base/memory.c                            | 12 ++--
 drivers/ieee1394/nodemgr.c                       | 20 +++---
 drivers/infiniband/core/sysfs.c                  | 16 ++---
 drivers/input/input.c                            | 14 ++--
 drivers/input/serio/serio.c                      | 22 +++---
 drivers/macintosh/macio_asic.c                   |  4 +-
 drivers/mmc/mmc_sysfs.c                          |  4 +-
 drivers/pci/hotplug.c                            | 44 ++++++------
 drivers/pci/pci-driver.c                         |  6 +-
 drivers/pci/pci.h                                |  4 +-
 drivers/pcmcia/cs.c                              | 10 +--
 drivers/pcmcia/ds.c                              | 50 +++++++-------
 drivers/scsi/ipr.c                               |  2 +-
 drivers/usb/core/usb.c                           | 86 +++++++++++-------------
 drivers/usb/host/hc_crisv10.c                    |  2 +-
 drivers/w1/w1.c                                  | 14 ++--
 fs/partitions/check.c                            |  6 +-
 include/linux/device.h                           | 14 ++--
 include/linux/firmware.h                         |  2 +-
 include/linux/kobject.h                          | 40 ++++++-----
 include/linux/sysctl.h                           |  2 +-
 include/linux/usb.h                              |  2 +-
 kernel/ksysfs.c                                  | 14 ++--
 kernel/sysctl.c                                  |  4 +-
 lib/kobject.c                                    |  4 +-
 lib/kobject_uevent.c                             | 64 +++++++++---------
 net/bluetooth/hci_sysfs.c                        |  4 +-
 net/bridge/br_sysfs_if.c                         |  4 +-
 net/core/net-sysfs.c                             |  8 +--
 40 files changed, 372 insertions(+), 378 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/powerpc/eeh-pci-error-recovery.txt b/Documentation/powerpc/eeh-pci-error-recovery.txt
index e75d7474322c..67a11a36270c 100644
--- a/Documentation/powerpc/eeh-pci-error-recovery.txt
+++ b/Documentation/powerpc/eeh-pci-error-recovery.txt
@@ -115,7 +115,7 @@ Current PPC64 Linux EEH Implementation
 At this time, a generic EEH recovery mechanism has been implemented,
 so that individual device drivers do not need to be modified to support
 EEH recovery.  This generic mechanism piggy-backs on the PCI hotplug
-infrastructure,  and percolates events up through the hotplug/udev
+infrastructure,  and percolates events up through the userspace/udev
 infrastructure.  Followiing is a detailed description of how this is
 accomplished.
 
@@ -172,7 +172,7 @@ A handler for the EEH notifier_block events is implemented in
 drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
 It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
 This last call causes the device driver for the card to be stopped,
-which causes hotplug events to go out to user space. This triggers
+which causes uevents to go out to user space. This triggers
 user-space scripts that might issue commands such as "ifdown eth0"
 for ethernet cards, and so on.  This handler then sleeps for 5 seconds,
 hoping to give the user-space scripts enough time to complete.
@@ -258,29 +258,30 @@ rpa_php_unconfig_pci_adapter() {             // in rpaphp_pci.c
     calls
     pci_destroy_dev (struct pci_dev *) {
       calls
-      device_unregister (&dev->dev) {      // in /drivers/base/core.c
+      device_unregister (&dev->dev) {        // in /drivers/base/core.c
         calls
-        device_del(struct device * dev) {  // in /drivers/base/core.c
+        device_del(struct device * dev) {    // in /drivers/base/core.c
           calls
-          kobject_del() {                  //in /libs/kobject.c
+          kobject_del() {                    //in /libs/kobject.c
             calls
-            kobject_hotplug() {            // in /libs/kobject.c
+            kobject_uevent() {               // in /libs/kobject.c
               calls
-              kset_hotplug() {             // in /lib/kobject.c
+              kset_uevent() {                // in /lib/kobject.c
                 calls
-                kset->hotplug_ops->hotplug() which is really just
+                kset->uevent_ops->uevent()   // which is really just
                 a call to
-                dev_hotplug() {           // in /drivers/base/core.c
+                dev_uevent() {               // in /drivers/base/core.c
                   calls
-                  dev->bus->hotplug() which is really just a call to
-                  pci_hotplug () {      // in drivers/pci/hotplug.c
+                  dev->bus->uevent() which is really just a call to
+                  pci_uevent () {            // in drivers/pci/hotplug.c
                     which prints device name, etc....
                  }
                }
-               then kset_hotplug() calls
-                call_usermodehelper () with
-                   argv[0]=hotplug_path[] which is "/sbin/hotplug"
-             --> event to userspace,
+               then kobject_uevent() sends a netlink uevent to userspace
+               --> userspace uevent
+               (during early boot, nobody listens to netlink events and
+               kobject_uevent() executes uevent_helper[], which runs the
+               event process /sbin/hotplug)
            }
          }
          kobject_del() then calls sysfs_remove_dir(), which would
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index 71a6addf9f7f..13c41495fe06 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -293,6 +293,6 @@ static int vio_hotplug(struct device *dev, char **envp, int num_envp,
 
 struct bus_type vio_bus_type = {
 	.name = "vio",
-	.hotplug = vio_hotplug,
+	.uevent = vio_hotplug,
 	.match = vio_bus_match,
 };
diff --git a/block/genhd.c b/block/genhd.c
index f04609d553b8..f1ed83f3f083 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -358,7 +358,7 @@ static struct sysfs_ops disk_sysfs_ops = {
 static ssize_t disk_uevent_store(struct gendisk * disk,
 				 const char *buf, size_t count)
 {
-	kobject_hotplug(&disk->kobj, KOBJ_ADD);
+	kobject_uevent(&disk->kobj, KOBJ_ADD);
 	return count;
 }
 static ssize_t disk_dev_read(struct gendisk * disk, char *page)
@@ -455,14 +455,14 @@ static struct kobj_type ktype_block = {
 
 extern struct kobj_type ktype_part;
 
-static int block_hotplug_filter(struct kset *kset, struct kobject *kobj)
+static int block_uevent_filter(struct kset *kset, struct kobject *kobj)
 {
 	struct kobj_type *ktype = get_ktype(kobj);
 
 	return ((ktype == &ktype_block) || (ktype == &ktype_part));
 }
 
-static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+static int block_uevent(struct kset *kset, struct kobject *kobj, char **envp,
 			 int num_envp, char *buffer, int buffer_size)
 {
 	struct kobj_type *ktype = get_ktype(kobj);
@@ -474,40 +474,40 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 
 	if (ktype == &ktype_block) {
 		disk = container_of(kobj, struct gendisk, kobj);
-		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
-				    &length, "MINOR=%u", disk->first_minor);
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "MINOR=%u", disk->first_minor);
 	} else if (ktype == &ktype_part) {
 		disk = container_of(kobj->parent, struct gendisk, kobj);
 		part = container_of(kobj, struct hd_struct, kobj);
-		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
-				    &length, "MINOR=%u",
-				    disk->first_minor + part->partno);
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "MINOR=%u",
+			       disk->first_minor + part->partno);
 	} else
 		return 0;
 
-	add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &length,
-			    "MAJOR=%u", disk->major);
+	add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &length,
+		       "MAJOR=%u", disk->major);
 
 	/* add physical device, backing this device  */
 	physdev = disk->driverfs_dev;
 	if (physdev) {
 		char *path = kobject_get_path(&physdev->kobj, GFP_KERNEL);
 
-		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
-				    &length, "PHYSDEVPATH=%s", path);
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "PHYSDEVPATH=%s", path);
 		kfree(path);
 
 		if (physdev->bus)
-			add_hotplug_env_var(envp, num_envp, &i,
-					    buffer, buffer_size, &length,
-					    "PHYSDEVBUS=%s",
-					    physdev->bus->name);
+			add_uevent_var(envp, num_envp, &i,
+				       buffer, buffer_size, &length,
+				       "PHYSDEVBUS=%s",
+				       physdev->bus->name);
 
 		if (physdev->driver)
-			add_hotplug_env_var(envp, num_envp, &i,
-					    buffer, buffer_size, &length,
-					    "PHYSDEVDRIVER=%s",
-					    physdev->driver->name);
+			add_uevent_var(envp, num_envp, &i,
+				       buffer, buffer_size, &length,
+				       "PHYSDEVDRIVER=%s",
+				       physdev->driver->name);
 	}
 
 	/* terminate, set to next free slot, shrink available space */
@@ -520,13 +520,13 @@ static int block_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 	return 0;
 }
 
-static struct kset_hotplug_ops block_hotplug_ops = {
-	.filter		= block_hotplug_filter,
-	.hotplug	= block_hotplug,
+static struct kset_uevent_ops block_uevent_ops = {
+	.filter		= block_uevent_filter,
+	.uevent		= block_uevent,
 };
 
 /* declare block_subsys. */
-static decl_subsys(block, &ktype_block, &block_hotplug_ops);
+static decl_subsys(block, &ktype_block, &block_uevent_ops);
 
 
 /*
diff --git a/drivers/acpi/container.c b/drivers/acpi/container.c
index 27ec12c1fab0..b69a8cad82b7 100644
--- a/drivers/acpi/container.c
+++ b/drivers/acpi/container.c
@@ -172,21 +172,21 @@ static void container_notify_cb(acpi_handle handle, u32 type, void *context)
 			if (ACPI_FAILURE(status) || !device) {
 				result = container_device_add(&device, handle);
 				if (!result)
-					kobject_hotplug(&device->kobj,
-							KOBJ_ONLINE);
+					kobject_uevent(&device->kobj,
+						       KOBJ_ONLINE);
 				else
 					printk("Failed to add container\n");
 			}
 		} else {
 			if (ACPI_SUCCESS(status)) {
 				/* device exist and this is a remove request */
-				kobject_hotplug(&device->kobj, KOBJ_OFFLINE);
+				kobject_uevent(&device->kobj, KOBJ_OFFLINE);
 			}
 		}
 		break;
 	case ACPI_NOTIFY_EJECT_REQUEST:
 		if (!acpi_bus_get_device(handle, &device) && device) {
-			kobject_hotplug(&device->kobj, KOBJ_OFFLINE);
+			kobject_uevent(&device->kobj, KOBJ_OFFLINE);
 		}
 		break;
 	default:
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 0c561c571f29..1278aca96fe3 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -748,7 +748,7 @@ int acpi_processor_device_add(acpi_handle handle, struct acpi_device **device)
 		return_VALUE(-ENODEV);
 
 	if ((pr->id >= 0) && (pr->id < NR_CPUS)) {
-		kobject_hotplug(&(*device)->kobj, KOBJ_ONLINE);
+		kobject_uevent(&(*device)->kobj, KOBJ_ONLINE);
 	}
 	return_VALUE(0);
 }
@@ -788,13 +788,13 @@ acpi_processor_hotplug_notify(acpi_handle handle, u32 event, void *data)
 		}
 
 		if (pr->id >= 0 && (pr->id < NR_CPUS)) {
-			kobject_hotplug(&device->kobj, KOBJ_OFFLINE);
+			kobject_uevent(&device->kobj, KOBJ_OFFLINE);
 			break;
 		}
 
 		result = acpi_processor_start(device);
 		if ((!result) && ((pr->id >= 0) && (pr->id < NR_CPUS))) {
-			kobject_hotplug(&device->kobj, KOBJ_ONLINE);
+			kobject_uevent(&device->kobj, KOBJ_ONLINE);
 		} else {
 			ACPI_DEBUG_PRINT((ACPI_DB_ERROR,
 					  "Device [%s] failed to start\n",
@@ -818,7 +818,7 @@ acpi_processor_hotplug_notify(acpi_handle handle, u32 event, void *data)
 		}
 
 		if ((pr->id < NR_CPUS) && (cpu_present(pr->id)))
-			kobject_hotplug(&device->kobj, KOBJ_OFFLINE);
+			kobject_uevent(&device->kobj, KOBJ_OFFLINE);
 		break;
 	default:
 		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 31218e1d2a18..0745d20afb8c 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -78,7 +78,7 @@ static struct kobj_type ktype_acpi_ns = {
 	.release = acpi_device_release,
 };
 
-static int namespace_hotplug(struct kset *kset, struct kobject *kobj,
+static int namespace_uevent(struct kset *kset, struct kobject *kobj,
 			     char **envp, int num_envp, char *buffer,
 			     int buffer_size)
 {
@@ -89,8 +89,8 @@ static int namespace_hotplug(struct kset *kset, struct kobject *kobj,
 	if (!dev->driver)
 		return 0;
 
-	if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len,
-				"PHYSDEVDRIVER=%s", dev->driver->name))
+	if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			   "PHYSDEVDRIVER=%s", dev->driver->name))
 		return -ENOMEM;
 
 	envp[i] = NULL;
@@ -98,8 +98,8 @@ static int namespace_hotplug(struct kset *kset, struct kobject *kobj,
 	return 0;
 }
 
-static struct kset_hotplug_ops namespace_hotplug_ops = {
-	.hotplug = &namespace_hotplug,
+static struct kset_uevent_ops namespace_uevent_ops = {
+	.uevent = &namespace_uevent,
 };
 
 static struct kset acpi_namespace_kset = {
@@ -108,7 +108,7 @@ static struct kset acpi_namespace_kset = {
 		 },
 	.subsys = &acpi_subsys,
 	.ktype = &ktype_acpi_ns,
-	.hotplug_ops = &namespace_hotplug_ops,
+	.uevent_ops = &namespace_uevent_ops,
 };
 
 static void acpi_device_register(struct acpi_device *device,
@@ -347,7 +347,7 @@ static int acpi_bus_get_wakeup_device_flags(struct acpi_device *device)
 }
 
 /* --------------------------------------------------------------------------
-		ACPI hotplug sysfs device file support
+		ACPI sysfs device file support
    -------------------------------------------------------------------------- */
 static ssize_t acpi_eject_store(struct acpi_device *device,
 				const char *buf, size_t count);
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 934149c1512b..f0eff3dac58d 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -19,11 +19,11 @@ config PREVENT_FIRMWARE_BUILD
 	  If unsure say Y here.
 
 config FW_LOADER
-	tristate "Hotplug firmware loading support"
+	tristate "Userspace firmware loading support"
 	select HOTPLUG
 	---help---
 	  This option is provided for the case where no in-kernel-tree modules
-	  require hotplug firmware loading support, but a module built outside
+	  require userspace firmware loading support, but a module built outside
 	  the kernel tree does.
 
 config DEBUG_DRIVER
diff --git a/drivers/base/class.c b/drivers/base/class.c
index db65fd0babe9..df7fdabd0730 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -178,7 +178,7 @@ static void class_device_create_release(struct class_device *class_dev)
 }
 
 /* needed to allow these devices to have parent class devices */
-static int class_device_create_hotplug(struct class_device *class_dev,
+static int class_device_create_uevent(struct class_device *class_dev,
 				       char **envp, int num_envp,
 				       char *buffer, int buffer_size)
 {
@@ -331,7 +331,7 @@ static struct kobj_type ktype_class_device = {
 	.release	= class_dev_release,
 };
 
-static int class_hotplug_filter(struct kset *kset, struct kobject *kobj)
+static int class_uevent_filter(struct kset *kset, struct kobject *kobj)
 {
 	struct kobj_type *ktype = get_ktype(kobj);
 
@@ -343,14 +343,14 @@ static int class_hotplug_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static const char *class_hotplug_name(struct kset *kset, struct kobject *kobj)
+static const char *class_uevent_name(struct kset *kset, struct kobject *kobj)
 {
 	struct class_device *class_dev = to_class_dev(kobj);
 
 	return class_dev->class->name;
 }
 
-static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+static int class_uevent(struct kset *kset, struct kobject *kobj, char **envp,
 			 int num_envp, char *buffer, int buffer_size)
 {
 	struct class_device *class_dev = to_class_dev(kobj);
@@ -365,29 +365,29 @@ static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 		struct device *dev = class_dev->dev;
 		char *path = kobject_get_path(&dev->kobj, GFP_KERNEL);
 
-		add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
-				    &length, "PHYSDEVPATH=%s", path);
+		add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			       &length, "PHYSDEVPATH=%s", path);
 		kfree(path);
 
 		if (dev->bus)
-			add_hotplug_env_var(envp, num_envp, &i,
-					    buffer, buffer_size, &length,
-					    "PHYSDEVBUS=%s", dev->bus->name);
+			add_uevent_var(envp, num_envp, &i,
+				       buffer, buffer_size, &length,
+				       "PHYSDEVBUS=%s", dev->bus->name);
 
 		if (dev->driver)
-			add_hotplug_env_var(envp, num_envp, &i,
-					    buffer, buffer_size, &length,
-					    "PHYSDEVDRIVER=%s", dev->driver->name);
+			add_uevent_var(envp, num_envp, &i,
+				       buffer, buffer_size, &length,
+				       "PHYSDEVDRIVER=%s", dev->driver->name);
 	}
 
 	if (MAJOR(class_dev->devt)) {
-		add_hotplug_env_var(envp, num_envp, &i,
-				    buffer, buffer_size, &length,
-				    "MAJOR=%u", MAJOR(class_dev->devt));
+		add_uevent_var(envp, num_envp, &i,
+			       buffer, buffer_size, &length,
+			       "MAJOR=%u", MAJOR(class_dev->devt));
 
-		add_hotplug_env_var(envp, num_envp, &i,
-				    buffer, buffer_size, &length,
-				    "MINOR=%u", MINOR(class_dev->devt));
+		add_uevent_var(envp, num_envp, &i,
+			       buffer, buffer_size, &length,
+			       "MINOR=%u", MINOR(class_dev->devt));
 	}
 
 	/* terminate, set to next free slot, shrink available space */
@@ -397,30 +397,30 @@ static int class_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 	buffer = &buffer[length];
 	buffer_size -= length;
 
-	if (class_dev->hotplug) {
+	if (class_dev->uevent) {
 		/* have the class device specific function add its stuff */
-		retval = class_dev->hotplug(class_dev, envp, num_envp,
+		retval = class_dev->uevent(class_dev, envp, num_envp,
 					    buffer, buffer_size);
 		if (retval)
-			pr_debug("class_dev->hotplug() returned %d\n", retval);
-	} else if (class_dev->class->hotplug) {
+			pr_debug("class_dev->uevent() returned %d\n", retval);
+	} else if (class_dev->class->uevent) {
 		/* have the class specific function add its stuff */
-		retval = class_dev->class->hotplug(class_dev, envp, num_envp,
+		retval = class_dev->class->uevent(class_dev, envp, num_envp,
 						   buffer, buffer_size);
 		if (retval)
-			pr_debug("class->hotplug() returned %d\n", retval);
+			pr_debug("class->uevent() returned %d\n", retval);
 	}
 
 	return retval;
 }
 
-static struct kset_hotplug_ops class_hotplug_ops = {
-	.filter =	class_hotplug_filter,
-	.name =		class_hotplug_name,
-	.hotplug =	class_hotplug,
+static struct kset_uevent_ops class_uevent_ops = {
+	.filter =	class_uevent_filter,
+	.name =		class_uevent_name,
+	.uevent =	class_uevent,
 };
 
-static decl_subsys(class_obj, &ktype_class_device, &class_hotplug_ops);
+static decl_subsys(class_obj, &ktype_class_device, &class_uevent_ops);
 
 
 static int class_device_add_attrs(struct class_device * cd)
@@ -464,7 +464,7 @@ static ssize_t show_dev(struct class_device *class_dev, char *buf)
 static ssize_t store_uevent(struct class_device *class_dev,
 			    const char *buf, size_t count)
 {
-	kobject_hotplug(&class_dev->kobj, KOBJ_ADD);
+	kobject_uevent(&class_dev->kobj, KOBJ_ADD);
 	return count;
 }
 
@@ -559,7 +559,7 @@ int class_device_add(struct class_device *class_dev)
 				  class_name);
 	}
 
-	kobject_hotplug(&class_dev->kobj, KOBJ_ADD);
+	kobject_uevent(&class_dev->kobj, KOBJ_ADD);
 
 	/* notify any interfaces this device is now here */
 	if (parent_class) {
@@ -632,7 +632,7 @@ struct class_device *class_device_create(struct class *cls,
 	class_dev->class = cls;
 	class_dev->parent = parent;
 	class_dev->release = class_device_create_release;
-	class_dev->hotplug = class_device_create_hotplug;
+	class_dev->uevent = class_device_create_uevent;
 
 	va_start(args, fmt);
 	vsnprintf(class_dev->class_id, BUS_ID_SIZE, fmt, args);
@@ -674,7 +674,7 @@ void class_device_del(struct class_device *class_dev)
 		class_device_remove_file(class_dev, class_dev->devt_attr);
 	class_device_remove_attrs(class_dev);
 
-	kobject_hotplug(&class_dev->kobj, KOBJ_REMOVE);
+	kobject_uevent(&class_dev->kobj, KOBJ_REMOVE);
 	kobject_del(&class_dev->kobj);
 
 	class_device_put(parent_device);
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 8615b42b517a..fd8059920dbf 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -90,7 +90,7 @@ static struct kobj_type ktype_device = {
 };
 
 
-static int dev_hotplug_filter(struct kset *kset, struct kobject *kobj)
+static int dev_uevent_filter(struct kset *kset, struct kobject *kobj)
 {
 	struct kobj_type *ktype = get_ktype(kobj);
 
@@ -102,14 +102,14 @@ static int dev_hotplug_filter(struct kset *kset, struct kobject *kobj)
 	return 0;
 }
 
-static const char *dev_hotplug_name(struct kset *kset, struct kobject *kobj)
+static const char *dev_uevent_name(struct kset *kset, struct kobject *kobj)
 {
 	struct device *dev = to_dev(kobj);
 
 	return dev->bus->name;
 }
 
-static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+static int dev_uevent(struct kset *kset, struct kobject *kobj, char **envp,
 			int num_envp, char *buffer, int buffer_size)
 {
 	struct device *dev = to_dev(kobj);
@@ -119,15 +119,15 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 
 	/* add bus name of physical device */
 	if (dev->bus)
-		add_hotplug_env_var(envp, num_envp, &i,
-				    buffer, buffer_size, &length,
-				    "PHYSDEVBUS=%s", dev->bus->name);
+		add_uevent_var(envp, num_envp, &i,
+			       buffer, buffer_size, &length,
+			       "PHYSDEVBUS=%s", dev->bus->name);
 
 	/* add driver name of physical device */
 	if (dev->driver)
-		add_hotplug_env_var(envp, num_envp, &i,
-				    buffer, buffer_size, &length,
-				    "PHYSDEVDRIVER=%s", dev->driver->name);
+		add_uevent_var(envp, num_envp, &i,
+			       buffer, buffer_size, &length,
+			       "PHYSDEVDRIVER=%s", dev->driver->name);
 
 	/* terminate, set to next free slot, shrink available space */
 	envp[i] = NULL;
@@ -136,11 +136,11 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 	buffer = &buffer[length];
 	buffer_size -= length;
 
-	if (dev->bus && dev->bus->hotplug) {
+	if (dev->bus && dev->bus->uevent) {
 		/* have the bus specific function add its stuff */
-		retval = dev->bus->hotplug (dev, envp, num_envp, buffer, buffer_size);
+		retval = dev->bus->uevent(dev, envp, num_envp, buffer, buffer_size);
 			if (retval) {
-			pr_debug ("%s - hotplug() returned %d\n",
+			pr_debug ("%s - uevent() returned %d\n",
 				  __FUNCTION__, retval);
 		}
 	}
@@ -148,16 +148,16 @@ static int dev_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 	return retval;
 }
 
-static struct kset_hotplug_ops device_hotplug_ops = {
-	.filter =	dev_hotplug_filter,
-	.name =		dev_hotplug_name,
-	.hotplug =	dev_hotplug,
+static struct kset_uevent_ops device_uevent_ops = {
+	.filter =	dev_uevent_filter,
+	.name =		dev_uevent_name,
+	.uevent =	dev_uevent,
 };
 
 static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
 			    const char *buf, size_t count)
 {
-	kobject_hotplug(&dev->kobj, KOBJ_ADD);
+	kobject_uevent(&dev->kobj, KOBJ_ADD);
 	return count;
 }
 
@@ -165,7 +165,7 @@ static ssize_t store_uevent(struct device *dev, struct device_attribute *attr,
  *	device_subsys - structure to be registered with kobject core.
  */
 
-decl_subsys(devices, &ktype_device, &device_hotplug_ops);
+decl_subsys(devices, &ktype_device, &device_uevent_ops);
 
 
 /**
@@ -274,7 +274,7 @@ int device_add(struct device *dev)
 	dev->uevent_attr.store = store_uevent;
 	device_create_file(dev, &dev->uevent_attr);
 
-	kobject_hotplug(&dev->kobj, KOBJ_ADD);
+	kobject_uevent(&dev->kobj, KOBJ_ADD);
 	if ((error = device_pm_add(dev)))
 		goto PMError;
 	if ((error = bus_add_device(dev)))
@@ -291,7 +291,7 @@ int device_add(struct device *dev)
  BusError:
 	device_pm_remove(dev);
  PMError:
-	kobject_hotplug(&dev->kobj, KOBJ_REMOVE);
+	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
  Error:
 	if (parent)
@@ -374,7 +374,7 @@ void device_del(struct device * dev)
 		platform_notify_remove(dev);
 	bus_remove_device(dev);
 	device_pm_remove(dev);
-	kobject_hotplug(&dev->kobj, KOBJ_REMOVE);
+	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	kobject_del(&dev->kobj);
 	if (parent)
 		put_device(parent);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index a95844790f7b..281d26784d25 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -41,14 +41,14 @@ static ssize_t store_online(struct sys_device *dev, const char *buf,
 	case '0':
 		ret = cpu_down(cpu->sysdev.id);
 		if (!ret)
-			kobject_hotplug(&dev->kobj, KOBJ_OFFLINE);
+			kobject_uevent(&dev->kobj, KOBJ_OFFLINE);
 		break;
 	case '1':
 		ret = smp_prepare_cpu(cpu->sysdev.id);
 		if (!ret)
 			ret = cpu_up(cpu->sysdev.id);
 		if (!ret)
-			kobject_hotplug(&dev->kobj, KOBJ_ONLINE);
+			kobject_uevent(&dev->kobj, KOBJ_ONLINE);
 		break;
 	default:
 		ret = -EINVAL;
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 59dacb6552c0..5b3d5e9ddcb6 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -85,17 +85,17 @@ firmware_timeout_store(struct class *class, const char *buf, size_t count)
 static CLASS_ATTR(timeout, 0644, firmware_timeout_show, firmware_timeout_store);
 
 static void  fw_class_dev_release(struct class_device *class_dev);
-int firmware_class_hotplug(struct class_device *dev, char **envp,
+int firmware_class_uevent(struct class_device *dev, char **envp,
 			   int num_envp, char *buffer, int buffer_size);
 
 static struct class firmware_class = {
 	.name		= "firmware",
-	.hotplug	= firmware_class_hotplug,
+	.uevent	= firmware_class_uevent,
 	.release	= fw_class_dev_release,
 };
 
 int
-firmware_class_hotplug(struct class_device *class_dev, char **envp,
+firmware_class_uevent(struct class_device *class_dev, char **envp,
 		       int num_envp, char *buffer, int buffer_size)
 {
 	struct firmware_priv *fw_priv = class_get_devdata(class_dev);
@@ -104,13 +104,12 @@ firmware_class_hotplug(struct class_device *class_dev, char **envp,
 	if (!test_bit(FW_STATUS_READY, &fw_priv->status))
 		return -ENODEV;
 
-	if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len,
-			"FIRMWARE=%s", fw_priv->fw_id))
+	if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			   "FIRMWARE=%s", fw_priv->fw_id))
 		return -ENOMEM;
-	if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size, &len,
-			"TIMEOUT=%i", loading_timeout))
+	if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size, &len,
+			   "TIMEOUT=%i", loading_timeout))
 		return -ENOMEM;
-
 	envp[i] = NULL;
 
 	return 0;
@@ -352,7 +351,7 @@ error_kfree:
 
 static int
 fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p,
-		      const char *fw_name, struct device *device, int hotplug)
+		      const char *fw_name, struct device *device, int uevent)
 {
 	struct class_device *class_dev;
 	struct firmware_priv *fw_priv;
@@ -384,7 +383,7 @@ fw_setup_class_device(struct firmware *fw, struct class_device **class_dev_p,
 		goto error_unreg;
 	}
 
-	if (hotplug)
+	if (uevent)
                 set_bit(FW_STATUS_READY, &fw_priv->status);
         else
                 set_bit(FW_STATUS_READY_NOHOTPLUG, &fw_priv->status);
@@ -399,7 +398,7 @@ out:
 
 static int
 _request_firmware(const struct firmware **firmware_p, const char *name,
-		 struct device *device, int hotplug)
+		 struct device *device, int uevent)
 {
 	struct class_device *class_dev;
 	struct firmware_priv *fw_priv;
@@ -418,19 +417,19 @@ _request_firmware(const struct firmware **firmware_p, const char *name,
 	}
 
 	retval = fw_setup_class_device(firmware, &class_dev, name, device,
-		hotplug);
+				       uevent);
 	if (retval)
 		goto error_kfree_fw;
 
 	fw_priv = class_get_devdata(class_dev);
 
-	if (hotplug) {
+	if (uevent) {
 		if (loading_timeout > 0) {
 			fw_priv->timeout.expires = jiffies + loading_timeout * HZ;
 			add_timer(&fw_priv->timeout);
 		}
 
-		kobject_hotplug(&class_dev->kobj, KOBJ_ADD);
+		kobject_uevent(&class_dev->kobj, KOBJ_ADD);
 		wait_for_completion(&fw_priv->completion);
 		set_bit(FW_STATUS_DONE, &fw_priv->status);
 		del_timer_sync(&fw_priv->timeout);
@@ -456,7 +455,7 @@ out:
 }
 
 /**
- * request_firmware: - request firmware to hotplug and wait for it
+ * request_firmware: - send firmware request and wait for it
  * @firmware_p: pointer to firmware image
  * @name: name of firmware file
  * @device: device for which firmware is being loaded
@@ -466,7 +465,7 @@ out:
  *
  *      Should be called from user context where sleeping is allowed.
  *
- *      @name will be used as $FIRMWARE in the hotplug environment and
+ *      @name will be used as $FIRMWARE in the uevent environment and
  *      should be distinctive enough not to be confused with any other
  *      firmware image for this or any other device.
  **/
@@ -474,8 +473,8 @@ int
 request_firmware(const struct firmware **firmware_p, const char *name,
                  struct device *device)
 {
-        int hotplug = 1;
-        return _request_firmware(firmware_p, name, device, hotplug);
+        int uevent = 1;
+        return _request_firmware(firmware_p, name, device, uevent);
 }
 
 /**
@@ -518,7 +517,7 @@ struct firmware_work {
 	struct device *device;
 	void *context;
 	void (*cont)(const struct firmware *fw, void *context);
-	int hotplug;
+	int uevent;
 };
 
 static int
@@ -533,7 +532,7 @@ request_firmware_work_func(void *arg)
 	}
 	daemonize("%s/%s", "firmware", fw_work->name);
 	ret = _request_firmware(&fw, fw_work->name, fw_work->device,
-		fw_work->hotplug);
+		fw_work->uevent);
 	if (ret < 0)
 		fw_work->cont(NULL, fw_work->context);
 	else {
@@ -548,7 +547,7 @@ request_firmware_work_func(void *arg)
 /**
  * request_firmware_nowait: asynchronous version of request_firmware
  * @module: module requesting the firmware
- * @hotplug: invokes hotplug event to copy the firmware image if this flag
+ * @uevent: sends uevent to copy the firmware image if this flag
  *	is non-zero else the firmware copy must be done manually.
  * @name: name of firmware file
  * @device: device for which firmware is being loaded
@@ -562,7 +561,7 @@ request_firmware_work_func(void *arg)
  **/
 int
 request_firmware_nowait(
-	struct module *module, int hotplug,
+	struct module *module, int uevent,
 	const char *name, struct device *device, void *context,
 	void (*cont)(const struct firmware *fw, void *context))
 {
@@ -583,7 +582,7 @@ request_firmware_nowait(
 		.device = device,
 		.context = context,
 		.cont = cont,
-		.hotplug = hotplug,
+		.uevent = uevent,
 	};
 
 	ret = kernel_thread(request_firmware_work_func, fw_work,
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index bc3ca6a656b2..7e1d077874df 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -29,12 +29,12 @@ static struct sysdev_class memory_sysdev_class = {
 	set_kset_name(MEMORY_CLASS_NAME),
 };
 
-static char *memory_hotplug_name(struct kset *kset, struct kobject *kobj)
+static const char *memory_uevent_name(struct kset *kset, struct kobject *kobj)
 {
 	return MEMORY_CLASS_NAME;
 }
 
-static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
+static int memory_uevent(struct kset *kset, struct kobject *kobj, char **envp,
 			int num_envp, char *buffer, int buffer_size)
 {
 	int retval = 0;
@@ -42,9 +42,9 @@ static int memory_hotplug(struct kset *kset, struct kobject *kobj, char **envp,
 	return retval;
 }
 
-static struct kset_hotplug_ops memory_hotplug_ops = {
-	.name		= memory_hotplug_name,
-	.hotplug	= memory_hotplug,
+static struct kset_uevent_ops memory_uevent_ops = {
+	.name		= memory_uevent_name,
+	.uevent		= memory_uevent,
 };
 
 static struct notifier_block *memory_chain;
@@ -431,7 +431,7 @@ int __init memory_dev_init(void)
 	unsigned int i;
 	int ret;
 
-	memory_sysdev_class.kset.hotplug_ops = &memory_hotplug_ops;
+	memory_sysdev_class.kset.uevent_ops = &memory_uevent_ops;
 	ret = sysdev_class_register(&memory_sysdev_class);
 
 	/*
diff --git a/drivers/ieee1394/nodemgr.c b/drivers/ieee1394/nodemgr.c
index 0ea37b1bccb2..f2453668acf5 100644
--- a/drivers/ieee1394/nodemgr.c
+++ b/drivers/ieee1394/nodemgr.c
@@ -121,8 +121,8 @@ struct host_info {
 };
 
 static int nodemgr_bus_match(struct device * dev, struct device_driver * drv);
-static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp,
-			   char *buffer, int buffer_size);
+static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp,
+			  char *buffer, int buffer_size);
 static void nodemgr_resume_ne(struct node_entry *ne);
 static void nodemgr_remove_ne(struct node_entry *ne);
 static struct node_entry *find_entry_by_guid(u64 guid);
@@ -162,7 +162,7 @@ static void ud_cls_release(struct class_device *class_dev)
 static struct class nodemgr_ud_class = {
 	.name		= "ieee1394",
 	.release	= ud_cls_release,
-	.hotplug	= nodemgr_hotplug,
+	.uevent		= nodemgr_uevent,
 };
 
 static struct hpsb_highlevel nodemgr_highlevel;
@@ -966,7 +966,7 @@ static struct unit_directory *nodemgr_process_unit_directory
 				if (ud_child == NULL)
 					break;
 				
-				/* inherit unspecified values so hotplug picks it up */
+				/* inherit unspecified values, the driver core picks it up */
 				if ((ud->flags & UNIT_DIRECTORY_MODEL_ID) &&
 				    !(ud_child->flags & UNIT_DIRECTORY_MODEL_ID))
 				{
@@ -1062,8 +1062,8 @@ static void nodemgr_process_root_directory(struct host_info *hi, struct node_ent
 
 #ifdef CONFIG_HOTPLUG
 
-static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp,
-			   char *buffer, int buffer_size)
+static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp,
+			  char *buffer, int buffer_size)
 {
 	struct unit_directory *ud;
 	int i = 0;
@@ -1112,8 +1112,8 @@ do {								\
 
 #else
 
-static int nodemgr_hotplug(struct class_device *cdev, char **envp, int num_envp,
-			   char *buffer, int buffer_size)
+static int nodemgr_uevent(struct class_device *cdev, char **envp, int num_envp,
+			  char *buffer, int buffer_size)
 {
 	return -ENODEV;
 }
@@ -1618,8 +1618,8 @@ static int nodemgr_host_thread(void *__hi)
 
 		/* Scan our nodes to get the bus options and create node
 		 * entries. This does not do the sysfs stuff, since that
-		 * would trigger hotplug callbacks and such, which is a
-		 * bad idea at this point. */
+		 * would trigger uevents and such, which is a bad idea at
+		 * this point. */
 		nodemgr_node_scan(hi, generation);
 
 		/* This actually does the full probe, with sysfs
diff --git a/drivers/infiniband/core/sysfs.c b/drivers/infiniband/core/sysfs.c
index 08648b1a387e..1f1743c5c9a3 100644
--- a/drivers/infiniband/core/sysfs.c
+++ b/drivers/infiniband/core/sysfs.c
@@ -434,24 +434,24 @@ static void ib_device_release(struct class_device *cdev)
 	kfree(dev);
 }
 
-static int ib_device_hotplug(struct class_device *cdev, char **envp,
-			     int num_envp, char *buf, int size)
+static int ib_device_uevent(struct class_device *cdev, char **envp,
+			    int num_envp, char *buf, int size)
 {
 	struct ib_device *dev = container_of(cdev, struct ib_device, class_dev);
 	int i = 0, len = 0;
 
-	if (add_hotplug_env_var(envp, num_envp, &i, buf, size, &len,
-				"NAME=%s", dev->name))
+	if (add_uevent_var(envp, num_envp, &i, buf, size, &len,
+			   "NAME=%s", dev->name))
 		return -ENOMEM;
 
 	/*
-	 * It might be nice to pass the node GUID to hotplug, but
+	 * It might be nice to pass the node GUID with the event, but
 	 * right now the only way to get it is to query the device
 	 * provider, and this can crash during device removal because
 	 * we are will be running after driver removal has started.
 	 * We could add a node_guid field to struct ib_device, or we
-	 * could just let the hotplug script read the node GUID from
-	 * sysfs when devices are added.
+	 * could just let userspace read the node GUID from sysfs when
+	 * devices are added.
 	 */
 
 	envp[i] = NULL;
@@ -653,7 +653,7 @@ static struct class_device_attribute *ib_class_attributes[] = {
 static struct class ib_class = {
 	.name    = "infiniband",
 	.release = ib_device_release,
-	.hotplug = ib_device_hotplug,
+	.uevent = ib_device_uevent,
 };
 
 int ib_device_register_sysfs(struct ib_device *device)
diff --git a/drivers/input/input.c b/drivers/input/input.c
index 43b49ccd7dad..2d37b394e384 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -610,10 +610,10 @@ static void input_dev_release(struct class_device *class_dev)
 }
 
 /*
- * Input hotplugging interface - loading event handlers based on
+ * Input uevent interface - loading event handlers based on
  * device bitfields.
  */
-static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index,
+static int input_add_uevent_bm_var(char **envp, int num_envp, int *cur_index,
 				    char *buffer, int buffer_size, int *cur_len,
 				    const char *name, unsigned long *bitmap, int max)
 {
@@ -638,7 +638,7 @@ static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index,
 
 #define INPUT_ADD_HOTPLUG_VAR(fmt, val...)				\
 	do {								\
-		int err = add_hotplug_env_var(envp, num_envp, &i,	\
+		int err = add_uevent_var(envp, num_envp, &i,	\
 					buffer, buffer_size, &len,	\
 					fmt, val);			\
 		if (err)						\
@@ -647,15 +647,15 @@ static int input_add_hotplug_bm_var(char **envp, int num_envp, int *cur_index,
 
 #define INPUT_ADD_HOTPLUG_BM_VAR(name, bm, max)				\
 	do {								\
-		int err = input_add_hotplug_bm_var(envp, num_envp, &i,	\
+		int err = input_add_uevent_bm_var(envp, num_envp, &i,	\
 					buffer, buffer_size, &len,	\
 					name, bm, max);			\
 		if (err)						\
 			return err;					\
 	} while (0)
 
-static int input_dev_hotplug(struct class_device *cdev, char **envp,
-			     int num_envp, char *buffer, int buffer_size)
+static int input_dev_uevent(struct class_device *cdev, char **envp,
+			    int num_envp, char *buffer, int buffer_size)
 {
 	struct input_dev *dev = to_input_dev(cdev);
 	int i = 0;
@@ -697,7 +697,7 @@ static int input_dev_hotplug(struct class_device *cdev, char **envp,
 struct class input_class = {
 	.name			= "input",
 	.release		= input_dev_release,
-	.hotplug		= input_dev_hotplug,
+	.uevent			= input_dev_uevent,
 };
 
 struct input_dev *input_allocate_device(void)
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index fbb69ef6a77b..8e530cc970e1 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -800,16 +800,16 @@ static int serio_bus_match(struct device *dev, struct device_driver *drv)
 
 #ifdef CONFIG_HOTPLUG
 
-#define SERIO_ADD_HOTPLUG_VAR(fmt, val...)				\
+#define SERIO_ADD_UEVENT_VAR(fmt, val...)				\
 	do {								\
-		int err = add_hotplug_env_var(envp, num_envp, &i,	\
+		int err = add_uevent_var(envp, num_envp, &i,	\
 					buffer, buffer_size, &len,	\
 					fmt, val);			\
 		if (err)						\
 			return err;					\
 	} while (0)
 
-static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
+static int serio_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
 {
 	struct serio *serio;
 	int i = 0;
@@ -820,21 +820,21 @@ static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *bu
 
 	serio = to_serio_port(dev);
 
-	SERIO_ADD_HOTPLUG_VAR("SERIO_TYPE=%02x", serio->id.type);
-	SERIO_ADD_HOTPLUG_VAR("SERIO_PROTO=%02x", serio->id.proto);
-	SERIO_ADD_HOTPLUG_VAR("SERIO_ID=%02x", serio->id.id);
-	SERIO_ADD_HOTPLUG_VAR("SERIO_EXTRA=%02x", serio->id.extra);
-	SERIO_ADD_HOTPLUG_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X",
+	SERIO_ADD_UEVENT_VAR("SERIO_TYPE=%02x", serio->id.type);
+	SERIO_ADD_UEVENT_VAR("SERIO_PROTO=%02x", serio->id.proto);
+	SERIO_ADD_UEVENT_VAR("SERIO_ID=%02x", serio->id.id);
+	SERIO_ADD_UEVENT_VAR("SERIO_EXTRA=%02x", serio->id.extra);
+	SERIO_ADD_UEVENT_VAR("MODALIAS=serio:ty%02Xpr%02Xid%02Xex%02X",
 				serio->id.type, serio->id.proto, serio->id.id, serio->id.extra);
 	envp[i] = NULL;
 
 	return 0;
 }
-#undef SERIO_ADD_HOTPLUG_VAR
+#undef SERIO_ADD_UEVENT_VAR
 
 #else
 
-static int serio_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
+static int serio_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
 {
 	return -ENODEV;
 }
@@ -908,7 +908,7 @@ static int __init serio_init(void)
 	serio_bus.dev_attrs = serio_device_attrs;
 	serio_bus.drv_attrs = serio_driver_attrs;
 	serio_bus.match = serio_bus_match;
-	serio_bus.hotplug = serio_hotplug;
+	serio_bus.uevent = serio_uevent;
 	serio_bus.resume = serio_resume;
 	bus_register(&serio_bus);
 
diff --git a/drivers/macintosh/macio_asic.c b/drivers/macintosh/macio_asic.c
index c34c96d18907..228e1852a836 100644
--- a/drivers/macintosh/macio_asic.c
+++ b/drivers/macintosh/macio_asic.c
@@ -128,7 +128,7 @@ static int macio_device_resume(struct device * dev)
 	return 0;
 }
 
-static int macio_hotplug (struct device *dev, char **envp, int num_envp,
+static int macio_uevent(struct device *dev, char **envp, int num_envp,
                           char *buffer, int buffer_size)
 {
 	struct macio_dev * macio_dev;
@@ -203,7 +203,7 @@ extern struct device_attribute macio_dev_attrs[];
 struct bus_type macio_bus_type = {
        .name	= "macio",
        .match	= macio_bus_match,
-       .hotplug = macio_hotplug,
+       .uevent = macio_uevent,
        .suspend	= macio_device_suspend,
        .resume	= macio_device_resume,
        .dev_attrs = macio_dev_attrs,
diff --git a/drivers/mmc/mmc_sysfs.c b/drivers/mmc/mmc_sysfs.c
index 3f4a66ca9555..ec701667abfc 100644
--- a/drivers/mmc/mmc_sysfs.c
+++ b/drivers/mmc/mmc_sysfs.c
@@ -80,7 +80,7 @@ static int mmc_bus_match(struct device *dev, struct device_driver *drv)
 }
 
 static int
-mmc_bus_hotplug(struct device *dev, char **envp, int num_envp, char *buf,
+mmc_bus_uevent(struct device *dev, char **envp, int num_envp, char *buf,
 		int buf_size)
 {
 	struct mmc_card *card = dev_to_mmc_card(dev);
@@ -140,7 +140,7 @@ static struct bus_type mmc_bus_type = {
 	.name		= "mmc",
 	.dev_attrs	= mmc_dev_attrs,
 	.match		= mmc_bus_match,
-	.hotplug	= mmc_bus_hotplug,
+	.uevent		= mmc_bus_uevent,
 	.suspend	= mmc_bus_suspend,
 	.resume		= mmc_bus_resume,
 };
diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c
index e1743be31909..1c97e7dd130b 100644
--- a/drivers/pci/hotplug.c
+++ b/drivers/pci/hotplug.c
@@ -3,8 +3,8 @@
 #include <linux/module.h>
 #include "pci.h"
 
-int pci_hotplug (struct device *dev, char **envp, int num_envp,
-		 char *buffer, int buffer_size)
+int pci_uevent(struct device *dev, char **envp, int num_envp,
+	       char *buffer, int buffer_size)
 {
 	struct pci_dev *pdev;
 	int i = 0;
@@ -17,34 +17,34 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp,
 	if (!pdev)
 		return -ENODEV;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"PCI_CLASS=%04X", pdev->class))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "PCI_CLASS=%04X", pdev->class))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"PCI_ID=%04X:%04X", pdev->vendor, pdev->device))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "PCI_ID=%04X:%04X", pdev->vendor, pdev->device))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor,
-				pdev->subsystem_device))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "PCI_SUBSYS_ID=%04X:%04X", pdev->subsystem_vendor,
+			   pdev->subsystem_device))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"PCI_SLOT_NAME=%s", pci_name(pdev)))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "PCI_SLOT_NAME=%s", pci_name(pdev)))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x",
-				pdev->vendor, pdev->device,
-				pdev->subsystem_vendor, pdev->subsystem_device,
-				(u8)(pdev->class >> 16), (u8)(pdev->class >> 8),
-				(u8)(pdev->class)))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "MODALIAS=pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02x",
+			   pdev->vendor, pdev->device,
+			   pdev->subsystem_vendor, pdev->subsystem_device,
+			   (u8)(pdev->class >> 16), (u8)(pdev->class >> 8),
+			   (u8)(pdev->class)))
 		return -ENOMEM;
 
 	envp[i] = NULL;
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index a9046d4b8af3..7146b69b812c 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -502,8 +502,8 @@ void pci_dev_put(struct pci_dev *dev)
 }
 
 #ifndef CONFIG_HOTPLUG
-int pci_hotplug (struct device *dev, char **envp, int num_envp,
-		 char *buffer, int buffer_size)
+int pci_uevent(struct device *dev, char **envp, int num_envp,
+	       char *buffer, int buffer_size)
 {
 	return -ENODEV;
 }
@@ -512,7 +512,7 @@ int pci_hotplug (struct device *dev, char **envp, int num_envp,
 struct bus_type pci_bus_type = {
 	.name		= "pci",
 	.match		= pci_bus_match,
-	.hotplug	= pci_hotplug,
+	.uevent		= pci_uevent,
 	.suspend	= pci_device_suspend,
 	.resume		= pci_device_resume,
 	.dev_attrs	= pci_dev_attrs,
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 6527b36c9a61..294849d24590 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -1,7 +1,7 @@
 /* Functions internal to the PCI core code */
 
-extern int pci_hotplug (struct device *dev, char **envp, int num_envp,
-			 char *buffer, int buffer_size);
+extern int pci_uevent(struct device *dev, char **envp, int num_envp,
+		      char *buffer, int buffer_size);
 extern int pci_create_sysfs_dev_files(struct pci_dev *pdev);
 extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
 extern void pci_cleanup_rom(struct pci_dev *dev);
diff --git a/drivers/pcmcia/cs.c b/drivers/pcmcia/cs.c
index a30aa74304a2..7cf09084ef61 100644
--- a/drivers/pcmcia/cs.c
+++ b/drivers/pcmcia/cs.c
@@ -901,14 +901,14 @@ int pcmcia_insert_card(struct pcmcia_socket *skt)
 EXPORT_SYMBOL(pcmcia_insert_card);
 
 
-static int pcmcia_socket_hotplug(struct class_device *dev, char **envp,
-				int num_envp, char *buffer, int buffer_size)
+static int pcmcia_socket_uevent(struct class_device *dev, char **envp,
+			        int num_envp, char *buffer, int buffer_size)
 {
 	struct pcmcia_socket *s = container_of(dev, struct pcmcia_socket, dev);
 	int i = 0, length = 0;
 
-	if (add_hotplug_env_var(envp, num_envp, &i, buffer, buffer_size,
-				&length, "SOCKET_NO=%u", s->sock))
+	if (add_uevent_var(envp, num_envp, &i, buffer, buffer_size,
+			   &length, "SOCKET_NO=%u", s->sock))
 		return -ENOMEM;
 
 	envp[i] = NULL;
@@ -927,7 +927,7 @@ static void pcmcia_release_socket_class(struct class *data)
 
 struct class pcmcia_socket_class = {
 	.name = "pcmcia_socket",
-        .hotplug = pcmcia_socket_hotplug,
+	.uevent = pcmcia_socket_uevent,
 	.release = pcmcia_release_socket,
 	.class_release = pcmcia_release_socket_class,
 };
diff --git a/drivers/pcmcia/ds.c b/drivers/pcmcia/ds.c
index 7f8219f3fd9e..6fb76399547e 100644
--- a/drivers/pcmcia/ds.c
+++ b/drivers/pcmcia/ds.c
@@ -779,8 +779,8 @@ static int pcmcia_bus_match(struct device * dev, struct device_driver * drv) {
 
 #ifdef CONFIG_HOTPLUG
 
-static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp,
-			      char *buffer, int buffer_size)
+static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp,
+			     char *buffer, int buffer_size)
 {
 	struct pcmcia_device *p_dev;
 	int i, length = 0;
@@ -800,31 +800,31 @@ static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp,
 
 	i = 0;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"SOCKET_NO=%u",
-				p_dev->socket->sock))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "SOCKET_NO=%u",
+			   p_dev->socket->sock))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"DEVICE_NO=%02X",
-				p_dev->device_no))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "DEVICE_NO=%02X",
+			   p_dev->device_no))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"MODALIAS=pcmcia:m%04Xc%04Xf%02Xfn%02Xpfn%02X"
-				"pa%08Xpb%08Xpc%08Xpd%08X",
-				p_dev->has_manf_id ? p_dev->manf_id : 0,
-				p_dev->has_card_id ? p_dev->card_id : 0,
-				p_dev->has_func_id ? p_dev->func_id : 0,
-				p_dev->func,
-				p_dev->device_no,
-				hash[0],
-				hash[1],
-				hash[2],
-				hash[3]))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "MODALIAS=pcmcia:m%04Xc%04Xf%02Xfn%02Xpfn%02X"
+			   "pa%08Xpb%08Xpc%08Xpd%08X",
+			   p_dev->has_manf_id ? p_dev->manf_id : 0,
+			   p_dev->has_card_id ? p_dev->card_id : 0,
+			   p_dev->has_func_id ? p_dev->func_id : 0,
+			   p_dev->func,
+			   p_dev->device_no,
+			   hash[0],
+			   hash[1],
+			   hash[2],
+			   hash[3]))
 		return -ENOMEM;
 
 	envp[i] = NULL;
@@ -834,7 +834,7 @@ static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp,
 
 #else
 
-static int pcmcia_bus_hotplug(struct device *dev, char **envp, int num_envp,
+static int pcmcia_bus_uevent(struct device *dev, char **envp, int num_envp,
 			      char *buffer, int buffer_size)
 {
 	return -ENODEV;
@@ -1223,7 +1223,7 @@ static struct class_interface pcmcia_bus_interface = {
 
 struct bus_type pcmcia_bus_type = {
 	.name = "pcmcia",
-	.hotplug = pcmcia_bus_hotplug,
+	.uevent = pcmcia_bus_uevent,
 	.match = pcmcia_bus_match,
 	.dev_attrs = pcmcia_dev_attrs,
 };
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index bf44a409ba0d..07ddf9a38758 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -2132,7 +2132,7 @@ restart:
 	}
 
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
-	kobject_hotplug(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE);
+	kobject_uevent(&ioa_cfg->host->shost_classdev.kobj, KOBJ_CHANGE);
 	LEAVE;
 }
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index e80ef9467825..af2f0941baac 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -363,8 +363,7 @@ void usb_driver_release_interface(struct usb_driver *driver,
  * Most USB device drivers will use this indirectly, through the usb core,
  * but some layered driver frameworks use it directly.
  * These device tables are exported with MODULE_DEVICE_TABLE, through
- * modutils and "modules.usbmap", to support the driver loading
- * functionality of USB hotplugging.
+ * modutils, to support the driver loading functionality of USB hotplugging.
  *
  * What Matches:
  *
@@ -545,10 +544,7 @@ static int usb_device_match (struct device *dev, struct device_driver *drv)
 #ifdef	CONFIG_HOTPLUG
 
 /*
- * USB hotplugging invokes what /proc/sys/kernel/hotplug says
- * (normally /sbin/hotplug) when USB devices get added or removed.
- *
- * This invokes a user mode policy agent, typically helping to load driver
+ * This sends an uevent to userspace, typically helping to load driver
  * or other modules, configure the device, and more.  Drivers can provide
  * a MODULE_DEVICE_TABLE to help with module loading subtasks.
  *
@@ -557,8 +553,8 @@ static int usb_device_match (struct device *dev, struct device_driver *drv)
  * delays in event delivery.  Use sysfs (and DEVPATH) to make sure the
  * device (and this configuration!) are still present.
  */
-static int usb_hotplug (struct device *dev, char **envp, int num_envp,
-			char *buffer, int buffer_size)
+static int usb_uevent(struct device *dev, char **envp, int num_envp,
+		      char *buffer, int buffer_size)
 {
 	struct usb_interface *intf;
 	struct usb_device *usb_dev;
@@ -570,7 +566,7 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp,
 		return -ENODEV;
 
 	/* driver is often null here; dev_dbg() would oops */
-	pr_debug ("usb %s: hotplug\n", dev->bus_id);
+	pr_debug ("usb %s: uevent\n", dev->bus_id);
 
 	/* Must check driver_data here, as on remove driver is always NULL */
 	if ((dev->driver == &usb_generic_driver) || 
@@ -597,51 +593,51 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp,
 	 *
 	 * FIXME reduce hardwired intelligence here
 	 */
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"DEVICE=/proc/bus/usb/%03d/%03d",
-				usb_dev->bus->busnum, usb_dev->devnum))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "DEVICE=/proc/bus/usb/%03d/%03d",
+			   usb_dev->bus->busnum, usb_dev->devnum))
 		return -ENOMEM;
 #endif
 
 	/* per-device configurations are common */
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"PRODUCT=%x/%x/%x",
-				le16_to_cpu(usb_dev->descriptor.idVendor),
-				le16_to_cpu(usb_dev->descriptor.idProduct),
-				le16_to_cpu(usb_dev->descriptor.bcdDevice)))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "PRODUCT=%x/%x/%x",
+			   le16_to_cpu(usb_dev->descriptor.idVendor),
+			   le16_to_cpu(usb_dev->descriptor.idProduct),
+			   le16_to_cpu(usb_dev->descriptor.bcdDevice)))
 		return -ENOMEM;
 
 	/* class-based driver binding models */
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"TYPE=%d/%d/%d",
-				usb_dev->descriptor.bDeviceClass,
-				usb_dev->descriptor.bDeviceSubClass,
-				usb_dev->descriptor.bDeviceProtocol))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "TYPE=%d/%d/%d",
+			   usb_dev->descriptor.bDeviceClass,
+			   usb_dev->descriptor.bDeviceSubClass,
+			   usb_dev->descriptor.bDeviceProtocol))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"INTERFACE=%d/%d/%d",
-				alt->desc.bInterfaceClass,
-				alt->desc.bInterfaceSubClass,
-				alt->desc.bInterfaceProtocol))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "INTERFACE=%d/%d/%d",
+			   alt->desc.bInterfaceClass,
+			   alt->desc.bInterfaceSubClass,
+			   alt->desc.bInterfaceProtocol))
 		return -ENOMEM;
 
-	if (add_hotplug_env_var(envp, num_envp, &i,
-				buffer, buffer_size, &length,
-				"MODALIAS=usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02X",
-				le16_to_cpu(usb_dev->descriptor.idVendor),
-				le16_to_cpu(usb_dev->descriptor.idProduct),
-				le16_to_cpu(usb_dev->descriptor.bcdDevice),
-				usb_dev->descriptor.bDeviceClass,
-				usb_dev->descriptor.bDeviceSubClass,
-				usb_dev->descriptor.bDeviceProtocol,
-				alt->desc.bInterfaceClass,
-				alt->desc.bInterfaceSubClass,
-				alt->desc.bInterfaceProtocol))
+	if (add_uevent_var(envp, num_envp, &i,
+			   buffer, buffer_size, &length,
+			   "MODALIAS=usb:v%04Xp%04Xd%04Xdc%02Xdsc%02Xdp%02Xic%02Xisc%02Xip%02X",
+			   le16_to_cpu(usb_dev->descriptor.idVendor),
+			   le16_to_cpu(usb_dev->descriptor.idProduct),
+			   le16_to_cpu(usb_dev->descriptor.bcdDevice),
+			   usb_dev->descriptor.bDeviceClass,
+			   usb_dev->descriptor.bDeviceSubClass,
+			   usb_dev->descriptor.bDeviceProtocol,
+			   alt->desc.bInterfaceClass,
+			   alt->desc.bInterfaceSubClass,
+			   alt->desc.bInterfaceProtocol))
 		return -ENOMEM;
 
 	envp[i] = NULL;
@@ -651,7 +647,7 @@ static int usb_hotplug (struct device *dev, char **envp, int num_envp,
 
 #else
 
-static int usb_hotplug (struct device *dev, char **envp,
+static int usb_uevent(struct device *dev, char **envp,
 			int num_envp, char *buffer, int buffer_size)
 {
 	return -ENODEV;
@@ -1491,7 +1487,7 @@ static int usb_generic_resume(struct device *dev)
 struct bus_type usb_bus_type = {
 	.name =		"usb",
 	.match =	usb_device_match,
-	.hotplug =	usb_hotplug,
+	.uevent =	usb_uevent,
 	.suspend =	usb_generic_suspend,
 	.resume =	usb_generic_resume,
 };
diff --git a/drivers/usb/host/hc_crisv10.c b/drivers/usb/host/hc_crisv10.c
index 0eaabeb37ac3..641268d7e6f3 100644
--- a/drivers/usb/host/hc_crisv10.c
+++ b/drivers/usb/host/hc_crisv10.c
@@ -4397,7 +4397,7 @@ static int __init etrax_usb_hc_init(void)
         device_initialize(&fake_device);
         kobject_set_name(&fake_device.kobj, "etrax_usb");
         kobject_add(&fake_device.kobj);
-        kobject_hotplug(&fake_device.kobj, KOBJ_ADD);
+	kobject_uevent(&fake_device.kobj, KOBJ_ADD);
         hc->bus->controller = &fake_device;
 	usb_register_bus(hc->bus);
 
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index 14016b1cd948..024206c4a0e4 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -142,12 +142,12 @@ static struct bin_attribute w1_slave_attr_bin_id = {
 /* Default family */
 static struct w1_family w1_default_family;
 
-static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size);
+static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size);
 
 static struct bus_type w1_bus_type = {
 	.name = "w1",
 	.match = w1_master_match,
-	.hotplug = w1_hotplug,
+	.uevent = w1_uevent,
 };
 
 struct device_driver w1_master_driver = {
@@ -361,7 +361,7 @@ void w1_destroy_master_attributes(struct w1_master *master)
 }
 
 #ifdef CONFIG_HOTPLUG
-static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
+static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
 {
 	struct w1_master *md = NULL;
 	struct w1_slave *sl = NULL;
@@ -377,7 +377,7 @@ static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffe
 		event_owner = "slave";
 		name = sl->name;
 	} else {
-		dev_dbg(dev, "Unknown hotplug event.\n");
+		dev_dbg(dev, "Unknown event.\n");
 		return -EINVAL;
 	}
 
@@ -386,18 +386,18 @@ static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffe
 	if (dev->driver != &w1_slave_driver || !sl)
 		return 0;
 
-	err = add_hotplug_env_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_FID=%02X", sl->reg_num.family);
+	err = add_uevent_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_FID=%02X", sl->reg_num.family);
 	if (err)
 		return err;
 
-	err = add_hotplug_env_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_SLAVE_ID=%024LX", (u64)sl->reg_num.id);
+	err = add_uevent_var(envp, num_envp, &cur_index, buffer, buffer_size, &cur_len, "W1_SLAVE_ID=%024LX", (u64)sl->reg_num.id);
 	if (err)
 		return err;
 
 	return 0;
 };
 #else
-static int w1_hotplug(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
+static int w1_uevent(struct device *dev, char **envp, int num_envp, char *buffer, int buffer_size)
 {
 	return 0;
 }
diff --git a/fs/partitions/check.c b/fs/partitions/check.c
index 8dc1822a7022..7187a57d51e8 100644
--- a/fs/partitions/check.c
+++ b/fs/partitions/check.c
@@ -226,7 +226,7 @@ static struct sysfs_ops part_sysfs_ops = {
 static ssize_t part_uevent_store(struct hd_struct * p,
 				 const char *page, size_t count)
 {
-	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	kobject_uevent(&p->kobj, KOBJ_ADD);
 	return count;
 }
 static ssize_t part_dev_read(struct hd_struct * p, char *page)
@@ -360,7 +360,7 @@ void register_disk(struct gendisk *disk)
 	if ((err = kobject_add(&disk->kobj)))
 		return;
 	disk_sysfs_symlinks(disk);
-	kobject_hotplug(&disk->kobj, KOBJ_ADD);
+	kobject_uevent(&disk->kobj, KOBJ_ADD);
 
 	/* No minors to use for partitions */
 	if (disk->minors == 1) {
@@ -465,6 +465,6 @@ void del_gendisk(struct gendisk *disk)
 		sysfs_remove_link(&disk->driverfs_dev->kobj, "block");
 		put_device(disk->driverfs_dev);
 	}
-	kobject_hotplug(&disk->kobj, KOBJ_REMOVE);
+	kobject_uevent(&disk->kobj, KOBJ_REMOVE);
 	kobject_del(&disk->kobj);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index 17cbc6db67b4..0cdee78e5ce1 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -47,8 +47,8 @@ struct bus_type {
 	struct driver_attribute	* drv_attrs;
 
 	int		(*match)(struct device * dev, struct device_driver * drv);
-	int		(*hotplug) (struct device *dev, char **envp, 
-				    int num_envp, char *buffer, int buffer_size);
+	int		(*uevent)(struct device *dev, char **envp,
+				  int num_envp, char *buffer, int buffer_size);
 	int		(*suspend)(struct device * dev, pm_message_t state);
 	int		(*resume)(struct device * dev);
 };
@@ -151,7 +151,7 @@ struct class {
 	struct class_attribute		* class_attrs;
 	struct class_device_attribute	* class_dev_attrs;
 
-	int	(*hotplug)(struct class_device *dev, char **envp, 
+	int	(*uevent)(struct class_device *dev, char **envp,
 			   int num_envp, char *buffer, int buffer_size);
 
 	void	(*release)(struct class_device *dev);
@@ -209,9 +209,9 @@ extern int class_device_create_file(struct class_device *,
  * set, this will be called instead of the class specific release function.
  * Only use this if you want to override the default release function, like
  * when you are nesting class_device structures.
- * @hotplug: pointer to a hotplug function for this struct class_device.  If
- * set, this will be called instead of the class specific hotplug function.
- * Only use this if you want to override the default hotplug function, like
+ * @uevent: pointer to a uevent function for this struct class_device.  If
+ * set, this will be called instead of the class specific uevent function.
+ * Only use this if you want to override the default uevent function, like
  * when you are nesting class_device structures.
  */
 struct class_device {
@@ -227,7 +227,7 @@ struct class_device {
 	struct class_device	*parent;	/* parent of this child device, if there is one */
 
 	void	(*release)(struct class_device *dev);
-	int	(*hotplug)(struct class_device *dev, char **envp,
+	int	(*uevent)(struct class_device *dev, char **envp,
 			   int num_envp, char *buffer, int buffer_size);
 	char	class_id[BUS_ID_SIZE];	/* unique to this class */
 };
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 2063c0839d4f..2d716080be4a 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -14,7 +14,7 @@ struct device;
 int request_firmware(const struct firmware **fw, const char *name,
 		     struct device *device);
 int request_firmware_nowait(
-	struct module *module, int hotplug,
+	struct module *module, int uevent,
 	const char *name, struct device *device, void *context,
 	void (*cont)(const struct firmware *fw, void *context));
 
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 5b08248fba72..8eb21f2f25e1 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -26,15 +26,14 @@
 #include <linux/kernel.h>
 #include <asm/atomic.h>
 
-#define KOBJ_NAME_LEN	20
-
-#define HOTPLUG_PATH_LEN	256
+#define KOBJ_NAME_LEN			20
+#define UEVENT_HELPER_PATH_LEN		256
 
 /* path to the userspace helper executed on an event */
-extern char hotplug_path[];
+extern char uevent_helper[];
 
-/* counter to tag the hotplug event, read only except for the kobject core */
-extern u64 hotplug_seqnum;
+/* counter to tag the uevent, read only except for the kobject core */
+extern u64 uevent_seqnum;
 
 /* the actions here must match the proper string in lib/kobject_uevent.c */
 typedef int __bitwise kobject_action_t;
@@ -101,15 +100,14 @@ struct kobj_type {
  *	of object; multiple ksets can belong to one subsystem. All 
  *	ksets of a subsystem share the subsystem's lock.
  *
- *      Each kset can support hotplugging; if it does, it will be given
- *      the opportunity to filter out specific kobjects from being
- *      reported, as well as to add its own "data" elements to the
- *      environment being passed to the hotplug helper.
+ *	Each kset can support specific event variables; it can
+ *	supress the event generation or add subsystem specific
+ *	variables carried with the event.
  */
-struct kset_hotplug_ops {
+struct kset_uevent_ops {
 	int (*filter)(struct kset *kset, struct kobject *kobj);
 	const char *(*name)(struct kset *kset, struct kobject *kobj);
-	int (*hotplug)(struct kset *kset, struct kobject *kobj, char **envp,
+	int (*uevent)(struct kset *kset, struct kobject *kobj, char **envp,
 			int num_envp, char *buffer, int buffer_size);
 };
 
@@ -119,7 +117,7 @@ struct kset {
 	struct list_head	list;
 	spinlock_t		list_lock;
 	struct kobject		kobj;
-	struct kset_hotplug_ops	* hotplug_ops;
+	struct kset_uevent_ops	* uevent_ops;
 };
 
 
@@ -167,20 +165,20 @@ struct subsystem {
 	struct rw_semaphore	rwsem;
 };
 
-#define decl_subsys(_name,_type,_hotplug_ops) \
+#define decl_subsys(_name,_type,_uevent_ops) \
 struct subsystem _name##_subsys = { \
 	.kset = { \
 		.kobj = { .name = __stringify(_name) }, \
 		.ktype = _type, \
-		.hotplug_ops =_hotplug_ops, \
+		.uevent_ops =_uevent_ops, \
 	} \
 }
-#define decl_subsys_name(_varname,_name,_type,_hotplug_ops) \
+#define decl_subsys_name(_varname,_name,_type,_uevent_ops) \
 struct subsystem _varname##_subsys = { \
 	.kset = { \
 		.kobj = { .name = __stringify(_name) }, \
 		.ktype = _type, \
-		.hotplug_ops =_hotplug_ops, \
+		.uevent_ops =_uevent_ops, \
 	} \
 }
 
@@ -256,16 +254,16 @@ extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
 extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
 #ifdef CONFIG_HOTPLUG
-void kobject_hotplug(struct kobject *kobj, enum kobject_action action);
+void kobject_uevent(struct kobject *kobj, enum kobject_action action);
 
-int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
+int add_uevent_var(char **envp, int num_envp, int *cur_index,
 			char *buffer, int buffer_size, int *cur_len,
 			const char *format, ...)
 	__attribute__((format (printf, 7, 8)));
 #else
-static inline void kobject_hotplug(struct kobject *kobj, enum kobject_action action) { }
+static inline void kobject_uevent(struct kobject *kobj, enum kobject_action action) { }
 
-static inline int add_hotplug_env_var(char **envp, int num_envp, int *cur_index, 
+static inline int add_uevent_var(char **envp, int num_envp, int *cur_index,
 				      char *buffer, int buffer_size, int *cur_len, 
 				      const char *format, ...)
 { return 0; }
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 4be34ef8c2f7..501564264518 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -124,7 +124,7 @@ enum
 	KERN_OVERFLOWUID=46,	/* int: overflow UID */
 	KERN_OVERFLOWGID=47,	/* int: overflow GID */
 	KERN_SHMPATH=48,	/* string: path to shm fs */
-	KERN_HOTPLUG=49,	/* string: path to hotplug policy agent */
+	KERN_HOTPLUG=49,	/* string: path to uevent helper (deprecated) */
 	KERN_IEEE_EMULATION_WARNINGS=50, /* int: unimplemented ieee instructions */
 	KERN_S390_USER_DEBUG_LOGGING=51,  /* int: dumps of user faults */
 	KERN_CORE_USES_PID=52,		/* int: use core or core.%pid */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d81b050e5955..7a20997e8071 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -225,7 +225,7 @@ struct usb_interface_cache {
  * Device drivers should not attempt to activate configurations.  The choice
  * of which configuration to install is a policy decision based on such
  * considerations as available power, functionality provided, and the user's
- * desires (expressed through hotplug scripts).  However, drivers can call
+ * desires (expressed through userspace tools).  However, drivers can call
  * usb_reset_configuration() to reinitialize the current configuration and
  * all its interfaces.
  */
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index e975a76a9d5b..bfb4a7a54e22 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -26,23 +26,23 @@ static struct subsys_attribute _name##_attr = \
 /* current uevent sequence number */
 static ssize_t uevent_seqnum_show(struct subsystem *subsys, char *page)
 {
-	return sprintf(page, "%llu\n", (unsigned long long)hotplug_seqnum);
+	return sprintf(page, "%llu\n", (unsigned long long)uevent_seqnum);
 }
 KERNEL_ATTR_RO(uevent_seqnum);
 
 /* uevent helper program, used during early boo */
 static ssize_t uevent_helper_show(struct subsystem *subsys, char *page)
 {
-	return sprintf(page, "%s\n", hotplug_path);
+	return sprintf(page, "%s\n", uevent_helper);
 }
 static ssize_t uevent_helper_store(struct subsystem *subsys, const char *page, size_t count)
 {
-	if (count+1 > HOTPLUG_PATH_LEN)
+	if (count+1 > UEVENT_HELPER_PATH_LEN)
 		return -ENOENT;
-	memcpy(hotplug_path, page, count);
-	hotplug_path[count] = '\0';
-	if (count && hotplug_path[count-1] == '\n')
-		hotplug_path[count-1] = '\0';
+	memcpy(uevent_helper, page, count);
+	uevent_helper[count] = '\0';
+	if (count && uevent_helper[count-1] == '\n')
+		uevent_helper[count-1] = '\0';
 	return count;
 }
 KERNEL_ATTR_RW(uevent_helper);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 6a51e25d4466..345f4a1d533f 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -395,8 +395,8 @@ static ctl_table kern_table[] = {
 	{
 		.ctl_name	= KERN_HOTPLUG,
 		.procname	= "hotplug",
-		.data		= &hotplug_path,
-		.maxlen		= HOTPLUG_PATH_LEN,
+		.data		= &uevent_helper,
+		.maxlen		= UEVENT_HELPER_PATH_LEN,
 		.mode		= 0644,
 		.proc_handler	= &proc_dostring,
 		.strategy	= &sysctl_string,
diff --git a/lib/kobject.c b/lib/kobject.c
index a181abed89f6..7a0e6809490d 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -207,7 +207,7 @@ int kobject_register(struct kobject * kobj)
 			       kobject_name(kobj),error);
 			dump_stack();
 		} else
-			kobject_hotplug(kobj, KOBJ_ADD);
+			kobject_uevent(kobj, KOBJ_ADD);
 	} else
 		error = -EINVAL;
 	return error;
@@ -312,7 +312,7 @@ void kobject_del(struct kobject * kobj)
 void kobject_unregister(struct kobject * kobj)
 {
 	pr_debug("kobject %s: unregistering\n",kobject_name(kobj));
-	kobject_hotplug(kobj, KOBJ_REMOVE);
+	kobject_uevent(kobj, KOBJ_REMOVE);
 	kobject_del(kobj);
 	kobject_put(kobj);
 }
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index dd061da3aba9..01479e5c6d18 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -22,12 +22,12 @@
 #include <linux/kobject.h>
 #include <net/sock.h>
 
-#define BUFFER_SIZE	1024	/* buffer for the hotplug env */
+#define BUFFER_SIZE	1024	/* buffer for the variables */
 #define NUM_ENVP	32	/* number of env pointers */
 
 #if defined(CONFIG_HOTPLUG)
-char hotplug_path[HOTPLUG_PATH_LEN] = "/sbin/hotplug";
-u64 hotplug_seqnum;
+char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
+u64 uevent_seqnum;
 static DEFINE_SPINLOCK(sequence_lock);
 static struct sock *uevent_sock;
 
@@ -50,12 +50,12 @@ static char *action_to_string(enum kobject_action action)
 }
 
 /**
- * kobject_hotplug - notify userspace by executing /sbin/hotplug
+ * kobject_uevent - notify userspace by ending an uevent
  *
- * @action: action that is happening (usually "ADD" or "REMOVE")
+ * @action: action that is happening (usually KOBJ_ADD and KOBJ_REMOVE)
  * @kobj: struct kobject that the action is happening to
  */
-void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
+void kobject_uevent(struct kobject *kobj, enum kobject_action action)
 {
 	char **envp;
 	char *buffer;
@@ -65,7 +65,7 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 	const char *subsystem;
 	struct kobject *top_kobj;
 	struct kset *kset;
-	struct kset_hotplug_ops *hotplug_ops;
+	struct kset_uevent_ops *uevent_ops;
 	u64 seq;
 	char *seq_buff;
 	int i = 0;
@@ -88,11 +88,11 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 		return;
 
 	kset = top_kobj->kset;
-	hotplug_ops = kset->hotplug_ops;
+	uevent_ops = kset->uevent_ops;
 
 	/*  skip the event, if the filter returns zero. */
-	if (hotplug_ops && hotplug_ops->filter)
-		if (!hotplug_ops->filter(kset, kobj))
+	if (uevent_ops && uevent_ops->filter)
+		if (!uevent_ops->filter(kset, kobj))
 			return;
 
 	/* environment index */
@@ -111,8 +111,8 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 		goto exit;
 
 	/* originating subsystem */
-	if (hotplug_ops && hotplug_ops->name)
-		subsystem = hotplug_ops->name(kset, kobj);
+	if (uevent_ops && uevent_ops->name)
+		subsystem = uevent_ops->name(kset, kobj);
 	else
 		subsystem = kobject_name(&kset->kobj);
 
@@ -134,12 +134,12 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 	scratch += strlen("SEQNUM=18446744073709551616") + 1;
 
 	/* let the kset specific function add its stuff */
-	if (hotplug_ops && hotplug_ops->hotplug) {
-		retval = hotplug_ops->hotplug (kset, kobj,
+	if (uevent_ops && uevent_ops->uevent) {
+		retval = uevent_ops->uevent(kset, kobj,
 				  &envp[i], NUM_ENVP - i, scratch,
 				  BUFFER_SIZE - (scratch - buffer));
 		if (retval) {
-			pr_debug ("%s - hotplug() returned %d\n",
+			pr_debug ("%s - uevent() returned %d\n",
 				  __FUNCTION__, retval);
 			goto exit;
 		}
@@ -147,7 +147,7 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 
 	/* we will send an event, request a new sequence number */
 	spin_lock(&sequence_lock);
-	seq = ++hotplug_seqnum;
+	seq = ++uevent_seqnum;
 	spin_unlock(&sequence_lock);
 	sprintf(seq_buff, "SEQNUM=%llu", (unsigned long long)seq);
 
@@ -177,10 +177,10 @@ void kobject_hotplug(struct kobject *kobj, enum kobject_action action)
 	}
 
 	/* call uevent_helper, usually only enabled during early boot */
-	if (hotplug_path[0]) {
+	if (uevent_helper[0]) {
 		char *argv [3];
 
-		argv [0] = hotplug_path;
+		argv [0] = uevent_helper;
 		argv [1] = (char *)subsystem;
 		argv [2] = NULL;
 		call_usermodehelper (argv[0], argv, envp, 0);
@@ -192,39 +192,39 @@ exit:
 	kfree(envp);
 	return;
 }
-EXPORT_SYMBOL(kobject_hotplug);
+EXPORT_SYMBOL_GPL(kobject_uevent);
 
 /**
- * add_hotplug_env_var - helper for creating hotplug environment variables
+ * add_uevent_var - helper for creating event variables
  * @envp: Pointer to table of environment variables, as passed into
- * hotplug() method.
+ * uevent() method.
  * @num_envp: Number of environment variable slots available, as
- * passed into hotplug() method.
+ * passed into uevent() method.
  * @cur_index: Pointer to current index into @envp.  It should be
- * initialized to 0 before the first call to add_hotplug_env_var(),
+ * initialized to 0 before the first call to add_uevent_var(),
  * and will be incremented on success.
  * @buffer: Pointer to buffer for environment variables, as passed
- * into hotplug() method.
- * @buffer_size: Length of @buffer, as passed into hotplug() method.
+ * into uevent() method.
+ * @buffer_size: Length of @buffer, as passed into uevent() method.
  * @cur_len: Pointer to current length of space used in @buffer.
  * Should be initialized to 0 before the first call to
- * add_hotplug_env_var(), and will be incremented on success.
+ * add_uevent_var(), and will be incremented on success.
  * @format: Format for creating environment variable (of the form
  * "XXX=%x") for snprintf().
  *
  * Returns 0 if environment variable was added successfully or -ENOMEM
  * if no space was available.
  */
-int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
-			char *buffer, int buffer_size, int *cur_len,
-			const char *format, ...)
+int add_uevent_var(char **envp, int num_envp, int *cur_index,
+		   char *buffer, int buffer_size, int *cur_len,
+		   const char *format, ...)
 {
 	va_list args;
 
 	/*
 	 * We check against num_envp - 1 to make sure there is at
-	 * least one slot left after we return, since the hotplug
-	 * method needs to set the last slot to NULL.
+	 * least one slot left after we return, since kobject_uevent()
+	 * needs to set the last slot to NULL.
 	 */
 	if (*cur_index >= num_envp - 1)
 		return -ENOMEM;
@@ -243,7 +243,7 @@ int add_hotplug_env_var(char **envp, int num_envp, int *cur_index,
 	(*cur_index)++;
 	return 0;
 }
-EXPORT_SYMBOL(add_hotplug_env_var);
+EXPORT_SYMBOL_GPL(add_uevent_var);
 
 static int __init kobject_uevent_init(void)
 {
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index bd7568ac87fc..0ed38740388c 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -78,7 +78,7 @@ static struct class_device_attribute *bt_attrs[] = {
 };
 
 #ifdef CONFIG_HOTPLUG
-static int bt_hotplug(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
+static int bt_uevent(struct class_device *cdev, char **envp, int num_envp, char *buf, int size)
 {
 	struct hci_dev *hdev = class_get_devdata(cdev);
 	int n, i = 0;
@@ -107,7 +107,7 @@ struct class bt_class = {
 	.name		= "bluetooth",
 	.release	= bt_release,
 #ifdef CONFIG_HOTPLUG
-	.hotplug	= bt_hotplug,
+	.uevent		= bt_uevent,
 #endif
 };
 
diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c
index f6a19d53eaeb..2ebdc23bbe26 100644
--- a/net/bridge/br_sysfs_if.c
+++ b/net/bridge/br_sysfs_if.c
@@ -248,7 +248,7 @@ int br_sysfs_addif(struct net_bridge_port *p)
 	if (err)
 		goto out2;
 
-	kobject_hotplug(&p->kobj, KOBJ_ADD);
+	kobject_uevent(&p->kobj, KOBJ_ADD);
 	return 0;
  out2:
 	kobject_del(&p->kobj);
@@ -260,7 +260,7 @@ void br_sysfs_removeif(struct net_bridge_port *p)
 {
 	pr_debug("br_sysfs_removeif\n");
 	sysfs_remove_link(&p->br->ifobj, p->dev->name);
-	kobject_hotplug(&p->kobj, KOBJ_REMOVE);
+	kobject_uevent(&p->kobj, KOBJ_REMOVE);
 	kobject_del(&p->kobj);
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index e2137f3e489d..198655dd9a77 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -369,14 +369,14 @@ static struct attribute_group wireless_group = {
 #endif
 
 #ifdef CONFIG_HOTPLUG
-static int netdev_hotplug(struct class_device *cd, char **envp,
-			  int num_envp, char *buf, int size)
+static int netdev_uevent(struct class_device *cd, char **envp,
+			 int num_envp, char *buf, int size)
 {
 	struct net_device *dev = to_net_dev(cd);
 	int i = 0;
 	int n;
 
-	/* pass interface in env to hotplug. */
+	/* pass interface to uevent. */
 	envp[i++] = buf;
 	n = snprintf(buf, size, "INTERFACE=%s", dev->name) + 1;
 	buf += n;
@@ -408,7 +408,7 @@ static struct class net_class = {
 	.name = "net",
 	.release = netdev_release,
 #ifdef CONFIG_HOTPLUG
-	.hotplug = netdev_hotplug,
+	.uevent = netdev_uevent,
 #endif
 };
 
-- 
cgit v1.2.3


From f743ca5e10f4145e0b3e6d11b9b46171e16af7ce Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Tue, 22 Nov 2005 23:36:13 -0800
Subject: [PATCH] kobject_uevent CONFIG_NET=n fix

lib/lib.a(kobject_uevent.o)(.text+0x25f): In function `kobject_uevent':
: undefined reference to `__alloc_skb'
lib/lib.a(kobject_uevent.o)(.text+0x2a1): In function `kobject_uevent':
: undefined reference to `skb_over_panic'
lib/lib.a(kobject_uevent.o)(.text+0x31d): In function `kobject_uevent':
: undefined reference to `skb_over_panic'
lib/lib.a(kobject_uevent.o)(.text+0x356): In function `kobject_uevent':
: undefined reference to `netlink_broadcast'
lib/lib.a(kobject_uevent.o)(.init.text+0x9): In function `kobject_uevent_init':
: undefined reference to `netlink_kernel_create'
make: *** [.tmp_vmlinux1] Error 1

Netlink is unconditionally enabled if CONFIG_NET, so that's OK.

kobject_uevent.o is compiled even if !CONFIG_HOTPLUG, which is lazy.

Let's compound the sin.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h | 2 +-
 kernel/ksysfs.c         | 3 +++
 lib/kobject_uevent.c    | 4 +---
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 8eb21f2f25e1..2a8d8da70961 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -253,7 +253,7 @@ struct subsys_attribute {
 extern int subsys_create_file(struct subsystem * , struct subsys_attribute *);
 extern void subsys_remove_file(struct subsystem * , struct subsys_attribute *);
 
-#ifdef CONFIG_HOTPLUG
+#if defined(CONFIG_HOTPLUG) & defined(CONFIG_NET)
 void kobject_uevent(struct kobject *kobj, enum kobject_action action);
 
 int add_uevent_var(char **envp, int num_envp, int *cur_index,
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index bfb4a7a54e22..99af8b05eeaa 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -15,6 +15,9 @@
 #include <linux/module.h>
 #include <linux/init.h>
 
+u64 uevent_seqnum;
+char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
+
 #define KERNEL_ATTR_RO(_name) \
 static struct subsys_attribute _name##_attr = __ATTR_RO(_name)
 
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 01479e5c6d18..f56e27ae9d52 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,9 +25,7 @@
 #define BUFFER_SIZE	1024	/* buffer for the variables */
 #define NUM_ENVP	32	/* number of env pointers */
 
-#if defined(CONFIG_HOTPLUG)
-char uevent_helper[UEVENT_HELPER_PATH_LEN] = "/sbin/hotplug";
-u64 uevent_seqnum;
+#if defined(CONFIG_HOTPLUG) && defined(CONFIG_NET)
 static DEFINE_SPINLOCK(sequence_lock);
 static struct sock *uevent_sock;
 
-- 
cgit v1.2.3


From 1d8f430c15b3a345db990e285742c67c2f52f9a6 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 7 Dec 2005 21:40:34 +0100
Subject: [PATCH] Input: add modalias support

Here's the patch for modalias support for input classes.  It uses
comma-separated numbers, and doesn't describe all the potential keys (no
module currently cares, and that would make the strings huge).  The
changes to input.h are to move the definitions needed by file2alias
outside __KERNEL__.  I chose not to move those definitions to
mod_devicetable.h, because there are so many that it might break compile
of something else in the kernel.

The rest is fairly straightforward.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
CC: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/input/input.c    | 39 ++++++++++++++++++++++++
 include/linux/input.h    | 79 +++++++++++++++++++++++++-----------------------
 scripts/mod/file2alias.c | 62 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 141 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/input.c b/drivers/input/input.c
index 2d37b394e384..ef5824c8846b 100644
--- a/drivers/input/input.c
+++ b/drivers/input/input.c
@@ -528,10 +528,49 @@ INPUT_DEV_STRING_ATTR_SHOW(name);
 INPUT_DEV_STRING_ATTR_SHOW(phys);
 INPUT_DEV_STRING_ATTR_SHOW(uniq);
 
+static int print_modalias_bits(char *buf, char prefix, unsigned long *arr,
+			       unsigned int min, unsigned int max)
+{
+	int len, i;
+
+	len = sprintf(buf, "%c", prefix);
+	for (i = min; i < max; i++)
+		if (arr[LONG(i)] & BIT(i))
+			len += sprintf(buf+len, "%X,", i);
+	return len;
+}
+
+static ssize_t input_dev_show_modalias(struct class_device *dev, char *buf)
+{
+	struct input_dev *id = to_input_dev(dev);
+	ssize_t len = 0;
+
+	len += sprintf(buf+len, "input:b%04Xv%04Xp%04Xe%04X-",
+		       id->id.bustype,
+		       id->id.vendor,
+		       id->id.product,
+		       id->id.version);
+
+	len += print_modalias_bits(buf+len, 'e', id->evbit, 0, EV_MAX);
+	len += print_modalias_bits(buf+len, 'k', id->keybit,
+				   KEY_MIN_INTERESTING, KEY_MAX);
+	len += print_modalias_bits(buf+len, 'r', id->relbit, 0, REL_MAX);
+	len += print_modalias_bits(buf+len, 'a', id->absbit, 0, ABS_MAX);
+	len += print_modalias_bits(buf+len, 'm', id->mscbit, 0, MSC_MAX);
+	len += print_modalias_bits(buf+len, 'l', id->ledbit, 0, LED_MAX);
+	len += print_modalias_bits(buf+len, 's', id->sndbit, 0, SND_MAX);
+	len += print_modalias_bits(buf+len, 'f', id->ffbit, 0, FF_MAX);
+	len += print_modalias_bits(buf+len, 'w', id->swbit, 0, SW_MAX);
+	len += sprintf(buf+len, "\n");
+	return len;
+}
+static CLASS_DEVICE_ATTR(modalias, S_IRUGO, input_dev_show_modalias, NULL);
+
 static struct attribute *input_dev_attrs[] = {
 	&class_device_attr_name.attr,
 	&class_device_attr_phys.attr,
 	&class_device_attr_uniq.attr,
+	&class_device_attr_modalias.attr,
 	NULL
 };
 
diff --git a/include/linux/input.h b/include/linux/input.h
index 3c5823368ddb..bef08551a33b 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -18,6 +18,7 @@
 #include <sys/ioctl.h>
 #include <asm/types.h>
 #endif
+#include <linux/mod_devicetable.h>
 
 /*
  * The event structure itself
@@ -511,6 +512,8 @@ struct input_absinfo {
 #define KEY_FN_S		0x1e3
 #define KEY_FN_B		0x1e4
 
+/* We avoid low common keys in module aliases so they don't get huge. */
+#define KEY_MIN_INTERESTING	KEY_MUTE
 #define KEY_MAX			0x1ff
 
 /*
@@ -793,6 +796,44 @@ struct ff_effect {
 
 #define FF_MAX		0x7f
 
+struct input_device_id {
+
+	kernel_ulong_t flags;
+
+	struct input_id id;
+
+	kernel_ulong_t evbit[EV_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t keybit[KEY_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t relbit[REL_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t absbit[ABS_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t mscbit[MSC_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t ledbit[LED_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t sndbit[SND_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t ffbit[FF_MAX/BITS_PER_LONG+1];
+	kernel_ulong_t swbit[SW_MAX/BITS_PER_LONG+1];
+
+	kernel_ulong_t driver_info;
+};
+
+/*
+ * Structure for hotplug & device<->driver matching.
+ */
+
+#define INPUT_DEVICE_ID_MATCH_BUS	1
+#define INPUT_DEVICE_ID_MATCH_VENDOR	2
+#define INPUT_DEVICE_ID_MATCH_PRODUCT	4
+#define INPUT_DEVICE_ID_MATCH_VERSION	8
+
+#define INPUT_DEVICE_ID_MATCH_EVBIT	0x010
+#define INPUT_DEVICE_ID_MATCH_KEYBIT	0x020
+#define INPUT_DEVICE_ID_MATCH_RELBIT	0x040
+#define INPUT_DEVICE_ID_MATCH_ABSBIT	0x080
+#define INPUT_DEVICE_ID_MATCH_MSCIT	0x100
+#define INPUT_DEVICE_ID_MATCH_LEDBIT	0x200
+#define INPUT_DEVICE_ID_MATCH_SNDBIT	0x400
+#define INPUT_DEVICE_ID_MATCH_FFBIT	0x800
+#define INPUT_DEVICE_ID_MATCH_SWBIT	0x1000
+
 #ifdef __KERNEL__
 
 /*
@@ -901,49 +942,11 @@ struct input_dev {
 };
 #define to_input_dev(d) container_of(d, struct input_dev, cdev)
 
-/*
- * Structure for hotplug & device<->driver matching.
- */
-
-#define INPUT_DEVICE_ID_MATCH_BUS	1
-#define INPUT_DEVICE_ID_MATCH_VENDOR	2
-#define INPUT_DEVICE_ID_MATCH_PRODUCT	4
-#define INPUT_DEVICE_ID_MATCH_VERSION	8
-
-#define INPUT_DEVICE_ID_MATCH_EVBIT	0x010
-#define INPUT_DEVICE_ID_MATCH_KEYBIT	0x020
-#define INPUT_DEVICE_ID_MATCH_RELBIT	0x040
-#define INPUT_DEVICE_ID_MATCH_ABSBIT	0x080
-#define INPUT_DEVICE_ID_MATCH_MSCIT	0x100
-#define INPUT_DEVICE_ID_MATCH_LEDBIT	0x200
-#define INPUT_DEVICE_ID_MATCH_SNDBIT	0x400
-#define INPUT_DEVICE_ID_MATCH_FFBIT	0x800
-#define INPUT_DEVICE_ID_MATCH_SWBIT	0x1000
-
 #define INPUT_DEVICE_ID_MATCH_DEVICE\
 	(INPUT_DEVICE_ID_MATCH_BUS | INPUT_DEVICE_ID_MATCH_VENDOR | INPUT_DEVICE_ID_MATCH_PRODUCT)
 #define INPUT_DEVICE_ID_MATCH_DEVICE_AND_VERSION\
 	(INPUT_DEVICE_ID_MATCH_DEVICE | INPUT_DEVICE_ID_MATCH_VERSION)
 
-struct input_device_id {
-
-	unsigned long flags;
-
-	struct input_id id;
-
-	unsigned long evbit[NBITS(EV_MAX)];
-	unsigned long keybit[NBITS(KEY_MAX)];
-	unsigned long relbit[NBITS(REL_MAX)];
-	unsigned long absbit[NBITS(ABS_MAX)];
-	unsigned long mscbit[NBITS(MSC_MAX)];
-	unsigned long ledbit[NBITS(LED_MAX)];
-	unsigned long sndbit[NBITS(SND_MAX)];
-	unsigned long ffbit[NBITS(FF_MAX)];
-	unsigned long swbit[NBITS(SW_MAX)];
-
-	unsigned long driver_info;
-};
-
 struct input_handle;
 
 struct input_handler {
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index e3d144a3f10b..e0eedffe565b 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -16,8 +16,10 @@
  * use either stdint.h or inttypes.h for the rest. */
 #if KERNEL_ELFCLASS == ELFCLASS32
 typedef Elf32_Addr	kernel_ulong_t;
+#define BITS_PER_LONG 32
 #else
 typedef Elf64_Addr	kernel_ulong_t;
+#define BITS_PER_LONG 64
 #endif
 #ifdef __sun__
 #include <inttypes.h>
@@ -35,6 +37,7 @@ typedef unsigned char	__u8;
  * even potentially has different endianness and word sizes, since 
  * we handle those differences explicitly below */
 #include "../../include/linux/mod_devicetable.h"
+#include "../../include/linux/input.h"
 
 #define ADD(str, sep, cond, field)                              \
 do {                                                            \
@@ -366,6 +369,61 @@ static int do_i2c_entry(const char *filename, struct i2c_device_id *i2c, char *a
 	return 1;
 }
 
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+
+static void do_input(char *alias,
+		     kernel_ulong_t *arr, unsigned int min, unsigned int max)
+{
+	unsigned int i;
+	for (i = min; i < max; i++) {
+		if (arr[i/BITS_PER_LONG] & (1 << (i%BITS_PER_LONG)))
+			sprintf(alias+strlen(alias), "%X,*", i);
+	}
+}
+
+/* input:b0v0p0e0-eXkXrXaXmXlXsXfXwX where X is comma-separated %02X. */
+static int do_input_entry(const char *filename, struct input_device_id *id,
+			  char *alias)
+{
+	sprintf(alias, "input:");
+
+	ADD(alias, "b", id->flags&INPUT_DEVICE_ID_MATCH_BUS, id->id.bustype);
+	ADD(alias, "v", id->flags&INPUT_DEVICE_ID_MATCH_VENDOR, id->id.vendor);
+	ADD(alias, "p", id->flags&INPUT_DEVICE_ID_MATCH_PRODUCT,
+	    id->id.product);
+	ADD(alias, "e", id->flags&INPUT_DEVICE_ID_MATCH_VERSION,
+	    id->id.version);
+
+	sprintf(alias + strlen(alias), "-e*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_EVBIT)
+		do_input(alias, id->evbit, 0, EV_MAX);
+	sprintf(alias + strlen(alias), "k*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_KEYBIT)
+		do_input(alias, id->keybit, KEY_MIN_INTERESTING, KEY_MAX);
+	sprintf(alias + strlen(alias), "r*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_RELBIT)
+		do_input(alias, id->relbit, 0, REL_MAX);
+	sprintf(alias + strlen(alias), "a*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_ABSBIT)
+		do_input(alias, id->absbit, 0, ABS_MAX);
+	sprintf(alias + strlen(alias), "m*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_MSCIT)
+		do_input(alias, id->mscbit, 0, MSC_MAX);
+	sprintf(alias + strlen(alias), "l*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_LEDBIT)
+		do_input(alias, id->ledbit, 0, LED_MAX);
+	sprintf(alias + strlen(alias), "s*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_SNDBIT)
+		do_input(alias, id->sndbit, 0, SND_MAX);
+	sprintf(alias + strlen(alias), "f*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_FFBIT)
+		do_input(alias, id->ffbit, 0, SND_MAX);
+	sprintf(alias + strlen(alias), "w*");
+	if (id->flags&INPUT_DEVICE_ID_MATCH_SWBIT)
+		do_input(alias, id->swbit, 0, SW_MAX);
+	return 1;
+}
+
 /* Ignore any prefix, eg. v850 prepends _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
@@ -453,7 +511,9 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 	else if (sym_is(symname, "__mod_i2c_device_table"))
 		do_table(symval, sym->st_size, sizeof(struct i2c_device_id),
 			 do_i2c_entry, mod);
-
+	else if (sym_is(symname, "__mod_input_device_table"))
+		do_table(symval, sym->st_size, sizeof(struct input_device_id),
+			 do_input_entry, mod);
 }
 
 /* Now add out buffered information to the generated C source */
-- 
cgit v1.2.3


From e39b84337b8aed3044683a57741a19e5002225b9 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Sat, 10 Dec 2005 22:48:20 +1100
Subject: [PATCH] Input: fix add modalias support build error

Fix build when scripts/mod/file2alias.c includes linux/input.h, which
tries to include /usr/include/linux/mod_devicetable.h:

 In file included from scripts/mod/file2alias.c:40:
 include/linux/input.h:21:35: linux/mod_devicetable.h: No such file or directory
 make[2]: *** [scripts/mod/file2alias.o] Error 1

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/input.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/input.h b/include/linux/input.h
index bef08551a33b..6d4cc3c110d6 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -13,12 +13,12 @@
 #include <linux/time.h>
 #include <linux/list.h>
 #include <linux/device.h>
+#include <linux/mod_devicetable.h>
 #else
 #include <sys/time.h>
 #include <sys/ioctl.h>
 #include <asm/types.h>
 #endif
-#include <linux/mod_devicetable.h>
 
 /*
  * The event structure itself
-- 
cgit v1.2.3


From 93ce3061be212f6280e7ccafa9a7f698a95c6d75 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dtor_core@ameritech.net>
Date: Sat, 10 Dec 2005 01:36:27 -0500
Subject: [PATCH] Driver Core: Add platform_device_del()

Driver core: add platform_device_del function

Having platform_device_del90 allows more straightforward error
handling code in drivers registering platform devices.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 45 +++++++++++++++++++++++++++--------------
 include/linux/platform_device.h |  1 +
 2 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 1091af1cbb58..95ecfc490d54 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -168,7 +168,7 @@ struct platform_device *platform_device_alloc(const char *name, unsigned int id)
 		pa->pdev.dev.release = platform_device_release;
 	}
 
-	return pa ? &pa->pdev : NULL;	
+	return pa ? &pa->pdev : NULL;
 }
 EXPORT_SYMBOL_GPL(platform_device_alloc);
 
@@ -282,24 +282,13 @@ int platform_device_add(struct platform_device *pdev)
 EXPORT_SYMBOL_GPL(platform_device_add);
 
 /**
- *	platform_device_register - add a platform-level device
- *	@pdev:	platform device we're adding
- *
- */
-int platform_device_register(struct platform_device * pdev)
-{
-	device_initialize(&pdev->dev);
-	return platform_device_add(pdev);
-}
-
-/**
- *	platform_device_unregister - remove a platform-level device
+ *	platform_device_del - remove a platform-level device
  *	@pdev:	platform device we're removing
  *
  *	Note that this function will also release all memory- and port-based
  *	resources owned by the device (@dev->resource).
  */
-void platform_device_unregister(struct platform_device * pdev)
+void platform_device_del(struct platform_device *pdev)
 {
 	int i;
 
@@ -310,9 +299,35 @@ void platform_device_unregister(struct platform_device * pdev)
 				release_resource(r);
 		}
 
-		device_unregister(&pdev->dev);
+		device_del(&pdev->dev);
 	}
 }
+EXPORT_SYMBOL_GPL(platform_device_del);
+
+/**
+ *	platform_device_register - add a platform-level device
+ *	@pdev:	platform device we're adding
+ *
+ */
+int platform_device_register(struct platform_device * pdev)
+{
+	device_initialize(&pdev->dev);
+	return platform_device_add(pdev);
+}
+
+/**
+ *	platform_device_unregister - unregister a platform-level device
+ *	@pdev:	platform device we're unregistering
+ *
+ *	Unregistration is done in 2 steps. Fisrt we release all resources
+ *	and remove it from the sybsystem, then we drop reference count by
+ *	calling platform_device_put().
+ */
+void platform_device_unregister(struct platform_device * pdev)
+{
+	platform_device_del(pdev);
+	platform_device_put(pdev);
+}
 
 /**
  *	platform_device_register_simple
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 17e336f40b47..782090c68932 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -41,6 +41,7 @@ extern struct platform_device *platform_device_alloc(const char *name, unsigned
 extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num);
 extern int platform_device_add_data(struct platform_device *pdev, void *data, size_t size);
 extern int platform_device_add(struct platform_device *pdev);
+extern void platform_device_del(struct platform_device *pdev);
 extern void platform_device_put(struct platform_device *pdev);
 
 struct platform_driver {
-- 
cgit v1.2.3


From c1d10adb4a521de5760112853f42aaeefcec96eb Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 5 Jan 2006 12:19:05 -0800
Subject: [NETFILTER]: Add ctnetlink port for nf_conntrack

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h  |    3 +
 include/net/netfilter/nf_conntrack.h           |   31 +
 include/net/netfilter/nf_conntrack_helper.h    |    2 +
 include/net/netfilter/nf_conntrack_l3proto.h   |   15 +-
 include/net/netfilter/nf_conntrack_protocol.h  |   26 +-
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c |   47 +
 net/ipv4/netfilter/nf_conntrack_proto_icmp.c   |   97 +-
 net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c |   47 +
 net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c |   77 +-
 net/netfilter/Kconfig                          |    7 +
 net/netfilter/Makefile                         |    3 +
 net/netfilter/nf_conntrack_core.c              |  232 +++-
 net/netfilter/nf_conntrack_netlink.c           | 1642 ++++++++++++++++++++++++
 net/netfilter/nf_conntrack_proto_tcp.c         |   71 +
 net/netfilter/nf_conntrack_proto_udp.c         |   10 +
 net/netfilter/nf_conntrack_standalone.c        |   42 +-
 16 files changed, 2289 insertions(+), 63 deletions(-)
 create mode 100644 net/netfilter/nf_conntrack_netlink.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index 116fcaced909..b8e9a5b6fb1e 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -64,6 +64,9 @@ enum ctattr_l4proto {
 	CTA_PROTO_ICMP_ID,
 	CTA_PROTO_ICMP_TYPE,
 	CTA_PROTO_ICMP_CODE,
+	CTA_PROTO_ICMPV6_ID,
+	CTA_PROTO_ICMPV6_TYPE,
+	CTA_PROTO_ICMPV6_CODE,
 	__CTA_PROTO_MAX
 };
 #define CTA_PROTO_MAX (__CTA_PROTO_MAX - 1)
diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index cc4825610795..64b82b74a650 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -94,6 +94,9 @@ struct nf_conn
 	/* Current number of expected connections */
 	unsigned int expecting;
 
+	/* Unique ID that identifies this conntrack*/
+	unsigned int id;
+
 	/* Helper. if any */
 	struct nf_conntrack_helper *helper;
 
@@ -140,6 +143,9 @@ struct nf_conntrack_expect
 	/* Usage count. */
 	atomic_t use;
 
+	/* Unique ID */
+	unsigned int id;
+
 	/* Flags */
 	unsigned int flags;
 
@@ -190,6 +196,31 @@ static inline void nf_ct_put(struct nf_conn *ct)
 	nf_conntrack_put(&ct->ct_general);
 }
 
+extern struct nf_conntrack_tuple_hash *
+__nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
+		    const struct nf_conn *ignored_conntrack);
+
+extern void nf_conntrack_hash_insert(struct nf_conn *ct);
+
+extern struct nf_conntrack_expect *
+__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple);
+
+extern struct nf_conntrack_expect *
+nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple);
+
+extern void nf_ct_unlink_expect(struct nf_conntrack_expect *exp);
+
+extern void nf_ct_remove_expectations(struct nf_conn *ct);
+
+extern void nf_conntrack_flush(void);
+
+extern struct nf_conntrack_helper *
+nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple);
+extern void nf_ct_helper_put(struct nf_conntrack_helper *helper);
+
+extern struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name);
+
 /* call to create an explicit dependency on nf_conntrack. */
 extern void need_nf_conntrack(void);
 
diff --git a/include/net/netfilter/nf_conntrack_helper.h b/include/net/netfilter/nf_conntrack_helper.h
index 5a66b2a3a623..86ec8174ad02 100644
--- a/include/net/netfilter/nf_conntrack_helper.h
+++ b/include/net/netfilter/nf_conntrack_helper.h
@@ -33,6 +33,8 @@ struct nf_conntrack_helper
 		    unsigned int protoff,
 		    struct nf_conn *ct,
 		    enum ip_conntrack_info conntrackinfo);
+
+	int (*to_nfattr)(struct sk_buff *skb, const struct nf_conn *ct);
 };
 
 extern int nf_conntrack_helper_register(struct nf_conntrack_helper *);
diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h
index 01663e5b33df..67856eb93b43 100644
--- a/include/net/netfilter/nf_conntrack_l3proto.h
+++ b/include/net/netfilter/nf_conntrack_l3proto.h
@@ -14,6 +14,8 @@
 #include <linux/seq_file.h>
 #include <net/netfilter/nf_conntrack.h>
 
+struct nfattr;
+
 struct nf_conntrack_l3proto
 {
 	/* Next pointer. */
@@ -70,6 +72,12 @@ struct nf_conntrack_l3proto
 
 	u_int32_t (*get_features)(const struct nf_conntrack_tuple *tuple);
 
+	int (*tuple_to_nfattr)(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *t);
+
+	int (*nfattr_to_tuple)(struct nfattr *tb[],
+			       struct nf_conntrack_tuple *t);
+
 	/* Module (if any) which this is connected to. */
 	struct module *me;
 };
@@ -81,11 +89,16 @@ extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto);
 extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto);
 
 static inline struct nf_conntrack_l3proto *
-nf_ct_find_l3proto(u_int16_t l3proto)
+__nf_ct_l3proto_find(u_int16_t l3proto)
 {
 	return nf_ct_l3protos[l3proto];
 }
 
+extern struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto);
+
+extern void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p);
+
 /* Existing built-in protocols */
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
 extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6;
diff --git a/include/net/netfilter/nf_conntrack_protocol.h b/include/net/netfilter/nf_conntrack_protocol.h
index b3afda35397a..1f33737fcea5 100644
--- a/include/net/netfilter/nf_conntrack_protocol.h
+++ b/include/net/netfilter/nf_conntrack_protocol.h
@@ -12,6 +12,7 @@
 #include <net/netfilter/nf_conntrack.h>
 
 struct seq_file;
+struct nfattr;
 
 struct nf_conntrack_protocol
 {
@@ -66,6 +67,18 @@ struct nf_conntrack_protocol
 		     enum ip_conntrack_info *ctinfo,
 		     int pf, unsigned int hooknum);
 
+	/* convert protoinfo to nfnetink attributes */
+	int (*to_nfattr)(struct sk_buff *skb, struct nfattr *nfa,
+			 const struct nf_conn *ct);
+
+	/* convert nfnetlink attributes to protoinfo */
+	int (*from_nfattr)(struct nfattr *tb[], struct nf_conn *ct);
+
+	int (*tuple_to_nfattr)(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *t);
+	int (*nfattr_to_tuple)(struct nfattr *tb[],
+			       struct nf_conntrack_tuple *t);
+
 	/* Module (if any) which this is connected to. */
 	struct module *me;
 };
@@ -80,12 +93,23 @@ extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
 extern struct nf_conntrack_protocol **nf_ct_protos[PF_MAX];
 
 extern struct nf_conntrack_protocol *
-nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol);
+__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol);
+
+extern struct nf_conntrack_protocol *
+nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol);
+
+extern void nf_ct_proto_put(struct nf_conntrack_protocol *p);
 
 /* Protocol registration. */
 extern int nf_conntrack_protocol_register(struct nf_conntrack_protocol *proto);
 extern void nf_conntrack_protocol_unregister(struct nf_conntrack_protocol *proto);
 
+/* Generic netlink helpers */
+extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+				      const struct nf_conntrack_tuple *tuple);
+extern int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+				      struct nf_conntrack_tuple *t);
+
 /* Log invalid packets */
 extern unsigned int nf_ct_log_invalid;
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 8202c1c0afad..385867efd481 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -392,6 +392,48 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
 	return -ENOENT;
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
+		&tuple->src.u3.ip);
+	NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
+		&tuple->dst.u3.ip);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V4_SRC-1]       = sizeof(u_int32_t),
+	[CTA_IP_V4_DST-1]       = sizeof(u_int32_t),
+};
+
+static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	t->src.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
+	t->dst.u3.ip =
+		*(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+
+	return 0;
+}
+#endif
+
 static struct nf_sockopt_ops so_getorigdst = {
 	.pf		= PF_INET,
 	.get_optmin	= SO_ORIGINAL_DST,
@@ -408,6 +450,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
 	.print_conntrack = ipv4_print_conntrack,
 	.prepare	 = ipv4_prepare,
 	.get_features	 = ipv4_get_features,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr = ipv4_tuple_to_nfattr,
+	.nfattr_to_tuple = ipv4_nfattr_to_tuple,
+#endif
 	.me		 = THIS_MODULE,
 };
 
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 7ddb5c08f7b8..52dc175be39a 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -50,20 +50,21 @@ static int icmp_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static const u_int8_t invmap[] = {
+	[ICMP_ECHO] = ICMP_ECHOREPLY + 1,
+	[ICMP_ECHOREPLY] = ICMP_ECHO + 1,
+	[ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
+	[ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
+	[ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
+	[ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
+	[ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
+	[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
+};
+
 static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
 			     const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[]
-		= { [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
-		    [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
-		    [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
-		    [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
-		    [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
-		    [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
-		    [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
-		    [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1};
-
 	if (orig->dst.u.icmp.type >= sizeof(invmap)
 	    || !invmap[orig->dst.u.icmp.type])
 		return 0;
@@ -120,11 +121,12 @@ static int icmp_packet(struct nf_conn *ct,
 static int icmp_new(struct nf_conn *conntrack,
 		    const struct sk_buff *skb, unsigned int dataoff)
 {
-	static u_int8_t valid_new[]
-		= { [ICMP_ECHO] = 1,
-		    [ICMP_TIMESTAMP] = 1,
-		    [ICMP_INFO_REQUEST] = 1,
-		    [ICMP_ADDRESS] = 1 };
+	static const u_int8_t valid_new[] = {
+		[ICMP_ECHO] = 1,
+		[ICMP_TIMESTAMP] = 1,
+		[ICMP_INFO_REQUEST] = 1,
+		[ICMP_ADDRESS] = 1
+	};
 
 	if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
 	    || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
@@ -168,7 +170,7 @@ icmp_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	innerproto = nf_ct_find_proto(PF_INET, inside->ip.protocol);
+	innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
 	dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
@@ -281,6 +283,60 @@ checksum_skipped:
 	return icmp_error_message(skb, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMP_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMP_TYPE-1]
+	    || !tb[CTA_PROTO_ICMP_CODE-1]
+	    || !tb[CTA_PROTO_ICMP_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type = 
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+
+	if (tuple->dst.u.icmp.type >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 {
 	.list			= { NULL, NULL },
@@ -295,7 +351,12 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
 	.new			= icmp_new,
 	.error			= icmp_error,
 	.destroy		= NULL,
-	.me			= NULL
+	.me			= NULL,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmp_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmp_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
index 753a3ae8502b..704fbbe74874 100644
--- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c
@@ -401,6 +401,48 @@ static ctl_table nf_ct_net_table[] = {
 };
 #endif
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int ipv6_tuple_to_nfattr(struct sk_buff *skb,
+				const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4,
+		&tuple->src.u3.ip6);
+	NFA_PUT(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4,
+		&tuple->dst.u3.ip6);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_ip[CTA_IP_MAX] = {
+	[CTA_IP_V6_SRC-1]       = sizeof(u_int32_t)*4,
+	[CTA_IP_V6_DST-1]       = sizeof(u_int32_t)*4,
+};
+
+static int ipv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_IP_V6_SRC-1] || !tb[CTA_IP_V6_DST-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
+		return -EINVAL;
+
+	memcpy(&t->src.u3.ip6, NFA_DATA(tb[CTA_IP_V6_SRC-1]), 
+	       sizeof(u_int32_t) * 4);
+	memcpy(&t->dst.u3.ip6, NFA_DATA(tb[CTA_IP_V6_DST-1]),
+	       sizeof(u_int32_t) * 4);
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.l3proto		= PF_INET6,
 	.name			= "ipv6",
@@ -409,6 +451,11 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = {
 	.print_tuple		= ipv6_print_tuple,
 	.print_conntrack	= ipv6_print_conntrack,
 	.prepare		= ipv6_prepare,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= ipv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= ipv6_nfattr_to_tuple,
+#endif
 	.get_features		= ipv6_get_features,
 	.me			= THIS_MODULE,
 };
diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
index a7e03cfacd06..09945c333055 100644
--- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c
@@ -57,17 +57,17 @@ static int icmpv6_pkt_to_tuple(const struct sk_buff *skb,
 	return 1;
 }
 
+/* Add 1; spaces filled with 0. */
+static u_int8_t invmap[] = {
+	[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
+	[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
+	[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
+	[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
+};
+
 static int icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple,
 			       const struct nf_conntrack_tuple *orig)
 {
-	/* Add 1; spaces filled with 0. */
-	static u_int8_t invmap[] = {
-		[ICMPV6_ECHO_REQUEST - 128]	= ICMPV6_ECHO_REPLY + 1,
-		[ICMPV6_ECHO_REPLY - 128]	= ICMPV6_ECHO_REQUEST + 1,
-		[ICMPV6_NI_QUERY - 128]		= ICMPV6_NI_QUERY + 1,
-		[ICMPV6_NI_REPLY - 128]		= ICMPV6_NI_REPLY +1
-	};
-
 	int type = orig->dst.u.icmp.type - 128;
 	if (type < 0 || type >= sizeof(invmap) || !invmap[type])
 		return 0;
@@ -185,7 +185,7 @@ icmpv6_error_message(struct sk_buff *skb,
 		return -NF_ACCEPT;
 	}
 
-	inproto = nf_ct_find_proto(PF_INET6, inprotonum);
+	inproto = __nf_ct_proto_find(PF_INET6, inprotonum);
 
 	/* Are they talking about one of our connections? */
 	if (!nf_ct_get_tuple(skb, inip6off, inprotoff, PF_INET6, inprotonum,
@@ -255,6 +255,60 @@ skipped:
 	return icmpv6_error_message(skb, dataoff, ctinfo, hooknum);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+static int icmpv6_tuple_to_nfattr(struct sk_buff *skb,
+				  const struct nf_conntrack_tuple *t)
+{
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_ID, sizeof(u_int16_t),
+		&t->src.u.icmp.id);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_TYPE, sizeof(u_int8_t),
+		&t->dst.u.icmp.type);
+	NFA_PUT(skb, CTA_PROTO_ICMPV6_CODE, sizeof(u_int8_t),
+		&t->dst.u.icmp.code);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_ICMPV6_TYPE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_CODE-1] = sizeof(u_int8_t),
+	[CTA_PROTO_ICMPV6_ID-1]   = sizeof(u_int16_t)
+};
+
+static int icmpv6_nfattr_to_tuple(struct nfattr *tb[],
+				struct nf_conntrack_tuple *tuple)
+{
+	if (!tb[CTA_PROTO_ICMPV6_TYPE-1]
+	    || !tb[CTA_PROTO_ICMPV6_CODE-1]
+	    || !tb[CTA_PROTO_ICMPV6_ID-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	tuple->dst.u.icmp.type =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_TYPE-1]);
+	tuple->dst.u.icmp.code =
+			*(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_CODE-1]);
+	tuple->src.u.icmp.id =
+			*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMPV6_ID-1]);
+
+	if (tuple->dst.u.icmp.type < 128
+	    || tuple->dst.u.icmp.type - 128 >= sizeof(invmap)
+	    || !invmap[tuple->dst.u.icmp.type - 128])
+		return -EINVAL;
+
+	return 0;
+}
+#endif
+
 struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 {
 	.l3proto		= PF_INET6,
@@ -267,6 +321,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_icmpv6 =
 	.packet			= icmpv6_packet,
 	.new			= icmpv6_new,
 	.error			= icmpv6_error,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= icmpv6_tuple_to_nfattr,
+	.nfattr_to_tuple	= icmpv6_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_icmpv6);
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 794c41d19b28..7d55f9cbd853 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -95,4 +95,11 @@ config NF_CONNTRACK_FTP
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NF_CT_NETLINK
+	tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
+	depends on EXPERIMENTAL && NF_CONNTRACK && NETFILTER_NETLINK
+	depends on NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
+	help
+	  This option enables support for a netlink-based userspace interface
+
 endmenu
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 55f019ad2c08..cb2183145c37 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -13,3 +13,6 @@ obj-$(CONFIG_NF_CONNTRACK_FTP) += nf_conntrack_ftp.o
 
 # SCTP protocol connection tracking
 obj-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
+
+# netlink interface for nf_conntrack
+obj-$(CONFIG_NF_CT_NETLINK) += nf_conntrack_netlink.o
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c5b01d732d8..62bb509f05d4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -82,6 +82,8 @@ unsigned int nf_ct_log_invalid;
 static LIST_HEAD(unconfirmed);
 static int nf_conntrack_vmalloc;
 
+static unsigned int nf_conntrack_next_id = 1;
+static unsigned int nf_conntrack_expect_next_id = 1;
 #ifdef CONFIG_NF_CONNTRACK_EVENTS
 struct notifier_block *nf_conntrack_chain;
 struct notifier_block *nf_conntrack_expect_chain;
@@ -184,7 +186,7 @@ DECLARE_MUTEX(nf_ct_cache_mutex);
 
 extern struct nf_conntrack_protocol nf_conntrack_generic_protocol;
 struct nf_conntrack_protocol *
-nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
+__nf_ct_proto_find(u_int16_t l3proto, u_int8_t protocol)
 {
 	if (unlikely(nf_ct_protos[l3proto] == NULL))
 		return &nf_conntrack_generic_protocol;
@@ -192,6 +194,50 @@ nf_ct_find_proto(u_int16_t l3proto, u_int8_t protocol)
 	return nf_ct_protos[l3proto][protocol];
 }
 
+/* this is guaranteed to always return a valid protocol helper, since
+ * it falls back to generic_protocol */
+struct nf_conntrack_protocol *
+nf_ct_proto_find_get(u_int16_t l3proto, u_int8_t protocol)
+{
+	struct nf_conntrack_protocol *p;
+
+	preempt_disable();
+	p = __nf_ct_proto_find(l3proto, protocol);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_protocol;
+	}
+	preempt_enable();
+	
+	return p;
+}
+
+void nf_ct_proto_put(struct nf_conntrack_protocol *p)
+{
+	module_put(p->me);
+}
+
+struct nf_conntrack_l3proto *
+nf_ct_l3proto_find_get(u_int16_t l3proto)
+{
+	struct nf_conntrack_l3proto *p;
+
+	preempt_disable();
+	p = __nf_ct_l3proto_find(l3proto);
+	if (p) {
+		if (!try_module_get(p->me))
+			p = &nf_conntrack_generic_l3proto;
+	}
+	preempt_enable();
+
+	return p;
+}
+
+void nf_ct_l3proto_put(struct nf_conntrack_l3proto *p)
+{
+	module_put(p->me);
+}
+
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -384,7 +430,7 @@ nf_ct_invert_tuple(struct nf_conntrack_tuple *inverse,
 }
 
 /* nf_conntrack_expect helper functions */
-static void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
+void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 {
 	ASSERT_WRITE_LOCK(&nf_conntrack_lock);
 	NF_CT_ASSERT(!timer_pending(&exp->timeout));
@@ -404,6 +450,33 @@ static void expectation_timed_out(unsigned long ul_expect)
 	nf_conntrack_expect_put(exp);
 }
 
+struct nf_conntrack_expect *
+__nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	list_for_each_entry(i, &nf_conntrack_expect_list, list) {
+		if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
+			atomic_inc(&i->use);
+			return i;
+		}
+	}
+	return NULL;
+}
+
+/* Just find a expectation corresponding to a tuple. */
+struct nf_conntrack_expect *
+nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_expect *i;
+	
+	read_lock_bh(&nf_conntrack_lock);
+	i = __nf_conntrack_expect_find(tuple);
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return i;
+}
+
 /* If an expectation for this connection is found, it gets delete from
  * global list then returned. */
 static struct nf_conntrack_expect *
@@ -432,7 +505,7 @@ find_expectation(const struct nf_conntrack_tuple *tuple)
 }
 
 /* delete all expectations for this conntrack */
-static void remove_expectations(struct nf_conn *ct)
+void nf_ct_remove_expectations(struct nf_conn *ct)
 {
 	struct nf_conntrack_expect *i, *tmp;
 
@@ -462,7 +535,7 @@ clean_from_lists(struct nf_conn *ct)
 	LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
 
 	/* Destroy all pending expectations */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 }
 
 static void
@@ -482,12 +555,11 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	/* To make sure we don't get any weird locking issues here:
 	 * destroy_conntrack() MUST NOT be called with a write lock
 	 * to nf_conntrack_lock!!! -HW */
-	l3proto = nf_ct_find_l3proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num);
 	if (l3proto && l3proto->destroy)
 		l3proto->destroy(ct);
 
-	proto = nf_ct_find_proto(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num,
-				 ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
+	proto = __nf_ct_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
 	if (proto && proto->destroy)
 		proto->destroy(ct);
 
@@ -499,7 +571,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
 	 * except TFTP can create an expectation on the first packet,
 	 * before connection is in the list, so we need to clean here,
 	 * too. */
-	remove_expectations(ct);
+	nf_ct_remove_expectations(ct);
 
 	/* We overload first tuple to link into unconfirmed list. */
 	if (!nf_ct_is_confirmed(ct)) {
@@ -540,7 +612,7 @@ conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i,
 		&& nf_ct_tuple_equal(tuple, &i->tuple);
 }
 
-static struct nf_conntrack_tuple_hash *
+struct nf_conntrack_tuple_hash *
 __nf_conntrack_find(const struct nf_conntrack_tuple *tuple,
 		    const struct nf_conn *ignored_conntrack)
 {
@@ -575,6 +647,29 @@ nf_conntrack_find_get(const struct nf_conntrack_tuple *tuple,
 	return h;
 }
 
+static void __nf_conntrack_hash_insert(struct nf_conn *ct,
+				       unsigned int hash,
+				       unsigned int repl_hash) 
+{
+	ct->id = ++nf_conntrack_next_id;
+	list_prepend(&nf_conntrack_hash[hash],
+		     &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
+	list_prepend(&nf_conntrack_hash[repl_hash],
+		     &ct->tuplehash[IP_CT_DIR_REPLY].list);
+}
+
+void nf_conntrack_hash_insert(struct nf_conn *ct)
+{
+	unsigned int hash, repl_hash;
+
+	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+
+	write_lock_bh(&nf_conntrack_lock);
+	__nf_conntrack_hash_insert(ct, hash, repl_hash);
+	write_unlock_bh(&nf_conntrack_lock);
+}
+
 /* Confirm a connection given skb; places it in hash table */
 int
 __nf_conntrack_confirm(struct sk_buff **pskb)
@@ -621,10 +716,7 @@ __nf_conntrack_confirm(struct sk_buff **pskb)
 		/* Remove from unconfirmed list */
 		list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
 
-		list_prepend(&nf_conntrack_hash[hash],
-			     &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
-		list_prepend(&nf_conntrack_hash[repl_hash],
-			     &ct->tuplehash[IP_CT_DIR_REPLY]);
+		__nf_conntrack_hash_insert(ct, hash, repl_hash);
 		/* Timer relative to confirmation time, not original
 		   setting time, otherwise we'd get timer wrap in
 		   weird delay cases. */
@@ -708,13 +800,41 @@ static inline int helper_cmp(const struct nf_conntrack_helper *i,
 }
 
 static struct nf_conntrack_helper *
-nf_ct_find_helper(const struct nf_conntrack_tuple *tuple)
+__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple)
 {
 	return LIST_FIND(&helpers, helper_cmp,
 			 struct nf_conntrack_helper *,
 			 tuple);
 }
 
+struct nf_conntrack_helper *
+nf_ct_helper_find_get( const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_helper *helper;
+
+	/* need nf_conntrack_lock to assure that helper exists until
+	 * try_module_get() is called */
+	read_lock_bh(&nf_conntrack_lock);
+
+	helper = __nf_ct_helper_find(tuple);
+	if (helper) {
+		/* need to increase module usage count to assure helper will
+		 * not go away while the caller is e.g. busy putting a
+		 * conntrack in the hash that uses the helper */
+		if (!try_module_get(helper->me))
+			helper = NULL;
+	}
+
+	read_unlock_bh(&nf_conntrack_lock);
+
+	return helper;
+}
+
+void nf_ct_helper_put(struct nf_conntrack_helper *helper)
+{
+	module_put(helper->me);
+}
+
 static struct nf_conn *
 __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 		     const struct nf_conntrack_tuple *repl,
@@ -744,7 +864,7 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 	/*  find features needed by this conntrack. */
 	features = l3proto->get_features(orig);
 	read_lock_bh(&nf_conntrack_lock);
-	if (nf_ct_find_helper(repl) != NULL)
+	if (__nf_ct_helper_find(repl) != NULL)
 		features |= NF_CT_F_HELP;
 	read_unlock_bh(&nf_conntrack_lock);
 
@@ -794,7 +914,7 @@ struct nf_conn *nf_conntrack_alloc(const struct nf_conntrack_tuple *orig,
 {
 	struct nf_conntrack_l3proto *l3proto;
 
-	l3proto = nf_ct_find_l3proto(orig->src.l3num);
+	l3proto = __nf_ct_l3proto_find(orig->src.l3num);
 	return __nf_conntrack_alloc(orig, repl, l3proto);
 }
 
@@ -853,7 +973,7 @@ init_conntrack(const struct nf_conntrack_tuple *tuple,
 		nf_conntrack_get(&conntrack->master->ct_general);
 		NF_CT_STAT_INC(expect_new);
 	} else {
-		conntrack->helper = nf_ct_find_helper(&repl_tuple);
+		conntrack->helper = __nf_ct_helper_find(&repl_tuple);
 
 		NF_CT_STAT_INC(new);
         }
@@ -947,13 +1067,13 @@ nf_conntrack_in(int pf, unsigned int hooknum, struct sk_buff **pskb)
 		return NF_ACCEPT;
 	}
 
-	l3proto = nf_ct_find_l3proto((u_int16_t)pf);
+	l3proto = __nf_ct_l3proto_find((u_int16_t)pf);
 	if ((ret = l3proto->prepare(pskb, hooknum, &dataoff, &protonum)) <= 0) {
 		DEBUGP("not prepared to track yet or error occured\n");
 		return -ret;
 	}
 
-	proto = nf_ct_find_proto((u_int16_t)pf, protonum);
+	proto = __nf_ct_proto_find((u_int16_t)pf, protonum);
 
 	/* It may be an special packet, error, unclean...
 	 * inverse of the return code tells to the netfilter
@@ -1002,9 +1122,9 @@ int nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
 			 const struct nf_conntrack_tuple *orig)
 {
 	return nf_ct_invert_tuple(inverse, orig,
-				  nf_ct_find_l3proto(orig->src.l3num),
-				  nf_ct_find_proto(orig->src.l3num,
-						   orig->dst.protonum));
+				  __nf_ct_l3proto_find(orig->src.l3num),
+				  __nf_ct_proto_find(orig->src.l3num,
+						     orig->dst.protonum));
 }
 
 /* Would two expected things clash? */
@@ -1096,6 +1216,7 @@ static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
 	exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
 	add_timer(&exp->timeout);
 
+	exp->id = ++nf_conntrack_expect_next_id;
 	atomic_inc(&exp->use);
 	NF_CT_STAT_INC(expect_create);
 }
@@ -1176,7 +1297,7 @@ void nf_conntrack_alter_reply(struct nf_conn *conntrack,
 
 	conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
 	if (!conntrack->master && conntrack->expecting == 0)
-		conntrack->helper = nf_ct_find_helper(newreply);
+		conntrack->helper = __nf_ct_helper_find(newreply);
 	write_unlock_bh(&nf_conntrack_lock);
 }
 
@@ -1201,6 +1322,19 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
 	return 0;
 }
 
+struct nf_conntrack_helper *
+__nf_conntrack_helper_find_byname(const char *name)
+{
+	struct nf_conntrack_helper *h;
+
+	list_for_each_entry(h, &helpers, list) {
+		if (!strcmp(h->name, name))
+			return h;
+	}
+
+	return NULL;
+}
+
 static inline int unhelp(struct nf_conntrack_tuple_hash *i,
 			 const struct nf_conntrack_helper *me)
 {
@@ -1284,6 +1418,51 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
 		nf_conntrack_event_cache(event, skb);
 }
 
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
+ * in ip_conntrack_core, since we don't want the protocols to autoload
+ * or depend on ctnetlink */
+int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb,
+			       const struct nf_conntrack_tuple *tuple)
+{
+	NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
+		&tuple->src.u.tcp.port);
+	NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
+		&tuple->dst.u.tcp.port);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_SRC_PORT-1]  = sizeof(u_int16_t),
+	[CTA_PROTO_DST_PORT-1]  = sizeof(u_int16_t)
+};
+
+int nf_ct_port_nfattr_to_tuple(struct nfattr *tb[],
+			       struct nf_conntrack_tuple *t)
+{
+	if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
+		return -EINVAL;
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	t->src.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
+	t->dst.u.tcp.port =
+		*(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
+
+	return 0;
+}
+#endif
+
 /* Used by ipt_REJECT and ip6t_REJECT. */
 void __nf_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
 {
@@ -1366,6 +1545,11 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced, int size)
 			   get_order(sizeof(struct list_head) * size));
 }
 
+void nf_conntrack_flush()
+{
+	nf_ct_iterate_cleanup(kill_all, NULL);
+}
+
 /* Mishearing the voices in his head, our hero wonders how he's
    supposed to kill the mall. */
 void nf_conntrack_cleanup(void)
@@ -1379,7 +1563,7 @@ void nf_conntrack_cleanup(void)
 
 	nf_ct_event_cache_flush();
  i_see_dead_people:
-	nf_ct_iterate_cleanup(kill_all, NULL);
+	nf_conntrack_flush();
 	if (atomic_read(&nf_conntrack_count) != 0) {
 		schedule();
 		goto i_see_dead_people;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
new file mode 100644
index 000000000000..4f2e50952a12
--- /dev/null
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -0,0 +1,1642 @@
+/* Connection tracking via netlink socket. Allows for user space
+ * protocol helpers and general trouble making from userspace.
+ *
+ * (C) 2001 by Jay Schulist <jschlst@samba.org>
+ * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
+ * (C) 2003 by Patrick Mchardy <kaber@trash.net>
+ * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
+ *
+ * I've reworked this stuff to use attributes instead of conntrack 
+ * structures. 5.44 am. I need more tea. --pablo 05/07/11.
+ *
+ * Initial connection tracking via netlink development funded and 
+ * generally made possible by Network Robots, Inc. (www.networkrobots.com)
+ *
+ * Further development of this code funded by Astaro AG (http://www.astaro.com)
+ *
+ * This software may be used and distributed according to the terms
+ * of the GNU General Public License, incorporated herein by reference.
+ *
+ * Derived from ip_conntrack_netlink.c: Port by Pablo Neira Ayuso (05/11/14)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/skbuff.h>
+#include <linux/errno.h>
+#include <linux/netlink.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_protocol.h>
+#include <linux/netfilter_ipv4/ip_nat_protocol.h>
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+MODULE_LICENSE("GPL");
+
+static char __initdata version[] = "0.92";
+
+#if 0
+#define DEBUGP printk
+#else
+#define DEBUGP(format, args...)
+#endif
+
+
+static inline int
+ctnetlink_dump_tuples_proto(struct sk_buff *skb, 
+			    const struct nf_conntrack_tuple *tuple)
+{
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
+
+	/* If no protocol helper is found, this function will return the
+	 * generic protocol helper, so proto won't *ever* be NULL */
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+	if (likely(proto->tuple_to_nfattr))
+		ret = proto->tuple_to_nfattr(skb, tuple);
+	
+	nf_ct_proto_put(proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_tuples(struct sk_buff *skb, 
+		      const struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *nest_parms;
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+	
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
+	if (likely(l3proto->tuple_to_nfattr))
+		ret = l3proto->tuple_to_nfattr(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	nf_ct_l3proto_put(l3proto);
+
+	if (unlikely(ret < 0))
+		return ret;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
+	ret = ctnetlink_dump_tuples_proto(skb, tuple);
+	NFA_NEST_END(skb, nest_parms);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_status(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t status = htonl((u_int32_t) ct->status);
+	NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	long timeout_l = ct->timeout.expires - jiffies;
+	u_int32_t timeout;
+
+	if (timeout_l < 0)
+		timeout = 0;
+	else
+		timeout = htonl(timeout_l / HZ);
+	
+	NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nf_conntrack_protocol *proto = nf_ct_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num, ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+	struct nfattr *nest_proto;
+	int ret;
+
+	if (!proto->to_nfattr) {
+		nf_ct_proto_put(proto);
+		return 0;
+	}
+	
+	nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
+
+	ret = proto->to_nfattr(skb, nest_proto, ct);
+
+	nf_ct_proto_put(proto);
+
+	NFA_NEST_END(skb, nest_proto);
+
+	return ret;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	struct nfattr *nest_helper;
+
+	if (!ct->helper)
+		return 0;
+		
+	nest_helper = NFA_NEST(skb, CTA_HELP);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
+
+	if (ct->helper->to_nfattr)
+		ct->helper->to_nfattr(skb, ct);
+
+	NFA_NEST_END(skb, nest_helper);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static inline int
+ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct,
+			enum ip_conntrack_dir dir)
+{
+	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
+	struct nfattr *nest_count = NFA_NEST(skb, type);
+	u_int32_t tmp;
+
+	tmp = htonl(ct->counters[dir].packets);
+	NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
+
+	tmp = htonl(ct->counters[dir].bytes);
+	NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
+
+	NFA_NEST_END(skb, nest_count);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_counters(a, b, c) (0)
+#endif
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+static inline int
+ctnetlink_dump_mark(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t mark = htonl(ct->mark);
+
+	NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+#else
+#define ctnetlink_dump_mark(a, b) (0)
+#endif
+
+static inline int
+ctnetlink_dump_id(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t id = htonl(ct->id);
+	NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+static inline int
+ctnetlink_dump_use(struct sk_buff *skb, const struct nf_conn *ct)
+{
+	u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
+	
+	NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
+	return 0;
+
+nfattr_failure:
+	return -1;
+}
+
+#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
+
+static int
+ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, int nowait, 
+		    const struct nf_conn *ct)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = 
+		ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version      = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+
+	if (ctnetlink_dump_status(skb, ct) < 0 ||
+	    ctnetlink_dump_timeout(skb, ct) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
+	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_helpinfo(skb, ct) < 0 ||
+	    ctnetlink_dump_mark(skb, ct) < 0 ||
+	    ctnetlink_dump_id(skb, ct) < 0 ||
+	    ctnetlink_dump_use(skb, ct) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_conntrack_event(struct notifier_block *this,
+                                     unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nfattr *nest_parms;
+	struct nf_conn *ct = (struct nf_conn *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	unsigned int flags = 0, group;
+
+	/* ignore our fake conntrack entry */
+	if (ct == &nf_conntrack_untracked)
+		return NOTIFY_DONE;
+
+	if (events & IPCT_DESTROY) {
+		type = IPCTNL_MSG_CT_DELETE;
+		group = NFNLGRP_CONNTRACK_DESTROY;
+	} else  if (events & (IPCT_NEW | IPCT_RELATED)) {
+		type = IPCTNL_MSG_CT_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+		/* dump everything */
+		events = ~0UL;
+		group = NFNLGRP_CONNTRACK_NEW;
+	} else  if (events & (IPCT_STATUS |
+		      IPCT_PROTOINFO |
+		      IPCT_HELPER |
+		      IPCT_HELPINFO |
+		      IPCT_NATINFO)) {
+		type = IPCTNL_MSG_CT_NEW;
+		group = NFNLGRP_CONNTRACK_UPDATE;
+	} else
+		return NOTIFY_DONE;
+	
+  /* FIXME: Check if there are any listeners before, don't hurt performance */
+	
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	nfmsg->version	= NFNETLINK_V0;
+	nfmsg->res_id	= 0;
+
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
+	if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
+		goto nfattr_failure;
+	NFA_NEST_END(skb, nest_parms);
+	
+	/* NAT stuff is now a status flag */
+	if ((events & IPCT_STATUS || events & IPCT_NATINFO)
+	    && ctnetlink_dump_status(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_REFRESH
+	    && ctnetlink_dump_timeout(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_PROTOINFO
+	    && ctnetlink_dump_protoinfo(skb, ct) < 0)
+		goto nfattr_failure;
+	if (events & IPCT_HELPINFO
+	    && ctnetlink_dump_helpinfo(skb, ct) < 0)
+		goto nfattr_failure;
+
+	if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
+	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, group, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif /* CONFIG_NF_CONNTRACK_EVENTS */
+
+static int ctnetlink_done(struct netlink_callback *cb)
+{
+	DEBUGP("entered %s\n", __FUNCTION__);
+	return 0;
+}
+
+static int
+ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+
+	DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+		}
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+
+#ifdef CONFIG_NF_CT_ACCT
+static int
+ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conn *ct = NULL;
+	struct nf_conntrack_tuple_hash *h;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[1];
+
+	DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, 
+			cb->args[0], *id);
+
+	write_lock_bh(&nf_conntrack_lock);
+	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) {
+		list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) {
+			h = (struct nf_conntrack_tuple_hash *) i;
+			if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
+				continue;
+			ct = nf_ct_tuplehash_to_ctrack(h);
+			if (ct->id <= *id)
+				continue;
+			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
+		                        	cb->nlh->nlmsg_seq,
+						IPCTNL_MSG_CT_NEW,
+						1, ct) < 0)
+				goto out;
+			*id = ct->id;
+
+			memset(&ct->counters, 0, sizeof(ct->counters));
+		}
+	}
+out:	
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
+
+	return skb->len;
+}
+#endif
+
+static inline int
+ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_IP_MAX];
+	struct nf_conntrack_l3proto *l3proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_IP_MAX, attr);
+
+	l3proto = nf_ct_l3proto_find_get(tuple->src.l3num);
+
+	if (likely(l3proto->nfattr_to_tuple))
+		ret = l3proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_l3proto_put(l3proto);
+
+	DEBUGP("leaving\n");
+
+	return ret;
+}
+
+static const size_t cta_min_proto[CTA_PROTO_MAX] = {
+	[CTA_PROTO_NUM-1]	= sizeof(u_int8_t),
+};
+
+static inline int
+ctnetlink_parse_tuple_proto(struct nfattr *attr, 
+			    struct nf_conntrack_tuple *tuple)
+{
+	struct nfattr *tb[CTA_PROTO_MAX];
+	struct nf_conntrack_protocol *proto;
+	int ret = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTO_NUM-1])
+		return -EINVAL;
+	tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
+
+	proto = nf_ct_proto_find_get(tuple->src.l3num, tuple->dst.protonum);
+
+	if (likely(proto->nfattr_to_tuple))
+		ret = proto->nfattr_to_tuple(tb, tuple);
+
+	nf_ct_proto_put(proto);
+	
+	return ret;
+}
+
+static inline int
+ctnetlink_parse_tuple(struct nfattr *cda[], struct nf_conntrack_tuple *tuple,
+		      enum ctattr_tuple type, u_int8_t l3num)
+{
+	struct nfattr *tb[CTA_TUPLE_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(tuple, 0, sizeof(*tuple));
+
+	nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
+
+	if (!tb[CTA_TUPLE_IP-1])
+		return -EINVAL;
+
+	tuple->src.l3num = l3num;
+
+	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
+	if (err < 0)
+		return err;
+
+	if (!tb[CTA_TUPLE_PROTO-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
+	if (err < 0)
+		return err;
+
+	/* orig and expect tuples get DIR_ORIGINAL */
+	if (type == CTA_TUPLE_REPLY)
+		tuple->dst.dir = IP_CT_DIR_REPLY;
+	else
+		tuple->dst.dir = IP_CT_DIR_ORIGINAL;
+
+	NF_CT_DUMP_TUPLE(tuple);
+
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
+	[CTA_PROTONAT_PORT_MIN-1]       = sizeof(u_int16_t),
+	[CTA_PROTONAT_PORT_MAX-1]       = sizeof(u_int16_t),
+};
+
+static int ctnetlink_parse_nat_proto(struct nfattr *attr,
+				     const struct nf_conn *ct,
+				     struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_PROTONAT_MAX];
+	struct ip_nat_protocol *npt;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
+		return -EINVAL;
+
+	npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
+
+	if (!npt->nfattr_to_range) {
+		ip_nat_proto_put(npt);
+		return 0;
+	}
+
+	/* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
+	if (npt->nfattr_to_range(tb, range) > 0)
+		range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
+
+	ip_nat_proto_put(npt);
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+
+static const size_t cta_min_nat[CTA_NAT_MAX] = {
+	[CTA_NAT_MINIP-1]       = sizeof(u_int32_t),
+	[CTA_NAT_MAXIP-1]       = sizeof(u_int32_t),
+};
+
+static inline int
+ctnetlink_parse_nat(struct nfattr *cda[],
+		    const struct nf_conn *ct, struct ip_nat_range *range)
+{
+	struct nfattr *tb[CTA_NAT_MAX];
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	memset(range, 0, sizeof(*range));
+	
+	nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
+
+	if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
+		return -EINVAL;
+
+	if (tb[CTA_NAT_MINIP-1])
+		range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
+
+	if (!tb[CTA_NAT_MAXIP-1])
+		range->max_ip = range->min_ip;
+	else
+		range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
+
+	if (range->min_ip)
+		range->flags |= IP_NAT_RANGE_MAP_IPS;
+
+	if (!tb[CTA_NAT_PROTO-1])
+		return 0;
+
+	err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
+	if (err < 0)
+		return err;
+
+	DEBUGP("leaving\n");
+	return 0;
+}
+#endif
+
+static inline int
+ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
+{
+	struct nfattr *tb[CTA_HELP_MAX];
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
+
+	if (!tb[CTA_HELP_NAME-1])
+		return -EINVAL;
+
+	*helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
+
+	return 0;
+}
+
+static const size_t cta_min[CTA_MAX] = {
+	[CTA_STATUS-1] 		= sizeof(u_int32_t),
+	[CTA_TIMEOUT-1] 	= sizeof(u_int32_t),
+	[CTA_MARK-1]		= sizeof(u_int32_t),
+	[CTA_USE-1]		= sizeof(u_int32_t),
+	[CTA_ID-1]		= sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else {
+		/* Flush the whole table */
+		nf_conntrack_flush();
+		return 0;
+	}
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash\n");
+		return -ENOENT;
+	}
+
+	ct = nf_ct_tuplehash_to_ctrack(h);
+	
+	if (cda[CTA_ID-1]) {
+		u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
+		if (ct->id != id) {
+			nf_ct_put(ct);
+			return -ENOENT;
+		}
+	}	
+	if (del_timer(&ct->timeout))
+		ct->timeout.function((unsigned long)ct);
+
+	nf_ct_put(ct);
+	DEBUGP("leaving\n");
+
+	return 0;
+}
+
+static int
+ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple_hash *h;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conn *ct;
+	struct sk_buff *skb2 = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if (nfmsg->nfgen_family != AF_INET)
+			return -EAFNOSUPPORT;
+
+		if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
+					IPCTNL_MSG_CT_GET_CTRZERO) {
+#ifdef CONFIG_NF_CT_ACCT
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+						ctnetlink_dump_table_w,
+						ctnetlink_done)) != 0)
+				return -EINVAL;
+#else
+			return -ENOTSUPP;
+#endif
+		} else {
+			if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		      		                        ctnetlink_dump_table,
+		                                	ctnetlink_done)) != 0)
+			return -EINVAL;
+		}
+
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG, u3);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	h = nf_conntrack_find_get(&tuple, NULL);
+	if (!h) {
+		DEBUGP("tuple not found in conntrack hash");
+		return -ENOENT;
+	}
+	DEBUGP("tuple found\n");
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2) {
+		nf_ct_put(ct);
+		return -ENOMEM;
+	}
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+
+	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 
+				  IPCTNL_MSG_CT_NEW, 1, ct);
+	nf_ct_put(ct);
+	if (err <= 0)
+		goto free;
+
+	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+	if (err < 0)
+		goto out;
+
+	DEBUGP("leaving\n");
+	return 0;
+
+free:
+	kfree_skb(skb2);
+out:
+	return err;
+}
+
+static inline int
+ctnetlink_change_status(struct nf_conn *ct, struct nfattr *cda[])
+{
+	unsigned long d;
+	unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
+	d = ct->status ^ status;
+
+	if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
+		/* unchangeable */
+		return -EINVAL;
+	
+	if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
+		/* SEEN_REPLY bit can only be set */
+		return -EINVAL;
+
+	
+	if (d & IPS_ASSURED && !(status & IPS_ASSURED))
+		/* ASSURED bit can only be set */
+		return -EINVAL;
+
+	if (cda[CTA_NAT-1]) {
+#ifndef CONFIG_IP_NF_NAT_NEEDED
+		return -EINVAL;
+#else
+		unsigned int hooknum;
+		struct ip_nat_range range;
+
+		if (ctnetlink_parse_nat(cda, ct, &range) < 0)
+			return -EINVAL;
+
+		DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", 
+		       NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
+		       htons(range.min.all), htons(range.max.all));
+		
+		/* This is tricky but it works. ip_nat_setup_info needs the
+		 * hook number as parameter, so let's do the correct 
+		 * conversion and run away */
+		if (status & IPS_SRC_NAT_DONE)
+			hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
+		else if (status & IPS_DST_NAT_DONE)
+			hooknum = NF_IP_PRE_ROUTING;  /* IP_NAT_MANIP_DST */
+		else 
+			return -EINVAL; /* Missing NAT flags */
+
+		DEBUGP("NAT status: %lu\n", 
+		       status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+		
+		if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
+			return -EEXIST;
+		ip_nat_setup_info(ct, &range, hooknum);
+
+                DEBUGP("NAT status after setup_info: %lu\n",
+                       ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
+#endif
+	}
+
+	/* Be careful here, modifying NAT bits can screw up things,
+	 * so don't let users modify them directly if they don't pass
+	 * ip_nat_range. */
+	ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
+	return 0;
+}
+
+
+static inline int
+ctnetlink_change_helper(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nf_conntrack_helper *helper;
+	char *helpname;
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* don't change helper of sibling connections */
+	if (ct->master)
+		return -EINVAL;
+
+	err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
+	if (err < 0)
+		return err;
+
+	helper = __nf_conntrack_helper_find_byname(helpname);
+	if (!helper) {
+		if (!strcmp(helpname, ""))
+			helper = NULL;
+		else
+			return -EINVAL;
+	}
+
+	if (ct->helper) {
+		if (!helper) {
+			/* we had a helper before ... */
+			nf_ct_remove_expectations(ct);
+			ct->helper = NULL;
+		} else {
+			/* need to zero data of old helper */
+			memset(&ct->help, 0, sizeof(ct->help));
+		}
+	}
+	
+	ct->helper = helper;
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_timeout(struct nf_conn *ct, struct nfattr *cda[])
+{
+	u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+	
+	if (!del_timer(&ct->timeout))
+		return -ETIME;
+
+	ct->timeout.expires = jiffies + timeout * HZ;
+	add_timer(&ct->timeout);
+
+	return 0;
+}
+
+static inline int
+ctnetlink_change_protoinfo(struct nf_conn *ct, struct nfattr *cda[])
+{
+	struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
+	struct nf_conntrack_protocol *proto;
+	u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
+	u_int16_t l3num = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num;
+	int err = 0;
+
+	nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
+
+	proto = nf_ct_proto_find_get(l3num, npt);
+
+	if (proto->from_nfattr)
+		err = proto->from_nfattr(tb, ct);
+	nf_ct_proto_put(proto); 
+
+	return err;
+}
+
+static int
+ctnetlink_change_conntrack(struct nf_conn *ct, struct nfattr *cda[])
+{
+	int err;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (cda[CTA_HELP-1]) {
+		err = ctnetlink_change_helper(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TIMEOUT-1]) {
+		err = ctnetlink_change_timeout(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_STATUS-1]) {
+		err = ctnetlink_change_status(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	DEBUGP("all done\n");
+	return 0;
+}
+
+static int
+ctnetlink_create_conntrack(struct nfattr *cda[], 
+			   struct nf_conntrack_tuple *otuple,
+			   struct nf_conntrack_tuple *rtuple)
+{
+	struct nf_conn *ct;
+	int err = -EINVAL;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	ct = nf_conntrack_alloc(otuple, rtuple);
+	if (ct == NULL || IS_ERR(ct))
+		return -ENOMEM;	
+
+	if (!cda[CTA_TIMEOUT-1])
+		goto err;
+	ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
+
+	ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
+	ct->status |= IPS_CONFIRMED;
+
+	err = ctnetlink_change_status(ct, cda);
+	if (err < 0)
+		goto err;
+
+	if (cda[CTA_PROTOINFO-1]) {
+		err = ctnetlink_change_protoinfo(ct, cda);
+		if (err < 0)
+			return err;
+	}
+
+#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
+	if (cda[CTA_MARK-1])
+		ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
+#endif
+
+	ct->helper = nf_ct_helper_find_get(rtuple);
+
+	add_timer(&ct->timeout);
+	nf_conntrack_hash_insert(ct);
+
+	if (ct->helper)
+		nf_ct_helper_put(ct->helper);
+
+	DEBUGP("conntrack with id %u inserted\n", ct->id);
+	return 0;
+
+err:	
+	nf_conntrack_free(ct);
+	return err;
+}
+
+static int 
+ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, 
+			struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple otuple, rtuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_MAX, cta_min))
+		return -EINVAL;
+
+	if (cda[CTA_TUPLE_ORIG-1]) {
+		err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG, u3);
+		if (err < 0)
+			return err;
+	}
+
+	if (cda[CTA_TUPLE_REPLY-1]) {
+		err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY, u3);
+		if (err < 0)
+			return err;
+	}
+
+	write_lock_bh(&nf_conntrack_lock);
+	if (cda[CTA_TUPLE_ORIG-1])
+		h = __nf_conntrack_find(&otuple, NULL);
+	else if (cda[CTA_TUPLE_REPLY-1])
+		h = __nf_conntrack_find(&rtuple, NULL);
+
+	if (h == NULL) {
+		write_unlock_bh(&nf_conntrack_lock);
+		DEBUGP("no such conntrack, create new\n");
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
+		return err;
+	}
+	/* implicit 'else' */
+
+	/* we only allow nat config for new conntracks */
+	if (cda[CTA_NAT-1]) {
+		err = -EINVAL;
+		goto out_unlock;
+	}
+
+	/* We manipulate the conntrack inside the global conntrack table lock,
+	 * so there's no need to increase the refcount */
+	DEBUGP("conntrack found\n");
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_conntrack(nf_ct_tuplehash_to_ctrack(h), cda);
+
+out_unlock:
+	write_unlock_bh(&nf_conntrack_lock);
+	return err;
+}
+
+/*********************************************************************** 
+ * EXPECT 
+ ***********************************************************************/ 
+
+static inline int
+ctnetlink_exp_dump_tuple(struct sk_buff *skb,
+			 const struct nf_conntrack_tuple *tuple,
+			 enum ctattr_expect type)
+{
+	struct nfattr *nest_parms = NFA_NEST(skb, type);
+	
+	if (ctnetlink_dump_tuples(skb, tuple) < 0)
+		goto nfattr_failure;
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	return -1;
+}			
+
+static inline int
+ctnetlink_exp_dump_expect(struct sk_buff *skb,
+                          const struct nf_conntrack_expect *exp)
+{
+	struct nf_conn *master = exp->master;
+	u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
+	u_int32_t id = htonl(exp->id);
+
+	if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
+		goto nfattr_failure;
+	if (ctnetlink_exp_dump_tuple(skb,
+				 &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+				 CTA_EXPECT_MASTER) < 0)
+		goto nfattr_failure;
+	
+	NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
+	NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
+
+	return 0;
+	
+nfattr_failure:
+	return -1;
+}
+
+static int
+ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
+		    int event, 
+		    int nowait, 
+		    const struct nf_conntrack_expect *exp)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	unsigned char *b;
+
+	b = skb->tail;
+
+	event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
+	nlh    = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
+	nfmsg  = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = (nowait && pid) ? NLM_F_MULTI : 0;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	return skb->len;
+
+nlmsg_failure:
+nfattr_failure:
+	skb_trim(skb, b - skb->data);
+	return -1;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static int ctnetlink_expect_event(struct notifier_block *this,
+				  unsigned long events, void *ptr)
+{
+	struct nlmsghdr *nlh;
+	struct nfgenmsg *nfmsg;
+	struct nf_conntrack_expect *exp = (struct nf_conntrack_expect *)ptr;
+	struct sk_buff *skb;
+	unsigned int type;
+	unsigned char *b;
+	int flags = 0;
+
+	if (events & IPEXP_NEW) {
+		type = IPCTNL_MSG_EXP_NEW;
+		flags = NLM_F_CREATE|NLM_F_EXCL;
+	} else
+		return NOTIFY_DONE;
+
+	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
+	if (!skb)
+		return NOTIFY_DONE;
+
+	b = skb->tail;
+
+	type |= NFNL_SUBSYS_CTNETLINK << 8;
+	nlh   = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
+	nfmsg = NLMSG_DATA(nlh);
+
+	nlh->nlmsg_flags    = flags;
+	nfmsg->nfgen_family = exp->tuple.src.l3num;
+	nfmsg->version	    = NFNETLINK_V0;
+	nfmsg->res_id	    = 0;
+
+	if (ctnetlink_exp_dump_expect(skb, exp) < 0)
+		goto nfattr_failure;
+
+	nlh->nlmsg_len = skb->tail - b;
+	nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
+	return NOTIFY_DONE;
+
+nlmsg_failure:
+nfattr_failure:
+	kfree_skb(skb);
+	return NOTIFY_DONE;
+}
+#endif
+
+static int
+ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct nf_conntrack_expect *exp = NULL;
+	struct list_head *i;
+	u_int32_t *id = (u_int32_t *) &cb->args[0];
+
+	DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
+
+	read_lock_bh(&nf_conntrack_lock);
+	list_for_each_prev(i, &nf_conntrack_expect_list) {
+		exp = (struct nf_conntrack_expect *) i;
+		if (exp->id <= *id)
+			continue;
+		if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
+					    cb->nlh->nlmsg_seq,
+					    IPCTNL_MSG_EXP_NEW,
+					    1, exp) < 0)
+			goto out;
+		*id = exp->id;
+	}
+out:	
+	read_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving, last id=%llu\n", *id);
+
+	return skb->len;
+}
+
+static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
+	[CTA_EXPECT_TIMEOUT-1]          = sizeof(u_int32_t),
+	[CTA_EXPECT_ID-1]               = sizeof(u_int32_t)
+};
+
+static int
+ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct sk_buff *skb2;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (nlh->nlmsg_flags & NLM_F_DUMP) {
+		u32 rlen;
+
+		if (nfmsg->nfgen_family != AF_INET)
+			return -EAFNOSUPPORT;
+
+		if ((*errp = netlink_dump_start(ctnl, skb, nlh,
+		    				ctnetlink_exp_dump_table,
+						ctnetlink_done)) != 0)
+			return -EINVAL;
+		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
+		if (rlen > skb->len)
+			rlen = skb->len;
+		skb_pull(skb, rlen);
+		return 0;
+	}
+
+	if (cda[CTA_EXPECT_MASTER-1])
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);
+	else
+		return -EINVAL;
+
+	if (err < 0)
+		return err;
+
+	exp = nf_conntrack_expect_find(&tuple);
+	if (!exp)
+		return -ENOENT;
+
+	if (cda[CTA_EXPECT_ID-1]) {
+		u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+		if (exp->id != ntohl(id)) {
+			nf_conntrack_expect_put(exp);
+			return -ENOENT;
+		}
+	}	
+
+	err = -ENOMEM;
+	skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb2)
+		goto out;
+	NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
+	
+	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, 
+				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
+				      1, exp);
+	if (err <= 0)
+		goto free;
+
+	nf_conntrack_expect_put(exp);
+
+	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
+
+free:
+	kfree_skb(skb2);
+out:
+	nf_conntrack_expect_put(exp);
+	return err;
+}
+
+static int
+ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, 
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_expect *exp, *tmp;
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_helper *h;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err;
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (cda[CTA_EXPECT_TUPLE-1]) {
+		/* delete a single expect by tuple */
+		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+		if (err < 0)
+			return err;
+
+		/* bump usage count to 2 */
+		exp = nf_conntrack_expect_find(&tuple);
+		if (!exp)
+			return -ENOENT;
+
+		if (cda[CTA_EXPECT_ID-1]) {
+			u_int32_t id = 
+				*(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
+			if (exp->id != ntohl(id)) {
+				nf_conntrack_expect_put(exp);
+				return -ENOENT;
+			}
+		}
+
+		/* after list removal, usage count == 1 */
+		nf_conntrack_unexpect_related(exp);
+		/* have to put what we 'get' above. 
+		 * after this line usage count == 0 */
+		nf_conntrack_expect_put(exp);
+	} else if (cda[CTA_EXPECT_HELP_NAME-1]) {
+		char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
+
+		/* delete all expectations for this helper */
+		write_lock_bh(&nf_conntrack_lock);
+		h = __nf_conntrack_helper_find_byname(name);
+		if (!h) {
+			write_unlock_bh(&nf_conntrack_lock);
+			return -EINVAL;
+		}
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (exp->master->helper == h 
+			    && del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	} else {
+		/* This basically means we have to flush everything*/
+		write_lock_bh(&nf_conntrack_lock);
+		list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list,
+					 list) {
+			if (del_timer(&exp->timeout)) {
+				nf_ct_unlink_expect(exp);
+				nf_conntrack_expect_put(exp);
+			}
+		}
+		write_unlock_bh(&nf_conntrack_lock);
+	}
+
+	return 0;
+}
+static int
+ctnetlink_change_expect(struct nf_conntrack_expect *x, struct nfattr *cda[])
+{
+	return -EOPNOTSUPP;
+}
+
+static int
+ctnetlink_create_expect(struct nfattr *cda[], u_int8_t u3)
+{
+	struct nf_conntrack_tuple tuple, mask, master_tuple;
+	struct nf_conntrack_tuple_hash *h = NULL;
+	struct nf_conntrack_expect *exp;
+	struct nf_conn *ct;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);
+
+	/* caller guarantees that those three CTA_EXPECT_* exist */
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK, u3);
+	if (err < 0)
+		return err;
+	err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER, u3);
+	if (err < 0)
+		return err;
+
+	/* Look for master conntrack of this expectation */
+	h = nf_conntrack_find_get(&master_tuple, NULL);
+	if (!h)
+		return -ENOENT;
+	ct = nf_ct_tuplehash_to_ctrack(h);
+
+	if (!ct->helper) {
+		/* such conntrack hasn't got any helper, abort */
+		err = -EINVAL;
+		goto out;
+	}
+
+	exp = nf_conntrack_expect_alloc(ct);
+	if (!exp) {
+		err = -ENOMEM;
+		goto out;
+	}
+	
+	exp->expectfn = NULL;
+	exp->flags = 0;
+	exp->master = ct;
+	memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple));
+	memcpy(&exp->mask, &mask, sizeof(struct nf_conntrack_tuple));
+
+	err = nf_conntrack_expect_related(exp);
+	nf_conntrack_expect_put(exp);
+
+out:	
+	nf_ct_put(nf_ct_tuplehash_to_ctrack(h));
+	return err;
+}
+
+static int
+ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
+		     struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
+{
+	struct nf_conntrack_tuple tuple;
+	struct nf_conntrack_expect *exp;
+	struct nfgenmsg *nfmsg = NLMSG_DATA(nlh);
+	u_int8_t u3 = nfmsg->nfgen_family;
+	int err = 0;
+
+	DEBUGP("entered %s\n", __FUNCTION__);	
+
+	if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
+		return -EINVAL;
+
+	if (!cda[CTA_EXPECT_TUPLE-1]
+	    || !cda[CTA_EXPECT_MASK-1]
+	    || !cda[CTA_EXPECT_MASTER-1])
+		return -EINVAL;
+
+	err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3);
+	if (err < 0)
+		return err;
+
+	write_lock_bh(&nf_conntrack_lock);
+	exp = __nf_conntrack_expect_find(&tuple);
+
+	if (!exp) {
+		write_unlock_bh(&nf_conntrack_lock);
+		err = -ENOENT;
+		if (nlh->nlmsg_flags & NLM_F_CREATE)
+			err = ctnetlink_create_expect(cda, u3);
+		return err;
+	}
+
+	err = -EEXIST;
+	if (!(nlh->nlmsg_flags & NLM_F_EXCL))
+		err = ctnetlink_change_expect(exp, cda);
+	write_unlock_bh(&nf_conntrack_lock);
+
+	DEBUGP("leaving\n");
+	
+	return err;
+}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+static struct notifier_block ctnl_notifier = {
+	.notifier_call	= ctnetlink_conntrack_event,
+};
+
+static struct notifier_block ctnl_notifier_exp = {
+	.notifier_call	= ctnetlink_expect_event,
+};
+#endif
+
+static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
+	[IPCTNL_MSG_CT_NEW]		= { .call = ctnetlink_new_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET] 		= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_DELETE]  	= { .call = ctnetlink_del_conntrack,
+					    .attr_count = CTA_MAX, },
+	[IPCTNL_MSG_CT_GET_CTRZERO] 	= { .call = ctnetlink_get_conntrack,
+					    .attr_count = CTA_MAX, },
+};
+
+static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
+	[IPCTNL_MSG_EXP_GET]		= { .call = ctnetlink_get_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_NEW]		= { .call = ctnetlink_new_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+	[IPCTNL_MSG_EXP_DELETE]		= { .call = ctnetlink_del_expect,
+					    .attr_count = CTA_EXPECT_MAX, },
+};
+
+static struct nfnetlink_subsystem ctnl_subsys = {
+	.name				= "conntrack",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK,
+	.cb_count			= IPCTNL_MSG_MAX,
+	.cb				= ctnl_cb,
+};
+
+static struct nfnetlink_subsystem ctnl_exp_subsys = {
+	.name				= "conntrack_expect",
+	.subsys_id			= NFNL_SUBSYS_CTNETLINK_EXP,
+	.cb_count			= IPCTNL_MSG_EXP_MAX,
+	.cb				= ctnl_exp_cb,
+};
+
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
+
+static int __init ctnetlink_init(void)
+{
+	int ret;
+
+	printk("ctnetlink v%s: registering with nfnetlink.\n", version);
+	ret = nfnetlink_subsys_register(&ctnl_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register with nfnetlink.\n");
+		goto err_out;
+	}
+
+	ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
+		goto err_unreg_subsys;
+	}
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	ret = nf_conntrack_register_notifier(&ctnl_notifier);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot register notifier.\n");
+		goto err_unreg_exp_subsys;
+	}
+
+	ret = nf_conntrack_expect_register_notifier(&ctnl_notifier_exp);
+	if (ret < 0) {
+		printk("ctnetlink_init: cannot expect register notifier.\n");
+		goto err_unreg_notifier;
+	}
+#endif
+
+	return 0;
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+err_unreg_notifier:
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+err_unreg_exp_subsys:
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+#endif
+err_unreg_subsys:
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+err_out:
+	return ret;
+}
+
+static void __exit ctnetlink_exit(void)
+{
+	printk("ctnetlink: unregistering from nfnetlink.\n");
+
+#ifdef CONFIG_NF_CONNTRACK_EVENTS
+	nf_conntrack_unregister_notifier(&ctnl_notifier_exp);
+	nf_conntrack_unregister_notifier(&ctnl_notifier);
+#endif
+
+	nfnetlink_subsys_unregister(&ctnl_exp_subsys);
+	nfnetlink_subsys_unregister(&ctnl_subsys);
+	return;
+}
+
+module_init(ctnetlink_init);
+module_exit(ctnetlink_exit);
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 6035633d8225..6167137a5cb5 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -1147,6 +1147,63 @@ static int tcp_new(struct nf_conn *conntrack,
 		receiver->td_scale);
 	return 1;
 }
+
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+
+static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
+			 const struct nf_conn *ct)
+{
+	struct nfattr *nest_parms;
+	
+	read_lock_bh(&tcp_lock);
+	nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
+	NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
+		&ct->proto.tcp.state);
+	read_unlock_bh(&tcp_lock);
+
+	NFA_NEST_END(skb, nest_parms);
+
+	return 0;
+
+nfattr_failure:
+	read_unlock_bh(&tcp_lock);
+	return -1;
+}
+
+static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
+	[CTA_PROTOINFO_TCP_STATE-1]	= sizeof(u_int8_t),
+};
+
+static int nfattr_to_tcp(struct nfattr *cda[], struct nf_conn *ct)
+{
+	struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
+	struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
+
+	/* updates could not contain anything about the private
+	 * protocol info, in that case skip the parsing */
+	if (!attr)
+		return 0;
+
+        nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
+
+	if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
+		return -EINVAL;
+
+	if (!tb[CTA_PROTOINFO_TCP_STATE-1])
+		return -EINVAL;
+
+	write_lock_bh(&tcp_lock);
+	ct->proto.tcp.state = 
+		*(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
+	write_unlock_bh(&tcp_lock);
+
+	return 0;
+}
+#endif
   
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 {
@@ -1160,6 +1217,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp4 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
@@ -1174,6 +1238,13 @@ struct nf_conntrack_protocol nf_conntrack_protocol_tcp6 =
 	.packet 		= tcp_packet,
 	.new 			= tcp_new,
 	.error			= tcp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.to_nfattr		= tcp_to_nfattr,
+	.from_nfattr		= nfattr_to_tcp,
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_tcp4);
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3cae7ce420dd..1a592a556182 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -196,6 +196,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp4 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error4,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
@@ -210,6 +215,11 @@ struct nf_conntrack_protocol nf_conntrack_protocol_udp6 =
 	.packet			= udp_packet,
 	.new			= udp_new,
 	.error			= udp_error6,
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+	.tuple_to_nfattr	= nf_ct_port_tuple_to_nfattr,
+	.nfattr_to_tuple	= nf_ct_port_nfattr_to_tuple,
+#endif
 };
 
 EXPORT_SYMBOL(nf_conntrack_protocol_udp4);
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 5af381f9fe3d..d17e42b28c79 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -161,14 +161,14 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (NF_CT_DIRECTION(hash))
 		return 0;
 
-	l3proto = nf_ct_find_l3proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				     .tuple.src.l3num);
+	l3proto = __nf_ct_l3proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				       .tuple.src.l3num);
 
 	NF_CT_ASSERT(l3proto);
-	proto = nf_ct_find_proto(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.src.l3num,
-				 conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
-				 .tuple.dst.protonum);
+	proto = __nf_ct_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.src.l3num,
+				   conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
+				   .tuple.dst.protonum);
 	NF_CT_ASSERT(proto);
 
 	if (seq_printf(s, "%-8s %u %-8s %u %ld ",
@@ -307,9 +307,9 @@ static int exp_seq_show(struct seq_file *s, void *v)
 		   expect->tuple.src.l3num,
 		   expect->tuple.dst.protonum);
 	print_tuple(s, &expect->tuple,
-		    nf_ct_find_l3proto(expect->tuple.src.l3num),
-		    nf_ct_find_proto(expect->tuple.src.l3num,
-				     expect->tuple.dst.protonum));
+		    __nf_ct_l3proto_find(expect->tuple.src.l3num),
+		    __nf_ct_proto_find(expect->tuple.src.l3num,
+				       expect->tuple.dst.protonum));
 	return seq_putc(s, '\n');
 }
 
@@ -847,7 +847,11 @@ EXPORT_SYMBOL(nf_conntrack_helper_unregister);
 EXPORT_SYMBOL(nf_ct_iterate_cleanup);
 EXPORT_SYMBOL(__nf_ct_refresh_acct);
 EXPORT_SYMBOL(nf_ct_protos);
-EXPORT_SYMBOL(nf_ct_find_proto);
+EXPORT_SYMBOL(__nf_ct_proto_find);
+EXPORT_SYMBOL(nf_ct_proto_find_get);
+EXPORT_SYMBOL(nf_ct_proto_put);
+EXPORT_SYMBOL(nf_ct_l3proto_find_get);
+EXPORT_SYMBOL(nf_ct_l3proto_put);
 EXPORT_SYMBOL(nf_ct_l3protos);
 EXPORT_SYMBOL(nf_conntrack_expect_alloc);
 EXPORT_SYMBOL(nf_conntrack_expect_put);
@@ -867,3 +871,21 @@ EXPORT_SYMBOL(nf_ct_get_tuple);
 EXPORT_SYMBOL(nf_ct_invert_tuple);
 EXPORT_SYMBOL(nf_conntrack_in);
 EXPORT_SYMBOL(__nf_conntrack_attach);
+EXPORT_SYMBOL(nf_conntrack_alloc);
+EXPORT_SYMBOL(nf_conntrack_free);
+EXPORT_SYMBOL(nf_conntrack_flush);
+EXPORT_SYMBOL(nf_ct_remove_expectations);
+EXPORT_SYMBOL(nf_ct_helper_find_get);
+EXPORT_SYMBOL(nf_ct_helper_put);
+EXPORT_SYMBOL(__nf_conntrack_helper_find_byname);
+EXPORT_SYMBOL(__nf_conntrack_find);
+EXPORT_SYMBOL(nf_ct_unlink_expect);
+EXPORT_SYMBOL(nf_conntrack_hash_insert);
+EXPORT_SYMBOL(__nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_find);
+EXPORT_SYMBOL(nf_conntrack_expect_list);
+#if defined(CONFIG_NF_CT_NETLINK) || \
+    defined(CONFIG_NF_CT_NETLINK_MODULE)
+EXPORT_SYMBOL(nf_ct_port_tuple_to_nfattr);
+EXPORT_SYMBOL(nf_ct_port_nfattr_to_tuple);
+#endif
-- 
cgit v1.2.3


From a9b305c4e56f97d6a2ae4f21691bc13797498caf Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 5 Jan 2006 12:20:02 -0800
Subject: [NETFILTER]: ctnetlink: Fix dumping of helper name

Properly dump the helper name instead of internal kernel data.
Based on patch by Marcus Sundberg <marcus@ingate.com>.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter/nfnetlink_conntrack.h | 2 --
 net/ipv4/netfilter/ip_conntrack_netlink.c     | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h
index b8e9a5b6fb1e..668ec946c8e2 100644
--- a/include/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/linux/netfilter/nfnetlink_conntrack.h
@@ -131,6 +131,4 @@ enum ctattr_help {
 };
 #define CTA_HELP_MAX (__CTA_HELP_MAX - 1)
 
-#define CTA_HELP_MAXNAMESIZE	32
-
 #endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
index 703f2d2e3464..c9ebbe0d2d9c 100644
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ b/net/ipv4/netfilter/ip_conntrack_netlink.c
@@ -161,7 +161,7 @@ ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
 		return 0;
 		
 	nest_helper = NFA_NEST(skb, CTA_HELP);
-	NFA_PUT(skb, CTA_HELP_NAME, CTA_HELP_MAXNAMESIZE, &ct->helper->name);
+	NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
 
 	if (ct->helper->to_nfattr)
 		ct->helper->to_nfattr(skb, ct);
-- 
cgit v1.2.3


From b777e0ce7437a0e788e2aeb42aca9af2cce1f2e1 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 5 Jan 2006 12:21:16 -0800
Subject: [NETFILTER]: make ipv6_find_hdr() find transport protocol header

The original ipv6_find_hdr() finds the specified header in IPv6 packets.
This makes it possible to get transport header so that we can kill similar
loop in ip6_match_packet().

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h |   2 +-
 net/ipv6/netfilter/ip6_tables.c           | 106 +++++++++++-------------------
 net/ipv6/netfilter/ip6t_ah.c              |   2 +-
 net/ipv6/netfilter/ip6t_dst.c             |   4 +-
 net/ipv6/netfilter/ip6t_esp.c             |   2 +-
 net/ipv6/netfilter/ip6t_frag.c            |   2 +-
 net/ipv6/netfilter/ip6t_hbh.c             |   4 +-
 net/ipv6/netfilter/ip6t_rt.c              |   2 +-
 8 files changed, 49 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index 2efc046d9e94..a291cb76ef18 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -474,7 +474,7 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb,
 extern int ip6t_ext_hdr(u8 nexthdr);
 /* find specified header and get offset to it */
 extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
-			 u8 target);
+			 int target, unsigned short *fragoff);
 
 #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1))
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index ea43ef1d94a7..13b1a525b92c 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -205,69 +205,21 @@ ip6_packet_match(const struct sk_buff *skb,
 
 	/* look for the desired protocol header */
 	if((ip6info->flags & IP6T_F_PROTO)) {
-		u_int8_t currenthdr = ipv6->nexthdr;
-		struct ipv6_opt_hdr _hdr, *hp;
-		u_int16_t ptr;		/* Header offset in skb */
-		u_int16_t hdrlen;	/* Header */
-		u_int16_t _fragoff = 0, *fp = NULL;
-
-		ptr = IPV6_HDR_LEN;
-
-		while (ip6t_ext_hdr(currenthdr)) {
-	                /* Is there enough space for the next ext header? */
-	                if (skb->len - ptr < IPV6_OPTHDR_LEN)
-	                        return 0;
-
-			/* NONE or ESP: there isn't protocol part */
-			/* If we want to count these packets in '-p all',
-			 * we will change the return 0 to 1*/
-			if ((currenthdr == IPPROTO_NONE) || 
-				(currenthdr == IPPROTO_ESP))
-				break;
+		int protohdr;
+		unsigned short _frag_off;
 
-			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-			BUG_ON(hp == NULL);
-
-			/* Size calculation */
-	                if (currenthdr == IPPROTO_FRAGMENT) {
-				fp = skb_header_pointer(skb,
-						   ptr+offsetof(struct frag_hdr,
-								frag_off),
-						   sizeof(_fragoff),
-						   &_fragoff);
-				if (fp == NULL)
-					return 0;
-
-				_fragoff = ntohs(*fp) & ~0x7;
-	                        hdrlen = 8;
-	                } else if (currenthdr == IPPROTO_AH)
-	                        hdrlen = (hp->hdrlen+2)<<2;
-	                else
-	                        hdrlen = ipv6_optlen(hp);
-
-			currenthdr = hp->nexthdr;
-	                ptr += hdrlen;
-			/* ptr is too large */
-	                if ( ptr > skb->len ) 
-				return 0;
-			if (_fragoff) {
-				if (ip6t_ext_hdr(currenthdr))
-					return 0;
-				break;
-			}
-		}
-
-		*protoff = ptr;
-		*fragoff = _fragoff;
+		protohdr = ipv6_find_hdr(skb, protoff, -1, &_frag_off);
+		if (protohdr < 0)
+			return 0;
 
-		/* currenthdr contains the protocol header */
+		*fragoff = _frag_off;
 
 		dprintf("Packet protocol %hi ?= %s%hi.\n",
-				currenthdr, 
+				protohdr, 
 				ip6info->invflags & IP6T_INV_PROTO ? "!":"",
 				ip6info->proto);
 
-		if (ip6info->proto == currenthdr) {
+		if (ip6info->proto == protohdr) {
 			if(ip6info->invflags & IP6T_INV_PROTO) {
 				return 0;
 			}
@@ -2098,26 +2050,39 @@ static void __exit fini(void)
 }
 
 /*
- * find specified header up to transport protocol header.
- * If found target header, the offset to the header is set to *offset
- * and return 0. otherwise, return -1.
+ * find the offset to specified header or the protocol number of last header
+ * if target < 0. "last header" is transport protocol header, ESP, or
+ * "No next header".
+ *
+ * If target header is found, its offset is set in *offset and return protocol
+ * number. Otherwise, return -1.
+ *
+ * Note that non-1st fragment is special case that "the protocol number
+ * of last header" is "next header" field in Fragment header. In this case,
+ * *offset is meaningless and fragment offset is stored in *fragoff if fragoff
+ * isn't NULL.
  *
- * Notes: - non-1st Fragment Header isn't skipped.
- *	  - ESP header isn't skipped.
- *	  - The target header may be trancated.
  */
-int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
+int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
+		  int target, unsigned short *fragoff)
 {
 	unsigned int start = (u8*)(skb->nh.ipv6h + 1) - skb->data;
 	u8 nexthdr = skb->nh.ipv6h->nexthdr;
 	unsigned int len = skb->len - start;
 
+	if (fragoff)
+		*fragoff = 0;
+
 	while (nexthdr != target) {
 		struct ipv6_opt_hdr _hdr, *hp;
 		unsigned int hdrlen;
 
-		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE)
+		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) {
+			if (target < 0)
+				break;
 			return -1;
+		}
+
 		hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr);
 		if (hp == NULL)
 			return -1;
@@ -2131,8 +2096,17 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 			if (fp == NULL)
 				return -1;
 
-			if (ntohs(*fp) & ~0x7)
+			_frag_off = ntohs(*fp) & ~0x7;
+			if (_frag_off) {
+				if (target < 0 &&
+				    ((!ipv6_ext_hdr(hp->nexthdr)) ||
+				     nexthdr == NEXTHDR_NONE)) {
+					if (fragoff)
+						*fragoff = _frag_off;
+					return hp->nexthdr;
+				}
 				return -1;
+			}
 			hdrlen = 8;
 		} else if (nexthdr == NEXTHDR_AUTH)
 			hdrlen = (hp->hdrlen + 2) << 2; 
@@ -2145,7 +2119,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, u8 target)
 	}
 
 	*offset = start;
-	return 0;
+	return nexthdr;
 }
 
 EXPORT_SYMBOL(ip6t_register_table);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 268918d5deea..f5c1a7ff4a1f 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -54,7 +54,7 @@ match(const struct sk_buff *skb,
 	unsigned int ptr;
 	unsigned int hdrlen = 0;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL) < 0)
 		return 0;
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
diff --git a/net/ipv6/netfilter/ip6t_dst.c b/net/ipv6/netfilter/ip6t_dst.c
index c450a635e54b..48cf5f9efc95 100644
--- a/net/ipv6/netfilter/ip6t_dst.c
+++ b/net/ipv6/netfilter/ip6t_dst.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
        unsigned int optlen;
        
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
diff --git a/net/ipv6/netfilter/ip6t_esp.c b/net/ipv6/netfilter/ip6t_esp.c
index 65937de1b58c..e1828f6d0a40 100644
--- a/net/ipv6/netfilter/ip6t_esp.c
+++ b/net/ipv6/netfilter/ip6t_esp.c
@@ -56,7 +56,7 @@ match(const struct sk_buff *skb,
 	/* Make sure this isn't an evil packet */
 	/*DEBUGP("ipv6_esp entered \n");*/
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ESP, NULL) < 0)
 		return 0;
 
 	eh = skb_header_pointer(skb, ptr, sizeof(_esp), &_esp);
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 085d5f8eea29..d1549b268669 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -52,7 +52,7 @@ match(const struct sk_buff *skb,
        const struct ip6t_frag *fraginfo = matchinfo;
        unsigned int ptr;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL) < 0)
 		return 0;
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index 1d09485111d0..e3bc8e2700e7 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -71,9 +71,9 @@ match(const struct sk_buff *skb,
        unsigned int optlen;
        
 #if HOPBYHOP
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0)
 #else
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0)
 #endif
 		return 0;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index beb2fd5cebbb..c1e770e45543 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -58,7 +58,7 @@ match(const struct sk_buff *skb,
        unsigned int ret = 0;
        struct in6_addr *ap, _addr;
 
-	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING) < 0)
+	if (ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL) < 0)
 		return 0;
 
        rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
-- 
cgit v1.2.3


From 22dea562bb56dbc3430c8f23f60ccd38527b1f5a Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 5 Jan 2006 12:21:34 -0800
Subject: [NETFILTER]: Export ip6_masked_addrcmp, don't pass IPv6 addresses on
 stack

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h |  4 ++++
 net/ipv6/netfilter/ip6_tables.c           | 18 ++++++++++--------
 2 files changed, 14 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index a291cb76ef18..c163ba31aab7 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -476,6 +476,10 @@ extern int ip6t_ext_hdr(u8 nexthdr);
 extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 			 int target, unsigned short *fragoff);
 
+extern int ip6_masked_addrcmp(const struct in6_addr *addr1,
+			      const struct in6_addr *mask,
+			      const struct in6_addr *addr2);
+
 #define IP6T_ALIGN(s) (((s) + (__alignof__(struct ip6t_entry)-1)) & ~(__alignof__(struct ip6t_entry)-1))
 
 #endif /*__KERNEL__*/
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 13b1a525b92c..925b42d48347 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -119,13 +119,14 @@ static LIST_HEAD(ip6t_tables);
 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
 #endif
 
-static int ip6_masked_addrcmp(struct in6_addr addr1, struct in6_addr mask,
-			      struct in6_addr addr2)
+int
+ip6_masked_addrcmp(const struct in6_addr *addr1, const struct in6_addr *mask,
+                   const struct in6_addr *addr2)
 {
 	int i;
 	for( i = 0; i < 16; i++){
-		if((addr1.s6_addr[i] & mask.s6_addr[i]) != 
-		   (addr2.s6_addr[i] & mask.s6_addr[i]))
+		if((addr1->s6_addr[i] & mask->s6_addr[i]) != 
+		   (addr2->s6_addr[i] & mask->s6_addr[i]))
 			return 1;
 	}
 	return 0;
@@ -159,10 +160,10 @@ ip6_packet_match(const struct sk_buff *skb,
 
 #define FWINV(bool,invflg) ((bool) ^ !!(ip6info->invflags & invflg))
 
-	if (FWINV(ip6_masked_addrcmp(ipv6->saddr,ip6info->smsk,ip6info->src),
-		  IP6T_INV_SRCIP)
-	    || FWINV(ip6_masked_addrcmp(ipv6->daddr,ip6info->dmsk,ip6info->dst),
-		     IP6T_INV_DSTIP)) {
+	if (FWINV(ip6_masked_addrcmp(&ipv6->saddr, &ip6info->smsk,
+	                             &ip6info->src), IP6T_INV_SRCIP)
+	    || FWINV(ip6_masked_addrcmp(&ipv6->daddr, &ip6info->dmsk,
+	                                &ip6info->dst), IP6T_INV_DSTIP)) {
 		dprintf("Source or dest mismatch.\n");
 /*
 		dprintf("SRC: %u. Mask: %u. Target: %u.%s\n", ip->saddr,
@@ -2131,6 +2132,7 @@ EXPORT_SYMBOL(ip6t_register_target);
 EXPORT_SYMBOL(ip6t_unregister_target);
 EXPORT_SYMBOL(ip6t_ext_hdr);
 EXPORT_SYMBOL(ipv6_find_hdr);
+EXPORT_SYMBOL(ip6_masked_addrcmp);
 
 module_init(init);
 module_exit(fini);
-- 
cgit v1.2.3


From ff179c8cf5caa17bf3d407edbb5872aa2eee6900 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 20:24:59 +0100
Subject: [PATCH] i2c: Drop i2c_driver.flags, 1 of 3

The I2C_DF_DUMMY flag is gone since 2.5.70, it's about time to
drop all ifdef'd out references thereto.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/media/video/tvmixer.c | 4 ----
 include/linux/i2c.h           | 5 -----
 2 files changed, 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c
index 8318bd1aad00..5897e5d4d3d2 100644
--- a/drivers/media/video/tvmixer.c
+++ b/drivers/media/video/tvmixer.c
@@ -232,12 +232,8 @@ static struct i2c_driver driver = {
 #endif
 	.name            = "tv card mixer driver",
 	.id              = I2C_DRIVERID_TVMIXER,
-#ifdef I2C_DF_DUMMY
-	.flags           = I2C_DF_DUMMY,
-#else
 	.flags           = I2C_DF_NOTIFY,
 	.detach_adapter  = tvmixer_adapters,
-#endif
 	.attach_adapter  = tvmixer_adapters,
 	.detach_client   = tvmixer_clients,
 };
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 5e19a7ba69b2..0316ba1294ca 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -252,11 +252,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 
 /*flags for the driver struct: */
 #define I2C_DF_NOTIFY	0x01		/* notify on bus (de/a)ttaches 	*/
-#if 0
-/* this flag is gone -- there is a (optional) driver->detach_adapter
- * callback now which can be used instead */
-# define I2C_DF_DUMMY	0x02
-#endif
 
 /*flags for the client struct: */
 #define I2C_CLIENT_ALLOW_USE		0x01	/* Client allows access */
-- 
cgit v1.2.3


From 8a9947552d43b0d20d5fa23ac0ba435d526be454 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 20:28:06 +0100
Subject: [PATCH] i2c: Drop i2c_driver.flags, 2 of 3

Just about every i2c chip driver sets the I2C_DF_NOTIFY flag, so we
can simply make it the default and drop the flag. If any driver really
doesn't want to be notified when i2c adapters are added, that driver
can simply omit to set .attach_adapter. This approach is also more
robust as it prevents accidental NULL pointer dereferences.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/i2c/porting-clients              | 3 +++
 Documentation/i2c/writing-clients              | 5 -----
 arch/arm/mach-pxa/akita-ioexp.c                | 1 -
 drivers/acorn/char/pcf8583.c                   | 1 -
 drivers/hwmon/adm1021.c                        | 1 -
 drivers/hwmon/adm1025.c                        | 1 -
 drivers/hwmon/adm1026.c                        | 1 -
 drivers/hwmon/adm1031.c                        | 1 -
 drivers/hwmon/adm9240.c                        | 1 -
 drivers/hwmon/asb100.c                         | 1 -
 drivers/hwmon/atxp1.c                          | 1 -
 drivers/hwmon/ds1621.c                         | 1 -
 drivers/hwmon/fscher.c                         | 1 -
 drivers/hwmon/fscpos.c                         | 1 -
 drivers/hwmon/gl518sm.c                        | 1 -
 drivers/hwmon/gl520sm.c                        | 1 -
 drivers/hwmon/it87.c                           | 1 -
 drivers/hwmon/lm63.c                           | 1 -
 drivers/hwmon/lm75.c                           | 1 -
 drivers/hwmon/lm77.c                           | 1 -
 drivers/hwmon/lm78.c                           | 1 -
 drivers/hwmon/lm80.c                           | 1 -
 drivers/hwmon/lm83.c                           | 1 -
 drivers/hwmon/lm85.c                           | 1 -
 drivers/hwmon/lm87.c                           | 1 -
 drivers/hwmon/lm90.c                           | 1 -
 drivers/hwmon/lm92.c                           | 1 -
 drivers/hwmon/max1619.c                        | 1 -
 drivers/hwmon/w83781d.c                        | 1 -
 drivers/hwmon/w83792d.c                        | 1 -
 drivers/hwmon/w83l785ts.c                      | 1 -
 drivers/i2c/chips/ds1337.c                     | 1 -
 drivers/i2c/chips/ds1374.c                     | 1 -
 drivers/i2c/chips/eeprom.c                     | 1 -
 drivers/i2c/chips/isp1301_omap.c               | 1 -
 drivers/i2c/chips/m41t00.c                     | 1 -
 drivers/i2c/chips/max6875.c                    | 1 -
 drivers/i2c/chips/pca9539.c                    | 1 -
 drivers/i2c/chips/pcf8574.c                    | 1 -
 drivers/i2c/chips/pcf8591.c                    | 1 -
 drivers/i2c/chips/rtc8564.c                    | 1 -
 drivers/i2c/chips/tps65010.c                   | 1 -
 drivers/i2c/chips/x1205.c                      | 1 -
 drivers/i2c/i2c-core.c                         | 4 ++--
 drivers/i2c/i2c-dev.c                          | 1 -
 drivers/macintosh/therm_adt746x.c              | 1 -
 drivers/macintosh/therm_pm72.c                 | 1 -
 drivers/macintosh/therm_windtunnel.c           | 1 -
 drivers/macintosh/windfarm_lm75_sensor.c       | 1 -
 drivers/media/video/adv7170.c                  | 1 -
 drivers/media/video/adv7175.c                  | 1 -
 drivers/media/video/bt819.c                    | 1 -
 drivers/media/video/bt832.c                    | 1 -
 drivers/media/video/bt856.c                    | 1 -
 drivers/media/video/cs53l32a.c                 | 1 -
 drivers/media/video/cx25840/cx25840-core.c     | 1 -
 drivers/media/video/indycam.c                  | 1 -
 drivers/media/video/ir-kbd-i2c.c               | 1 -
 drivers/media/video/msp3400.c                  | 1 -
 drivers/media/video/ovcamchip/ovcamchip_core.c | 1 -
 drivers/media/video/saa5246a.c                 | 1 -
 drivers/media/video/saa5249.c                  | 1 -
 drivers/media/video/saa6588.c                  | 1 -
 drivers/media/video/saa7110.c                  | 1 -
 drivers/media/video/saa7111.c                  | 1 -
 drivers/media/video/saa7114.c                  | 1 -
 drivers/media/video/saa7115.c                  | 1 -
 drivers/media/video/saa711x.c                  | 1 -
 drivers/media/video/saa7127.c                  | 1 -
 drivers/media/video/saa7134/saa6752hs.c        | 1 -
 drivers/media/video/saa7185.c                  | 1 -
 drivers/media/video/saa7191.c                  | 1 -
 drivers/media/video/tda7432.c                  | 1 -
 drivers/media/video/tda9840.c                  | 1 -
 drivers/media/video/tda9875.c                  | 1 -
 drivers/media/video/tda9887.c                  | 1 -
 drivers/media/video/tea6415c.c                 | 1 -
 drivers/media/video/tea6420.c                  | 1 -
 drivers/media/video/tuner-3036.c               | 1 -
 drivers/media/video/tuner-core.c               | 1 -
 drivers/media/video/tvaudio.c                  | 1 -
 drivers/media/video/tveeprom.c                 | 1 -
 drivers/media/video/tvmixer.c                  | 1 -
 drivers/media/video/tvp5150.c                  | 1 -
 drivers/media/video/vpx3220.c                  | 1 -
 drivers/media/video/wm8775.c                   | 1 -
 drivers/video/matrox/matroxfb_maven.c          | 1 -
 include/linux/i2c.h                            | 1 -
 sound/oss/dmasound/dac3550a.c                  | 1 -
 sound/oss/dmasound/tas_common.c                | 1 -
 sound/ppc/keywest.c                            | 1 -
 91 files changed, 5 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients
index 184fac2377aa..64c610bf2fbc 100644
--- a/Documentation/i2c/porting-clients
+++ b/Documentation/i2c/porting-clients
@@ -109,6 +109,9 @@ Technical changes:
   there is a MODULE_LICENSE() line, at the bottom of the file
   (after MODULE_AUTHOR() and MODULE_DESCRIPTION(), in this order).
 
+* [Driver] The flags field of the i2c_driver structure is gone.
+  I2C_DF_NOTIFY is now the default behavior.
+
 Coding policy:
 
 * [Copyright] Use (C), not (c), for copyright.
diff --git a/Documentation/i2c/writing-clients b/Documentation/i2c/writing-clients
index d19993cc0604..59d2c169cd61 100644
--- a/Documentation/i2c/writing-clients
+++ b/Documentation/i2c/writing-clients
@@ -27,7 +27,6 @@ address.
 static struct i2c_driver foo_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "Foo version 2.3 driver",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= &foo_attach_adapter,
 	.detach_client	= &foo_detach_client,
 	.command	= &foo_command /* may be NULL */
@@ -36,10 +35,6 @@ static struct i2c_driver foo_driver = {
 The name field must match the driver name, including the case. It must not
 contain spaces, and may be up to 31 characters long.
 
-Don't worry about the flags field; just put I2C_DF_NOTIFY into it. This
-means that your driver will be notified when new adapters are found.
-This is almost always what you want.
-
 All other fields are for call-back functions which will be explained 
 below.
 
diff --git a/arch/arm/mach-pxa/akita-ioexp.c b/arch/arm/mach-pxa/akita-ioexp.c
index f6d73cc01f78..440ebb3c3db1 100644
--- a/arch/arm/mach-pxa/akita-ioexp.c
+++ b/arch/arm/mach-pxa/akita-ioexp.c
@@ -127,7 +127,6 @@ static struct i2c_driver max7310_i2c_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "akita-max7310",
 	.id		= I2C_DRIVERID_AKITAIOEXP,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= max7310_attach_adapter,
 	.detach_client	= max7310_detach_client,
 };
diff --git a/drivers/acorn/char/pcf8583.c b/drivers/acorn/char/pcf8583.c
index e26f007a1417..befc9469b4f2 100644
--- a/drivers/acorn/char/pcf8583.c
+++ b/drivers/acorn/char/pcf8583.c
@@ -259,7 +259,6 @@ pcf8583_command(struct i2c_client *client, unsigned int cmd, void *arg)
 static struct i2c_driver pcf8583_driver = {
 	.name		= "PCF8583",
 	.id		= I2C_DRIVERID_PCF8583,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= pcf8583_probe,
 	.detach_client	= pcf8583_detach,
 	.command	= pcf8583_command
diff --git a/drivers/hwmon/adm1021.c b/drivers/hwmon/adm1021.c
index 8102876c7c3f..5e6df4b7b857 100644
--- a/drivers/hwmon/adm1021.c
+++ b/drivers/hwmon/adm1021.c
@@ -129,7 +129,6 @@ static struct i2c_driver adm1021_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "adm1021",
 	.id		= I2C_DRIVERID_ADM1021,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= adm1021_attach_adapter,
 	.detach_client	= adm1021_detach_client,
 };
diff --git a/drivers/hwmon/adm1025.c b/drivers/hwmon/adm1025.c
index bf67860e6a20..2be48a7a90b8 100644
--- a/drivers/hwmon/adm1025.c
+++ b/drivers/hwmon/adm1025.c
@@ -121,7 +121,6 @@ static struct i2c_driver adm1025_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "adm1025",
 	.id		= I2C_DRIVERID_ADM1025,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= adm1025_attach_adapter,
 	.detach_client	= adm1025_detach_client,
 };
diff --git a/drivers/hwmon/adm1026.c b/drivers/hwmon/adm1026.c
index 5036b17a39cd..5416db809f97 100644
--- a/drivers/hwmon/adm1026.c
+++ b/drivers/hwmon/adm1026.c
@@ -310,7 +310,6 @@ static void adm1026_init_client(struct i2c_client *client);
 static struct i2c_driver adm1026_driver = {
 	.owner          = THIS_MODULE,
 	.name           = "adm1026",
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = adm1026_attach_adapter,
 	.detach_client  = adm1026_detach_client,
 };
diff --git a/drivers/hwmon/adm1031.c b/drivers/hwmon/adm1031.c
index 7c545d5eee45..1e24428090c4 100644
--- a/drivers/hwmon/adm1031.c
+++ b/drivers/hwmon/adm1031.c
@@ -107,7 +107,6 @@ static struct adm1031_data *adm1031_update_device(struct device *dev);
 static struct i2c_driver adm1031_driver = {
 	.owner = THIS_MODULE,
 	.name = "adm1031",
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = adm1031_attach_adapter,
 	.detach_client = adm1031_detach_client,
 };
diff --git a/drivers/hwmon/adm9240.c b/drivers/hwmon/adm9240.c
index 11dc95f8a17e..287733fe2c0d 100644
--- a/drivers/hwmon/adm9240.c
+++ b/drivers/hwmon/adm9240.c
@@ -140,7 +140,6 @@ static struct i2c_driver adm9240_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "adm9240",
 	.id		= I2C_DRIVERID_ADM9240,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= adm9240_attach_adapter,
 	.detach_client	= adm9240_detach_client,
 };
diff --git a/drivers/hwmon/asb100.c b/drivers/hwmon/asb100.c
index 52c469722a65..7227f800bef9 100644
--- a/drivers/hwmon/asb100.c
+++ b/drivers/hwmon/asb100.c
@@ -220,7 +220,6 @@ static struct i2c_driver asb100_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "asb100",
 	.id		= I2C_DRIVERID_ASB100,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= asb100_attach_adapter,
 	.detach_client	= asb100_detach_client,
 };
diff --git a/drivers/hwmon/atxp1.c b/drivers/hwmon/atxp1.c
index 53324f56404e..a60a9f20281b 100644
--- a/drivers/hwmon/atxp1.c
+++ b/drivers/hwmon/atxp1.c
@@ -52,7 +52,6 @@ static int atxp1_detect(struct i2c_adapter *adapter, int address, int kind);
 static struct i2c_driver atxp1_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "atxp1",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = atxp1_attach_adapter,
 	.detach_client	= atxp1_detach_client,
 };
diff --git a/drivers/hwmon/ds1621.c b/drivers/hwmon/ds1621.c
index 34f71b7c7f37..0096eb37c663 100644
--- a/drivers/hwmon/ds1621.c
+++ b/drivers/hwmon/ds1621.c
@@ -92,7 +92,6 @@ static struct i2c_driver ds1621_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "ds1621",
 	.id		= I2C_DRIVERID_DS1621,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= ds1621_attach_adapter,
 	.detach_client	= ds1621_detach_client,
 };
diff --git a/drivers/hwmon/fscher.c b/drivers/hwmon/fscher.c
index a02e1c34c757..f56ca06dbf88 100644
--- a/drivers/hwmon/fscher.c
+++ b/drivers/hwmon/fscher.c
@@ -121,7 +121,6 @@ static struct i2c_driver fscher_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "fscher",
 	.id		= I2C_DRIVERID_FSCHER,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= fscher_attach_adapter,
 	.detach_client	= fscher_detach_client,
 };
diff --git a/drivers/hwmon/fscpos.c b/drivers/hwmon/fscpos.c
index 64e4edc64f8d..701dffc2ceed 100644
--- a/drivers/hwmon/fscpos.c
+++ b/drivers/hwmon/fscpos.c
@@ -103,7 +103,6 @@ static struct i2c_driver fscpos_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "fscpos",
 	.id		= I2C_DRIVERID_FSCPOS,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= fscpos_attach_adapter,
 	.detach_client	= fscpos_detach_client,
 };
diff --git a/drivers/hwmon/gl518sm.c b/drivers/hwmon/gl518sm.c
index 2f178dbe3d87..5788bbb77d8d 100644
--- a/drivers/hwmon/gl518sm.c
+++ b/drivers/hwmon/gl518sm.c
@@ -154,7 +154,6 @@ static struct i2c_driver gl518_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "gl518sm",
 	.id		= I2C_DRIVERID_GL518,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= gl518_attach_adapter,
 	.detach_client	= gl518_detach_client,
 };
diff --git a/drivers/hwmon/gl520sm.c b/drivers/hwmon/gl520sm.c
index c39ba1239426..b3998165193d 100644
--- a/drivers/hwmon/gl520sm.c
+++ b/drivers/hwmon/gl520sm.c
@@ -112,7 +112,6 @@ static struct i2c_driver gl520_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "gl520sm",
 	.id		= I2C_DRIVERID_GL520,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= gl520_attach_adapter,
 	.detach_client	= gl520_detach_client,
 };
diff --git a/drivers/hwmon/it87.c b/drivers/hwmon/it87.c
index a61f5d00f10a..d5f0d92378c5 100644
--- a/drivers/hwmon/it87.c
+++ b/drivers/hwmon/it87.c
@@ -237,7 +237,6 @@ static struct i2c_driver it87_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "it87",
 	.id		= I2C_DRIVERID_IT87,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= it87_attach_adapter,
 	.detach_client	= it87_detach_client,
 };
diff --git a/drivers/hwmon/lm63.c b/drivers/hwmon/lm63.c
index 954ec2497249..c2dd4ac8042d 100644
--- a/drivers/hwmon/lm63.c
+++ b/drivers/hwmon/lm63.c
@@ -141,7 +141,6 @@ static void lm63_init_client(struct i2c_client *client);
 static struct i2c_driver lm63_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm63",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm63_attach_adapter,
 	.detach_client	= lm63_detach_client,
 };
diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index d70f4c8fc1e6..0bcbd6515139 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -69,7 +69,6 @@ static struct i2c_driver lm75_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm75",
 	.id		= I2C_DRIVERID_LM75,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm75_attach_adapter,
 	.detach_client	= lm75_detach_client,
 };
diff --git a/drivers/hwmon/lm77.c b/drivers/hwmon/lm77.c
index 9380fda7dcd1..6a524e92432e 100644
--- a/drivers/hwmon/lm77.c
+++ b/drivers/hwmon/lm77.c
@@ -76,7 +76,6 @@ static struct lm77_data *lm77_update_device(struct device *dev);
 static struct i2c_driver lm77_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm77",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = lm77_attach_adapter,
 	.detach_client	= lm77_detach_client,
 };
diff --git a/drivers/hwmon/lm78.c b/drivers/hwmon/lm78.c
index 78cdd506439f..18448f83a6b0 100644
--- a/drivers/hwmon/lm78.c
+++ b/drivers/hwmon/lm78.c
@@ -167,7 +167,6 @@ static struct i2c_driver lm78_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm78",
 	.id		= I2C_DRIVERID_LM78,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm78_attach_adapter,
 	.detach_client	= lm78_detach_client,
 };
diff --git a/drivers/hwmon/lm80.c b/drivers/hwmon/lm80.c
index c359fdea211e..02303fa0c464 100644
--- a/drivers/hwmon/lm80.c
+++ b/drivers/hwmon/lm80.c
@@ -146,7 +146,6 @@ static struct i2c_driver lm80_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm80",
 	.id		= I2C_DRIVERID_LM80,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm80_attach_adapter,
 	.detach_client	= lm80_detach_client,
 };
diff --git a/drivers/hwmon/lm83.c b/drivers/hwmon/lm83.c
index 9a70611a9f69..96cb34ea2490 100644
--- a/drivers/hwmon/lm83.c
+++ b/drivers/hwmon/lm83.c
@@ -127,7 +127,6 @@ static struct i2c_driver lm83_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm83",
 	.id		= I2C_DRIVERID_LM83,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm83_attach_adapter,
 	.detach_client	= lm83_detach_client,
 };
diff --git a/drivers/hwmon/lm85.c b/drivers/hwmon/lm85.c
index 3f5544a40f3c..131ecab094ac 100644
--- a/drivers/hwmon/lm85.c
+++ b/drivers/hwmon/lm85.c
@@ -383,7 +383,6 @@ static struct i2c_driver lm85_driver = {
 	.owner          = THIS_MODULE,
 	.name           = "lm85",
 	.id             = I2C_DRIVERID_LM85,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = lm85_attach_adapter,
 	.detach_client  = lm85_detach_client,
 };
diff --git a/drivers/hwmon/lm87.c b/drivers/hwmon/lm87.c
index eeec18177861..26fd0b33beaf 100644
--- a/drivers/hwmon/lm87.c
+++ b/drivers/hwmon/lm87.c
@@ -164,7 +164,6 @@ static struct i2c_driver lm87_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm87",
 	.id		= I2C_DRIVERID_LM87,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm87_attach_adapter,
 	.detach_client	= lm87_detach_client,
 };
diff --git a/drivers/hwmon/lm90.c b/drivers/hwmon/lm90.c
index 83cf2e1b09f5..011923b7091d 100644
--- a/drivers/hwmon/lm90.c
+++ b/drivers/hwmon/lm90.c
@@ -189,7 +189,6 @@ static struct i2c_driver lm90_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm90",
 	.id		= I2C_DRIVERID_LM90,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm90_attach_adapter,
 	.detach_client	= lm90_detach_client,
 };
diff --git a/drivers/hwmon/lm92.c b/drivers/hwmon/lm92.c
index 7a4b3701ed1a..2005a9ee61fb 100644
--- a/drivers/hwmon/lm92.c
+++ b/drivers/hwmon/lm92.c
@@ -413,7 +413,6 @@ static struct i2c_driver lm92_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "lm92",
 	.id		= I2C_DRIVERID_LM92,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= lm92_attach_adapter,
 	.detach_client	= lm92_detach_client,
 };
diff --git a/drivers/hwmon/max1619.c b/drivers/hwmon/max1619.c
index 69e7e125683b..d5aebef126d5 100644
--- a/drivers/hwmon/max1619.c
+++ b/drivers/hwmon/max1619.c
@@ -92,7 +92,6 @@ static struct max1619_data *max1619_update_device(struct device *dev);
 static struct i2c_driver max1619_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "max1619",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= max1619_attach_adapter,
 	.detach_client	= max1619_detach_client,
 };
diff --git a/drivers/hwmon/w83781d.c b/drivers/hwmon/w83781d.c
index ffdb3a03e2b5..a78929f2b3d8 100644
--- a/drivers/hwmon/w83781d.c
+++ b/drivers/hwmon/w83781d.c
@@ -272,7 +272,6 @@ static struct i2c_driver w83781d_driver = {
 	.owner = THIS_MODULE,
 	.name = "w83781d",
 	.id = I2C_DRIVERID_W83781D,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = w83781d_attach_adapter,
 	.detach_client = w83781d_detach_client,
 };
diff --git a/drivers/hwmon/w83792d.c b/drivers/hwmon/w83792d.c
index 1ba072630361..6824243d90d7 100644
--- a/drivers/hwmon/w83792d.c
+++ b/drivers/hwmon/w83792d.c
@@ -319,7 +319,6 @@ static void w83792d_init_client(struct i2c_client *client);
 static struct i2c_driver w83792d_driver = {
 	.owner = THIS_MODULE,
 	.name = "w83792d",
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = w83792d_attach_adapter,
 	.detach_client = w83792d_detach_client,
 };
diff --git a/drivers/hwmon/w83l785ts.c b/drivers/hwmon/w83l785ts.c
index f495b6378668..35172fb455d0 100644
--- a/drivers/hwmon/w83l785ts.c
+++ b/drivers/hwmon/w83l785ts.c
@@ -95,7 +95,6 @@ static struct i2c_driver w83l785ts_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "w83l785ts",
 	.id		= I2C_DRIVERID_W83L785TS,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= w83l785ts_attach_adapter,
 	.detach_client	= w83l785ts_detach_client,
 };
diff --git a/drivers/i2c/chips/ds1337.c b/drivers/i2c/chips/ds1337.c
index 18228957606c..65146cbc8390 100644
--- a/drivers/i2c/chips/ds1337.c
+++ b/drivers/i2c/chips/ds1337.c
@@ -54,7 +54,6 @@ static int ds1337_command(struct i2c_client *client, unsigned int cmd,
 static struct i2c_driver ds1337_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "ds1337",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= ds1337_attach_adapter,
 	.detach_client	= ds1337_detach_client,
 	.command	= ds1337_command,
diff --git a/drivers/i2c/chips/ds1374.c b/drivers/i2c/chips/ds1374.c
index da488b735abf..5a270d60b699 100644
--- a/drivers/i2c/chips/ds1374.c
+++ b/drivers/i2c/chips/ds1374.c
@@ -235,7 +235,6 @@ static struct i2c_driver ds1374_driver = {
 	.owner = THIS_MODULE,
 	.name = DS1374_DRV_NAME,
 	.id = I2C_DRIVERID_DS1374,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = ds1374_attach,
 	.detach_client = ds1374_detach,
 };
diff --git a/drivers/i2c/chips/eeprom.c b/drivers/i2c/chips/eeprom.c
index 4baf573fa04f..9bb1f8b3f38c 100644
--- a/drivers/i2c/chips/eeprom.c
+++ b/drivers/i2c/chips/eeprom.c
@@ -71,7 +71,6 @@ static struct i2c_driver eeprom_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "eeprom",
 	.id		= I2C_DRIVERID_EEPROM,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= eeprom_attach_adapter,
 	.detach_client	= eeprom_detach_client,
 };
diff --git a/drivers/i2c/chips/isp1301_omap.c b/drivers/i2c/chips/isp1301_omap.c
index d2a100d77839..4f472ba66a02 100644
--- a/drivers/i2c/chips/isp1301_omap.c
+++ b/drivers/i2c/chips/isp1301_omap.c
@@ -1636,7 +1636,6 @@ static struct i2c_driver isp1301_driver = {
 	.name		= "isp1301_omap",
 	.id		= 1301,		/* FIXME "official", i2c-ids.h */
 	.class		= I2C_CLASS_HWMON,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= isp1301_scan_bus,
 	.detach_client	= isp1301_detach_client,
 };
diff --git a/drivers/i2c/chips/m41t00.c b/drivers/i2c/chips/m41t00.c
index 3df309ae44a6..13e67836b348 100644
--- a/drivers/i2c/chips/m41t00.c
+++ b/drivers/i2c/chips/m41t00.c
@@ -214,7 +214,6 @@ static struct i2c_driver m41t00_driver = {
 	.owner		= THIS_MODULE,
 	.name		= M41T00_DRV_NAME,
 	.id		= I2C_DRIVERID_STM41T00,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= m41t00_attach,
 	.detach_client	= m41t00_detach,
 };
diff --git a/drivers/i2c/chips/max6875.c b/drivers/i2c/chips/max6875.c
index b376a006883c..7e61019e72dd 100644
--- a/drivers/i2c/chips/max6875.c
+++ b/drivers/i2c/chips/max6875.c
@@ -69,7 +69,6 @@ static int max6875_detach_client(struct i2c_client *client);
 static struct i2c_driver max6875_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "max6875",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= max6875_attach_adapter,
 	.detach_client	= max6875_detach_client,
 };
diff --git a/drivers/i2c/chips/pca9539.c b/drivers/i2c/chips/pca9539.c
index 59a930346229..26feb7a4f942 100644
--- a/drivers/i2c/chips/pca9539.c
+++ b/drivers/i2c/chips/pca9539.c
@@ -40,7 +40,6 @@ static int pca9539_detach_client(struct i2c_client *client);
 static struct i2c_driver pca9539_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "pca9539",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= pca9539_attach_adapter,
 	.detach_client	= pca9539_detach_client,
 };
diff --git a/drivers/i2c/chips/pcf8574.c b/drivers/i2c/chips/pcf8574.c
index c323c2de236c..2fae640cf329 100644
--- a/drivers/i2c/chips/pcf8574.c
+++ b/drivers/i2c/chips/pcf8574.c
@@ -68,7 +68,6 @@ static struct i2c_driver pcf8574_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "pcf8574",
 	.id		= I2C_DRIVERID_PCF8574,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= pcf8574_attach_adapter,
 	.detach_client	= pcf8574_detach_client,
 };
diff --git a/drivers/i2c/chips/pcf8591.c b/drivers/i2c/chips/pcf8591.c
index ce420a67560b..8750f71278e1 100644
--- a/drivers/i2c/chips/pcf8591.c
+++ b/drivers/i2c/chips/pcf8591.c
@@ -91,7 +91,6 @@ static struct i2c_driver pcf8591_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "pcf8591",
 	.id		= I2C_DRIVERID_PCF8591,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= pcf8591_attach_adapter,
 	.detach_client	= pcf8591_detach_client,
 };
diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c
index 26e498d921da..e586f75dd024 100644
--- a/drivers/i2c/chips/rtc8564.c
+++ b/drivers/i2c/chips/rtc8564.c
@@ -362,7 +362,6 @@ static struct i2c_driver rtc8564_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "RTC8564",
 	.id		= I2C_DRIVERID_RTC8564,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = rtc8564_probe,
 	.detach_client	= rtc8564_detach,
 	.command	= rtc8564_command
diff --git a/drivers/i2c/chips/tps65010.c b/drivers/i2c/chips/tps65010.c
index 280dd7a45db6..439bf6ceb789 100644
--- a/drivers/i2c/chips/tps65010.c
+++ b/drivers/i2c/chips/tps65010.c
@@ -639,7 +639,6 @@ static int __init tps65010_scan_bus(struct i2c_adapter *bus)
 static struct i2c_driver tps65010_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "tps65010",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= tps65010_scan_bus,
 	.detach_client	= __exit_p(tps65010_detach_client),
 };
diff --git a/drivers/i2c/chips/x1205.c b/drivers/i2c/chips/x1205.c
index 7da366cdc18c..c5ff2cee15ae 100644
--- a/drivers/i2c/chips/x1205.c
+++ b/drivers/i2c/chips/x1205.c
@@ -107,7 +107,6 @@ static int x1205_command(struct i2c_client *client, unsigned int cmd,
 static struct i2c_driver x1205_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "x1205",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = &x1205_attach,
 	.detach_client	= &x1205_detach,
 };
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 82ea1b7ec914..ad68ac00d910 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -197,7 +197,7 @@ int i2c_add_adapter(struct i2c_adapter *adap)
 	/* inform drivers of new adapters */
 	list_for_each(item,&drivers) {
 		driver = list_entry(item, struct i2c_driver, list);
-		if (driver->flags & I2C_DF_NOTIFY)
+		if (driver->attach_adapter)
 			/* We ignore the return code; if it fails, too bad */
 			driver->attach_adapter(adap);
 	}
@@ -309,7 +309,7 @@ int i2c_add_driver(struct i2c_driver *driver)
 	pr_debug("i2c-core: driver [%s] registered\n", driver->name);
 
 	/* now look for instances of driver on our adapters */
-	if (driver->flags & I2C_DF_NOTIFY) {
+	if (driver->attach_adapter) {
 		list_for_each(item,&adapters) {
 			adapter = list_entry(item, struct i2c_adapter, list);
 			driver->attach_adapter(adapter);
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 8af0bd1424d2..9da51eb37c06 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -484,7 +484,6 @@ static struct i2c_driver i2cdev_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "dev_driver",
 	.id		= I2C_DRIVERID_I2CDEV,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= i2cdev_attach_adapter,
 	.detach_adapter	= i2cdev_detach_adapter,
 	.detach_client	= i2cdev_detach_client,
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index f38696622eb4..f62c16fab42b 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -173,7 +173,6 @@ detach_thermostat(struct i2c_adapter *adapter)
 static struct i2c_driver thermostat_driver = {  
 	.owner		= THIS_MODULE,
 	.name		= "therm_adt746x",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= attach_thermostat,
 	.detach_adapter	= detach_thermostat,
 };
diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
index 190878eef990..df00c960fc5a 100644
--- a/drivers/macintosh/therm_pm72.c
+++ b/drivers/macintosh/therm_pm72.c
@@ -285,7 +285,6 @@ static struct i2c_driver therm_pm72_driver =
 {
 	.owner		= THIS_MODULE,
 	.name		= "therm_pm72",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= therm_pm72_attach,
 	.detach_adapter	= therm_pm72_detach,
 };
diff --git a/drivers/macintosh/therm_windtunnel.c b/drivers/macintosh/therm_windtunnel.c
index 6aaa1df1a64e..f3bae0d00ed2 100644
--- a/drivers/macintosh/therm_windtunnel.c
+++ b/drivers/macintosh/therm_windtunnel.c
@@ -357,7 +357,6 @@ static struct i2c_driver g4fan_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "therm_windtunnel",
 	.id		= I2C_DRIVERID_G4FAN,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = do_attach,
 	.detach_client	= do_detach,
 };
diff --git a/drivers/macintosh/windfarm_lm75_sensor.c b/drivers/macintosh/windfarm_lm75_sensor.c
index a0a41ad0f2b5..2392789ccf32 100644
--- a/drivers/macintosh/windfarm_lm75_sensor.c
+++ b/drivers/macintosh/windfarm_lm75_sensor.c
@@ -49,7 +49,6 @@ static int wf_lm75_detach(struct i2c_client *client);
 static struct i2c_driver wf_lm75_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "wf_lm75",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= wf_lm75_attach,
 	.detach_client	= wf_lm75_detach,
 };
diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c
index 1ca2b67aedfb..c4f2265167a2 100644
--- a/drivers/media/video/adv7170.c
+++ b/drivers/media/video/adv7170.c
@@ -502,7 +502,6 @@ static struct i2c_driver i2c_driver_adv7170 = {
 	.name = "adv7170",	/* name */
 
 	.id = I2C_DRIVERID_ADV7170,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = adv7170_attach_adapter,
 	.detach_client = adv7170_detach_client,
diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c
index 173bca1e0295..4fc08b17d4d0 100644
--- a/drivers/media/video/adv7175.c
+++ b/drivers/media/video/adv7175.c
@@ -552,7 +552,6 @@ static struct i2c_driver i2c_driver_adv7175 = {
 	.name = "adv7175",	/* name */
 
 	.id = I2C_DRIVERID_ADV7175,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = adv7175_attach_adapter,
 	.detach_client = adv7175_detach_client,
diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c
index 3ee0afca76a7..7bba69793b78 100644
--- a/drivers/media/video/bt819.c
+++ b/drivers/media/video/bt819.c
@@ -627,7 +627,6 @@ static struct i2c_driver i2c_driver_bt819 = {
 	.name = "bt819",
 
 	.id = I2C_DRIVERID_BT819,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = bt819_attach_adapter,
 	.detach_client = bt819_detach_client,
diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c
index 3ca1d768bfd3..0ba8652357e2 100644
--- a/drivers/media/video/bt832.c
+++ b/drivers/media/video/bt832.c
@@ -233,7 +233,6 @@ static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "i2c bt832 driver",
 	.id             = -1, /* FIXME */
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = bt832_probe,
 	.detach_client  = bt832_detach,
 	.command        = bt832_command,
diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c
index 8eb871d0e85b..4c9acd1e2c70 100644
--- a/drivers/media/video/bt856.c
+++ b/drivers/media/video/bt856.c
@@ -409,7 +409,6 @@ static struct i2c_driver i2c_driver_bt856 = {
 	.name = "bt856",
 
 	.id = I2C_DRIVERID_BT856,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = bt856_attach_adapter,
 	.detach_client = bt856_detach_client,
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index 780b352ec119..fce5d89b7b15 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -218,7 +218,6 @@ static int cs53l32a_detach(struct i2c_client *client)
 static struct i2c_driver i2c_driver = {
 	.name = "cs53l32a",
 	.id = I2C_DRIVERID_CS53L32A,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = cs53l32a_probe,
 	.detach_client = cs53l32a_detach,
 	.command = cs53l32a_command,
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index 5b93723a1768..c66bc147ee71 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -847,7 +847,6 @@ static struct i2c_driver i2c_driver_cx25840 = {
 	.name = "cx25840",
 
 	.id = I2C_DRIVERID_CX25840,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = cx25840_attach_adapter,
 	.detach_client = cx25840_detach_client,
diff --git a/drivers/media/video/indycam.c b/drivers/media/video/indycam.c
index deeef125eb92..3eba514cdb29 100644
--- a/drivers/media/video/indycam.c
+++ b/drivers/media/video/indycam.c
@@ -454,7 +454,6 @@ static struct i2c_driver i2c_driver_indycam = {
 	.owner		= THIS_MODULE,
 	.name		= "indycam",
 	.id		= I2C_DRIVERID_INDYCAM,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = indycam_probe,
 	.detach_client	= indycam_detach,
 	.command	= indycam_command,
diff --git a/drivers/media/video/ir-kbd-i2c.c b/drivers/media/video/ir-kbd-i2c.c
index 740e543311af..2e2f78a4afc8 100644
--- a/drivers/media/video/ir-kbd-i2c.c
+++ b/drivers/media/video/ir-kbd-i2c.c
@@ -280,7 +280,6 @@ static int ir_probe(struct i2c_adapter *adap);
 static struct i2c_driver driver = {
 	.name           = "ir remote kbd driver",
 	.id             = I2C_DRIVERID_INFRARED,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = ir_probe,
 	.detach_client  = ir_detach,
 };
diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c
index d86f8e92e534..46328fb6fe80 100644
--- a/drivers/media/video/msp3400.c
+++ b/drivers/media/video/msp3400.c
@@ -1564,7 +1564,6 @@ static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "msp3400",
 	.id             = I2C_DRIVERID_MSP3400,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = msp_probe,
 	.detach_client  = msp_detach,
 	.command        = msp_command,
diff --git a/drivers/media/video/ovcamchip/ovcamchip_core.c b/drivers/media/video/ovcamchip/ovcamchip_core.c
index 2de34ebf0673..390d0d6c7838 100644
--- a/drivers/media/video/ovcamchip/ovcamchip_core.c
+++ b/drivers/media/video/ovcamchip/ovcamchip_core.c
@@ -414,7 +414,6 @@ static struct i2c_driver driver = {
 	.name =			"ovcamchip",
 	.id =			I2C_DRIVERID_OVCAMCHIP,
 	.class =		I2C_CLASS_CAM_DIGITAL,
-	.flags =		I2C_DF_NOTIFY,
 	.attach_adapter =	ovcamchip_attach,
 	.detach_client =	ovcamchip_detach,
 	.command =		ovcamchip_command,
diff --git a/drivers/media/video/saa5246a.c b/drivers/media/video/saa5246a.c
index b8054da31ffd..9bf686989aab 100644
--- a/drivers/media/video/saa5246a.c
+++ b/drivers/media/video/saa5246a.c
@@ -166,7 +166,6 @@ static struct i2c_driver i2c_driver_videotext =
 	.owner 		= THIS_MODULE,
 	.name 		= IF_NAME,		/* name */
 	.id 		= I2C_DRIVERID_SAA5249, /* in i2c.h */
-	.flags 		= I2C_DF_NOTIFY,
 	.attach_adapter = saa5246a_probe,
 	.detach_client  = saa5246a_detach,
 	.command 	= saa5246a_command
diff --git a/drivers/media/video/saa5249.c b/drivers/media/video/saa5249.c
index 7ffa2e9a9bf3..811e86396030 100644
--- a/drivers/media/video/saa5249.c
+++ b/drivers/media/video/saa5249.c
@@ -239,7 +239,6 @@ static struct i2c_driver i2c_driver_videotext =
 	.owner 		= THIS_MODULE,
 	.name 		= IF_NAME,		/* name */
 	.id 		= I2C_DRIVERID_SAA5249, /* in i2c.h */
-	.flags 		= I2C_DF_NOTIFY,
 	.attach_adapter = saa5249_probe,
 	.detach_client  = saa5249_detach,
 	.command 	= saa5249_command
diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c
index 923322503e8f..18a0b7143e8b 100644
--- a/drivers/media/video/saa6588.c
+++ b/drivers/media/video/saa6588.c
@@ -498,7 +498,6 @@ static struct i2c_driver driver = {
 	.owner = THIS_MODULE,
 	.name = "i2c saa6588 driver",
 	.id = -1,		/* FIXME */
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = saa6588_probe,
 	.detach_client = saa6588_detach,
 	.command = saa6588_command,
diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c
index e116bdbed310..f266b35ceb35 100644
--- a/drivers/media/video/saa7110.c
+++ b/drivers/media/video/saa7110.c
@@ -591,7 +591,6 @@ static struct i2c_driver i2c_driver_saa7110 = {
 	.name = "saa7110",
 
 	.id = I2C_DRIVERID_SAA7110,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = saa7110_attach_adapter,
 	.detach_client = saa7110_detach_client,
diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c
index fe8a5e453969..687beaf11adc 100644
--- a/drivers/media/video/saa7111.c
+++ b/drivers/media/video/saa7111.c
@@ -594,7 +594,6 @@ static struct i2c_driver i2c_driver_saa7111 = {
 	.name = "saa7111",
 
 	.id = I2C_DRIVERID_SAA7111A,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = saa7111_attach_adapter,
 	.detach_client = saa7111_detach_client,
diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c
index d9f50e2f7b92..4748cf0598c0 100644
--- a/drivers/media/video/saa7114.c
+++ b/drivers/media/video/saa7114.c
@@ -1208,7 +1208,6 @@ static struct i2c_driver i2c_driver_saa7114 = {
 	.name = "saa7114",
 
 	.id = I2C_DRIVERID_SAA7114,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = saa7114_attach_adapter,
 	.detach_client = saa7114_detach_client,
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index e717e30d8187..b1079de938b7 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -1356,7 +1356,6 @@ static int saa7115_detach(struct i2c_client *client)
 static struct i2c_driver i2c_driver_saa7115 = {
 	.name = "saa7115",
 	.id = I2C_DRIVERID_SAA711X,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = saa7115_probe,
 	.detach_client = saa7115_detach,
 	.command = saa7115_command,
diff --git a/drivers/media/video/saa711x.c b/drivers/media/video/saa711x.c
index 31f7b950b01c..734a70919080 100644
--- a/drivers/media/video/saa711x.c
+++ b/drivers/media/video/saa711x.c
@@ -569,7 +569,6 @@ static struct i2c_driver i2c_driver_saa711x = {
 	.name = "saa711x",
 
 	.id = I2C_DRIVERID_SAA711X,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = saa711x_attach_adapter,
 	.detach_client = saa711x_detach_client,
diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c
index c36f014f1fdf..a2fab9837507 100644
--- a/drivers/media/video/saa7127.c
+++ b/drivers/media/video/saa7127.c
@@ -821,7 +821,6 @@ static int saa7127_detach(struct i2c_client *client)
 static struct i2c_driver i2c_driver_saa7127 = {
 	.name = "saa7127",
 	.id = I2C_DRIVERID_SAA7127,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = saa7127_probe,
 	.detach_client = saa7127_detach,
 	.command = saa7127_command,
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index a61d24f588f7..6fc298e0a03a 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -600,7 +600,6 @@ static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "i2c saa6752hs MPEG encoder",
 	.id             = I2C_DRIVERID_SAA6752HS,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = saa6752hs_probe,
 	.detach_client  = saa6752hs_detach,
 	.command        = saa6752hs_command,
diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c
index 132aa7943c16..e24aa16f2d8c 100644
--- a/drivers/media/video/saa7185.c
+++ b/drivers/media/video/saa7185.c
@@ -491,7 +491,6 @@ static struct i2c_driver i2c_driver_saa7185 = {
 	.name = "saa7185",	/* name */
 
 	.id = I2C_DRIVERID_SAA7185B,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = saa7185_attach_adapter,
 	.detach_client = saa7185_detach_client,
diff --git a/drivers/media/video/saa7191.c b/drivers/media/video/saa7191.c
index cbca896e8cfa..6be98fc0fe24 100644
--- a/drivers/media/video/saa7191.c
+++ b/drivers/media/video/saa7191.c
@@ -791,7 +791,6 @@ static struct i2c_driver i2c_driver_saa7191 = {
 	.owner		= THIS_MODULE,
 	.name		= "saa7191",
 	.id		= I2C_DRIVERID_SAA7191,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter = saa7191_probe,
 	.detach_client	= saa7191_detach,
 	.command	= saa7191_command
diff --git a/drivers/media/video/tda7432.c b/drivers/media/video/tda7432.c
index d32737dd2142..239a58666a12 100644
--- a/drivers/media/video/tda7432.c
+++ b/drivers/media/video/tda7432.c
@@ -504,7 +504,6 @@ static struct i2c_driver driver = {
 	.owner           = THIS_MODULE,
 	.name            = "i2c tda7432 driver",
 	.id              = I2C_DRIVERID_TDA7432,
-	.flags           = I2C_DF_NOTIFY,
 	.attach_adapter  = tda7432_probe,
 	.detach_client   = tda7432_detach,
 	.command         = tda7432_command,
diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c
index 1794686612c6..f29fb507075d 100644
--- a/drivers/media/video/tda9840.c
+++ b/drivers/media/video/tda9840.c
@@ -224,7 +224,6 @@ static struct i2c_driver driver = {
 	.owner	= THIS_MODULE,
 	.name	= "tda9840",
 	.id	= I2C_DRIVERID_TDA9840,
-	.flags	= I2C_DF_NOTIFY,
 	.attach_adapter	= attach,
 	.detach_client	= detach,
 	.command	= command,
diff --git a/drivers/media/video/tda9875.c b/drivers/media/video/tda9875.c
index a5e37dc91f39..d053b6445502 100644
--- a/drivers/media/video/tda9875.c
+++ b/drivers/media/video/tda9875.c
@@ -375,7 +375,6 @@ static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "i2c tda9875 driver",
 	.id             = I2C_DRIVERID_TDA9875,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = tda9875_probe,
 	.detach_client  = tda9875_detach,
 	.command        = tda9875_command,
diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
index 2f2414e90e8b..049b44e0767b 100644
--- a/drivers/media/video/tda9887.c
+++ b/drivers/media/video/tda9887.c
@@ -822,7 +822,6 @@ static struct i2c_driver driver = {
 	.owner          = THIS_MODULE,
 	.name           = "i2c tda9887 driver",
 	.id             = -1, /* FIXME */
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = tda9887_probe,
 	.detach_client  = tda9887_detach,
 	.command        = tda9887_command,
diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c
index ee3688348b66..96d88ce60c98 100644
--- a/drivers/media/video/tea6415c.c
+++ b/drivers/media/video/tea6415c.c
@@ -193,7 +193,6 @@ static struct i2c_driver driver = {
 	.owner	= THIS_MODULE,
 	.name 	= "tea6415c",
 	.id 	= I2C_DRIVERID_TEA6415C,
-	.flags 	= I2C_DF_NOTIFY,
 	.attach_adapter	= attach,
 	.detach_client	= detach,
 	.command	= command,
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 17975c19da5e..fd417de95847 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -170,7 +170,6 @@ static struct i2c_driver driver = {
 	.owner	= THIS_MODULE,
 	.name	= "tea6420",
 	.id	= I2C_DRIVERID_TEA6420,
-	.flags	= I2C_DF_NOTIFY,
 	.attach_adapter	= attach,
 	.detach_client	= detach,
 	.command	= command,
diff --git a/drivers/media/video/tuner-3036.c b/drivers/media/video/tuner-3036.c
index 79203595b9c1..3505cec2e65a 100644
--- a/drivers/media/video/tuner-3036.c
+++ b/drivers/media/video/tuner-3036.c
@@ -178,7 +178,6 @@ i2c_driver_tuner =
 	.owner		=	THIS_MODULE,
 	.name		=	"sab3036",
 	.id		=	I2C_DRIVERID_SAB3036,
-        .flags		=	I2C_DF_NOTIFY,
 	.attach_adapter =	tuner_probe,
 	.detach_client  =	tuner_detach,
 	.command	=	tuner_command
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index e58abdfcaab8..3c75121f6383 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -745,7 +745,6 @@ static struct i2c_driver driver = {
 	.owner = THIS_MODULE,
 	.name = "tuner",
 	.id = I2C_DRIVERID_TUNER,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = tuner_probe,
 	.detach_client = tuner_detach,
 	.command = tuner_command,
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index 5b20e8177cad..3565f35be7a1 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -1705,7 +1705,6 @@ static struct i2c_driver driver = {
 	.owner           = THIS_MODULE,
 	.name            = "generic i2c audio driver",
 	.id              = I2C_DRIVERID_TVAUDIO,
-	.flags           = I2C_DF_NOTIFY,
 	.attach_adapter  = chip_probe,
 	.detach_client   = chip_detach,
 	.command         = chip_command,
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c
index 5ac235365dd8..195bc51d4576 100644
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -782,7 +782,6 @@ static struct i2c_driver i2c_driver_tveeprom = {
 	.owner          = THIS_MODULE,
 	.name           = "tveeprom",
 	.id             = I2C_DRIVERID_TVEEPROM,
-	.flags          = I2C_DF_NOTIFY,
 	.attach_adapter = tveeprom_attach_adapter,
 	.detach_client  = tveeprom_detach_client,
 	.command        = tveeprom_command,
diff --git a/drivers/media/video/tvmixer.c b/drivers/media/video/tvmixer.c
index 5897e5d4d3d2..936e01d2c785 100644
--- a/drivers/media/video/tvmixer.c
+++ b/drivers/media/video/tvmixer.c
@@ -232,7 +232,6 @@ static struct i2c_driver driver = {
 #endif
 	.name            = "tv card mixer driver",
 	.id              = I2C_DRIVERID_TVMIXER,
-	.flags           = I2C_DF_NOTIFY,
 	.detach_adapter  = tvmixer_adapters,
 	.attach_adapter  = tvmixer_adapters,
 	.detach_client   = tvmixer_clients,
diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index 97431e26d229..4f3ee2091611 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -806,7 +806,6 @@ static struct i2c_driver driver = {
 
 	/* FIXME */
 	.id = I2C_DRIVERID_SAA7110,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = tvp5150_attach_adapter,
 	.detach_client = tvp5150_detach_client,
diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c
index 137b58f2c666..c66d28505bcd 100644
--- a/drivers/media/video/vpx3220.c
+++ b/drivers/media/video/vpx3220.c
@@ -726,7 +726,6 @@ static struct i2c_driver vpx3220_i2c_driver = {
 	.name = "vpx3220",
 
 	.id = I2C_DRIVERID_VPX3220,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = vpx3220_attach_adapter,
 	.detach_client = vpx3220_detach_client,
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index a6936ad74fcf..7b07717a3c67 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -236,7 +236,6 @@ static struct i2c_driver i2c_driver = {
 	.name = "wm8775",
 
 	.id = I2C_DRIVERID_WM8775,
-	.flags = I2C_DF_NOTIFY,
 
 	.attach_adapter = wm8775_probe,
 	.detach_client = wm8775_detach,
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index ad60bbb16cdf..78994c5fe932 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -1296,7 +1296,6 @@ static struct i2c_driver maven_driver={
 	.owner		= THIS_MODULE,
 	.name		= "maven",
 	.id		= I2C_DRIVERID_MGATVO,
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= maven_attach_adapter,
 	.detach_client	= maven_detach_client,
 	.command	= maven_command,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 0316ba1294ca..99399fadf13f 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -251,7 +251,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 }
 
 /*flags for the driver struct: */
-#define I2C_DF_NOTIFY	0x01		/* notify on bus (de/a)ttaches 	*/
 
 /*flags for the client struct: */
 #define I2C_CLIENT_ALLOW_USE		0x01	/* Client allows access */
diff --git a/sound/oss/dmasound/dac3550a.c b/sound/oss/dmasound/dac3550a.c
index 533895eba0eb..3402a663d07f 100644
--- a/sound/oss/dmasound/dac3550a.c
+++ b/sound/oss/dmasound/dac3550a.c
@@ -44,7 +44,6 @@ struct i2c_driver daca_driver = {
 	.owner			= THIS_MODULE,
 	.name			= "DAC3550A driver  V " DACA_VERSION,
 	.id			= I2C_DRIVERID_DACA,
-	.flags			= I2C_DF_NOTIFY,
 	.attach_adapter		= daca_attach_adapter,
 	.detach_client		= daca_detach_client,
 };
diff --git a/sound/oss/dmasound/tas_common.c b/sound/oss/dmasound/tas_common.c
index d36a1fe2fcf3..7e3d517af4b9 100644
--- a/sound/oss/dmasound/tas_common.c
+++ b/sound/oss/dmasound/tas_common.c
@@ -49,7 +49,6 @@ static int tas_detach_client(struct i2c_client *);
 struct i2c_driver tas_driver = {
 	.owner		= THIS_MODULE,
 	.name		= "tas",
-	.flags		= I2C_DF_NOTIFY,
 	.attach_adapter	= tas_attach_adapter,
 	.detach_client	= tas_detach_client,
 };
diff --git a/sound/ppc/keywest.c b/sound/ppc/keywest.c
index 097fbcfc5d45..fd8e2e6062f6 100644
--- a/sound/ppc/keywest.c
+++ b/sound/ppc/keywest.c
@@ -43,7 +43,6 @@ static int keywest_detach_client(struct i2c_client *client);
 struct i2c_driver keywest_driver = {  
 	.name = "PMac Keywest Audio",
 	.id = I2C_DRIVERID_KEYWEST,
-	.flags = I2C_DF_NOTIFY,
 	.attach_adapter = &keywest_attach_adapter,
 	.detach_client = &keywest_detach_client,
 };
-- 
cgit v1.2.3


From 5d7b851dcced3611e4a4432308618b1ed1a9fc31 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 27 Nov 2005 08:57:10 +0100
Subject: [PATCH] i2c: Drop i2c_driver.flags, 3 of 3

The flags member of the i2c_driver structure is no more used. Drop it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 99399fadf13f..3c16a8fb95f4 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -112,7 +112,6 @@ struct i2c_driver {
 	char name[32];
 	int id;
 	unsigned int class;
-	unsigned int flags;		/* div., see below		*/
 
 	/* Notifies the driver that a new bus has appeared. This routine
 	 * can be used by the driver to test if the bus meets its conditions
@@ -250,8 +249,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 	dev_set_drvdata (&dev->dev, data);
 }
 
-/*flags for the driver struct: */
-
 /*flags for the client struct: */
 #define I2C_CLIENT_ALLOW_USE		0x01	/* Client allows access */
 #define I2C_CLIENT_ALLOW_MULTIPLE_USE 	0x02  	/* Allow multiple access-locks */
-- 
cgit v1.2.3


From cb748fb20186d4b345c68a7f580429f379fdd268 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 20:58:35 +0100
Subject: [PATCH] i2c: Rework client usage count, 1 of 3

No i2c client uses the I2C_CLIENT_ALLOW_MULTIPLE_USE flag, drop it.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/i2c/i2c-core.c | 4 +---
 include/linux/i2c.h    | 2 --
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index ad68ac00d910..2f0bc9529376 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -500,9 +500,7 @@ int i2c_use_client(struct i2c_client *client)
 		return ret;
 
 	if (client->flags & I2C_CLIENT_ALLOW_USE) {
-		if (client->flags & I2C_CLIENT_ALLOW_MULTIPLE_USE)
-			client->usage_count++;
-		else if (client->usage_count > 0) 
+		if (client->usage_count > 0)
 			goto busy;
 		else 
 			client->usage_count++;
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 3c16a8fb95f4..4487c5189747 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -251,8 +251,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 
 /*flags for the client struct: */
 #define I2C_CLIENT_ALLOW_USE		0x01	/* Client allows access */
-#define I2C_CLIENT_ALLOW_MULTIPLE_USE 	0x02  	/* Allow multiple access-locks */
-						/* on an i2c_client */
 #define I2C_CLIENT_PEC  0x04			/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN	0x10			/* we have a ten bit chip address	*/
 						/* Must equal I2C_M_TEN below */
-- 
cgit v1.2.3


From cde7859bda0d1124392b44e50aa11df99707e1d9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 21:00:54 +0100
Subject: [PATCH] i2c: Rework client usage count, 2 of 3

Make I2C_CLIENT_ALLOW_USE the default for all i2c clients. It doesn't
hurt if the usage count is actually never used for any given driver,
and allows for nice code simplifications in i2c-core.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/arm/mach-pxa/akita-ioexp.c            |  1 -
 drivers/i2c/chips/rtc8564.c                |  1 -
 drivers/i2c/i2c-core.c                     | 28 ++++++++++------------------
 drivers/media/video/adv7170.c              |  1 -
 drivers/media/video/adv7175.c              |  1 -
 drivers/media/video/bt819.c                |  1 -
 drivers/media/video/bt832.c                |  1 -
 drivers/media/video/bt856.c                |  1 -
 drivers/media/video/cs53l32a.c             |  1 -
 drivers/media/video/cx25840/cx25840-core.c |  1 -
 drivers/media/video/em28xx/em28xx-i2c.c    |  1 -
 drivers/media/video/msp3400.c              |  1 -
 drivers/media/video/saa6588.c              |  1 -
 drivers/media/video/saa7110.c              |  1 -
 drivers/media/video/saa7111.c              |  1 -
 drivers/media/video/saa7114.c              |  1 -
 drivers/media/video/saa7115.c              |  1 -
 drivers/media/video/saa711x.c              |  1 -
 drivers/media/video/saa7127.c              |  1 -
 drivers/media/video/saa7134/saa6752hs.c    |  1 -
 drivers/media/video/saa7185.c              |  1 -
 drivers/media/video/tda9887.c              |  1 -
 drivers/media/video/tuner-core.c           |  1 -
 drivers/media/video/tvaudio.c              |  1 -
 drivers/media/video/tveeprom.c             |  1 -
 drivers/media/video/tvp5150.c              |  1 -
 drivers/media/video/vpx3220.c              |  1 -
 drivers/media/video/wm8775.c               |  1 -
 include/linux/i2c.h                        |  1 -
 29 files changed, 10 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/akita-ioexp.c b/arch/arm/mach-pxa/akita-ioexp.c
index 440ebb3c3db1..b6bff550c8e9 100644
--- a/arch/arm/mach-pxa/akita-ioexp.c
+++ b/arch/arm/mach-pxa/akita-ioexp.c
@@ -133,7 +133,6 @@ static struct i2c_driver max7310_i2c_driver = {
 
 static struct i2c_client max7310_template = {
 	name:   "akita-max7310",
-	flags:  I2C_CLIENT_ALLOW_USE,
 	driver: &max7310_i2c_driver,
 };
 
diff --git a/drivers/i2c/chips/rtc8564.c b/drivers/i2c/chips/rtc8564.c
index e586f75dd024..07494d394381 100644
--- a/drivers/i2c/chips/rtc8564.c
+++ b/drivers/i2c/chips/rtc8564.c
@@ -155,7 +155,6 @@ static int rtc8564_attach(struct i2c_adapter *adap, int addr, int kind)
 
 	strlcpy(new_client->name, "RTC8564", I2C_NAME_SIZE);
 	i2c_set_clientdata(new_client, d);
-	new_client->flags = I2C_CLIENT_ALLOW_USE;
 	new_client->addr = addr;
 	new_client->adapter = adap;
 	new_client->driver = &rtc8564_driver;
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 2f0bc9529376..d16b4998c4c2 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -419,8 +419,7 @@ int i2c_attach_client(struct i2c_client *client)
 		}
 	}
 
-	if (client->flags & I2C_CLIENT_ALLOW_USE)
-		client->usage_count = 0;
+	client->usage_count = 0;
 
 	client->dev.parent = &client->adapter->dev;
 	client->dev.driver = &client->driver->driver;
@@ -443,8 +442,7 @@ int i2c_detach_client(struct i2c_client *client)
 	struct i2c_adapter *adapter = client->adapter;
 	int res = 0;
 	
-	if ((client->flags & I2C_CLIENT_ALLOW_USE)
-	 && (client->usage_count > 0)) {
+	if (client->usage_count > 0) {
 		dev_warn(&client->dev, "Client [%s] still busy, "
 			 "can't detach\n", client->name);
 		return -EBUSY;
@@ -499,12 +497,9 @@ int i2c_use_client(struct i2c_client *client)
 	if (ret)
 		return ret;
 
-	if (client->flags & I2C_CLIENT_ALLOW_USE) {
-		if (client->usage_count > 0)
-			goto busy;
-		else 
-			client->usage_count++;
-	}
+	if (client->usage_count > 0)
+		goto busy;
+	client->usage_count++;
 
 	return 0;
  busy:
@@ -514,16 +509,13 @@ int i2c_use_client(struct i2c_client *client)
 
 int i2c_release_client(struct i2c_client *client)
 {
-	if(client->flags & I2C_CLIENT_ALLOW_USE) {
-		if(client->usage_count>0)
-			client->usage_count--;
-		else {
-			pr_debug("i2c-core: %s used one too many times\n",
-				__FUNCTION__);
-			return -EPERM;
-		}
+	if (!client->usage_count) {
+		pr_debug("i2c-core: %s used one too many times\n",
+			 __FUNCTION__);
+		return -EPERM;
 	}
 	
+	client->usage_count--;
 	i2c_dec_use_client(client);
 	
 	return 0;
diff --git a/drivers/media/video/adv7170.c b/drivers/media/video/adv7170.c
index c4f2265167a2..622b1619a7e3 100644
--- a/drivers/media/video/adv7170.c
+++ b/drivers/media/video/adv7170.c
@@ -420,7 +420,6 @@ adv7170_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_adv7170;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	if ((client->addr == I2C_ADV7170 >> 1) ||
 	    (client->addr == (I2C_ADV7170 >> 1) + 1)) {
 		dname = adv7170_name;
diff --git a/drivers/media/video/adv7175.c b/drivers/media/video/adv7175.c
index 4fc08b17d4d0..d4859b445715 100644
--- a/drivers/media/video/adv7175.c
+++ b/drivers/media/video/adv7175.c
@@ -470,7 +470,6 @@ adv7175_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_adv7175;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	if ((client->addr == I2C_ADV7175 >> 1) ||
 	    (client->addr == (I2C_ADV7175 >> 1) + 1)) {
 		dname = adv7175_name;
diff --git a/drivers/media/video/bt819.c b/drivers/media/video/bt819.c
index 7bba69793b78..741e59af0991 100644
--- a/drivers/media/video/bt819.c
+++ b/drivers/media/video/bt819.c
@@ -535,7 +535,6 @@ bt819_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_bt819;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 
 	decoder = kmalloc(sizeof(struct bt819), GFP_KERNEL);
 	if (decoder == NULL) {
diff --git a/drivers/media/video/bt832.c b/drivers/media/video/bt832.c
index 0ba8652357e2..4ed13860b523 100644
--- a/drivers/media/video/bt832.c
+++ b/drivers/media/video/bt832.c
@@ -240,7 +240,6 @@ static struct i2c_driver driver = {
 static struct i2c_client client_template =
 {
 	.name       = "bt832",
-	.flags      = I2C_CLIENT_ALLOW_USE,
 	.driver     = &driver,
 };
 
diff --git a/drivers/media/video/bt856.c b/drivers/media/video/bt856.c
index 4c9acd1e2c70..d4bba8efac69 100644
--- a/drivers/media/video/bt856.c
+++ b/drivers/media/video/bt856.c
@@ -323,7 +323,6 @@ bt856_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_bt856;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "bt856", sizeof(I2C_NAME(client)));
 
 	encoder = kmalloc(sizeof(struct bt856), GFP_KERNEL);
diff --git a/drivers/media/video/cs53l32a.c b/drivers/media/video/cs53l32a.c
index fce5d89b7b15..f442ce3ba74b 100644
--- a/drivers/media/video/cs53l32a.c
+++ b/drivers/media/video/cs53l32a.c
@@ -154,7 +154,6 @@ static int cs53l32a_attach(struct i2c_adapter *adapter, int address, int kind)
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name) - 1, "cs53l32a");
 
 	cs53l32a_info("chip found @ 0x%x (%s)\n", address << 1, adapter->name);
diff --git a/drivers/media/video/cx25840/cx25840-core.c b/drivers/media/video/cx25840/cx25840-core.c
index c66bc147ee71..0b278abe879d 100644
--- a/drivers/media/video/cx25840/cx25840-core.c
+++ b/drivers/media/video/cx25840/cx25840-core.c
@@ -773,7 +773,6 @@ static int cx25840_detect_client(struct i2c_adapter *adapter, int address,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_cx25840;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name) - 1, "cx25840");
 
 	cx25840_dbg("detecting cx25840 client on address 0x%x\n", address << 1);
diff --git a/drivers/media/video/em28xx/em28xx-i2c.c b/drivers/media/video/em28xx/em28xx-i2c.c
index 7f5603054f02..d14bcf4ceaea 100644
--- a/drivers/media/video/em28xx/em28xx-i2c.c
+++ b/drivers/media/video/em28xx/em28xx-i2c.c
@@ -497,7 +497,6 @@ static struct i2c_adapter em28xx_adap_template = {
 
 static struct i2c_client em28xx_client_template = {
 	.name = "em28xx internal",
-	.flags = I2C_CLIENT_ALLOW_USE,
 };
 
 /* ----------------------------------------------------------- */
diff --git a/drivers/media/video/msp3400.c b/drivers/media/video/msp3400.c
index 46328fb6fe80..c5e8ad3aac4a 100644
--- a/drivers/media/video/msp3400.c
+++ b/drivers/media/video/msp3400.c
@@ -1576,7 +1576,6 @@ static struct i2c_driver driver = {
 static struct i2c_client client_template =
 {
 	.name      = "(unset)",
-	.flags     = I2C_CLIENT_ALLOW_USE,
 	.driver    = &driver,
 };
 
diff --git a/drivers/media/video/saa6588.c b/drivers/media/video/saa6588.c
index 18a0b7143e8b..3d4076ca67c3 100644
--- a/drivers/media/video/saa6588.c
+++ b/drivers/media/video/saa6588.c
@@ -505,7 +505,6 @@ static struct i2c_driver driver = {
 
 static struct i2c_client client_template = {
 	.name = "saa6588",
-	.flags = I2C_CLIENT_ALLOW_USE,
 	.driver = &driver,
 };
 
diff --git a/drivers/media/video/saa7110.c b/drivers/media/video/saa7110.c
index f266b35ceb35..8affa63c8b24 100644
--- a/drivers/media/video/saa7110.c
+++ b/drivers/media/video/saa7110.c
@@ -501,7 +501,6 @@ saa7110_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7110;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "saa7110", sizeof(I2C_NAME(client)));
 
 	decoder = kmalloc(sizeof(struct saa7110), GFP_KERNEL);
diff --git a/drivers/media/video/saa7111.c b/drivers/media/video/saa7111.c
index 687beaf11adc..2b2204564514 100644
--- a/drivers/media/video/saa7111.c
+++ b/drivers/media/video/saa7111.c
@@ -518,7 +518,6 @@ saa7111_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7111;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "saa7111", sizeof(I2C_NAME(client)));
 
 	decoder = kmalloc(sizeof(struct saa7111), GFP_KERNEL);
diff --git a/drivers/media/video/saa7114.c b/drivers/media/video/saa7114.c
index 4748cf0598c0..285f6c7a8f71 100644
--- a/drivers/media/video/saa7114.c
+++ b/drivers/media/video/saa7114.c
@@ -859,7 +859,6 @@ saa7114_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7114;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "saa7114", sizeof(I2C_NAME(client)));
 
 	decoder = kmalloc(sizeof(struct saa7114), GFP_KERNEL);
diff --git a/drivers/media/video/saa7115.c b/drivers/media/video/saa7115.c
index b1079de938b7..79aadd2d408f 100644
--- a/drivers/media/video/saa7115.c
+++ b/drivers/media/video/saa7115.c
@@ -1270,7 +1270,6 @@ static int saa7115_attach(struct i2c_adapter *adapter, int address, int kind)
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7115;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name) - 1, "saa7115");
 
 	saa7115_dbg("detecting saa7115 client on address 0x%x\n", address << 1);
diff --git a/drivers/media/video/saa711x.c b/drivers/media/video/saa711x.c
index 734a70919080..44bfc047704c 100644
--- a/drivers/media/video/saa711x.c
+++ b/drivers/media/video/saa711x.c
@@ -494,7 +494,6 @@ saa711x_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa711x;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "saa711x", sizeof(I2C_NAME(client)));
 	decoder = kmalloc(sizeof(struct saa711x), GFP_KERNEL);
 	if (decoder == NULL) {
diff --git a/drivers/media/video/saa7127.c b/drivers/media/video/saa7127.c
index a2fab9837507..1f4b41599445 100644
--- a/drivers/media/video/saa7127.c
+++ b/drivers/media/video/saa7127.c
@@ -719,7 +719,6 @@ static int saa7127_attach(struct i2c_adapter *adapter, int address, int kind)
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7127;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name) - 1, "saa7127");
 
 	saa7127_dbg("detecting saa7127 client on address 0x%x\n", address << 1);
diff --git a/drivers/media/video/saa7134/saa6752hs.c b/drivers/media/video/saa7134/saa6752hs.c
index 6fc298e0a03a..68206060c92d 100644
--- a/drivers/media/video/saa7134/saa6752hs.c
+++ b/drivers/media/video/saa7134/saa6752hs.c
@@ -608,7 +608,6 @@ static struct i2c_driver driver = {
 static struct i2c_client client_template =
 {
 	.name       = "saa6752hs",
-	.flags      = I2C_CLIENT_ALLOW_USE,
 	.driver     = &driver,
 };
 
diff --git a/drivers/media/video/saa7185.c b/drivers/media/video/saa7185.c
index e24aa16f2d8c..9f37585d3f73 100644
--- a/drivers/media/video/saa7185.c
+++ b/drivers/media/video/saa7185.c
@@ -415,7 +415,6 @@ saa7185_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_saa7185;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	strlcpy(I2C_NAME(client), "saa7185", sizeof(I2C_NAME(client)));
 
 	encoder = kmalloc(sizeof(struct saa7185), GFP_KERNEL);
diff --git a/drivers/media/video/tda9887.c b/drivers/media/video/tda9887.c
index 049b44e0767b..324f61bf714e 100644
--- a/drivers/media/video/tda9887.c
+++ b/drivers/media/video/tda9887.c
@@ -833,7 +833,6 @@ static struct i2c_driver driver = {
 static struct i2c_client client_template =
 {
 	.name      = "tda9887",
-	.flags     = I2C_CLIENT_ALLOW_USE,
 	.driver    = &driver,
 };
 
diff --git a/drivers/media/video/tuner-core.c b/drivers/media/video/tuner-core.c
index 3c75121f6383..6328f0954e70 100644
--- a/drivers/media/video/tuner-core.c
+++ b/drivers/media/video/tuner-core.c
@@ -755,7 +755,6 @@ static struct i2c_driver driver = {
 };
 static struct i2c_client client_template = {
 	.name = "(tuner unset)",
-	.flags = I2C_CLIENT_ALLOW_USE,
 	.driver = &driver,
 };
 
diff --git a/drivers/media/video/tvaudio.c b/drivers/media/video/tvaudio.c
index 3565f35be7a1..4f1f339283e0 100644
--- a/drivers/media/video/tvaudio.c
+++ b/drivers/media/video/tvaudio.c
@@ -1713,7 +1713,6 @@ static struct i2c_driver driver = {
 static struct i2c_client client_template =
 {
 	.name       = "(unset)",
-	.flags      = I2C_CLIENT_ALLOW_USE,
 	.driver     = &driver,
 };
 
diff --git a/drivers/media/video/tveeprom.c b/drivers/media/video/tveeprom.c
index 195bc51d4576..d833b651073a 100644
--- a/drivers/media/video/tveeprom.c
+++ b/drivers/media/video/tveeprom.c
@@ -751,7 +751,6 @@ tveeprom_detect_client(struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver_tveeprom;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name), "tveeprom");
 	i2c_attach_client(client);
 
diff --git a/drivers/media/video/tvp5150.c b/drivers/media/video/tvp5150.c
index 4f3ee2091611..3734554fc73b 100644
--- a/drivers/media/video/tvp5150.c
+++ b/drivers/media/video/tvp5150.c
@@ -714,7 +714,6 @@ static struct i2c_driver driver;
 
 static struct i2c_client client_template = {
 	.name = "(unset)",
-	.flags = I2C_CLIENT_ALLOW_USE,
 	.driver = &driver,
 };
 
diff --git a/drivers/media/video/vpx3220.c b/drivers/media/video/vpx3220.c
index c66d28505bcd..54bc888c3891 100644
--- a/drivers/media/video/vpx3220.c
+++ b/drivers/media/video/vpx3220.c
@@ -631,7 +631,6 @@ vpx3220_detect_client (struct i2c_adapter *adapter,
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &vpx3220_i2c_driver;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 
 	/* Check for manufacture ID and part number */
 	if (kind < 0) {
diff --git a/drivers/media/video/wm8775.c b/drivers/media/video/wm8775.c
index 7b07717a3c67..527c2591749a 100644
--- a/drivers/media/video/wm8775.c
+++ b/drivers/media/video/wm8775.c
@@ -168,7 +168,6 @@ static int wm8775_attach(struct i2c_adapter *adapter, int address, int kind)
 	client->addr = address;
 	client->adapter = adapter;
 	client->driver = &i2c_driver;
-	client->flags = I2C_CLIENT_ALLOW_USE;
 	snprintf(client->name, sizeof(client->name) - 1, "wm8775");
 
 	wm8775_info("chip found @ 0x%x (%s)\n", address << 1, adapter->name);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 4487c5189747..8b4d4695de0e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -250,7 +250,6 @@ static inline void i2c_set_adapdata (struct i2c_adapter *dev, void *data)
 }
 
 /*flags for the client struct: */
-#define I2C_CLIENT_ALLOW_USE		0x01	/* Client allows access */
 #define I2C_CLIENT_PEC  0x04			/* Use Packet Error Checking */
 #define I2C_CLIENT_TEN	0x10			/* we have a ten bit chip address	*/
 						/* Must equal I2C_M_TEN below */
-- 
cgit v1.2.3


From cf02df770228350254251fde520007a2709db785 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 21:03:41 +0100
Subject: [PATCH] i2c: Rework client usage count, 3 of 3

Do not limit the usage count of i2c clients to 1. In other words,
change the client usage count behavior from the old I2C_CLIENT_ALLOW_USE
to the old I2C_CLIENT_ALLOW_MULTIPLE_USE. The rationale is that no
driver actually needs the limiting behavior, and the unlimiting
behavior is slightly easier to implement.

Update the documentation to reflect this change.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/i2c/porting-clients | 1 +
 drivers/i2c/i2c-core.c            | 5 -----
 include/linux/i2c.h               | 4 +---
 3 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/porting-clients b/Documentation/i2c/porting-clients
index 64c610bf2fbc..6b07f23039d2 100644
--- a/Documentation/i2c/porting-clients
+++ b/Documentation/i2c/porting-clients
@@ -92,6 +92,7 @@ Technical changes:
   Drop client->id.
   Drop any 24RF08 corruption prevention you find, as this is now done
   at the i2c-core level, and doing it twice voids it.
+  Don't add I2C_CLIENT_ALLOW_USE to client->flags, it's the default now.
 
 * [Init] Limits must not be set by the driver (can be done later in
   user-space). Chip should not be reset default (although a module
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index d16b4998c4c2..a1c5dff85431 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -497,14 +497,9 @@ int i2c_use_client(struct i2c_client *client)
 	if (ret)
 		return ret;
 
-	if (client->usage_count > 0)
-		goto busy;
 	client->usage_count++;
 
 	return 0;
- busy:
-	i2c_dec_use_client(client);
-	return -EBUSY;
 }
 
 int i2c_release_client(struct i2c_client *client)
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 8b4d4695de0e..85c517a9b05b 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -307,9 +307,7 @@ extern struct i2c_client *i2c_get_client(int driver_id, int adapter_id,
    extern struct i2c_client *i2c_get_client(int,int,struct i2c_client *);
    to make sure that client-struct is valid and that it is okay to access
    the i2c-client. 
-   returns -EACCES if client doesn't allow use (default)
-   returns -EBUSY if client doesn't allow multiple use (default) and 
-   usage_count >0 */
+   returns -ENODEV if client has gone in the meantime */
 extern int i2c_use_client(struct i2c_client *);
 extern int i2c_release_client(struct i2c_client *);
 
-- 
cgit v1.2.3


From 482c788ded0aa9710722eaf9cf60886d3b923218 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sat, 26 Nov 2005 21:06:08 +0100
Subject: [PATCH] i2c: i2c_get_client is gone

The i2c_get_client function doesn't exist anymore, so we shouldn't
have a definition for it in i2c.h.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c.h | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 85c517a9b05b..a9cea62fd486 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -296,17 +296,8 @@ extern int i2c_del_driver(struct i2c_driver *);
 extern int i2c_attach_client(struct i2c_client *);
 extern int i2c_detach_client(struct i2c_client *);
 
-/* New function: This is to get an i2c_client-struct for controlling the 
-   client either by using i2c_control-function or having the 
-   client-module export functions that can be used with the i2c_client
-   -struct. */
-extern struct i2c_client *i2c_get_client(int driver_id, int adapter_id, 
-					struct i2c_client *prev);
-
-/* Should be used with new function
-   extern struct i2c_client *i2c_get_client(int,int,struct i2c_client *);
-   to make sure that client-struct is valid and that it is okay to access
-   the i2c-client. 
+/* Should be used to make sure that client-struct is valid and that it
+   is okay to access the i2c-client.
    returns -ENODEV if client has gone in the meantime */
 extern int i2c_use_client(struct i2c_client *);
 extern int i2c_release_client(struct i2c_client *);
-- 
cgit v1.2.3


From 35d8b2e6b8e86b0d5126f36613b5202d4eb978b6 Mon Sep 17 00:00:00 2001
From: Laurent Riffard <laurent.riffard@free.fr>
Date: Sat, 26 Nov 2005 20:34:05 +0100
Subject: [PATCH] i2c: Drop i2c_driver.{owner,name}, 1 of 11

We should use the i2c_driver.driver's .name and .owner fields
instead of the i2c_driver's ones.

This patch updates the core of the i2c drivers: it removes .name and
.owner fields from the struct i2c_device and modify various
functions to use struct device fields instead.

Signed-off-by: Laurent Riffard <laurent.riffard@free.fr>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/i2c/busses/i2c-isa.c |  4 +---
 drivers/i2c/i2c-core.c       | 22 +++++++++++-----------
 drivers/i2c/i2c-dev.c        |  6 ++++--
 include/linux/i2c.h          |  5 +++--
 4 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-isa.c b/drivers/i2c/busses/i2c-isa.c
index 03672c9ca409..9f93fb85d06e 100644
--- a/drivers/i2c/busses/i2c-isa.c
+++ b/drivers/i2c/busses/i2c-isa.c
@@ -92,8 +92,6 @@ int i2c_isa_add_driver(struct i2c_driver *driver)
 	int res;
 
 	/* Add the driver to the list of i2c drivers in the driver core */
-	driver->driver.name = driver->name;
-	driver->driver.owner = driver->owner;
 	driver->driver.bus = &i2c_bus_type;
 	driver->driver.probe = i2c_isa_device_probe;
 	driver->driver.remove = i2c_isa_device_remove;
@@ -124,7 +122,7 @@ int i2c_isa_del_driver(struct i2c_driver *driver)
 		if ((res = driver->detach_client(client))) {
 			dev_err(&isa_adapter.dev, "Failed, driver "
 				"%s not unregistered!\n",
-				driver->name);
+				driver->driver.name);
 			return res;
 		}
 	}
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index a1c5dff85431..4ce5f0f32fba 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -235,7 +235,8 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 		if (driver->detach_adapter)
 			if ((res = driver->detach_adapter(adap))) {
 				dev_err(&adap->dev, "detach_adapter failed "
-					"for driver [%s]\n", driver->name);
+					"for driver [%s]\n",
+					driver->driver.name);
 				goto out_unlock;
 			}
 	}
@@ -295,8 +296,6 @@ int i2c_add_driver(struct i2c_driver *driver)
 	down(&core_lists);
 
 	/* add the driver to the list of i2c drivers in the driver core */
-	driver->driver.owner = driver->owner;
-	driver->driver.name = driver->name;
 	driver->driver.bus = &i2c_bus_type;
 	driver->driver.probe = i2c_device_probe;
 	driver->driver.remove = i2c_device_remove;
@@ -306,7 +305,7 @@ int i2c_add_driver(struct i2c_driver *driver)
 		goto out_unlock;
 	
 	list_add_tail(&driver->list,&drivers);
-	pr_debug("i2c-core: driver [%s] registered\n", driver->name);
+	pr_debug("i2c-core: driver [%s] registered\n", driver->driver.name);
 
 	/* now look for instances of driver on our adapters */
 	if (driver->attach_adapter) {
@@ -344,7 +343,8 @@ int i2c_del_driver(struct i2c_driver *driver)
 		if (driver->detach_adapter) {
 			if ((res = driver->detach_adapter(adap))) {
 				dev_err(&adap->dev, "detach_adapter failed "
-					"for driver [%s]\n", driver->name);
+					"for driver [%s]\n",
+					driver->driver.name);
 				goto out_unlock;
 			}
 		} else {
@@ -368,7 +368,7 @@ int i2c_del_driver(struct i2c_driver *driver)
 
 	driver_unregister(&driver->driver);
 	list_del(&driver->list);
-	pr_debug("i2c-core: driver [%s] unregistered\n", driver->name);
+	pr_debug("i2c-core: driver [%s] unregistered\n", driver->driver.name);
 
  out_unlock:
 	up(&core_lists);
@@ -473,10 +473,10 @@ int i2c_detach_client(struct i2c_client *client)
 static int i2c_inc_use_client(struct i2c_client *client)
 {
 
-	if (!try_module_get(client->driver->owner))
+	if (!try_module_get(client->driver->driver.owner))
 		return -ENODEV;
 	if (!try_module_get(client->adapter->owner)) {
-		module_put(client->driver->owner);
+		module_put(client->driver->driver.owner);
 		return -ENODEV;
 	}
 
@@ -485,7 +485,7 @@ static int i2c_inc_use_client(struct i2c_client *client)
 
 static void i2c_dec_use_client(struct i2c_client *client)
 {
-	module_put(client->driver->owner);
+	module_put(client->driver->driver.owner);
 	module_put(client->adapter->owner);
 }
 
@@ -524,14 +524,14 @@ void i2c_clients_command(struct i2c_adapter *adap, unsigned int cmd, void *arg)
 	down(&adap->clist_lock);
 	list_for_each(item,&adap->clients) {
 		client = list_entry(item, struct i2c_client, list);
-		if (!try_module_get(client->driver->owner))
+		if (!try_module_get(client->driver->driver.owner))
 			continue;
 		if (NULL != client->driver->command) {
 			up(&adap->clist_lock);
 			client->driver->command(client,cmd,arg);
 			down(&adap->clist_lock);
 		}
-		module_put(client->driver->owner);
+		module_put(client->driver->driver.owner);
        }
        up(&adap->clist_lock);
 }
diff --git a/drivers/i2c/i2c-dev.c b/drivers/i2c/i2c-dev.c
index 9da51eb37c06..9715217a0343 100644
--- a/drivers/i2c/i2c-dev.c
+++ b/drivers/i2c/i2c-dev.c
@@ -481,8 +481,10 @@ static int i2cdev_command(struct i2c_client *client, unsigned int cmd,
 }
 
 static struct i2c_driver i2cdev_driver = {
-	.owner		= THIS_MODULE,
-	.name		= "dev_driver",
+	.driver = {
+		.owner	= THIS_MODULE,
+		.name	= "dev_driver",
+	},
 	.id		= I2C_DRIVERID_I2CDEV,
 	.attach_adapter	= i2cdev_attach_adapter,
 	.detach_adapter	= i2cdev_detach_adapter,
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index a9cea62fd486..75aa18e865da 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -105,11 +105,12 @@ extern s32 i2c_smbus_read_i2c_block_data(struct i2c_client * client,
  * A driver is capable of handling one or more physical devices present on
  * I2C adapters. This information is used to inform the driver of adapter
  * events.
+ *
+ * The driver.owner field should be set to the module owner of this driver.
+ * The driver.name field should be set to the name of this driver.
  */
 
 struct i2c_driver {
-	struct module *owner;
-	char name[32];
 	int id;
 	unsigned int class;
 
-- 
cgit v1.2.3


From de59cf9ed44f64991e52a9de6094729537f0420c Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Tue, 6 Dec 2005 15:33:15 -0800
Subject: [PATCH] I2C: Make i2c_add_driver automatically set the proper module
 owner

This prevents i2c drivers from messing up and forgetting to set the
module owner of their driver.  It also reduces the size of their drivers
by one line :)

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/i2c-core.c | 5 +++--
 include/linux/i2c.h    | 7 ++++++-
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 4ce5f0f32fba..c23443ee1b33 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -287,7 +287,7 @@ int i2c_del_adapter(struct i2c_adapter *adap)
  * chips.
  */
 
-int i2c_add_driver(struct i2c_driver *driver)
+int i2c_register_driver(struct module *owner, struct i2c_driver *driver)
 {
 	struct list_head   *item;
 	struct i2c_adapter *adapter;
@@ -296,6 +296,7 @@ int i2c_add_driver(struct i2c_driver *driver)
 	down(&core_lists);
 
 	/* add the driver to the list of i2c drivers in the driver core */
+	driver->driver.owner = owner;
 	driver->driver.bus = &i2c_bus_type;
 	driver->driver.probe = i2c_device_probe;
 	driver->driver.remove = i2c_device_remove;
@@ -319,6 +320,7 @@ int i2c_add_driver(struct i2c_driver *driver)
 	up(&core_lists);
 	return res;
 }
+EXPORT_SYMBOL(i2c_register_driver);
 
 int i2c_del_driver(struct i2c_driver *driver)
 {
@@ -1132,7 +1134,6 @@ EXPORT_SYMBOL_GPL(i2c_bus_type);
 
 EXPORT_SYMBOL(i2c_add_adapter);
 EXPORT_SYMBOL(i2c_del_adapter);
-EXPORT_SYMBOL(i2c_add_driver);
 EXPORT_SYMBOL(i2c_del_driver);
 EXPORT_SYMBOL(i2c_attach_client);
 EXPORT_SYMBOL(i2c_detach_client);
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 75aa18e865da..7863a59bd598 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -291,9 +291,14 @@ struct i2c_client_address_data {
 extern int i2c_add_adapter(struct i2c_adapter *);
 extern int i2c_del_adapter(struct i2c_adapter *);
 
-extern int i2c_add_driver(struct i2c_driver *);
+extern int i2c_register_driver(struct module *, struct i2c_driver *);
 extern int i2c_del_driver(struct i2c_driver *);
 
+static inline int i2c_add_driver(struct i2c_driver *driver)
+{
+	return i2c_register_driver(THIS_MODULE, driver);
+}
+
 extern int i2c_attach_client(struct i2c_client *);
 extern int i2c_detach_client(struct i2c_client *);
 
-- 
cgit v1.2.3


From 734a12a36676ad3b9418fa5a829c518afec02b8a Mon Sep 17 00:00:00 2001
From: Rudolf Marek <r.marek@sh.cvut.cz>
Date: Sun, 18 Dec 2005 16:36:52 +0100
Subject: [PATCH] hwmon: add VRM/VID support for some VIA CPUs

This patch adds the VIA CENTAUR CPUs to detection table.
Table was updated to treat future Intel x86 CPUs as VRD10.
Stepping field was added, because some VIA CPUs have
different VRM specs across stepping. I changed the vrm type
to u8 because all drivers use u8 anyway.

Signed-off-by: Rudolf Marek <r.marek@sh.cvut.cz>
Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/hwmon/hwmon-vid.c | 69 ++++++++++++++++++++++++++++-------------------
 include/linux/hwmon-vid.h |  6 ++---
 2 files changed, 45 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/hwmon-vid.c b/drivers/hwmon/hwmon-vid.c
index 312769ad4dab..e497274916ce 100644
--- a/drivers/hwmon/hwmon-vid.c
+++ b/drivers/hwmon/hwmon-vid.c
@@ -49,20 +49,22 @@
         . . . .
        11110  =  0.800 V
        11111  =  0.000 V (off)
+
+    The 17 specification is in fact Intel Mobile Voltage Positioning -
+    (IMVP-II). You can find more information in the datasheet of Max1718
+    http://www.maxim-ic.com/quick_view2.cfm/qv_pk/2452
+
 */
 
 /* vrm is the VRM/VRD document version multiplied by 10.
    val is the 4-, 5- or 6-bit VID code.
    Returned value is in mV to avoid floating point in the kernel. */
-int vid_from_reg(int val, int vrm)
+int vid_from_reg(int val, u8 vrm)
 {
 	int vid;
 
 	switch(vrm) {
 
-	case  0:
-		return 0;
-
 	case 100:               /* VRD 10.0 */
 		if((val & 0x1f) == 0x1f)
 			return 0;
@@ -91,10 +93,16 @@ int vid_from_reg(int val, int vrm)
 	case 84:		/* VRM 8.4 */
 		val &= 0x0f;
 				/* fall through */
-	default:		/* VRM 8.2 */
+	case 82:		/* VRM 8.2 */
 		return(val == 0x1f ? 0 :
 		       val & 0x10  ? 5100 - (val) * 100 :
 		                     2050 - (val) * 50);
+	case 17:		/* Intel IMVP-II */
+		return(val & 0x10 ? 975 - (val & 0xF) * 25 :
+				    1750 - val * 50);
+	default:		/* report 0 for unknown */
+		printk(KERN_INFO "hwmon-vid: requested unknown VRM version\n");
+		return 0;
 	}
 }
 
@@ -108,30 +116,36 @@ struct vrm_model {
 	u8 vendor;
 	u8 eff_family;
 	u8 eff_model;
-	int vrm_type;
+	u8 eff_stepping;
+	u8 vrm_type;
 };
 
 #define ANY 0xFF
 
 #ifdef CONFIG_X86
 
+/* the stepping parameter is highest acceptable stepping for current line */
+
 static struct vrm_model vrm_models[] = {
-	{X86_VENDOR_AMD, 0x6, ANY, 90},		/* Athlon Duron etc */
-	{X86_VENDOR_AMD, 0xF, ANY, 24},		/* Athlon 64, Opteron */
-	{X86_VENDOR_INTEL, 0x6, 0x9, 85},	/* 0.13um too */
-	{X86_VENDOR_INTEL, 0x6, 0xB, 85},	/* Tualatin */
-	{X86_VENDOR_INTEL, 0x6, ANY, 82},	/* any P6 */
-	{X86_VENDOR_INTEL, 0x7, ANY, 0},	/* Itanium */
-	{X86_VENDOR_INTEL, 0xF, 0x0, 90},	/* P4 */
-	{X86_VENDOR_INTEL, 0xF, 0x1, 90},	/* P4 Willamette */
-	{X86_VENDOR_INTEL, 0xF, 0x2, 90},	/* P4 Northwood */
-	{X86_VENDOR_INTEL, 0xF, 0x3, 100},	/* P4 Prescott */
-	{X86_VENDOR_INTEL, 0xF, 0x4, 100},	/* P4 Prescott */
-	{X86_VENDOR_INTEL, 0x10,ANY, 0},	/* Itanium 2 */
-	{X86_VENDOR_UNKNOWN, ANY, ANY, 0}	/* stop here */
+	{X86_VENDOR_AMD, 0x6, ANY, ANY, 90},		/* Athlon Duron etc */
+	{X86_VENDOR_AMD, 0xF, ANY, ANY, 24},		/* Athlon 64, Opteron and above VRM 24 */
+	{X86_VENDOR_INTEL, 0x6, 0x9, ANY, 85},		/* 0.13um too */
+	{X86_VENDOR_INTEL, 0x6, 0xB, ANY, 85},		/* Tualatin */
+	{X86_VENDOR_INTEL, 0x6, ANY, ANY, 82},		/* any P6 */
+	{X86_VENDOR_INTEL, 0x7, ANY, ANY, 0},		/* Itanium */
+	{X86_VENDOR_INTEL, 0xF, 0x0, ANY, 90},		/* P4 */
+	{X86_VENDOR_INTEL, 0xF, 0x1, ANY, 90},		/* P4 Willamette */
+	{X86_VENDOR_INTEL, 0xF, 0x2, ANY, 90},		/* P4 Northwood */
+	{X86_VENDOR_INTEL, 0xF, ANY, ANY, 100},		/* Prescott and above assume VRD 10 */
+	{X86_VENDOR_INTEL, 0x10, ANY, ANY, 0},		/* Itanium 2 */
+	{X86_VENDOR_CENTAUR, 0x6, 0x7, ANY, 85},	/* Eden ESP/Ezra */
+	{X86_VENDOR_CENTAUR, 0x6, 0x8, 0x7, 85},	/* Ezra T */
+	{X86_VENDOR_CENTAUR, 0x6, 0x9, 0x7, 85},	/* Nemiah */
+	{X86_VENDOR_CENTAUR, 0x6, 0x9, ANY, 17},	/* C3-M */
+	{X86_VENDOR_UNKNOWN, ANY, ANY, ANY, 0}		/* stop here */
 };
 
-static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor)
+static u8 find_vrm(u8 eff_family, u8 eff_model, u8 eff_stepping, u8 vendor)
 {
 	int i = 0;
 
@@ -139,7 +153,8 @@ static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor)
 		if (vrm_models[i].vendor==vendor)
 			if ((vrm_models[i].eff_family==eff_family)
 			 && ((vrm_models[i].eff_model==eff_model) ||
-			     (vrm_models[i].eff_model==ANY)))
+			     (vrm_models[i].eff_model==ANY)) &&
+			     (eff_stepping <= vrm_models[i].eff_stepping))
 				return vrm_models[i].vrm_type;
 		i++;
 	}
@@ -147,12 +162,11 @@ static int find_vrm(u8 eff_family, u8 eff_model, u8 vendor)
 	return 0;
 }
 
-int vid_which_vrm(void)
+u8 vid_which_vrm(void)
 {
 	struct cpuinfo_x86 *c = cpu_data;
 	u32 eax;
-	u8 eff_family, eff_model;
-	int vrm_ret;
+	u8 eff_family, eff_model, eff_stepping, vrm_ret;
 
 	if (c->x86 < 6)		/* Any CPU with family lower than 6 */
 		return 0;	/* doesn't have VID and/or CPUID */
@@ -160,20 +174,21 @@ int vid_which_vrm(void)
 	eax = cpuid_eax(1);
 	eff_family = ((eax & 0x00000F00)>>8);
 	eff_model  = ((eax & 0x000000F0)>>4);
+	eff_stepping = eax & 0xF;
 	if (eff_family == 0xF) {	/* use extended model & family */
 		eff_family += ((eax & 0x00F00000)>>20);
 		eff_model += ((eax & 0x000F0000)>>16)<<4;
 	}
-	vrm_ret = find_vrm(eff_family,eff_model,c->x86_vendor);
+	vrm_ret = find_vrm(eff_family, eff_model, eff_stepping, c->x86_vendor);
 	if (vrm_ret == 0)
 		printk(KERN_INFO "hwmon-vid: Unknown VRM version of your "
 		       "x86 CPU\n");
 	return vrm_ret;
 }
 
-/* and now something completely different for the non-x86 world */
+/* and now for something completely different for the non-x86 world */
 #else
-int vid_which_vrm(void)
+u8 vid_which_vrm(void)
 {
 	printk(KERN_INFO "hwmon-vid: Unknown VRM version of your CPU\n");
 	return 0;
diff --git a/include/linux/hwmon-vid.h b/include/linux/hwmon-vid.h
index cd4b7a042b86..f346e4d5381c 100644
--- a/include/linux/hwmon-vid.h
+++ b/include/linux/hwmon-vid.h
@@ -23,14 +23,14 @@
 #ifndef _LINUX_HWMON_VID_H
 #define _LINUX_HWMON_VID_H
 
-int vid_from_reg(int val, int vrm);
-int vid_which_vrm(void);
+int vid_from_reg(int val, u8 vrm);
+u8 vid_which_vrm(void);
 
 /* vrm is the VRM/VRD document version multiplied by 10.
    val is in mV to avoid floating point in the kernel.
    Returned value is the 4-, 5- or 6-bit VID code.
    Note that only VRM 9.x is supported for now. */
-static inline int vid_to_reg(int val, int vrm)
+static inline int vid_to_reg(int val, u8 vrm)
 {
 	switch (vrm) {
 	case 91:		/* VRM 9.1 */
-- 
cgit v1.2.3


From 04b4b8434a92b9ef127985113c0bd961957778b7 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 18 Dec 2005 16:42:35 +0100
Subject: [PATCH] i2c: driver ID list cleanups

Cleanups to i2c driver ID list:
* Remove mostly bogus comments about driver ID ranges.
* Drop experimental driver IDs, as the concept is pretty broken.
* Drop now unused IDs of non-I2C (ISA) drivers.
* Drop a few more IDs which are no more used.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/i2c-id.h | 20 --------------------
 1 file changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c-id.h b/include/linux/i2c-id.h
index 006c81ef4d50..fb46f8d56999 100644
--- a/include/linux/i2c-id.h
+++ b/include/linux/i2c-id.h
@@ -25,12 +25,6 @@
 
 /*
  * ---- Driver types -----------------------------------------------------
- *       device id name + number        function description, i2c address(es)
- *
- *  Range 1000-1999 range is defined in sensors/sensors.h
- *  Range 0x100 - 0x1ff is for V4L2 Common Components
- *  Range 0xf000 - 0xffff is reserved for local experimentation, and should
- *        never be used in official drivers
  */
 
 #define I2C_DRIVERID_MSP3400	 1
@@ -110,13 +104,7 @@
 #define I2C_DRIVERID_AKITAIOEXP	74	/* IO Expander on Sharp SL-C1000 */
 #define I2C_DRIVERID_INFRARED	75	/* I2C InfraRed on Video boards */
 
-#define I2C_DRIVERID_EXP0	0xF0	/* experimental use id's	*/
-#define I2C_DRIVERID_EXP1	0xF1
-#define I2C_DRIVERID_EXP2	0xF2
-#define I2C_DRIVERID_EXP3	0xF3
-
 #define I2C_DRIVERID_I2CDEV	900
-#define I2C_DRIVERID_I2CPROC	901
 #define I2C_DRIVERID_ARP        902    /* SMBus ARP Client              */
 #define I2C_DRIVERID_ALERT      903    /* SMBus Alert Responder Client  */
 
@@ -131,15 +119,12 @@
 #define I2C_DRIVERID_ADM1021 1008
 #define I2C_DRIVERID_ADM9240 1009
 #define I2C_DRIVERID_LTC1710 1010
-#define I2C_DRIVERID_SIS5595 1011
 #define I2C_DRIVERID_ICSPLL 1012
 #define I2C_DRIVERID_BT869 1013
 #define I2C_DRIVERID_MAXILIFE 1014
 #define I2C_DRIVERID_MATORB 1015
 #define I2C_DRIVERID_GL520 1016
 #define I2C_DRIVERID_THMC50 1017
-#define I2C_DRIVERID_DDCMON 1018
-#define I2C_DRIVERID_VIA686A 1019
 #define I2C_DRIVERID_ADM1025 1020
 #define I2C_DRIVERID_LM87 1021
 #define I2C_DRIVERID_PCF8574 1022
@@ -151,21 +136,16 @@
 #define I2C_DRIVERID_FSCPOS 1028
 #define I2C_DRIVERID_FSCSCY 1029
 #define I2C_DRIVERID_PCF8591 1030
-#define I2C_DRIVERID_SMSC47M1 1031
-#define I2C_DRIVERID_VT1211 1032
 #define I2C_DRIVERID_LM92 1033
-#define I2C_DRIVERID_VT8231 1034
 #define I2C_DRIVERID_SMARTBATT 1035
 #define I2C_DRIVERID_BMCSENSORS 1036
 #define I2C_DRIVERID_FS451 1037
-#define I2C_DRIVERID_W83627HF 1038
 #define I2C_DRIVERID_LM85 1039
 #define I2C_DRIVERID_LM83 1040
 #define I2C_DRIVERID_LM90 1042
 #define I2C_DRIVERID_ASB100 1043
 #define I2C_DRIVERID_FSCHER 1046
 #define I2C_DRIVERID_W83L785TS 1047
-#define I2C_DRIVERID_SMSC47B397 1050
 
 /*
  * ---- Adapter types ----------------------------------------------------
-- 
cgit v1.2.3


From 7c72ccf09b6debe55b8e049377ad3183ed4f4cb3 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Sun, 18 Dec 2005 17:25:18 +0100
Subject: [PATCH] i2c: i2c-nforce2 add nforce4 MCP-04 device ID

One more supported PCI ID for the i2c-nforce2 driver.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/i2c/busses/i2c-nforce2 | 3 ++-
 drivers/i2c/busses/i2c-nforce2.c     | 2 ++
 include/linux/pci_ids.h              | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/i2c/busses/i2c-nforce2 b/Documentation/i2c/busses/i2c-nforce2
index e379e182e64f..d751282d9b2a 100644
--- a/Documentation/i2c/busses/i2c-nforce2
+++ b/Documentation/i2c/busses/i2c-nforce2
@@ -5,7 +5,8 @@ Supported adapters:
   * nForce2 Ultra 400 MCP      10de:0084 
   * nForce3 Pro150 MCP         10de:00D4 
   * nForce3 250Gb MCP          10de:00E4 
-  * nForce4 MCP	       	       10de:0052
+  * nForce4 MCP                10de:0052
+  * nForce4 MCP-04             10de:0034
 
 Datasheet: not publically available, but seems to be similar to the
            AMD-8111 SMBus 2.0 adapter.
diff --git a/drivers/i2c/busses/i2c-nforce2.c b/drivers/i2c/busses/i2c-nforce2.c
index 4d18e6e5f159..2d80eb26f688 100644
--- a/drivers/i2c/busses/i2c-nforce2.c
+++ b/drivers/i2c/busses/i2c-nforce2.c
@@ -30,6 +30,7 @@
     nForce3 Pro150 MCP		00D4
     nForce3 250Gb MCP		00E4
     nForce4 MCP			0052
+    nForce4 MCP-04		0034
 
     This driver supports the 2 SMBuses that are included in the MCP of the
     nForce2/3/4 chipsets.
@@ -257,6 +258,7 @@ static struct pci_device_id nforce2_ids[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE3S_SMBUS) },
 	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE4_SMBUS) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS) },
 	{ 0 }
 };
 
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4f01710485cd..96a0403f61f6 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -976,6 +976,7 @@
 #define PCI_DEVICE_ID_NVIDIA_TNT_UNKNOWN        0x002a
 #define PCI_DEVICE_ID_NVIDIA_VTNT2		0x002C
 #define PCI_DEVICE_ID_NVIDIA_UVTNT2		0x002D
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SMBUS	0x0034
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_IDE	0x0035
 #define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP04_SATA	0x0036
 #define PCI_DEVICE_ID_NVIDIA_NVENET_10		0x0037
-- 
cgit v1.2.3


From 8ffdc6550c47f75ca4e6c9f30a2a89063e035cf2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 6 Jan 2006 09:49:03 +0100
Subject: [BLOCK] add @uptodate to end_that_request_last() and @error to
 rq_end_io_fn()

add @uptodate argument to end_that_request_last() and @error
to rq_end_io_fn().  there's no generic way to pass error code
to request completion function, making generic error handling
of non-fs request difficult (rq->errors is driver-specific and
each driver uses it differently).  this patch adds @uptodate
to end_that_request_last() and @error to rq_end_io_fn().

for fs requests, this doesn't really matter, so just using the
same uptodate argument used in the last call to
end_that_request_first() should suffice.  imho, this can also
help the generic command-carrying request jens is working on.

Signed-off-by: tejun heo <htejun@gmail.com>
Signed-Off-By: Jens Axboe <axboe@suse.de>
---
 block/elevator.c                |  2 +-
 block/ll_rw_blk.c               | 22 +++++++++++++++-------
 drivers/block/DAC960.c          |  2 +-
 drivers/block/cciss.c           |  2 +-
 drivers/block/cpqarray.c        |  2 +-
 drivers/block/floppy.c          |  2 +-
 drivers/block/nbd.c             |  2 +-
 drivers/block/sx8.c             |  2 +-
 drivers/block/ub.c              |  2 +-
 drivers/block/viodasd.c         |  2 +-
 drivers/cdrom/cdu31a.c          |  2 +-
 drivers/ide/ide-cd.c            |  4 ++--
 drivers/ide/ide-io.c            |  6 +++---
 drivers/message/i2o/i2o_block.c |  2 +-
 drivers/mmc/mmc_block.c         |  4 ++--
 drivers/s390/block/dasd.c       |  2 +-
 drivers/s390/char/tape_block.c  |  2 +-
 drivers/scsi/ide-scsi.c         |  4 ++--
 drivers/scsi/scsi_lib.c         |  2 +-
 drivers/scsi/sd.c               |  2 +-
 include/linux/blkdev.h          |  6 +++---
 21 files changed, 42 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 6c3fc8a10bf2..85a11cee7d1c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -498,7 +498,7 @@ struct request *elv_next_request(request_queue_t *q)
 			blkdev_dequeue_request(rq);
 			rq->flags |= REQ_QUIET;
 			end_that_request_chunk(rq, 0, nr_bytes);
-			end_that_request_last(rq);
+			end_that_request_last(rq, 0);
 		} else {
 			printk(KERN_ERR "%s: bad return=%d\n", __FUNCTION__,
 								ret);
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index e02c88ca8fb5..8b1ae69bc5ac 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -344,7 +344,7 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
 /*
  * Cache flushing for ordered writes handling
  */
-static void blk_pre_flush_end_io(struct request *flush_rq)
+static void blk_pre_flush_end_io(struct request *flush_rq, int error)
 {
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
@@ -362,7 +362,7 @@ static void blk_pre_flush_end_io(struct request *flush_rq)
 	}
 }
 
-static void blk_post_flush_end_io(struct request *flush_rq)
+static void blk_post_flush_end_io(struct request *flush_rq, int error)
 {
 	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
@@ -2317,7 +2317,7 @@ EXPORT_SYMBOL(blk_rq_map_kern);
  */
 void blk_execute_rq_nowait(request_queue_t *q, struct gendisk *bd_disk,
 			   struct request *rq, int at_head,
-			   void (*done)(struct request *))
+			   rq_end_io_fn *done)
 {
 	int where = at_head ? ELEVATOR_INSERT_FRONT : ELEVATOR_INSERT_BACK;
 
@@ -2521,7 +2521,7 @@ EXPORT_SYMBOL(blk_put_request);
  * blk_end_sync_rq - executes a completion event on a request
  * @rq: request to complete
  */
-void blk_end_sync_rq(struct request *rq)
+void blk_end_sync_rq(struct request *rq, int error)
 {
 	struct completion *waiting = rq->waiting;
 
@@ -3183,9 +3183,17 @@ EXPORT_SYMBOL(end_that_request_chunk);
 /*
  * queue lock must be held
  */
-void end_that_request_last(struct request *req)
+void end_that_request_last(struct request *req, int uptodate)
 {
 	struct gendisk *disk = req->rq_disk;
+	int error;
+
+	/*
+	 * extend uptodate bool to allow < 0 value to be direct io error
+	 */
+	error = 0;
+	if (end_io_error(uptodate))
+		error = !uptodate ? -EIO : uptodate;
 
 	if (unlikely(laptop_mode) && blk_fs_request(req))
 		laptop_io_completion();
@@ -3200,7 +3208,7 @@ void end_that_request_last(struct request *req)
 		disk->in_flight--;
 	}
 	if (req->end_io)
-		req->end_io(req);
+		req->end_io(req, error);
 	else
 		__blk_put_request(req->q, req);
 }
@@ -3212,7 +3220,7 @@ void end_request(struct request *req, int uptodate)
 	if (!end_that_request_first(req, uptodate, req->hard_cur_sectors)) {
 		add_disk_randomness(req->rq_disk);
 		blkdev_dequeue_request(req);
-		end_that_request_last(req);
+		end_that_request_last(req, uptodate);
 	}
 }
 
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 70eaa5c7ac08..21097a39a057 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3471,7 +3471,7 @@ static inline boolean DAC960_ProcessCompletedRequest(DAC960_Command_T *Command,
 
 	 if (!end_that_request_first(Request, UpToDate, Command->BlockCount)) {
 
- 	 	end_that_request_last(Request);
+ 	 	end_that_request_last(Request, UpToDate);
 
 		if (Command->Completion) {
 			complete(Command->Completion);
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index c3441b3f086e..d2815b7a9150 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -2310,7 +2310,7 @@ static inline void complete_command( ctlr_info_t *h, CommandList_struct *cmd,
 	printk("Done with %p\n", cmd->rq);
 #endif /* CCISS_DEBUG */ 
 
-	end_that_request_last(cmd->rq);
+	end_that_request_last(cmd->rq, status ? 1 : -EIO);
 	cmd_free(h,cmd,1);
 }
 
diff --git a/drivers/block/cpqarray.c b/drivers/block/cpqarray.c
index cf1822a6361c..9bddb6874873 100644
--- a/drivers/block/cpqarray.c
+++ b/drivers/block/cpqarray.c
@@ -1036,7 +1036,7 @@ static inline void complete_command(cmdlist_t *cmd, int timeout)
 	complete_buffers(cmd->rq->bio, ok);
 
         DBGPX(printk("Done with %p\n", cmd->rq););
-	end_that_request_last(cmd->rq);
+	end_that_request_last(cmd->rq, ok ? 1 : -EIO);
 }
 
 /*
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index f7e765a1d313..a5b857c5c4b8 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -2301,7 +2301,7 @@ static void floppy_end_request(struct request *req, int uptodate)
 	add_disk_randomness(req->rq_disk);
 	floppy_off((long)req->rq_disk->private_data);
 	blkdev_dequeue_request(req);
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 
 	/* We're done with the request */
 	current_req = NULL;
diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9e268ddedfbd..485345c8e632 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -136,7 +136,7 @@ static void nbd_end_request(struct request *req)
 
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (!end_that_request_first(req, uptodate, req->nr_sectors)) {
-		end_that_request_last(req);
+		end_that_request_last(req, uptodate);
 	}
 	spin_unlock_irqrestore(q->queue_lock, flags);
 }
diff --git a/drivers/block/sx8.c b/drivers/block/sx8.c
index 1ded3b433459..9251f4131b53 100644
--- a/drivers/block/sx8.c
+++ b/drivers/block/sx8.c
@@ -770,7 +770,7 @@ static inline void carm_end_request_queued(struct carm_host *host,
 	rc = end_that_request_first(req, uptodate, req->hard_nr_sectors);
 	assert(rc == 0);
 
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 
 	rc = carm_put_request(host, crq);
 	assert(rc == 0);
diff --git a/drivers/block/ub.c b/drivers/block/ub.c
index 10740a065088..a05fe5843e6c 100644
--- a/drivers/block/ub.c
+++ b/drivers/block/ub.c
@@ -951,7 +951,7 @@ static void ub_rw_cmd_done(struct ub_dev *sc, struct ub_scsi_cmd *cmd)
 static void ub_end_rq(struct request *rq, int uptodate)
 {
 	end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
-	end_that_request_last(rq);
+	end_that_request_last(rq, uptodate);
 }
 
 static int ub_rw_cmd_retry(struct ub_dev *sc, struct ub_lun *lun,
diff --git a/drivers/block/viodasd.c b/drivers/block/viodasd.c
index 2d518aa2720a..063f0304a163 100644
--- a/drivers/block/viodasd.c
+++ b/drivers/block/viodasd.c
@@ -305,7 +305,7 @@ static void viodasd_end_request(struct request *req, int uptodate,
 	if (end_that_request_first(req, uptodate, num_sectors))
 		return;
 	add_disk_randomness(req->rq_disk);
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 }
 
 /*
diff --git a/drivers/cdrom/cdu31a.c b/drivers/cdrom/cdu31a.c
index ac96de15d833..378e88d20757 100644
--- a/drivers/cdrom/cdu31a.c
+++ b/drivers/cdrom/cdu31a.c
@@ -1402,7 +1402,7 @@ static void do_cdu31a_request(request_queue_t * q)
 			if (!end_that_request_first(req, 1, nblock)) {
 				spin_lock_irq(q->queue_lock);
 				blkdev_dequeue_request(req);
-				end_that_request_last(req);
+				end_that_request_last(req, 1);
 				spin_unlock_irq(q->queue_lock);
 			}
 			continue;
diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 70aeb3a60120..d31117eb95aa 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -614,7 +614,7 @@ static void cdrom_end_request (ide_drive_t *drive, int uptodate)
 			 */
 			spin_lock_irqsave(&ide_lock, flags);
 			end_that_request_chunk(failed, 0, failed->data_len);
-			end_that_request_last(failed);
+			end_that_request_last(failed, 0);
 			spin_unlock_irqrestore(&ide_lock, flags);
 		}
 
@@ -1735,7 +1735,7 @@ end_request:
 
 	spin_lock_irqsave(&ide_lock, flags);
 	blkdev_dequeue_request(rq);
-	end_that_request_last(rq);
+	end_that_request_last(rq, 1);
 	HWGROUP(drive)->rq = NULL;
 	spin_unlock_irqrestore(&ide_lock, flags);
 	return ide_stopped;
diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index ecfafcdafea4..8435b44a700b 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -89,7 +89,7 @@ int __ide_end_request(ide_drive_t *drive, struct request *rq, int uptodate,
 
 		blkdev_dequeue_request(rq);
 		HWGROUP(drive)->rq = NULL;
-		end_that_request_last(rq);
+		end_that_request_last(rq, uptodate);
 		ret = 0;
 	}
 	return ret;
@@ -247,7 +247,7 @@ static void ide_complete_pm_request (ide_drive_t *drive, struct request *rq)
 	}
 	blkdev_dequeue_request(rq);
 	HWGROUP(drive)->rq = NULL;
-	end_that_request_last(rq);
+	end_that_request_last(rq, 1);
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
@@ -379,7 +379,7 @@ void ide_end_drive_cmd (ide_drive_t *drive, u8 stat, u8 err)
 	blkdev_dequeue_request(rq);
 	HWGROUP(drive)->rq = NULL;
 	rq->errors = err;
-	end_that_request_last(rq);
+	end_that_request_last(rq, !rq->errors);
 	spin_unlock_irqrestore(&ide_lock, flags);
 }
 
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index f283b5bafdd3..4f522527b7ed 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -466,7 +466,7 @@ static void i2o_block_end_request(struct request *req, int uptodate,
 
 	spin_lock_irqsave(q->queue_lock, flags);
 
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 
 	if (likely(dev)) {
 		dev->open_queue_depth--;
diff --git a/drivers/mmc/mmc_block.c b/drivers/mmc/mmc_block.c
index abcf19116d70..8e380c14bf65 100644
--- a/drivers/mmc/mmc_block.c
+++ b/drivers/mmc/mmc_block.c
@@ -263,7 +263,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 			 */
 			add_disk_randomness(req->rq_disk);
 			blkdev_dequeue_request(req);
-			end_that_request_last(req);
+			end_that_request_last(req, 1);
 		}
 		spin_unlock_irq(&md->lock);
 	} while (ret);
@@ -289,7 +289,7 @@ static int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 
 	add_disk_randomness(req->rq_disk);
 	blkdev_dequeue_request(req);
-	end_that_request_last(req);
+	end_that_request_last(req, 0);
 	spin_unlock_irq(&md->lock);
 
 	return 0;
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 7008d32433bf..fdb61380c523 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -1035,7 +1035,7 @@ dasd_end_request(struct request *req, int uptodate)
 	if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
 		BUG();
 	add_disk_randomness(req->rq_disk);
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 }
 
 /*
diff --git a/drivers/s390/char/tape_block.c b/drivers/s390/char/tape_block.c
index 1efc9f21229e..559d51490e2f 100644
--- a/drivers/s390/char/tape_block.c
+++ b/drivers/s390/char/tape_block.c
@@ -78,7 +78,7 @@ tapeblock_end_request(struct request *req, int uptodate)
 {
 	if (end_that_request_first(req, uptodate, req->hard_nr_sectors))
 		BUG();
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 }
 
 static void
diff --git a/drivers/scsi/ide-scsi.c b/drivers/scsi/ide-scsi.c
index 4cb1f3ed9100..3c688ef54660 100644
--- a/drivers/scsi/ide-scsi.c
+++ b/drivers/scsi/ide-scsi.c
@@ -1046,7 +1046,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 
 	/* kill current request */
 	blkdev_dequeue_request(req);
-	end_that_request_last(req);
+	end_that_request_last(req, 0);
 	if (req->flags & REQ_SENSE)
 		kfree(scsi->pc->buffer);
 	kfree(scsi->pc);
@@ -1056,7 +1056,7 @@ static int idescsi_eh_reset (struct scsi_cmnd *cmd)
 	/* now nuke the drive queue */
 	while ((req = elv_next_request(drive->queue))) {
 		blkdev_dequeue_request(req);
-		end_that_request_last(req);
+		end_that_request_last(req, 0);
 	}
 
 	HWGROUP(drive)->rq = NULL;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index a7f3f0c84db7..53551f1dfe21 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -791,7 +791,7 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate,
 	spin_lock_irqsave(q->queue_lock, flags);
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(q, req);
-	end_that_request_last(req);
+	end_that_request_last(req, uptodate);
 	spin_unlock_irqrestore(q->queue_lock, flags);
 
 	/*
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3d3ad7d1b779..d651150ee76d 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -748,7 +748,7 @@ static void sd_end_flush(request_queue_t *q, struct request *flush_rq)
 		 * force journal abort of barriers
 		 */
 		end_that_request_first(rq, -EOPNOTSUPP, rq->hard_nr_sectors);
-		end_that_request_last(rq);
+		end_that_request_last(rq, -EOPNOTSUPP);
 	}
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a18500d196e1..a0ce8c585165 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -102,7 +102,7 @@ void copy_io_context(struct io_context **pdst, struct io_context **psrc);
 void swap_io_context(struct io_context **ioc1, struct io_context **ioc2);
 
 struct request;
-typedef void (rq_end_io_fn)(struct request *);
+typedef void (rq_end_io_fn)(struct request *, int);
 
 struct request_list {
 	int count[2];
@@ -560,7 +560,7 @@ extern void register_disk(struct gendisk *dev);
 extern void generic_make_request(struct bio *bio);
 extern void blk_put_request(struct request *);
 extern void __blk_put_request(request_queue_t *, struct request *);
-extern void blk_end_sync_rq(struct request *rq);
+extern void blk_end_sync_rq(struct request *rq, int error);
 extern void blk_attempt_remerge(request_queue_t *, struct request *);
 extern struct request *blk_get_request(request_queue_t *, int, gfp_t);
 extern void blk_insert_request(request_queue_t *, struct request *, int, void *);
@@ -614,7 +614,7 @@ static inline void blk_run_address_space(struct address_space *mapping)
  */
 extern int end_that_request_first(struct request *, int, int);
 extern int end_that_request_chunk(struct request *, int, int);
-extern void end_that_request_last(struct request *);
+extern void end_that_request_last(struct request *, int);
 extern void end_request(struct request *req, int uptodate);
 
 /*
-- 
cgit v1.2.3


From 797e7dbbee0a91fa1349192f18ad5c454997d876 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 6 Jan 2006 09:51:03 +0100
Subject: [BLOCK] reimplement handling of barrier request

Reimplement handling of barrier requests.

* Flexible handling to deal with various capabilities of
  target devices.
* Retry support for falling back.
* Tagged queues which don't support ordered tag can do ordered.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
---
 block/elevator.c         |  84 +++++++----
 block/ll_rw_blk.c        | 384 ++++++++++++++++++++++++++++++-----------------
 include/linux/blkdev.h   |  82 +++++++---
 include/linux/elevator.h |   1 +
 4 files changed, 359 insertions(+), 192 deletions(-)

(limited to 'include/linux')

diff --git a/block/elevator.c b/block/elevator.c
index 85a11cee7d1c..39dcccc82ada 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -304,15 +304,7 @@ void elv_requeue_request(request_queue_t *q, struct request *rq)
 
 	rq->flags &= ~REQ_STARTED;
 
-	/*
-	 * if this is the flush, requeue the original instead and drop the flush
-	 */
-	if (rq->flags & REQ_BAR_FLUSH) {
-		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
-		rq = rq->end_io_data;
-	}
-
-	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+	__elv_add_request(q, rq, ELEVATOR_INSERT_REQUEUE, 0);
 }
 
 static void elv_drain_elevator(request_queue_t *q)
@@ -332,7 +324,18 @@ static void elv_drain_elevator(request_queue_t *q)
 void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		       int plug)
 {
+	struct list_head *pos;
+	unsigned ordseq;
+
+	if (q->ordcolor)
+		rq->flags |= REQ_ORDERED_COLOR;
+
 	if (rq->flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) {
+		/*
+		 * toggle ordered color
+		 */
+		q->ordcolor ^= 1;
+
 		/*
 		 * barriers implicitly indicate back insertion
 		 */
@@ -393,6 +396,30 @@ void __elv_add_request(request_queue_t *q, struct request *rq, int where,
 		q->elevator->ops->elevator_add_req_fn(q, rq);
 		break;
 
+	case ELEVATOR_INSERT_REQUEUE:
+		/*
+		 * If ordered flush isn't in progress, we do front
+		 * insertion; otherwise, requests should be requeued
+		 * in ordseq order.
+		 */
+		rq->flags |= REQ_SOFTBARRIER;
+
+		if (q->ordseq == 0) {
+			list_add(&rq->queuelist, &q->queue_head);
+			break;
+		}
+
+		ordseq = blk_ordered_req_seq(rq);
+
+		list_for_each(pos, &q->queue_head) {
+			struct request *pos_rq = list_entry_rq(pos);
+			if (ordseq <= blk_ordered_req_seq(pos_rq))
+				break;
+		}
+
+		list_add_tail(&rq->queuelist, pos);
+		break;
+
 	default:
 		printk(KERN_ERR "%s: bad insertion point %d\n",
 		       __FUNCTION__, where);
@@ -422,25 +449,16 @@ static inline struct request *__elv_next_request(request_queue_t *q)
 {
 	struct request *rq;
 
-	if (unlikely(list_empty(&q->queue_head) &&
-		     !q->elevator->ops->elevator_dispatch_fn(q, 0)))
-		return NULL;
-
-	rq = list_entry_rq(q->queue_head.next);
-
-	/*
-	 * if this is a barrier write and the device has to issue a
-	 * flush sequence to support it, check how far we are
-	 */
-	if (blk_fs_request(rq) && blk_barrier_rq(rq)) {
-		BUG_ON(q->ordered == QUEUE_ORDERED_NONE);
+	while (1) {
+		while (!list_empty(&q->queue_head)) {
+			rq = list_entry_rq(q->queue_head.next);
+			if (blk_do_ordered(q, &rq))
+				return rq;
+		}
 
-		if (q->ordered == QUEUE_ORDERED_FLUSH &&
-		    !blk_barrier_preflush(rq))
-			rq = blk_start_pre_flush(q, rq);
+		if (!q->elevator->ops->elevator_dispatch_fn(q, 0))
+			return NULL;
 	}
-
-	return rq;
 }
 
 struct request *elv_next_request(request_queue_t *q)
@@ -593,7 +611,21 @@ void elv_completed_request(request_queue_t *q, struct request *rq)
 	 * request is released from the driver, io must be done
 	 */
 	if (blk_account_rq(rq)) {
+		struct request *first_rq = list_entry_rq(q->queue_head.next);
+
 		q->in_flight--;
+
+		/*
+		 * Check if the queue is waiting for fs requests to be
+		 * drained for flush sequence.
+		 */
+		if (q->ordseq && q->in_flight == 0 &&
+		    blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN &&
+		    blk_ordered_req_seq(first_rq) > QUEUE_ORDSEQ_DRAIN) {
+			blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0);
+			q->request_fn(q);
+		}
+
 		if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn)
 			e->ops->elevator_completed_req_fn(q, rq);
 	}
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 65c4efc02adf..91d3b4828c49 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -290,8 +290,8 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
 
 /**
  * blk_queue_ordered - does this queue support ordered writes
- * @q:     the request queue
- * @flag:  see below
+ * @q:        the request queue
+ * @ordered:  one of QUEUE_ORDERED_*
  *
  * Description:
  *   For journalled file systems, doing ordered writes on a commit
@@ -300,28 +300,30 @@ static inline void rq_init(request_queue_t *q, struct request *rq)
  *   feature should call this function and indicate so.
  *
  **/
-void blk_queue_ordered(request_queue_t *q, int flag)
-{
-	switch (flag) {
-		case QUEUE_ORDERED_NONE:
-			if (q->flush_rq)
-				kmem_cache_free(request_cachep, q->flush_rq);
-			q->flush_rq = NULL;
-			q->ordered = flag;
-			break;
-		case QUEUE_ORDERED_TAG:
-			q->ordered = flag;
-			break;
-		case QUEUE_ORDERED_FLUSH:
-			q->ordered = flag;
-			if (!q->flush_rq)
-				q->flush_rq = kmem_cache_alloc(request_cachep,
-								GFP_KERNEL);
-			break;
-		default:
-			printk("blk_queue_ordered: bad value %d\n", flag);
-			break;
+int blk_queue_ordered(request_queue_t *q, unsigned ordered,
+		      prepare_flush_fn *prepare_flush_fn)
+{
+	if (ordered & (QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH) &&
+	    prepare_flush_fn == NULL) {
+		printk(KERN_ERR "blk_queue_ordered: prepare_flush_fn required\n");
+		return -EINVAL;
+	}
+
+	if (ordered != QUEUE_ORDERED_NONE &&
+	    ordered != QUEUE_ORDERED_DRAIN &&
+	    ordered != QUEUE_ORDERED_DRAIN_FLUSH &&
+	    ordered != QUEUE_ORDERED_DRAIN_FUA &&
+	    ordered != QUEUE_ORDERED_TAG &&
+	    ordered != QUEUE_ORDERED_TAG_FLUSH &&
+	    ordered != QUEUE_ORDERED_TAG_FUA) {
+		printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered);
+		return -EINVAL;
 	}
+
+	q->next_ordered = ordered;
+	q->prepare_flush_fn = prepare_flush_fn;
+
+	return 0;
 }
 
 EXPORT_SYMBOL(blk_queue_ordered);
@@ -346,167 +348,265 @@ EXPORT_SYMBOL(blk_queue_issue_flush_fn);
 /*
  * Cache flushing for ordered writes handling
  */
-static void blk_pre_flush_end_io(struct request *flush_rq, int error)
+inline unsigned blk_ordered_cur_seq(request_queue_t *q)
 {
-	struct request *rq = flush_rq->end_io_data;
-	request_queue_t *q = rq->q;
-
-	elv_completed_request(q, flush_rq);
-
-	rq->flags |= REQ_BAR_PREFLUSH;
-
-	if (!flush_rq->errors)
-		elv_requeue_request(q, rq);
-	else {
-		q->end_flush_fn(q, flush_rq);
-		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
-		q->request_fn(q);
-	}
+	if (!q->ordseq)
+		return 0;
+	return 1 << ffz(q->ordseq);
 }
 
-static void blk_post_flush_end_io(struct request *flush_rq, int error)
+unsigned blk_ordered_req_seq(struct request *rq)
 {
-	struct request *rq = flush_rq->end_io_data;
 	request_queue_t *q = rq->q;
 
-	elv_completed_request(q, flush_rq);
+	BUG_ON(q->ordseq == 0);
 
-	rq->flags |= REQ_BAR_POSTFLUSH;
+	if (rq == &q->pre_flush_rq)
+		return QUEUE_ORDSEQ_PREFLUSH;
+	if (rq == &q->bar_rq)
+		return QUEUE_ORDSEQ_BAR;
+	if (rq == &q->post_flush_rq)
+		return QUEUE_ORDSEQ_POSTFLUSH;
 
-	q->end_flush_fn(q, flush_rq);
-	clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
-	q->request_fn(q);
+	if ((rq->flags & REQ_ORDERED_COLOR) ==
+	    (q->orig_bar_rq->flags & REQ_ORDERED_COLOR))
+		return QUEUE_ORDSEQ_DRAIN;
+	else
+		return QUEUE_ORDSEQ_DONE;
 }
 
-struct request *blk_start_pre_flush(request_queue_t *q, struct request *rq)
+void blk_ordered_complete_seq(request_queue_t *q, unsigned seq, int error)
 {
-	struct request *flush_rq = q->flush_rq;
-
-	BUG_ON(!blk_barrier_rq(rq));
+	struct request *rq;
+	int uptodate;
 
-	if (test_and_set_bit(QUEUE_FLAG_FLUSH, &q->queue_flags))
-		return NULL;
+	if (error && !q->orderr)
+		q->orderr = error;
 
-	rq_init(q, flush_rq);
-	flush_rq->elevator_private = NULL;
-	flush_rq->flags = REQ_BAR_FLUSH;
-	flush_rq->rq_disk = rq->rq_disk;
-	flush_rq->rl = NULL;
+	BUG_ON(q->ordseq & seq);
+	q->ordseq |= seq;
 
-	/*
-	 * prepare_flush returns 0 if no flush is needed, just mark both
-	 * pre and post flush as done in that case
-	 */
-	if (!q->prepare_flush_fn(q, flush_rq)) {
-		rq->flags |= REQ_BAR_PREFLUSH | REQ_BAR_POSTFLUSH;
-		clear_bit(QUEUE_FLAG_FLUSH, &q->queue_flags);
-		return rq;
-	}
+	if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE)
+		return;
 
 	/*
-	 * some drivers dequeue requests right away, some only after io
-	 * completion. make sure the request is dequeued.
+	 * Okay, sequence complete.
 	 */
-	if (!list_empty(&rq->queuelist))
-		blkdev_dequeue_request(rq);
+	rq = q->orig_bar_rq;
+	uptodate = q->orderr ? q->orderr : 1;
 
-	flush_rq->end_io_data = rq;
-	flush_rq->end_io = blk_pre_flush_end_io;
+	q->ordseq = 0;
 
-	__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
-	return flush_rq;
+	end_that_request_first(rq, uptodate, rq->hard_nr_sectors);
+	end_that_request_last(rq, uptodate);
 }
 
-static void blk_start_post_flush(request_queue_t *q, struct request *rq)
+static void pre_flush_end_io(struct request *rq, int error)
 {
-	struct request *flush_rq = q->flush_rq;
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error);
+}
 
-	BUG_ON(!blk_barrier_rq(rq));
+static void bar_end_io(struct request *rq, int error)
+{
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error);
+}
 
-	rq_init(q, flush_rq);
-	flush_rq->elevator_private = NULL;
-	flush_rq->flags = REQ_BAR_FLUSH;
-	flush_rq->rq_disk = rq->rq_disk;
-	flush_rq->rl = NULL;
+static void post_flush_end_io(struct request *rq, int error)
+{
+	elv_completed_request(rq->q, rq);
+	blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error);
+}
 
-	if (q->prepare_flush_fn(q, flush_rq)) {
-		flush_rq->end_io_data = rq;
-		flush_rq->end_io = blk_post_flush_end_io;
+static void queue_flush(request_queue_t *q, unsigned which)
+{
+	struct request *rq;
+	rq_end_io_fn *end_io;
 
-		__elv_add_request(q, flush_rq, ELEVATOR_INSERT_FRONT, 0);
-		q->request_fn(q);
+	if (which == QUEUE_ORDERED_PREFLUSH) {
+		rq = &q->pre_flush_rq;
+		end_io = pre_flush_end_io;
+	} else {
+		rq = &q->post_flush_rq;
+		end_io = post_flush_end_io;
 	}
+
+	rq_init(q, rq);
+	rq->flags = REQ_HARDBARRIER;
+	rq->elevator_private = NULL;
+	rq->rq_disk = q->bar_rq.rq_disk;
+	rq->rl = NULL;
+	rq->end_io = end_io;
+	q->prepare_flush_fn(q, rq);
+
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
 }
 
-static inline int blk_check_end_barrier(request_queue_t *q, struct request *rq,
-					int sectors)
+static inline struct request *start_ordered(request_queue_t *q,
+					    struct request *rq)
 {
-	if (sectors > rq->nr_sectors)
-		sectors = rq->nr_sectors;
+	q->bi_size = 0;
+	q->orderr = 0;
+	q->ordered = q->next_ordered;
+	q->ordseq |= QUEUE_ORDSEQ_STARTED;
+
+	/*
+	 * Prep proxy barrier request.
+	 */
+	blkdev_dequeue_request(rq);
+	q->orig_bar_rq = rq;
+	rq = &q->bar_rq;
+	rq_init(q, rq);
+	rq->flags = bio_data_dir(q->orig_bar_rq->bio);
+	rq->flags |= q->ordered & QUEUE_ORDERED_FUA ? REQ_FUA : 0;
+	rq->elevator_private = NULL;
+	rq->rl = NULL;
+	init_request_from_bio(rq, q->orig_bar_rq->bio);
+	rq->end_io = bar_end_io;
+
+	/*
+	 * Queue ordered sequence.  As we stack them at the head, we
+	 * need to queue in reverse order.  Note that we rely on that
+	 * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs
+	 * request gets inbetween ordered sequence.
+	 */
+	if (q->ordered & QUEUE_ORDERED_POSTFLUSH)
+		queue_flush(q, QUEUE_ORDERED_POSTFLUSH);
+	else
+		q->ordseq |= QUEUE_ORDSEQ_POSTFLUSH;
+
+	__elv_add_request(q, rq, ELEVATOR_INSERT_FRONT, 0);
+
+	if (q->ordered & QUEUE_ORDERED_PREFLUSH) {
+		queue_flush(q, QUEUE_ORDERED_PREFLUSH);
+		rq = &q->pre_flush_rq;
+	} else
+		q->ordseq |= QUEUE_ORDSEQ_PREFLUSH;
 
-	rq->nr_sectors -= sectors;
-	return rq->nr_sectors;
+	if ((q->ordered & QUEUE_ORDERED_TAG) || q->in_flight == 0)
+		q->ordseq |= QUEUE_ORDSEQ_DRAIN;
+	else
+		rq = NULL;
+
+	return rq;
 }
 
-static int __blk_complete_barrier_rq(request_queue_t *q, struct request *rq,
-				     int sectors, int queue_locked)
+int blk_do_ordered(request_queue_t *q, struct request **rqp)
 {
-	if (q->ordered != QUEUE_ORDERED_FLUSH)
-		return 0;
-	if (!blk_fs_request(rq) || !blk_barrier_rq(rq))
-		return 0;
-	if (blk_barrier_postflush(rq))
-		return 0;
+	struct request *rq = *rqp, *allowed_rq;
+	int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq);
 
-	if (!blk_check_end_barrier(q, rq, sectors)) {
-		unsigned long flags = 0;
+	if (!q->ordseq) {
+		if (!is_barrier)
+			return 1;
 
-		if (!queue_locked)
-			spin_lock_irqsave(q->queue_lock, flags);
+		if (q->next_ordered != QUEUE_ORDERED_NONE) {
+			*rqp = start_ordered(q, rq);
+			return 1;
+		} else {
+			/*
+			 * This can happen when the queue switches to
+			 * ORDERED_NONE while this request is on it.
+			 */
+			blkdev_dequeue_request(rq);
+			end_that_request_first(rq, -EOPNOTSUPP,
+					       rq->hard_nr_sectors);
+			end_that_request_last(rq, -EOPNOTSUPP);
+			*rqp = NULL;
+			return 0;
+		}
+	}
 
-		blk_start_post_flush(q, rq);
+	if (q->ordered & QUEUE_ORDERED_TAG) {
+		if (is_barrier && rq != &q->bar_rq)
+			*rqp = NULL;
+		return 1;
+	}
 
-		if (!queue_locked)
-			spin_unlock_irqrestore(q->queue_lock, flags);
+	switch (blk_ordered_cur_seq(q)) {
+	case QUEUE_ORDSEQ_PREFLUSH:
+		allowed_rq = &q->pre_flush_rq;
+		break;
+	case QUEUE_ORDSEQ_BAR:
+		allowed_rq = &q->bar_rq;
+		break;
+	case QUEUE_ORDSEQ_POSTFLUSH:
+		allowed_rq = &q->post_flush_rq;
+		break;
+	default:
+		allowed_rq = NULL;
+		break;
 	}
 
+	if (rq != allowed_rq &&
+	    (blk_fs_request(rq) || rq == &q->pre_flush_rq ||
+	     rq == &q->post_flush_rq))
+		*rqp = NULL;
+
 	return 1;
 }
 
-/**
- * blk_complete_barrier_rq - complete possible barrier request
- * @q:  the request queue for the device
- * @rq:  the request
- * @sectors:  number of sectors to complete
- *
- * Description:
- *   Used in driver end_io handling to determine whether to postpone
- *   completion of a barrier request until a post flush has been done. This
- *   is the unlocked variant, used if the caller doesn't already hold the
- *   queue lock.
- **/
-int blk_complete_barrier_rq(request_queue_t *q, struct request *rq, int sectors)
+static int flush_dry_bio_endio(struct bio *bio, unsigned int bytes, int error)
 {
-	return __blk_complete_barrier_rq(q, rq, sectors, 0);
+	request_queue_t *q = bio->bi_private;
+	struct bio_vec *bvec;
+	int i;
+
+	/*
+	 * This is dry run, restore bio_sector and size.  We'll finish
+	 * this request again with the original bi_end_io after an
+	 * error occurs or post flush is complete.
+	 */
+	q->bi_size += bytes;
+
+	if (bio->bi_size)
+		return 1;
+
+	/* Rewind bvec's */
+	bio->bi_idx = 0;
+	bio_for_each_segment(bvec, bio, i) {
+		bvec->bv_len += bvec->bv_offset;
+		bvec->bv_offset = 0;
+	}
+
+	/* Reset bio */
+	set_bit(BIO_UPTODATE, &bio->bi_flags);
+	bio->bi_size = q->bi_size;
+	bio->bi_sector -= (q->bi_size >> 9);
+	q->bi_size = 0;
+
+	return 0;
 }
-EXPORT_SYMBOL(blk_complete_barrier_rq);
 
-/**
- * blk_complete_barrier_rq_locked - complete possible barrier request
- * @q:  the request queue for the device
- * @rq:  the request
- * @sectors:  number of sectors to complete
- *
- * Description:
- *   See blk_complete_barrier_rq(). This variant must be used if the caller
- *   holds the queue lock.
- **/
-int blk_complete_barrier_rq_locked(request_queue_t *q, struct request *rq,
-				   int sectors)
+static inline int ordered_bio_endio(struct request *rq, struct bio *bio,
+				    unsigned int nbytes, int error)
 {
-	return __blk_complete_barrier_rq(q, rq, sectors, 1);
+	request_queue_t *q = rq->q;
+	bio_end_io_t *endio;
+	void *private;
+
+	if (&q->bar_rq != rq)
+		return 0;
+
+	/*
+	 * Okay, this is the barrier request in progress, dry finish it.
+	 */
+	if (error && !q->orderr)
+		q->orderr = error;
+
+	endio = bio->bi_end_io;
+	private = bio->bi_private;
+	bio->bi_end_io = flush_dry_bio_endio;
+	bio->bi_private = q;
+
+	bio_endio(bio, nbytes, error);
+
+	bio->bi_end_io = endio;
+	bio->bi_private = private;
+
+	return 1;
 }
-EXPORT_SYMBOL(blk_complete_barrier_rq_locked);
 
 /**
  * blk_queue_bounce_limit - set bounce buffer limit for queue
@@ -1047,6 +1147,7 @@ static const char * const rq_flags[] = {
 	"REQ_SORTED",
 	"REQ_SOFTBARRIER",
 	"REQ_HARDBARRIER",
+	"REQ_FUA",
 	"REQ_CMD",
 	"REQ_NOMERGE",
 	"REQ_STARTED",
@@ -1066,6 +1167,7 @@ static const char * const rq_flags[] = {
 	"REQ_PM_SUSPEND",
 	"REQ_PM_RESUME",
 	"REQ_PM_SHUTDOWN",
+	"REQ_ORDERED_COLOR",
 };
 
 void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -1643,8 +1745,6 @@ void blk_cleanup_queue(request_queue_t * q)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	blk_queue_ordered(q, QUEUE_ORDERED_NONE);
-
 	kmem_cache_free(requestq_cachep, q);
 }
 
@@ -2714,7 +2814,7 @@ static int __make_request(request_queue_t *q, struct bio *bio)
 	spin_lock_prefetch(q->queue_lock);
 
 	barrier = bio_barrier(bio);
-	if (unlikely(barrier) && (q->ordered == QUEUE_ORDERED_NONE)) {
+	if (unlikely(barrier) && (q->next_ordered == QUEUE_ORDERED_NONE)) {
 		err = -EOPNOTSUPP;
 		goto end_io;
 	}
@@ -3075,7 +3175,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
 		if (nr_bytes >= bio->bi_size) {
 			req->bio = bio->bi_next;
 			nbytes = bio->bi_size;
-			bio_endio(bio, nbytes, error);
+			if (!ordered_bio_endio(req, bio, nbytes, error))
+				bio_endio(bio, nbytes, error);
 			next_idx = 0;
 			bio_nbytes = 0;
 		} else {
@@ -3130,7 +3231,8 @@ static int __end_that_request_first(struct request *req, int uptodate,
 	 * if the request wasn't completed, update state
 	 */
 	if (bio_nbytes) {
-		bio_endio(bio, bio_nbytes, error);
+		if (!ordered_bio_endio(req, bio, bio_nbytes, error))
+			bio_endio(bio, bio_nbytes, error);
 		bio->bi_idx += next_idx;
 		bio_iovec(bio)->bv_offset += nr_bytes;
 		bio_iovec(bio)->bv_len -= nr_bytes;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a0ce8c585165..15db0f112d0a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -207,6 +207,7 @@ enum rq_flag_bits {
 	__REQ_SORTED,		/* elevator knows about this request */
 	__REQ_SOFTBARRIER,	/* may not be passed by ioscheduler */
 	__REQ_HARDBARRIER,	/* may not be passed by drive either */
+	__REQ_FUA,		/* forced unit access */
 	__REQ_CMD,		/* is a regular fs rw request */
 	__REQ_NOMERGE,		/* don't touch this for merging */
 	__REQ_STARTED,		/* drive already may have started this one */
@@ -230,9 +231,7 @@ enum rq_flag_bits {
 	__REQ_PM_SUSPEND,	/* suspend request */
 	__REQ_PM_RESUME,	/* resume request */
 	__REQ_PM_SHUTDOWN,	/* shutdown request */
-	__REQ_BAR_PREFLUSH,	/* barrier pre-flush done */
-	__REQ_BAR_POSTFLUSH,	/* barrier post-flush */
-	__REQ_BAR_FLUSH,	/* rq is the flush request */
+	__REQ_ORDERED_COLOR,	/* is before or after barrier */
 	__REQ_NR_BITS,		/* stops here */
 };
 
@@ -241,6 +240,7 @@ enum rq_flag_bits {
 #define REQ_SORTED	(1 << __REQ_SORTED)
 #define REQ_SOFTBARRIER	(1 << __REQ_SOFTBARRIER)
 #define REQ_HARDBARRIER	(1 << __REQ_HARDBARRIER)
+#define REQ_FUA		(1 << __REQ_FUA)
 #define REQ_CMD		(1 << __REQ_CMD)
 #define REQ_NOMERGE	(1 << __REQ_NOMERGE)
 #define REQ_STARTED	(1 << __REQ_STARTED)
@@ -260,9 +260,7 @@ enum rq_flag_bits {
 #define REQ_PM_SUSPEND	(1 << __REQ_PM_SUSPEND)
 #define REQ_PM_RESUME	(1 << __REQ_PM_RESUME)
 #define REQ_PM_SHUTDOWN	(1 << __REQ_PM_SHUTDOWN)
-#define REQ_BAR_PREFLUSH	(1 << __REQ_BAR_PREFLUSH)
-#define REQ_BAR_POSTFLUSH	(1 << __REQ_BAR_POSTFLUSH)
-#define REQ_BAR_FLUSH	(1 << __REQ_BAR_FLUSH)
+#define REQ_ORDERED_COLOR	(1 << __REQ_ORDERED_COLOR)
 
 /*
  * State information carried for REQ_PM_SUSPEND and REQ_PM_RESUME
@@ -292,8 +290,7 @@ struct bio_vec;
 typedef int (merge_bvec_fn) (request_queue_t *, struct bio *, struct bio_vec *);
 typedef void (activity_fn) (void *data, int rw);
 typedef int (issue_flush_fn) (request_queue_t *, struct gendisk *, sector_t *);
-typedef int (prepare_flush_fn) (request_queue_t *, struct request *);
-typedef void (end_flush_fn) (request_queue_t *, struct request *);
+typedef void (prepare_flush_fn) (request_queue_t *, struct request *);
 
 enum blk_queue_state {
 	Queue_down,
@@ -335,7 +332,6 @@ struct request_queue
 	activity_fn		*activity_fn;
 	issue_flush_fn		*issue_flush_fn;
 	prepare_flush_fn	*prepare_flush_fn;
-	end_flush_fn		*end_flush_fn;
 
 	/*
 	 * Dispatch queue sorting
@@ -420,14 +416,11 @@ struct request_queue
 	/*
 	 * reserved for flush operations
 	 */
-	struct request		*flush_rq;
-	unsigned char		ordered;
-};
-
-enum {
-	QUEUE_ORDERED_NONE,
-	QUEUE_ORDERED_TAG,
-	QUEUE_ORDERED_FLUSH,
+	unsigned int		ordered, next_ordered, ordseq;
+	int			orderr, ordcolor;
+	struct request		pre_flush_rq, bar_rq, post_flush_rq;
+	struct request		*orig_bar_rq;
+	unsigned int		bi_size;
 };
 
 #define RQ_INACTIVE		(-1)
@@ -445,12 +438,51 @@ enum {
 #define QUEUE_FLAG_REENTER	6	/* Re-entrancy avoidance */
 #define QUEUE_FLAG_PLUGGED	7	/* queue is plugged */
 #define QUEUE_FLAG_ELVSWITCH	8	/* don't use elevator, just do FIFO */
-#define QUEUE_FLAG_FLUSH	9	/* doing barrier flush sequence */
+
+enum {
+	/*
+	 * Hardbarrier is supported with one of the following methods.
+	 *
+	 * NONE		: hardbarrier unsupported
+	 * DRAIN	: ordering by draining is enough
+	 * DRAIN_FLUSH	: ordering by draining w/ pre and post flushes
+	 * DRAIN_FUA	: ordering by draining w/ pre flush and FUA write
+	 * TAG		: ordering by tag is enough
+	 * TAG_FLUSH	: ordering by tag w/ pre and post flushes
+	 * TAG_FUA	: ordering by tag w/ pre flush and FUA write
+	 */
+	QUEUE_ORDERED_NONE	= 0x00,
+	QUEUE_ORDERED_DRAIN	= 0x01,
+	QUEUE_ORDERED_TAG	= 0x02,
+
+	QUEUE_ORDERED_PREFLUSH	= 0x10,
+	QUEUE_ORDERED_POSTFLUSH	= 0x20,
+	QUEUE_ORDERED_FUA	= 0x40,
+
+	QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
+			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
+	QUEUE_ORDERED_DRAIN_FUA	= QUEUE_ORDERED_DRAIN |
+			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
+	QUEUE_ORDERED_TAG_FLUSH	= QUEUE_ORDERED_TAG |
+			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_POSTFLUSH,
+	QUEUE_ORDERED_TAG_FUA	= QUEUE_ORDERED_TAG |
+			QUEUE_ORDERED_PREFLUSH | QUEUE_ORDERED_FUA,
+
+	/*
+	 * Ordered operation sequence
+	 */
+	QUEUE_ORDSEQ_STARTED	= 0x01,	/* flushing in progress */
+	QUEUE_ORDSEQ_DRAIN	= 0x02,	/* waiting for the queue to be drained */
+	QUEUE_ORDSEQ_PREFLUSH	= 0x04,	/* pre-flushing in progress */
+	QUEUE_ORDSEQ_BAR	= 0x08,	/* original barrier req in progress */
+	QUEUE_ORDSEQ_POSTFLUSH	= 0x10,	/* post-flushing in progress */
+	QUEUE_ORDSEQ_DONE	= 0x20,
+};
 
 #define blk_queue_plugged(q)	test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
 #define blk_queue_tagged(q)	test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
-#define blk_queue_flushing(q)	test_bit(QUEUE_FLAG_FLUSH, &(q)->queue_flags)
+#define blk_queue_flushing(q)	((q)->ordseq)
 
 #define blk_fs_request(rq)	((rq)->flags & REQ_CMD)
 #define blk_pc_request(rq)	((rq)->flags & REQ_BLOCK_PC)
@@ -466,8 +498,7 @@ enum {
 
 #define blk_sorted_rq(rq)	((rq)->flags & REQ_SORTED)
 #define blk_barrier_rq(rq)	((rq)->flags & REQ_HARDBARRIER)
-#define blk_barrier_preflush(rq)	((rq)->flags & REQ_BAR_PREFLUSH)
-#define blk_barrier_postflush(rq)	((rq)->flags & REQ_BAR_POSTFLUSH)
+#define blk_fua_rq(rq)		((rq)->flags & REQ_FUA)
 
 #define list_entry_rq(ptr)	list_entry((ptr), struct request, queuelist)
 
@@ -665,11 +696,12 @@ extern void blk_queue_prep_rq(request_queue_t *, prep_rq_fn *pfn);
 extern void blk_queue_merge_bvec(request_queue_t *, merge_bvec_fn *);
 extern void blk_queue_dma_alignment(request_queue_t *, int);
 extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern void blk_queue_ordered(request_queue_t *, int);
+extern int blk_queue_ordered(request_queue_t *, unsigned, prepare_flush_fn *);
 extern void blk_queue_issue_flush_fn(request_queue_t *, issue_flush_fn *);
-extern struct request *blk_start_pre_flush(request_queue_t *,struct request *);
-extern int blk_complete_barrier_rq(request_queue_t *, struct request *, int);
-extern int blk_complete_barrier_rq_locked(request_queue_t *, struct request *, int);
+extern int blk_do_ordered(request_queue_t *, struct request **);
+extern unsigned blk_ordered_cur_seq(request_queue_t *);
+extern unsigned blk_ordered_req_seq(struct request *);
+extern void blk_ordered_complete_seq(request_queue_t *, unsigned, int);
 
 extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
 extern void blk_dump_rq_flags(struct request *, char *);
diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index a74c27e460ba..fb80fa44c4dd 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -130,6 +130,7 @@ extern int elv_try_last_merge(request_queue_t *, struct bio *);
 #define ELEVATOR_INSERT_FRONT	1
 #define ELEVATOR_INSERT_BACK	2
 #define ELEVATOR_INSERT_SORT	3
+#define ELEVATOR_INSERT_REQUEUE	4
 
 /*
  * return values from elevator_may_queue_fn
-- 
cgit v1.2.3


From 9a3dccc42556537a48f39ee9a9e7ab90a933f766 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 6 Jan 2006 09:56:18 +0100
Subject: [BLOCK] add FUA support to libata

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jens Axboe <axboe@suse.de>
---
 drivers/scsi/libata-core.c | 31 +++++++++++++++++++++++++------
 drivers/scsi/libata-scsi.c | 32 ++++++++++++++++++++++++++------
 drivers/scsi/libata.h      |  4 +++-
 include/linux/ata.h        |  6 +++++-
 include/linux/libata.h     |  3 ++-
 5 files changed, 61 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 9ea102587914..bdfb0a88cd6f 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -562,16 +562,28 @@ static const u8 ata_rw_cmds[] = {
 	ATA_CMD_WRITE_MULTI,
 	ATA_CMD_READ_MULTI_EXT,
 	ATA_CMD_WRITE_MULTI_EXT,
+	0,
+	0,
+	0,
+	ATA_CMD_WRITE_MULTI_FUA_EXT,
 	/* pio */
 	ATA_CMD_PIO_READ,
 	ATA_CMD_PIO_WRITE,
 	ATA_CMD_PIO_READ_EXT,
 	ATA_CMD_PIO_WRITE_EXT,
+	0,
+	0,
+	0,
+	0,
 	/* dma */
 	ATA_CMD_READ,
 	ATA_CMD_WRITE,
 	ATA_CMD_READ_EXT,
-	ATA_CMD_WRITE_EXT
+	ATA_CMD_WRITE_EXT,
+	0,
+	0,
+	0,
+	ATA_CMD_WRITE_FUA_EXT
 };
 
 /**
@@ -584,25 +596,32 @@ static const u8 ata_rw_cmds[] = {
  *	LOCKING:
  *	caller.
  */
-void ata_rwcmd_protocol(struct ata_queued_cmd *qc)
+int ata_rwcmd_protocol(struct ata_queued_cmd *qc)
 {
 	struct ata_taskfile *tf = &qc->tf;
 	struct ata_device *dev = qc->dev;
+	u8 cmd;
 
-	int index, lba48, write;
+	int index, fua, lba48, write;
  
+	fua = (tf->flags & ATA_TFLAG_FUA) ? 4 : 0;
 	lba48 = (tf->flags & ATA_TFLAG_LBA48) ? 2 : 0;
 	write = (tf->flags & ATA_TFLAG_WRITE) ? 1 : 0;
 
 	if (dev->flags & ATA_DFLAG_PIO) {
 		tf->protocol = ATA_PROT_PIO;
-		index = dev->multi_count ? 0 : 4;
+		index = dev->multi_count ? 0 : 8;
 	} else {
 		tf->protocol = ATA_PROT_DMA;
-		index = 8;
+		index = 16;
 	}
 
-	tf->command = ata_rw_cmds[index + lba48 + write];
+	cmd = ata_rw_cmds[index + fua + lba48 + write];
+	if (cmd) {
+		tf->command = cmd;
+		return 0;
+	}
+	return -1;
 }
 
 static const char * const xfer_mode_str[] = {
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index e0439be4b573..2c644cbb6e9c 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -1080,11 +1080,13 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm
 	    scsicmd[0] == WRITE_16)
 		tf->flags |= ATA_TFLAG_WRITE;
 
-	/* Calculate the SCSI LBA and transfer length. */
+	/* Calculate the SCSI LBA, transfer length and FUA. */
 	switch (scsicmd[0]) {
 	case READ_10:
 	case WRITE_10:
 		scsi_10_lba_len(scsicmd, &block, &n_block);
+		if (unlikely(scsicmd[1] & (1 << 3)))
+			tf->flags |= ATA_TFLAG_FUA;
 		break;
 	case READ_6:
 	case WRITE_6:
@@ -1099,6 +1101,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm
 	case READ_16:
 	case WRITE_16:
 		scsi_16_lba_len(scsicmd, &block, &n_block);
+		if (unlikely(scsicmd[1] & (1 << 3)))
+			tf->flags |= ATA_TFLAG_FUA;
 		break;
 	default:
 		DPRINTK("no-byte command\n");
@@ -1142,7 +1146,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm
 			tf->device |= (block >> 24) & 0xf;
 		}
 
-		ata_rwcmd_protocol(qc);
+		if (unlikely(ata_rwcmd_protocol(qc) < 0))
+			goto invalid_fld;
 
 		qc->nsect = n_block;
 		tf->nsect = n_block & 0xff;
@@ -1160,7 +1165,8 @@ static unsigned int ata_scsi_rw_xlat(struct ata_queued_cmd *qc, const u8 *scsicm
 		if ((block >> 28) || (n_block > 256))
 			goto out_of_range;
 
-		ata_rwcmd_protocol(qc);
+		if (unlikely(ata_rwcmd_protocol(qc) < 0))
+			goto invalid_fld;
 
 		/* Convert LBA to CHS */
 		track = (u32)block / dev->sectors;
@@ -1695,6 +1701,7 @@ static unsigned int ata_msense_rw_recovery(u8 **ptr_io, const u8 *last)
 unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 				  unsigned int buflen)
 {
+	struct ata_device *dev = args->dev;
 	u8 *scsicmd = args->cmd->cmnd, *p, *last;
 	const u8 sat_blk_desc[] = {
 		0, 0, 0, 0,	/* number of blocks: sat unspecified */
@@ -1703,6 +1710,7 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 	};
 	u8 pg, spg;
 	unsigned int ebd, page_control, six_byte, output_len, alloc_len, minlen;
+	u8 dpofua;
 
 	VPRINTK("ENTER\n");
 
@@ -1771,9 +1779,17 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 
 	if (minlen < 1)
 		return 0;
+
+	dpofua = 0;
+	if (ata_id_has_fua(args->id) && dev->flags & ATA_DFLAG_LBA48 &&
+	    (!(dev->flags & ATA_DFLAG_PIO) || dev->multi_count))
+		dpofua = 1 << 4;
+
 	if (six_byte) {
 		output_len--;
 		rbuf[0] = output_len;
+		if (minlen > 2)
+			rbuf[2] |= dpofua;
 		if (ebd) {
 			if (minlen > 3)
 				rbuf[3] = sizeof(sat_blk_desc);
@@ -1786,6 +1802,8 @@ unsigned int ata_scsiop_mode_sense(struct ata_scsi_args *args, u8 *rbuf,
 		rbuf[0] = output_len >> 8;
 		if (minlen > 1)
 			rbuf[1] = output_len;
+		if (minlen > 3)
+			rbuf[3] |= dpofua;
 		if (ebd) {
 			if (minlen > 7)
 				rbuf[7] = sizeof(sat_blk_desc);
@@ -2446,7 +2464,7 @@ int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmnd *))
 		if (xlat_func)
 			ata_scsi_translate(ap, dev, cmd, done, xlat_func);
 		else
-			ata_scsi_simulate(dev->id, cmd, done);
+			ata_scsi_simulate(ap, dev, cmd, done);
 	} else
 		ata_scsi_translate(ap, dev, cmd, done, atapi_xlat);
 
@@ -2469,14 +2487,16 @@ out_unlock:
  *	spin_lock_irqsave(host_set lock)
  */
 
-void ata_scsi_simulate(u16 *id,
+void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev,
 		      struct scsi_cmnd *cmd,
 		      void (*done)(struct scsi_cmnd *))
 {
 	struct ata_scsi_args args;
 	const u8 *scsicmd = cmd->cmnd;
 
-	args.id = id;
+	args.ap = ap;
+	args.dev = dev;
+	args.id = dev->id;
 	args.cmd = cmd;
 	args.done = done;
 
diff --git a/drivers/scsi/libata.h b/drivers/scsi/libata.h
index 251e53bdc6e0..e03ce48b7b4b 100644
--- a/drivers/scsi/libata.h
+++ b/drivers/scsi/libata.h
@@ -32,6 +32,8 @@
 #define DRV_VERSION	"1.20"	/* must be exactly four chars */
 
 struct ata_scsi_args {
+	struct ata_port		*ap;
+	struct ata_device	*dev;
 	u16			*id;
 	struct scsi_cmnd	*cmd;
 	void			(*done)(struct scsi_cmnd *);
@@ -41,7 +43,7 @@ struct ata_scsi_args {
 extern int atapi_enabled;
 extern struct ata_queued_cmd *ata_qc_new_init(struct ata_port *ap,
 				      struct ata_device *dev);
-extern void ata_rwcmd_protocol(struct ata_queued_cmd *qc);
+extern int ata_rwcmd_protocol(struct ata_queued_cmd *qc);
 extern void ata_qc_free(struct ata_queued_cmd *qc);
 extern int ata_qc_issue(struct ata_queued_cmd *qc);
 extern int ata_check_atapi_dma(struct ata_queued_cmd *qc);
diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2873b732bb1..f63dad4165b1 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -129,6 +129,7 @@ enum {
 	ATA_CMD_READ_EXT	= 0x25,
 	ATA_CMD_WRITE		= 0xCA,
 	ATA_CMD_WRITE_EXT	= 0x35,
+	ATA_CMD_WRITE_FUA_EXT	= 0x3D,
 	ATA_CMD_PIO_READ	= 0x20,
 	ATA_CMD_PIO_READ_EXT	= 0x24,
 	ATA_CMD_PIO_WRITE	= 0x30,
@@ -137,6 +138,7 @@ enum {
 	ATA_CMD_READ_MULTI_EXT	= 0x29,
 	ATA_CMD_WRITE_MULTI	= 0xC5,
 	ATA_CMD_WRITE_MULTI_EXT	= 0x39,
+	ATA_CMD_WRITE_MULTI_FUA_EXT = 0xCE,
 	ATA_CMD_SET_FEATURES	= 0xEF,
 	ATA_CMD_PACKET		= 0xA0,
 	ATA_CMD_VERIFY		= 0x40,
@@ -192,6 +194,7 @@ enum {
 	ATA_TFLAG_DEVICE	= (1 << 2), /* enable r/w to device reg */
 	ATA_TFLAG_WRITE		= (1 << 3), /* data dir: host->dev==1 (write) */
 	ATA_TFLAG_LBA		= (1 << 4), /* enable LBA */
+	ATA_TFLAG_FUA		= (1 << 5), /* enable FUA */
 };
 
 enum ata_tf_protocols {
@@ -245,7 +248,8 @@ struct ata_taskfile {
 #define ata_id_is_sata(id)	((id)[93] == 0)
 #define ata_id_rahead_enabled(id) ((id)[85] & (1 << 6))
 #define ata_id_wcache_enabled(id) ((id)[85] & (1 << 5))
-#define ata_id_has_flush(id) ((id)[83] & (1 << 12))
+#define ata_id_has_fua(id)	((id)[84] & (1 << 6))
+#define ata_id_has_flush(id)	((id)[83] & (1 << 12))
 #define ata_id_has_flush_ext(id) ((id)[83] & (1 << 13))
 #define ata_id_has_lba48(id)	((id)[83] & (1 << 10))
 #define ata_id_has_wcache(id)	((id)[82] & (1 << 5))
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e828e172ccbf..6db2c0845731 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -480,7 +480,8 @@ extern u8   ata_bmdma_status(struct ata_port *ap);
 extern void ata_bmdma_irq_clear(struct ata_port *ap);
 extern void ata_qc_complete(struct ata_queued_cmd *qc);
 extern void ata_eng_timeout(struct ata_port *ap);
-extern void ata_scsi_simulate(u16 *id, struct scsi_cmnd *cmd,
+extern void ata_scsi_simulate(struct ata_port *ap, struct ata_device *dev,
+			      struct scsi_cmnd *cmd,
 			      void (*done)(struct scsi_cmnd *));
 extern int ata_std_bios_param(struct scsi_device *sdev,
 			      struct block_device *bdev,
-- 
cgit v1.2.3


From 15fc858a0067c800f410a24551a7b461978abf0b Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 6 Jan 2006 10:00:50 +0100
Subject: [BLOCK] Correct blk_execute_rq_nowait() prototype

---
 include/linux/blkdev.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 15db0f112d0a..fb0985377421 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -613,8 +613,7 @@ extern int blk_rq_map_user_iov(request_queue_t *, struct request *, struct sg_io
 extern int blk_execute_rq(request_queue_t *, struct gendisk *,
 			  struct request *, int);
 extern void blk_execute_rq_nowait(request_queue_t *, struct gendisk *,
-				  struct request *, int,
-				  void (*done)(struct request *));
+				  struct request *, int, rq_end_io_fn *);
 
 static inline request_queue_t *bdev_get_queue(struct block_device *bdev)
 {
-- 
cgit v1.2.3


From 4b2f0260c74324abca76ccaa42d426af163125e7 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 6 Jan 2006 00:09:47 -0800
Subject: [PATCH] nbd: fix TX/RX race condition

Janos Haar of First NetCenter Bt.  reported numerous crashes involving the
NBD driver.  With his help, this was tracked down to bogus bio vectors
which in turn was the result of a race condition between the
receive/transmit routines in the NBD driver.

The bug manifests itself like this:

CPU0				CPU1
do_nbd_request
	add req to queuelist
	nbd_send_request
		send req head
		for each bio
			kmap
			send
				nbd_read_stat
					nbd_find_request
					nbd_end_request
			kunmap

When CPU1 finishes nbd_end_request, the request and all its associated
bio's are freed.  So when CPU0 calls kunmap whose argument is derived from
the last bio, it may crash.

Under normal circumstances, the race occurs only on the last bio.  However,
if an error is encountered on the remote NBD server (such as an incorrect
magic number in the request), or if there were a bug in the server, it is
possible for the nbd_end_request to occur any time after the request's
addition to the queuelist.

The following patch fixes this problem by making sure that requests are not
added to the queuelist until after they have been completed transmission.

In order for the receiving side to be ready for responses involving
requests still being transmitted, the patch introduces the concept of the
active request.

When a response matches the current active request, its processing is
delayed until after the tranmission has come to a stop.

This has been tested by Janos and it has been successful in curing this
race condition.

From: Herbert Xu <herbert@gondor.apana.org.au>

  Here is an updated patch which removes the active_req wait in
  nbd_clear_queue and the associated memory barrier.

  I've also clarified this in the comment.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Cc: <djani22@dynamicweb.hu>
Cc: Paul Clements <Paul.Clements@SteelEye.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/block/nbd.c | 122 ++++++++++++++++++++++++++--------------------------
 include/linux/nbd.h |   8 ++++
 2 files changed, 68 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c
index 9e268ddedfbd..d5c8ee7d9815 100644
--- a/drivers/block/nbd.c
+++ b/drivers/block/nbd.c
@@ -54,11 +54,15 @@
 #include <linux/errno.h>
 #include <linux/file.h>
 #include <linux/ioctl.h>
+#include <linux/compiler.h>
+#include <linux/err.h>
+#include <linux/kernel.h>
 #include <net/sock.h>
 
 #include <linux/devfs_fs_kernel.h>
 
 #include <asm/uaccess.h>
+#include <asm/system.h>
 #include <asm/types.h>
 
 #include <linux/nbd.h>
@@ -230,14 +234,6 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 	request.len = htonl(size);
 	memcpy(request.handle, &req, sizeof(req));
 
-	down(&lo->tx_lock);
-
-	if (!sock || !lo->sock) {
-		printk(KERN_ERR "%s: Attempted send on closed socket\n",
-				lo->disk->disk_name);
-		goto error_out;
-	}
-
 	dprintk(DBG_TX, "%s: request %p: sending control (%s@%llu,%luB)\n",
 			lo->disk->disk_name, req,
 			nbdcmd_to_ascii(nbd_cmd(req)),
@@ -276,11 +272,9 @@ static int nbd_send_req(struct nbd_device *lo, struct request *req)
 			}
 		}
 	}
-	up(&lo->tx_lock);
 	return 0;
 
 error_out:
-	up(&lo->tx_lock);
 	return 1;
 }
 
@@ -289,9 +283,14 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle)
 	struct request *req;
 	struct list_head *tmp;
 	struct request *xreq;
+	int err;
 
 	memcpy(&xreq, handle, sizeof(xreq));
 
+	err = wait_event_interruptible(lo->active_wq, lo->active_req != xreq);
+	if (unlikely(err))
+		goto out;
+
 	spin_lock(&lo->queue_lock);
 	list_for_each(tmp, &lo->queue_head) {
 		req = list_entry(tmp, struct request, queuelist);
@@ -302,7 +301,11 @@ static struct request *nbd_find_request(struct nbd_device *lo, char *handle)
 		return req;
 	}
 	spin_unlock(&lo->queue_lock);
-	return NULL;
+
+	err = -ENOENT;
+
+out:
+	return ERR_PTR(err);
 }
 
 static inline int sock_recv_bvec(struct socket *sock, struct bio_vec *bvec)
@@ -331,7 +334,11 @@ static struct request *nbd_read_stat(struct nbd_device *lo)
 		goto harderror;
 	}
 	req = nbd_find_request(lo, reply.handle);
-	if (req == NULL) {
+	if (unlikely(IS_ERR(req))) {
+		result = PTR_ERR(req);
+		if (result != -ENOENT)
+			goto harderror;
+
 		printk(KERN_ERR "%s: Unexpected reply (%p)\n",
 				lo->disk->disk_name, reply.handle);
 		result = -EBADR;
@@ -395,19 +402,24 @@ static void nbd_clear_que(struct nbd_device *lo)
 
 	BUG_ON(lo->magic != LO_MAGIC);
 
-	do {
-		req = NULL;
-		spin_lock(&lo->queue_lock);
-		if (!list_empty(&lo->queue_head)) {
-			req = list_entry(lo->queue_head.next, struct request, queuelist);
-			list_del_init(&req->queuelist);
-		}
-		spin_unlock(&lo->queue_lock);
-		if (req) {
-			req->errors++;
-			nbd_end_request(req);
-		}
-	} while (req);
+	/*
+	 * Because we have set lo->sock to NULL under the tx_lock, all
+	 * modifications to the list must have completed by now.  For
+	 * the same reason, the active_req must be NULL.
+	 *
+	 * As a consequence, we don't need to take the spin lock while
+	 * purging the list here.
+	 */
+	BUG_ON(lo->sock);
+	BUG_ON(lo->active_req);
+
+	while (!list_empty(&lo->queue_head)) {
+		req = list_entry(lo->queue_head.next, struct request,
+				 queuelist);
+		list_del_init(&req->queuelist);
+		req->errors++;
+		nbd_end_request(req);
+	}
 }
 
 /*
@@ -435,11 +447,6 @@ static void do_nbd_request(request_queue_t * q)
 
 		BUG_ON(lo->magic != LO_MAGIC);
 
-		if (!lo->file) {
-			printk(KERN_ERR "%s: Request when not-ready\n",
-					lo->disk->disk_name);
-			goto error_out;
-		}
 		nbd_cmd(req) = NBD_CMD_READ;
 		if (rq_data_dir(req) == WRITE) {
 			nbd_cmd(req) = NBD_CMD_WRITE;
@@ -453,32 +460,34 @@ static void do_nbd_request(request_queue_t * q)
 		req->errors = 0;
 		spin_unlock_irq(q->queue_lock);
 
-		spin_lock(&lo->queue_lock);
-
-		if (!lo->file) {
-			spin_unlock(&lo->queue_lock);
-			printk(KERN_ERR "%s: failed between accept and semaphore, file lost\n",
-					lo->disk->disk_name);
+		down(&lo->tx_lock);
+		if (unlikely(!lo->sock)) {
+			up(&lo->tx_lock);
+			printk(KERN_ERR "%s: Attempted send on closed socket\n",
+			       lo->disk->disk_name);
 			req->errors++;
 			nbd_end_request(req);
 			spin_lock_irq(q->queue_lock);
 			continue;
 		}
 
-		list_add(&req->queuelist, &lo->queue_head);
-		spin_unlock(&lo->queue_lock);
+		lo->active_req = req;
 
 		if (nbd_send_req(lo, req) != 0) {
 			printk(KERN_ERR "%s: Request send failed\n",
 					lo->disk->disk_name);
-			if (nbd_find_request(lo, (char *)&req) != NULL) {
-				/* we still own req */
-				req->errors++;
-				nbd_end_request(req);
-			} else /* we're racing with nbd_clear_que */
-				printk(KERN_DEBUG "nbd: can't find req\n");
+			req->errors++;
+			nbd_end_request(req);
+		} else {
+			spin_lock(&lo->queue_lock);
+			list_add(&req->queuelist, &lo->queue_head);
+			spin_unlock(&lo->queue_lock);
 		}
 
+		lo->active_req = NULL;
+		up(&lo->tx_lock);
+		wake_up_all(&lo->active_wq);
+
 		spin_lock_irq(q->queue_lock);
 		continue;
 
@@ -529,17 +538,10 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 		down(&lo->tx_lock);
 		lo->sock = NULL;
 		up(&lo->tx_lock);
-		spin_lock(&lo->queue_lock);
 		file = lo->file;
 		lo->file = NULL;
-		spin_unlock(&lo->queue_lock);
 		nbd_clear_que(lo);
-		spin_lock(&lo->queue_lock);
-		if (!list_empty(&lo->queue_head)) {
-			printk(KERN_ERR "nbd: disconnect: some requests are in progress -> please try again.\n");
-			error = -EBUSY;
-		}
-		spin_unlock(&lo->queue_lock);
+		BUG_ON(!list_empty(&lo->queue_head));
 		if (file)
 			fput(file);
 		return error;
@@ -598,24 +600,19 @@ static int nbd_ioctl(struct inode *inode, struct file *file,
 			lo->sock = NULL;
 		}
 		up(&lo->tx_lock);
-		spin_lock(&lo->queue_lock);
 		file = lo->file;
 		lo->file = NULL;
-		spin_unlock(&lo->queue_lock);
 		nbd_clear_que(lo);
 		printk(KERN_WARNING "%s: queue cleared\n", lo->disk->disk_name);
 		if (file)
 			fput(file);
 		return lo->harderror;
 	case NBD_CLEAR_QUE:
-		down(&lo->tx_lock);
-		if (lo->sock) {
-			up(&lo->tx_lock);
-			return 0; /* probably should be error, but that would
-				   * break "nbd-client -d", so just return 0 */
-		}
-		up(&lo->tx_lock);
-		nbd_clear_que(lo);
+		/*
+		 * This is for compatibility only.  The queue is always cleared
+		 * by NBD_DO_IT or NBD_CLEAR_SOCK.
+		 */
+		BUG_ON(!lo->sock && !list_empty(&lo->queue_head));
 		return 0;
 	case NBD_PRINT_DEBUG:
 		printk(KERN_INFO "%s: next = %p, prev = %p, head = %p\n",
@@ -688,6 +685,7 @@ static int __init nbd_init(void)
 		spin_lock_init(&nbd_dev[i].queue_lock);
 		INIT_LIST_HEAD(&nbd_dev[i].queue_head);
 		init_MUTEX(&nbd_dev[i].tx_lock);
+		init_waitqueue_head(&nbd_dev[i].active_wq);
 		nbd_dev[i].blksize = 1024;
 		nbd_dev[i].bytesize = 0x7ffffc00ULL << 10; /* 2TB */
 		disk->major = NBD_MAJOR;
diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index 090e210e98f0..f95d51fae733 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -37,18 +37,26 @@ enum {
 /* userspace doesn't need the nbd_device structure */
 #ifdef __KERNEL__
 
+#include <linux/wait.h>
+
 /* values for flags field */
 #define NBD_READ_ONLY 0x0001
 #define NBD_WRITE_NOCHK 0x0002
 
+struct request;
+
 struct nbd_device {
 	int flags;
 	int harderror;		/* Code of hard error			*/
 	struct socket * sock;
 	struct file * file; 	/* If == NULL, device is not ready, yet	*/
 	int magic;
+
 	spinlock_t queue_lock;
 	struct list_head queue_head;/* Requests are added here...	*/
+	struct request *active_req;
+	wait_queue_head_t active_wq;
+
 	struct semaphore tx_lock;
 	struct gendisk *disk;
 	int blksize;
-- 
cgit v1.2.3


From d7339071f6a8b50101d7ba327926b770f22d5d8b Mon Sep 17 00:00:00 2001
From: Hans Reiser <reiser@namesys.com>
Date: Fri, 6 Jan 2006 00:10:36 -0800
Subject: [PATCH] reiser4: vfs: add truncate_inode_pages_range()

This patch makes truncate_inode_pages_range from truncate_inode_pages.
truncate_inode_pages became a one-liner call to truncate_inode_pages_range.

Reiser4 needs truncate_inode_pages_ranges because it tries to keep
correspondence between existences of metadata pointing to data pages and pages
to which those metadata point to.  So, when metadata of certain part of file
is removed from filesystem tree, only pages of corresponding range are to be
truncated.

(Needed by the madvise(MADV_REMOVE) patch)

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h |  2 ++
 mm/truncate.c      | 44 +++++++++++++++++++++++++++++++++++++-------
 2 files changed, 39 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index a06a84d347fb..92acae9f1f4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -896,6 +896,8 @@ extern unsigned long do_brk(unsigned long, unsigned long);
 /* filemap.c */
 extern unsigned long page_unuse(struct page *);
 extern void truncate_inode_pages(struct address_space *, loff_t);
+extern void truncate_inode_pages_range(struct address_space *,
+				       loff_t lstart, loff_t lend);
 
 /* generic vm_area_ops exported for stackable file systems */
 extern struct page *filemap_nopage(struct vm_area_struct *, unsigned long, int *);
diff --git a/mm/truncate.c b/mm/truncate.c
index 9173ab500604..7dee32745901 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -82,12 +82,15 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
 }
 
 /**
- * truncate_inode_pages - truncate *all* the pages from an offset
+ * truncate_inode_pages - truncate range of pages specified by start and
+ * end byte offsets
  * @mapping: mapping to truncate
  * @lstart: offset from which to truncate
+ * @lend: offset to which to truncate
  *
- * Truncate the page cache at a set offset, removing the pages that are beyond
- * that offset (and zeroing out partial pages).
+ * Truncate the page cache, removing the pages that are between
+ * specified offsets (and zeroing out partial page
+ * (if lstart is not page aligned)).
  *
  * Truncate takes two passes - the first pass is nonblocking.  It will not
  * block on page locks and it will not block on writeback.  The second pass
@@ -101,12 +104,12 @@ invalidate_complete_page(struct address_space *mapping, struct page *page)
  * We pass down the cache-hot hint to the page freeing code.  Even if the
  * mapping is large, it is probably the case that the final pages are the most
  * recently touched, and freeing happens in ascending file offset order.
- *
- * Called under (and serialised by) inode->i_sem.
  */
-void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
+void truncate_inode_pages_range(struct address_space *mapping,
+				loff_t lstart, loff_t lend)
 {
 	const pgoff_t start = (lstart + PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT;
+	pgoff_t end;
 	const unsigned partial = lstart & (PAGE_CACHE_SIZE - 1);
 	struct pagevec pvec;
 	pgoff_t next;
@@ -115,13 +118,22 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 	if (mapping->nrpages == 0)
 		return;
 
+	BUG_ON((lend & (PAGE_CACHE_SIZE - 1)) != (PAGE_CACHE_SIZE - 1));
+	end = (lend >> PAGE_CACHE_SHIFT);
+
 	pagevec_init(&pvec, 0);
 	next = start;
-	while (pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
+	while (next <= end &&
+	       pagevec_lookup(&pvec, mapping, next, PAGEVEC_SIZE)) {
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 			pgoff_t page_index = page->index;
 
+			if (page_index > end) {
+				next = page_index;
+				break;
+			}
+
 			if (page_index > next)
 				next = page_index;
 			next++;
@@ -157,9 +169,15 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 			next = start;
 			continue;
 		}
+		if (pvec.pages[0]->index > end) {
+			pagevec_release(&pvec);
+			break;
+		}
 		for (i = 0; i < pagevec_count(&pvec); i++) {
 			struct page *page = pvec.pages[i];
 
+			if (page->index > end)
+				break;
 			lock_page(page);
 			wait_on_page_writeback(page);
 			if (page->index > next)
@@ -171,7 +189,19 @@ void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
 		pagevec_release(&pvec);
 	}
 }
+EXPORT_SYMBOL(truncate_inode_pages_range);
 
+/**
+ * truncate_inode_pages - truncate *all* the pages from an offset
+ * @mapping: mapping to truncate
+ * @lstart: offset from which to truncate
+ *
+ * Called under (and serialised by) inode->i_sem.
+ */
+void truncate_inode_pages(struct address_space *mapping, loff_t lstart)
+{
+	truncate_inode_pages_range(mapping, lstart, (loff_t)-1);
+}
 EXPORT_SYMBOL(truncate_inode_pages);
 
 /**
-- 
cgit v1.2.3


From f6b3ec238d12c8cc6cc71490c6e3127988460349 Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Fri, 6 Jan 2006 00:10:38 -0800
Subject: [PATCH] madvise(MADV_REMOVE): remove pages from tmpfs shm backing
 store

Here is the patch to implement madvise(MADV_REMOVE) - which frees up a
given range of pages & its associated backing store.  Current
implementation supports only shmfs/tmpfs and other filesystems return
-ENOSYS.

"Some app allocates large tmpfs files, then when some task quits and some
client disconnect, some memory can be released.  However the only way to
release tmpfs-swap is to MADV_REMOVE". - Andrea Arcangeli

Databases want to use this feature to drop a section of their bufferpool
(shared memory segments) - without writing back to disk/swap space.

This feature is also useful for supporting hot-plug memory on UML.

Concerns raised by Andrew Morton:

- "We have no plan for holepunching!  If we _do_ have such a plan (or
  might in the future) then what would the API look like?  I think
  sys_holepunch(fd, start, len), so we should start out with that."

- Using madvise is very weird, because people will ask "why do I need to
  mmap my file before I can stick a hole in it?"

- None of the other madvise operations call into the filesystem in this
  manner.  A broad question is: is this capability an MM operation or a
  filesytem operation?  truncate, for example, is a filesystem operation
  which sometimes has MM side-effects.  madvise is an mm operation and with
  this patch, it gains FS side-effects, only they're really, really
  significant ones."

Comments:

- Andrea suggested the fs operation too but then it's more efficient to
  have it as a mm operation with fs side effects, because they don't
  immediatly know fd and physical offset of the range.  It's possible to
  fixup in userland and to use the fs operation but it's more expensive,
  the vmas are already in the kernel and we can use them.

Short term plan &  Future Direction:

- We seem to need this interface only for shmfs/tmpfs files in the short
  term.  We have to add hooks into the filesystem for correctness and
  completeness.  This is what this patch does.

- In the future, plan is to support both fs and mmap apis also.  This
  also involves (other) filesystem specific functions to be implemented.

- Current patch doesn't support VM_NONLINEAR - which can be addressed in
  the future.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Andrea Arcangeli <andrea@suse.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/mman.h   |  1 +
 include/asm-arm/mman.h     |  1 +
 include/asm-arm26/mman.h   |  1 +
 include/asm-cris/mman.h    |  1 +
 include/asm-frv/mman.h     |  1 +
 include/asm-h8300/mman.h   |  1 +
 include/asm-i386/mman.h    |  1 +
 include/asm-ia64/mman.h    |  1 +
 include/asm-m32r/mman.h    |  1 +
 include/asm-m68k/mman.h    |  1 +
 include/asm-mips/mman.h    |  1 +
 include/asm-parisc/mman.h  |  1 +
 include/asm-powerpc/mman.h |  1 +
 include/asm-s390/mman.h    |  1 +
 include/asm-sh/mman.h      |  1 +
 include/asm-sparc/mman.h   |  1 +
 include/asm-sparc64/mman.h |  1 +
 include/asm-v850/mman.h    |  1 +
 include/asm-x86_64/mman.h  |  1 +
 include/asm-xtensa/mman.h  |  1 +
 include/linux/fs.h         |  1 +
 include/linux/mm.h         |  1 +
 mm/madvise.c               | 35 +++++++++++++++++++++++++++++++++++
 mm/memory.c                | 25 ++++++++++++++++++++++++-
 mm/shmem.c                 | 32 ++++++++++++++++++++++++--------
 25 files changed, 105 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h
index eb9c279045ef..f6439532a262 100644
--- a/include/asm-alpha/mman.h
+++ b/include/asm-alpha/mman.h
@@ -42,6 +42,7 @@
 #define MADV_WILLNEED	3		/* will need these pages */
 #define	MADV_SPACEAVAIL	5		/* ensure resources are available */
 #define MADV_DONTNEED	6		/* don't need these pages */
+#define MADV_REMOVE	7		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h
index 8e4f69c4fa5f..f0bebca2ac21 100644
--- a/include/asm-arm/mman.h
+++ b/include/asm-arm/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h
index cc27b8240265..0ed7780541fa 100644
--- a/include/asm-arm26/mman.h
+++ b/include/asm-arm26/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h
index 8570e72b9502..5a382b8bf3f7 100644
--- a/include/asm-cris/mman.h
+++ b/include/asm-cris/mman.h
@@ -37,6 +37,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h
index c684720dfbdd..8af4a41c255e 100644
--- a/include/asm-frv/mman.h
+++ b/include/asm-frv/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h
index 63f727a59850..744a8fb485c2 100644
--- a/include/asm-h8300/mman.h
+++ b/include/asm-h8300/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h
index 196619a83854..ba4941e6f643 100644
--- a/include/asm-i386/mman.h
+++ b/include/asm-i386/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h
index 1c0a73af1461..828beb24a20e 100644
--- a/include/asm-ia64/mman.h
+++ b/include/asm-ia64/mman.h
@@ -43,6 +43,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h
index 011f6d9ec5cc..12e29747bc84 100644
--- a/include/asm-m32r/mman.h
+++ b/include/asm-m32r/mman.h
@@ -37,6 +37,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h
index f831c4eeae6e..ea262ab88b3b 100644
--- a/include/asm-m68k/mman.h
+++ b/include/asm-m68k/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h
index 62060957ba93..dd17c8bd62a1 100644
--- a/include/asm-mips/mman.h
+++ b/include/asm-mips/mman.h
@@ -65,6 +65,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON       MAP_ANONYMOUS
diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h
index e829607eb8bc..736b0abcac05 100644
--- a/include/asm-parisc/mman.h
+++ b/include/asm-parisc/mman.h
@@ -38,6 +38,7 @@
 #define MADV_SPACEAVAIL 5               /* insure that resources are reserved */
 #define MADV_VPS_PURGE  6               /* Purge pages from VM page cache */
 #define MADV_VPS_INHERIT 7              /* Inherit parents page size */
+#define MADV_REMOVE     8		/* remove these pages & resources */
 
 /* The range 12-64 is reserved for page size specification. */
 #define MADV_4K_PAGES   12              /* Use 4K pages  */
diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h
index f5e5342fcac5..a2e34c21b44f 100644
--- a/include/asm-powerpc/mman.h
+++ b/include/asm-powerpc/mman.h
@@ -44,6 +44,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h
index ea86bd12204f..c8d5409b5d56 100644
--- a/include/asm-s390/mman.h
+++ b/include/asm-s390/mman.h
@@ -43,6 +43,7 @@
 #define MADV_SEQUENTIAL        0x2             /* read-ahead aggressively */
 #define MADV_WILLNEED  0x3              /* pre-fault pages */
 #define MADV_DONTNEED  0x4              /* discard these pages */
+#define MADV_REMOVE    0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h
index 3ebab5f79db7..693bd55a3710 100644
--- a/include/asm-sh/mman.h
+++ b/include/asm-sh/mman.h
@@ -35,6 +35,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h
index 138eb81dd70d..98435ad8619e 100644
--- a/include/asm-sparc/mman.h
+++ b/include/asm-sparc/mman.h
@@ -54,6 +54,7 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
+#define MADV_REMOVE	0x6		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h
index 01cecf54357b..cb4b6156194d 100644
--- a/include/asm-sparc64/mman.h
+++ b/include/asm-sparc64/mman.h
@@ -54,6 +54,7 @@
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
 #define MADV_FREE	0x5		/* (Solaris) contents can be freed */
+#define MADV_REMOVE	0x6		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-v850/mman.h b/include/asm-v850/mman.h
index e2b90081b56f..edc79965193a 100644
--- a/include/asm-v850/mman.h
+++ b/include/asm-v850/mman.h
@@ -32,6 +32,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h
index 78e60a4fd4ee..d0e97b74f735 100644
--- a/include/asm-x86_64/mman.h
+++ b/include/asm-x86_64/mman.h
@@ -36,6 +36,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON	MAP_ANONYMOUS
diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h
index 9a95a45df996..082a7504925e 100644
--- a/include/asm-xtensa/mman.h
+++ b/include/asm-xtensa/mman.h
@@ -72,6 +72,7 @@
 #define MADV_SEQUENTIAL	0x2		/* read-ahead aggressively */
 #define MADV_WILLNEED	0x3		/* pre-fault pages */
 #define MADV_DONTNEED	0x4		/* discard these pages */
+#define MADV_REMOVE	0x5		/* remove these pages & resources */
 
 /* compatibility flags */
 #define MAP_ANON       MAP_ANONYMOUS
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed9a41a71e8b..115e72be25d0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1050,6 +1050,7 @@ struct inode_operations {
 	ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
 	ssize_t (*listxattr) (struct dentry *, char *, size_t);
 	int (*removexattr) (struct dentry *, const char *);
+	void (*truncate_range)(struct inode *, loff_t, loff_t);
 };
 
 struct seq_file;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 92acae9f1f4c..6c9be99429f3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -690,6 +690,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 }
 
 extern int vmtruncate(struct inode * inode, loff_t offset);
+extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
 extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
diff --git a/mm/madvise.c b/mm/madvise.c
index 2b7cf0400a21..ae0ae3ea299a 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -140,6 +140,36 @@ static long madvise_dontneed(struct vm_area_struct * vma,
 	return 0;
 }
 
+/*
+ * Application wants to free up the pages and associated backing store.
+ * This is effectively punching a hole into the middle of a file.
+ *
+ * NOTE: Currently, only shmfs/tmpfs is supported for this operation.
+ * Other filesystems return -ENOSYS.
+ */
+static long madvise_remove(struct vm_area_struct *vma,
+				unsigned long start, unsigned long end)
+{
+	struct address_space *mapping;
+        loff_t offset, endoff;
+
+	if (vma->vm_flags & (VM_LOCKED|VM_NONLINEAR|VM_HUGETLB))
+		return -EINVAL;
+
+	if (!vma->vm_file || !vma->vm_file->f_mapping
+		|| !vma->vm_file->f_mapping->host) {
+			return -EINVAL;
+	}
+
+	mapping = vma->vm_file->f_mapping;
+
+	offset = (loff_t)(start - vma->vm_start)
+			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+	endoff = (loff_t)(end - vma->vm_start - 1)
+			+ ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
+	return  vmtruncate_range(mapping->host, offset, endoff);
+}
+
 static long
 madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 		unsigned long start, unsigned long end, int behavior)
@@ -152,6 +182,9 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
 	case MADV_RANDOM:
 		error = madvise_behavior(vma, prev, start, end, behavior);
 		break;
+	case MADV_REMOVE:
+		error = madvise_remove(vma, start, end);
+		break;
 
 	case MADV_WILLNEED:
 		error = madvise_willneed(vma, prev, start, end);
@@ -190,6 +223,8 @@ madvise_vma(struct vm_area_struct *vma, struct vm_area_struct **prev,
  *		some pages ahead.
  *  MADV_DONTNEED - the application is finished with the given range,
  *		so the kernel can free resources associated with it.
+ *  MADV_REMOVE - the application wants to free up the given range of
+ *		pages and associated backing store.
  *
  * return values:
  *  zero    - success
diff --git a/mm/memory.c b/mm/memory.c
index d8dde07a3656..e249088908c4 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1770,9 +1770,32 @@ out_big:
 out_busy:
 	return -ETXTBSY;
 }
-
 EXPORT_SYMBOL(vmtruncate);
 
+int vmtruncate_range(struct inode *inode, loff_t offset, loff_t end)
+{
+	struct address_space *mapping = inode->i_mapping;
+
+	/*
+	 * If the underlying filesystem is not going to provide
+	 * a way to truncate a range of blocks (punch a hole) -
+	 * we should return failure right now.
+	 */
+	if (!inode->i_op || !inode->i_op->truncate_range)
+		return -ENOSYS;
+
+	down(&inode->i_sem);
+	down_write(&inode->i_alloc_sem);
+	unmap_mapping_range(mapping, offset, (end - offset), 1);
+	truncate_inode_pages_range(mapping, offset, end);
+	inode->i_op->truncate_range(inode, offset, end);
+	up_write(&inode->i_alloc_sem);
+	up(&inode->i_sem);
+
+	return 0;
+}
+EXPORT_SYMBOL(vmtruncate_range);
+
 /* 
  * Primitive swap readahead code. We simply read an aligned block of
  * (1 << page_cluster) entries in the swap area. This method is chosen
diff --git a/mm/shmem.c b/mm/shmem.c
index d9fc277940da..65c148efa2ed 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -457,7 +457,7 @@ static void shmem_free_pages(struct list_head *next)
 	} while (next);
 }
 
-static void shmem_truncate(struct inode *inode)
+static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
 {
 	struct shmem_inode_info *info = SHMEM_I(inode);
 	unsigned long idx;
@@ -475,18 +475,27 @@ static void shmem_truncate(struct inode *inode)
 	long nr_swaps_freed = 0;
 	int offset;
 	int freed;
+	int punch_hole = 0;
 
 	inode->i_ctime = inode->i_mtime = CURRENT_TIME;
-	idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	idx = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (idx >= info->next_index)
 		return;
 
 	spin_lock(&info->lock);
 	info->flags |= SHMEM_TRUNCATE;
-	limit = info->next_index;
-	info->next_index = idx;
+	if (likely(end == (loff_t) -1)) {
+		limit = info->next_index;
+		info->next_index = idx;
+	} else {
+		limit = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		if (limit > info->next_index)
+			limit = info->next_index;
+		punch_hole = 1;
+	}
+
 	topdir = info->i_indirect;
-	if (topdir && idx <= SHMEM_NR_DIRECT) {
+	if (topdir && idx <= SHMEM_NR_DIRECT && !punch_hole) {
 		info->i_indirect = NULL;
 		nr_pages_to_free++;
 		list_add(&topdir->lru, &pages_to_free);
@@ -573,11 +582,12 @@ static void shmem_truncate(struct inode *inode)
 			set_page_private(subdir, page_private(subdir) - freed);
 			if (offset)
 				spin_unlock(&info->lock);
-			BUG_ON(page_private(subdir) > offset);
+			if (!punch_hole)
+				BUG_ON(page_private(subdir) > offset);
 		}
 		if (offset)
 			offset = 0;
-		else if (subdir) {
+		else if (subdir && !page_private(subdir)) {
 			dir[diroff] = NULL;
 			nr_pages_to_free++;
 			list_add(&subdir->lru, &pages_to_free);
@@ -594,7 +604,7 @@ done2:
 		 * Also, though shmem_getpage checks i_size before adding to
 		 * cache, no recheck after: so fix the narrow window there too.
 		 */
-		truncate_inode_pages(inode->i_mapping, inode->i_size);
+		truncate_inode_pages_range(inode->i_mapping, start, end);
 	}
 
 	spin_lock(&info->lock);
@@ -614,6 +624,11 @@ done2:
 	}
 }
 
+static void shmem_truncate(struct inode *inode)
+{
+	shmem_truncate_range(inode, inode->i_size, (loff_t)-1);
+}
+
 static int shmem_notify_change(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
@@ -2083,6 +2098,7 @@ static struct file_operations shmem_file_operations = {
 static struct inode_operations shmem_inode_operations = {
 	.truncate	= shmem_truncate,
 	.setattr	= shmem_notify_change,
+	.truncate_range	= shmem_truncate_range,
 };
 
 static struct inode_operations shmem_dir_inode_operations = {
-- 
cgit v1.2.3


From 5da7ca86078964cbfe6c83efc1205904587706fe Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:10:46 -0800
Subject: [PATCH] Add NUMA policy support for huge pages.

The huge_zonelist() function in the memory policy layer provides an list of
zones ordered by NUMA distance.  The hugetlb layer will walk that list looking
for a zone that has available huge pages but is also in the nodeset of the
current cpuset.

This patch does not contain the folding of find_or_alloc_huge_page() that was
controversial in the earlier discussion.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/hugetlb.h   |  4 ++--
 include/linux/mempolicy.h |  8 ++++++++
 mm/hugetlb.c              | 24 ++++++++++++++----------
 mm/mempolicy.c            | 39 ++++++++++++++++++++++++++++++---------
 4 files changed, 54 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 1056717ee501..68d82ad6b17c 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -22,7 +22,7 @@ int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
 int is_hugepage_mem_enough(size_t);
 unsigned long hugetlb_total_pages(void);
-struct page *alloc_huge_page(void);
+struct page *alloc_huge_page(struct vm_area_struct *, unsigned long);
 void free_huge_page(struct page *);
 int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, int write_access);
@@ -97,7 +97,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \
 						do { } while (0)
-#define alloc_huge_page()			({ NULL; })
+#define alloc_huge_page(vma, addr)		({ NULL; })
 #define free_huge_page(p)			({ (void)(p); BUG(); })
 #define hugetlb_fault(mm, vma, addr, write)	({ BUG(); 0; })
 
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 8b67cf837ca9..817db6427113 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -156,6 +156,8 @@ extern void numa_default_policy(void);
 extern void numa_policy_init(void);
 extern void numa_policy_rebind(const nodemask_t *old, const nodemask_t *new);
 extern struct mempolicy default_policy;
+extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+		unsigned long addr);
 
 #else
 
@@ -232,6 +234,12 @@ static inline void numa_policy_rebind(const nodemask_t *old,
 {
 }
 
+static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+		unsigned long addr)
+{
+	return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+}
+
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e93bd63462f0..eb405565949d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -11,6 +11,8 @@
 #include <linux/highmem.h>
 #include <linux/nodemask.h>
 #include <linux/pagemap.h>
+#include <linux/mempolicy.h>
+
 #include <asm/page.h>
 #include <asm/pgtable.h>
 
@@ -36,11 +38,12 @@ static void enqueue_huge_page(struct page *page)
 	free_huge_pages_node[nid]++;
 }
 
-static struct page *dequeue_huge_page(void)
+static struct page *dequeue_huge_page(struct vm_area_struct *vma,
+				unsigned long address)
 {
 	int nid = numa_node_id();
 	struct page *page = NULL;
-	struct zonelist *zonelist = NODE_DATA(nid)->node_zonelists;
+	struct zonelist *zonelist = huge_zonelist(vma, address);
 	struct zone **z;
 
 	for (z = zonelist->zones; *z; z++) {
@@ -87,13 +90,13 @@ void free_huge_page(struct page *page)
 	spin_unlock(&hugetlb_lock);
 }
 
-struct page *alloc_huge_page(void)
+struct page *alloc_huge_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	struct page *page;
 	int i;
 
 	spin_lock(&hugetlb_lock);
-	page = dequeue_huge_page();
+	page = dequeue_huge_page(vma, addr);
 	if (!page) {
 		spin_unlock(&hugetlb_lock);
 		return NULL;
@@ -196,7 +199,7 @@ static unsigned long set_max_huge_pages(unsigned long count)
 	spin_lock(&hugetlb_lock);
 	try_to_free_low(count);
 	while (count < nr_huge_pages) {
-		struct page *page = dequeue_huge_page();
+		struct page *page = dequeue_huge_page(NULL, 0);
 		if (!page)
 			break;
 		update_and_free_page(page);
@@ -365,8 +368,9 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	flush_tlb_range(vma, start, end);
 }
 
-static struct page *find_or_alloc_huge_page(struct address_space *mapping,
-				unsigned long idx, int shared)
+static struct page *find_or_alloc_huge_page(struct vm_area_struct *vma,
+			unsigned long addr, struct address_space *mapping,
+			unsigned long idx, int shared)
 {
 	struct page *page;
 	int err;
@@ -378,7 +382,7 @@ retry:
 
 	if (hugetlb_get_quota(mapping))
 		goto out;
-	page = alloc_huge_page();
+	page = alloc_huge_page(vma, addr);
 	if (!page) {
 		hugetlb_put_quota(mapping);
 		goto out;
@@ -418,7 +422,7 @@ static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 	}
 
 	page_cache_get(old_page);
-	new_page = alloc_huge_page();
+	new_page = alloc_huge_page(vma, address);
 
 	if (!new_page) {
 		page_cache_release(old_page);
@@ -467,7 +471,7 @@ int hugetlb_no_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	 * Use page lock to guard against racing truncation
 	 * before we get page_table_lock.
 	 */
-	page = find_or_alloc_huge_page(mapping, idx,
+	page = find_or_alloc_huge_page(vma, address, mapping, idx,
 			vma->vm_flags & VM_SHARED);
 	if (!page)
 		goto out;
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 72f402cc9c9a..45c51ac63443 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -785,6 +785,34 @@ static unsigned offset_il_node(struct mempolicy *pol,
 	return nid;
 }
 
+/* Determine a node number for interleave */
+static inline unsigned interleave_nid(struct mempolicy *pol,
+		 struct vm_area_struct *vma, unsigned long addr, int shift)
+{
+	if (vma) {
+		unsigned long off;
+
+		off = vma->vm_pgoff;
+		off += (addr - vma->vm_start) >> shift;
+		return offset_il_node(pol, vma, off);
+	} else
+		return interleave_nodes(pol);
+}
+
+/* Return a zonelist suitable for a huge page allocation. */
+struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol = get_vma_policy(current, vma, addr);
+
+	if (pol->policy == MPOL_INTERLEAVE) {
+		unsigned nid;
+
+		nid = interleave_nid(pol, vma, addr, HPAGE_SHIFT);
+		return NODE_DATA(nid)->node_zonelists + gfp_zone(GFP_HIGHUSER);
+	}
+	return zonelist_policy(GFP_HIGHUSER, pol);
+}
+
 /* Allocate a page in interleaved policy.
    Own path because it needs to do special accounting. */
 static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
@@ -833,15 +861,8 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr)
 
 	if (unlikely(pol->policy == MPOL_INTERLEAVE)) {
 		unsigned nid;
-		if (vma) {
-			unsigned long off;
-			off = vma->vm_pgoff;
-			off += (addr - vma->vm_start) >> PAGE_SHIFT;
-			nid = offset_il_node(pol, vma, off);
-		} else {
-			/* fall back to process interleaving */
-			nid = interleave_nodes(pol);
-		}
+
+		nid = interleave_nid(pol, vma, addr, PAGE_SHIFT);
 		return alloc_page_interleave(gfp, 0, nid);
 	}
 	return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
-- 
cgit v1.2.3


From 21abb1478a87e26f5fa71dbcb7cf4264272c2248 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:10:47 -0800
Subject: [PATCH] Remove old node based policy interface from mempolicy.c

mempolicy.c contains provisional interface for huge page allocation based on
node numbers.  This is in use in SLES9 but was never used (AFAIK) in upstream
versions of Linux.

Huge page allocations now use zonelists to figure out where to allocate pages.
 The use of zonelists allows us to find the closest hugepage which was the
consideration of the NUMA distance for huge page allocations.

Remove the obsolete functions.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Cc: Andi Kleen <ak@muc.de>
Acked-by: William Lee Irwin III <wli@holomorphy.com>
Cc: Adam Litke <agl@us.ibm.com>
Acked-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h | 19 -------------------
 mm/mempolicy.c            | 48 -----------------------------------------------
 2 files changed, 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 817db6427113..b972f985a3c5 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -109,14 +109,6 @@ static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
 
 #define mpol_set_vma_default(vma) ((vma)->vm_policy = NULL)
 
-/*
- * Hugetlb policy. i386 hugetlb so far works with node numbers
- * instead of zone lists, so give it special interfaces for now.
- */
-extern int mpol_first_node(struct vm_area_struct *vma, unsigned long addr);
-extern int mpol_node_valid(int nid, struct vm_area_struct *vma,
-			unsigned long addr);
-
 /*
  * Tree of shared policies for a shared memory region.
  * Maintain the policies in a pseudo mm that contains vmas. The vmas
@@ -184,17 +176,6 @@ static inline struct mempolicy *mpol_copy(struct mempolicy *old)
 	return NULL;
 }
 
-static inline int mpol_first_node(struct vm_area_struct *vma, unsigned long a)
-{
-	return numa_node_id();
-}
-
-static inline int
-mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long a)
-{
-	return 1;
-}
-
 struct shared_policy {};
 
 static inline int mpol_set_shared_policy(struct shared_policy *info,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 45c51ac63443..96714e2646ad 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -960,54 +960,6 @@ void __mpol_free(struct mempolicy *p)
 	kmem_cache_free(policy_cache, p);
 }
 
-/*
- * Hugetlb policy. Same as above, just works with node numbers instead of
- * zonelists.
- */
-
-/* Find first node suitable for an allocation */
-int mpol_first_node(struct vm_area_struct *vma, unsigned long addr)
-{
-	struct mempolicy *pol = get_vma_policy(current, vma, addr);
-
-	switch (pol->policy) {
-	case MPOL_DEFAULT:
-		return numa_node_id();
-	case MPOL_BIND:
-		return pol->v.zonelist->zones[0]->zone_pgdat->node_id;
-	case MPOL_INTERLEAVE:
-		return interleave_nodes(pol);
-	case MPOL_PREFERRED:
-		return pol->v.preferred_node >= 0 ?
-				pol->v.preferred_node : numa_node_id();
-	}
-	BUG();
-	return 0;
-}
-
-/* Find secondary valid nodes for an allocation */
-int mpol_node_valid(int nid, struct vm_area_struct *vma, unsigned long addr)
-{
-	struct mempolicy *pol = get_vma_policy(current, vma, addr);
-
-	switch (pol->policy) {
-	case MPOL_PREFERRED:
-	case MPOL_DEFAULT:
-	case MPOL_INTERLEAVE:
-		return 1;
-	case MPOL_BIND: {
-		struct zone **z;
-		for (z = pol->v.zonelist->zones; *z; z++)
-			if ((*z)->zone_pgdat->node_id == nid)
-				return 1;
-		return 0;
-	}
-	default:
-		BUG();
-		return 0;
-	}
-}
-
 /*
  * Shared memory backing store policy support.
  *
-- 
cgit v1.2.3


From 9f3fd602aef96c2a490e3bfd669d06475aeba8d8 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Fri, 6 Jan 2006 00:10:50 -0800
Subject: [PATCH] mm: kvaddr_to_nid not used in common code

kvaddr_to_nid() isn't used in common code nor in i386 code.  Remove these
definitions.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/mmzone.h | 5 -----
 include/linux/mmzone.h    | 5 -----
 2 files changed, 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-i386/mmzone.h b/include/asm-i386/mmzone.h
index 620a90641ea8..74f595d80579 100644
--- a/include/asm-i386/mmzone.h
+++ b/include/asm-i386/mmzone.h
@@ -76,11 +76,6 @@ static inline int pfn_to_nid(unsigned long pfn)
  * Following are macros that each numa implmentation must define.
  */
 
-/*
- * Given a kernel address, find the home node of the underlying memory.
- */
-#define kvaddr_to_nid(kaddr)	pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
-
 #define node_start_pfn(nid)	(NODE_DATA(nid)->node_start_pfn)
 #define node_end_pfn(nid)						\
 ({									\
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 9f22090df7dd..3c49f786f90c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -564,11 +564,6 @@ static inline int valid_section_nr(unsigned long nr)
 	return valid_section(__nr_to_section(nr));
 }
 
-/*
- * Given a kernel address, find the home node of the underlying memory.
- */
-#define kvaddr_to_nid(kaddr)	pfn_to_nid(__pa(kaddr) >> PAGE_SHIFT)
-
 static inline struct mem_section *__pfn_to_section(unsigned long pfn)
 {
 	return __nr_to_section(pfn_to_section_nr(pfn));
-- 
cgit v1.2.3


From d5afa6dcf74c0efb60ce07c63d0a727be93c67c5 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Fri, 6 Jan 2006 00:10:50 -0800
Subject: [PATCH] mm: pfn_to_pgdat not used in common code

pfn_to_pgdat() isn't used in common code.  Remove definition.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3c49f786f90c..28f8496abcb9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -596,11 +596,6 @@ static inline int pfn_valid(unsigned long pfn)
 #define pfn_to_nid		early_pfn_to_nid
 #endif
 
-#define pfn_to_pgdat(pfn)						\
-({									\
-	NODE_DATA(pfn_to_nid(pfn));					\
-})
-
 #define early_pfn_valid(pfn)	pfn_valid(pfn)
 void sparse_init(void);
 #else
-- 
cgit v1.2.3


From a94b3ab7eab4edcc9b2cb474b188f774c331adf7 Mon Sep 17 00:00:00 2001
From: Mike Kravetz <kravetz@us.ibm.com>
Date: Fri, 6 Jan 2006 00:10:51 -0800
Subject: [PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES

The NODES_SPAN_OTHER_NODES config option was created so that DISCONTIGMEM
could handle pSeries numa layouts.  However, support for DISCONTIGMEM has
been replaced by SPARSEMEM on powerpc.  As a result, this config option and
supporting code is no longer needed.

I have already sent a patch to Paul that removes the option from powerpc
specific code.  This removes the arch independent piece.  Doesn't really
matter which is applied first.

Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 ------
 mm/page_alloc.c        | 2 --
 2 files changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 28f8496abcb9..d294b57a4016 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -603,12 +603,6 @@ void sparse_init(void);
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
-#ifdef CONFIG_NODES_SPAN_OTHER_NODES
-#define early_pfn_in_nid(pfn, nid)	(early_pfn_to_nid(pfn) == (nid))
-#else
-#define early_pfn_in_nid(pfn, nid)	(1)
-#endif
-
 #ifndef early_pfn_valid
 #define early_pfn_valid(pfn)	(1)
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 1e49dc7cd619..07825c637a58 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1708,8 +1708,6 @@ void __devinit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 	for (pfn = start_pfn; pfn < end_pfn; pfn++, page++) {
 		if (!early_pfn_valid(pfn))
 			continue;
-		if (!early_pfn_in_nid(pfn, nid))
-			continue;
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
 		set_page_count(page, 1);
-- 
cgit v1.2.3


From 03b00ebcc804180829d513df9e92e5fe8f72aacf Mon Sep 17 00:00:00 2001
From: Russell King <rmk@arm.linux.org.uk>
Date: Fri, 6 Jan 2006 00:10:52 -0800
Subject: [PATCH] Shut up warnings in ipc/shm.c

Fix two warnings in ipc/shm.c

ipc/shm.c:122: warning: statement with no effect
ipc/shm.c:560: warning: statement with no effect

by converting the macros to empty inline functions.  For safety, let's do
all three.  This also has the advantage that typechecking gets performed
even without CONFIG_SHMEM enabled.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Manfred Spraul <manfred@colorfullife.com>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6c9be99429f3..75ec04e2f184 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -634,9 +634,24 @@ struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 int shmem_lock(struct file *file, int lock, struct user_struct *user);
 #else
 #define shmem_nopage filemap_nopage
-#define shmem_lock(a, b, c) 	({0;})	/* always in memory, no need to lock */
-#define shmem_set_policy(a, b)	(0)
-#define shmem_get_policy(a, b)	(NULL)
+
+static inline int shmem_lock(struct file *file, int lock,
+			     struct user_struct *user)
+{
+	return 0;
+}
+
+static inline int shmem_set_policy(struct vm_area_struct *vma,
+				   struct mempolicy *new)
+{
+	return 0;
+}
+
+static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
+						 unsigned long addr)
+{
+	return NULL;
+}
 #endif
 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
 
-- 
cgit v1.2.3


From 2bdaf115b1c364d89484b59d5b937973f1c5a5c3 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Fri, 6 Jan 2006 00:10:53 -0800
Subject: [PATCH] flatmem split out memory model

There are three places we define pfn_to_nid().  Two in linux/mmzone.h and one
in asm/mmzone.h.  These in essence represent the three memory models.  The
definition in linux/mmzone.h under !NEED_MULTIPLE_NODES is both the FLATMEM
definition and the optimisation for single NUMA nodes; the one under SPARSEMEM
is the NUMA sparsemem one; the one in asm/mmzone.h under DISCONTIGMEM is the
discontigmem one.  This is not in the least bit obvious, particularly the
connection between the non-NUMA optimisations and the memory models.

Two patches:

flatmem-split-out-memory-model: simplifies the selection of pfn_to_nid()
implementations.  The selection is based primarily off the memory model
selected.  Optimisations for non-NUMA are applied where needed.

sparse-provide-pfn_to_nid: implement pfn_to_nid() for SPARSEMEM

This patch:

pfn_to_nid is memory model specific

The pfn_to_nid() call is memory model specific.  It represents the locality
identifier for the memory passed.  Classically this would be a NUMA node,
but not a chunk of memory under DISCONTIGMEM.

The SPARSEMEM and FLATMEM memory model non-NUMA versions of pfn_to_nid()
are folded together under NEED_MULTIPLE_NODES, while DISCONTIGMEM has its
own optimisation.  This is all very confusing.

This patch splits out each implementation of pfn_to_nid() so that we can
see them and the optimisations to each.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index d294b57a4016..ee9f7b74e613 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -435,7 +435,6 @@ extern struct pglist_data contig_page_data;
 #define NODE_DATA(nid)		(&contig_page_data)
 #define NODE_MEM_MAP(nid)	mem_map
 #define MAX_NODES_SHIFT		1
-#define pfn_to_nid(pfn)		(0)
 
 #else /* CONFIG_NEED_MULTIPLE_NODES */
 
@@ -470,6 +469,10 @@ extern struct pglist_data contig_page_data;
 #define early_pfn_to_nid(nid)  (0UL)
 #endif
 
+#ifdef CONFIG_FLATMEM
+#define pfn_to_nid(pfn)		(0)
+#endif
+
 #define pfn_to_section_nr(pfn) ((pfn) >> PFN_SECTION_SHIFT)
 #define section_nr_to_pfn(sec) ((sec) << PFN_SECTION_SHIFT)
 
@@ -594,6 +597,8 @@ static inline int pfn_valid(unsigned long pfn)
  */
 #ifdef CONFIG_NUMA
 #define pfn_to_nid		early_pfn_to_nid
+#else
+#define pfn_to_nid(pfn)		(0)
 #endif
 
 #define early_pfn_valid(pfn)	pfn_valid(pfn)
-- 
cgit v1.2.3


From 161599ff39a3c3cdea0a1be05ac53accd2c45cdd Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Fri, 6 Jan 2006 00:10:54 -0800
Subject: [PATCH] sparsemem: provide pfn_to_nid

Before SPARSEMEM is initialised we cannot provide an efficient pfn_to_nid()
implmentation; before initialisation is complete we use early_pfn_to_nid()
to provide location information.  Until recently there was no non-init user
of this functionality.  Provide a post init pfn_to_nid() implementation.

Note that this implmentation assumes that the pfn passed has been validated
with pfn_valid().  The current single user of this function already has
this check.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ee9f7b74e613..8cba76c6a28c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -596,7 +596,11 @@ static inline int pfn_valid(unsigned long pfn)
  * this restriction.
  */
 #ifdef CONFIG_NUMA
-#define pfn_to_nid		early_pfn_to_nid
+#define pfn_to_nid(pfn)							\
+({									\
+	unsigned long __pfn_to_nid_pfn = (pfn);				\
+	page_to_nid(pfn_to_page(__pfn_to_nid_pfn));			\
+})
 #else
 #define pfn_to_nid(pfn)		(0)
 #endif
-- 
cgit v1.2.3


From 2d92c5c9150a2a9ca3dc25da58d5042e17a96b6a Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:10:59 -0800
Subject: [PATCH] mm: remove pcp low

struct per_cpu_pages.low is useless.  Remove it.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 1 -
 mm/page_alloc.c        | 9 ++-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8cba76c6a28c..0d1a5981bb94 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -46,7 +46,6 @@ struct zone_padding {
 
 struct per_cpu_pages {
 	int count;		/* number of pages in the list */
-	int low;		/* low watermark, refill needed */
 	int high;		/* high watermark, emptying needed */
 	int batch;		/* chunk size for buddy add/remove */
 	struct list_head list;	/* the list of pages */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 088712f2ac02..7cff958e7813 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -740,7 +740,7 @@ again:
 		page = NULL;
 		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 		local_irq_save(flags);
-		if (pcp->count <= pcp->low)
+		if (!pcp->count)
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
 		if (likely(pcp->count)) {
@@ -1345,10 +1345,9 @@ void show_free_areas(void)
 			pageset = zone_pcp(zone, cpu);
 
 			for (temperature = 0; temperature < 2; temperature++)
-				printk("cpu %d %s: low %d, high %d, batch %d used:%d\n",
+				printk("cpu %d %s: high %d, batch %d used:%d\n",
 					cpu,
 					temperature ? "cold" : "hot",
-					pageset->pcp[temperature].low,
 					pageset->pcp[temperature].high,
 					pageset->pcp[temperature].batch,
 					pageset->pcp[temperature].count);
@@ -1790,14 +1789,12 @@ inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
 
 	pcp = &p->pcp[0];		/* hot */
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 6 * batch;
 	pcp->batch = max(1UL, 1 * batch);
 	INIT_LIST_HEAD(&pcp->list);
 
 	pcp = &p->pcp[1];		/* cold*/
 	pcp->count = 0;
-	pcp->low = 0;
 	pcp->high = 2 * batch;
 	pcp->batch = max(1UL, batch/2);
 	INIT_LIST_HEAD(&pcp->list);
@@ -2193,12 +2190,10 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 				seq_printf(m,
 					   "\n    cpu: %i pcp: %i"
 					   "\n              count: %i"
-					   "\n              low:   %i"
 					   "\n              high:  %i"
 					   "\n              batch: %i",
 					   i, j,
 					   pageset->pcp[j].count,
-					   pageset->pcp[j].low,
 					   pageset->pcp[j].high,
 					   pageset->pcp[j].batch);
 			}
-- 
cgit v1.2.3


From 008857c1a49ccffc31a54c3ea7e182833bd61304 Mon Sep 17 00:00:00 2001
From: Ravikiran G Thirumalai <kiran@scalex86.org>
Date: Fri, 6 Jan 2006 00:11:01 -0800
Subject: [PATCH] Cleanup bootmem allocator and fix alloc_bootmem_low

Patch cleans up the alloc_bootmem fix for swiotlb.  Patch removes
alloc_bootmem_*_limit api and fixes alloc_boot_*low api to do the right
thing -- allocate from low32 memory.

Signed-off-by: Ravikiran Thirumalai <kiran@scalex86.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bootmem.h | 46 ++++++++++++----------------------------------
 lib/swiotlb.c           |  3 +--
 mm/bootmem.c            | 38 +++++++++++++++++++++++++++++++-------
 3 files changed, 44 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 3b03b0b868dd..993da8cc9706 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -43,50 +43,38 @@ typedef struct bootmem_data {
 extern unsigned long __init bootmem_bootmap_pages (unsigned long);
 extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
 extern void __init free_bootmem (unsigned long addr, unsigned long size);
-extern void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal, unsigned long limit);
+extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal);
+extern void * __init __alloc_bootmem_low(unsigned long size,
+					 unsigned long align,
+					 unsigned long goal);
+extern void * __init __alloc_bootmem_low_node(pg_data_t *pgdat,
+					      unsigned long size,
+					      unsigned long align,
+					      unsigned long goal);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
 #define alloc_bootmem(x) \
 	__alloc_bootmem((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low(x) \
-	__alloc_bootmem((x), SMP_CACHE_BYTES, 0)
+	__alloc_bootmem_low((x), SMP_CACHE_BYTES, 0)
 #define alloc_bootmem_pages(x) \
 	__alloc_bootmem((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages(x) \
-	__alloc_bootmem((x), PAGE_SIZE, 0)
-
-#define alloc_bootmem_limit(x, limit)						\
-	__alloc_bootmem_limit((x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit))
-#define alloc_bootmem_low_limit(x, limit)			\
-	__alloc_bootmem_limit((x), SMP_CACHE_BYTES, 0, (limit))
-#define alloc_bootmem_pages_limit(x, limit)					\
-	__alloc_bootmem_limit((x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit))
-#define alloc_bootmem_low_pages_limit(x, limit)		\
-	__alloc_bootmem_limit((x), PAGE_SIZE, 0, (limit))
-
+	__alloc_bootmem_low((x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 extern unsigned long __init free_all_bootmem (void);
-
+extern void * __init __alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal);
 extern unsigned long __init init_bootmem_node (pg_data_t *pgdat, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn);
 extern void __init reserve_bootmem_node (pg_data_t *pgdat, unsigned long physaddr, unsigned long size);
 extern void __init free_bootmem_node (pg_data_t *pgdat, unsigned long addr, unsigned long size);
 extern unsigned long __init free_all_bootmem_node (pg_data_t *pgdat);
-extern void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align, unsigned long goal, unsigned long limit);
 #ifndef CONFIG_HAVE_ARCH_BOOTMEM_NODE
 #define alloc_bootmem_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_pages_node(pgdat, x) \
 	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS))
 #define alloc_bootmem_low_pages_node(pgdat, x) \
-	__alloc_bootmem_node((pgdat), (x), PAGE_SIZE, 0)
-
-#define alloc_bootmem_node_limit(pgdat, x, limit)				\
-	__alloc_bootmem_node_limit((pgdat), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS), (limit))
-#define alloc_bootmem_pages_node_limit(pgdat, x, limit)				\
-	__alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS), (limit))
-#define alloc_bootmem_low_pages_node_limit(pgdat, x, limit)		\
-	__alloc_bootmem_node_limit((pgdat), (x), PAGE_SIZE, 0, (limit))
-
+	__alloc_bootmem_low_node((pgdat), (x), PAGE_SIZE, 0)
 #endif /* !CONFIG_HAVE_ARCH_BOOTMEM_NODE */
 
 #ifdef CONFIG_HAVE_ARCH_ALLOC_REMAP
@@ -123,15 +111,5 @@ extern void *__init alloc_large_system_hash(const char *tablename,
 #endif
 extern int __initdata hashdist;		/* Distribute hashes across NUMA nodes? */
 
-static inline void *__alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal)
-{
-	return __alloc_bootmem_limit(size, align, goal, 0);
-}
-
-static inline void *__alloc_bootmem_node (pg_data_t *pgdat, unsigned long size, unsigned long align,
-				     unsigned long goal)
-{
-	return __alloc_bootmem_node_limit(pgdat, size, align, goal, 0);
-}
 
 #endif /* _LINUX_BOOTMEM_H */
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 1ff8dcebf7c6..3b482052f403 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -142,8 +142,7 @@ swiotlb_init_with_default_size (size_t default_size)
 	/*
 	 * Get IO TLB memory from the low pages
 	 */
-	io_tlb_start = alloc_bootmem_low_pages_limit(io_tlb_nslabs *
-					     (1 << IO_TLB_SHIFT), 0x100000000);
+	io_tlb_start = alloc_bootmem_low_pages(io_tlb_nslabs * (1 << IO_TLB_SHIFT));
 	if (!io_tlb_start)
 		panic("Cannot allocate SWIOTLB buffer");
 	io_tlb_end = io_tlb_start + io_tlb_nslabs * (1 << IO_TLB_SHIFT);
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 16b9465eb4eb..cbb82ee14fb5 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -393,15 +393,14 @@ unsigned long __init free_all_bootmem (void)
 	return(free_all_bootmem_core(NODE_DATA(0)));
 }
 
-void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, unsigned long goal,
-				unsigned long limit)
+void * __init __alloc_bootmem(unsigned long size, unsigned long align, unsigned long goal)
 {
 	pg_data_t *pgdat = pgdat_list;
 	void *ptr;
 
 	for_each_pgdat(pgdat)
 		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
-						 align, goal, limit)))
+						 align, goal, 0)))
 			return(ptr);
 
 	/*
@@ -413,15 +412,40 @@ void * __init __alloc_bootmem_limit (unsigned long size, unsigned long align, un
 }
 
 
-void * __init __alloc_bootmem_node_limit (pg_data_t *pgdat, unsigned long size, unsigned long align,
-				     unsigned long goal, unsigned long limit)
+void * __init __alloc_bootmem_node(pg_data_t *pgdat, unsigned long size, unsigned long align,
+				   unsigned long goal)
 {
 	void *ptr;
 
-	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, limit);
+	ptr = __alloc_bootmem_core(pgdat->bdata, size, align, goal, 0);
 	if (ptr)
 		return (ptr);
 
-	return __alloc_bootmem_limit(size, align, goal, limit);
+	return __alloc_bootmem(size, align, goal);
 }
 
+#define LOW32LIMIT 0xffffffff
+
+void * __init __alloc_bootmem_low(unsigned long size, unsigned long align, unsigned long goal)
+{
+	pg_data_t *pgdat = pgdat_list;
+	void *ptr;
+
+	for_each_pgdat(pgdat)
+		if ((ptr = __alloc_bootmem_core(pgdat->bdata, size,
+						 align, goal, LOW32LIMIT)))
+			return(ptr);
+
+	/*
+	 * Whoops, we cannot satisfy the allocation request.
+	 */
+	printk(KERN_ALERT "low bootmem alloc of %lu bytes failed!\n", size);
+	panic("Out of low memory");
+	return NULL;
+}
+
+void * __init __alloc_bootmem_low_node(pg_data_t *pgdat, unsigned long size,
+				       unsigned long align, unsigned long goal)
+{
+	return __alloc_bootmem_core(pgdat->bdata, size, align, goal, LOW32LIMIT);
+}
-- 
cgit v1.2.3


From 7756b9e4e321c3c83c7aa5b9532d3e7fd7ddeb4a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 6 Jan 2006 00:11:09 -0800
Subject: [PATCH] kill last zone_reclaim() bits

Remove the last bits of Martin's ill-fated sys_set_zone_reclaim().

Cc: Martin Hicks <mort@wildopensource.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/unistd.h |  2 +-
 include/asm-ia64/unistd.h |  2 +-
 include/linux/swap.h      |  1 -
 mm/vmscan.c               | 80 -----------------------------------------------
 4 files changed, 2 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 0f92e78dfea1..fe38b9a96233 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -256,7 +256,7 @@
 #define __NR_io_submit		248
 #define __NR_io_cancel		249
 #define __NR_fadvise64		250
-#define __NR_set_zone_reclaim	251
+/* 251 is available for reuse (was briefly sys_set_zone_reclaim) */
 #define __NR_exit_group		252
 #define __NR_lookup_dcookie	253
 #define __NR_epoll_create	254
diff --git a/include/asm-ia64/unistd.h b/include/asm-ia64/unistd.h
index 6d96a67439be..2bf543493cb8 100644
--- a/include/asm-ia64/unistd.h
+++ b/include/asm-ia64/unistd.h
@@ -265,7 +265,7 @@
 #define __NR_keyctl			1273
 #define __NR_ioprio_set			1274
 #define __NR_ioprio_get			1275
-#define __NR_set_zone_reclaim		1276
+/* 1276 is available for reuse (was briefly sys_set_zone_reclaim) */
 #define __NR_inotify_init		1277
 #define __NR_inotify_add_watch		1278
 #define __NR_inotify_rm_watch		1279
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 508668f840b6..bd6641784107 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -172,7 +172,6 @@ extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
 extern int try_to_free_pages(struct zone **, gfp_t);
-extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
 extern int shrink_all_memory(int);
 extern int vm_swappiness;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 795a050fe471..b2baca7645d7 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -74,9 +74,6 @@ struct scan_control {
 
 	int may_writepage;
 
-	/* Can pages be swapped as part of reclaim? */
-	int may_swap;
-
 	/* This context's SWAP_CLUSTER_MAX. If freeing memory for
 	 * suspend, we effectively ignore SWAP_CLUSTER_MAX.
 	 * In this context, it doesn't matter that we scan the
@@ -430,8 +427,6 @@ static int shrink_list(struct list_head *page_list, struct scan_control *sc)
 		 * Try to allocate it some swap space here.
 		 */
 		if (PageAnon(page) && !PageSwapCache(page)) {
-			if (!sc->may_swap)
-				goto keep_locked;
 			if (!add_to_swap(page))
 				goto activate_locked;
 		}
@@ -952,7 +947,6 @@ int try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 
 	sc.gfp_mask = gfp_mask;
 	sc.may_writepage = 0;
-	sc.may_swap = 1;
 
 	inc_page_state(allocstall);
 
@@ -1055,7 +1049,6 @@ loop_again:
 	total_reclaimed = 0;
 	sc.gfp_mask = GFP_KERNEL;
 	sc.may_writepage = 0;
-	sc.may_swap = 1;
 	sc.nr_mapped = read_page_state(nr_mapped);
 
 	inc_page_state(pageoutrun);
@@ -1353,76 +1346,3 @@ static int __init kswapd_init(void)
 }
 
 module_init(kswapd_init)
-
-
-/*
- * Try to free up some pages from this zone through reclaim.
- */
-int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
-{
-	struct scan_control sc;
-	int nr_pages = 1 << order;
-	int total_reclaimed = 0;
-
-	/* The reclaim may sleep, so don't do it if sleep isn't allowed */
-	if (!(gfp_mask & __GFP_WAIT))
-		return 0;
-	if (zone->all_unreclaimable)
-		return 0;
-
-	sc.gfp_mask = gfp_mask;
-	sc.may_writepage = 0;
-	sc.may_swap = 0;
-	sc.nr_mapped = read_page_state(nr_mapped);
-	sc.nr_scanned = 0;
-	sc.nr_reclaimed = 0;
-	/* scan at the highest priority */
-	sc.priority = 0;
-	disable_swap_token();
-
-	if (nr_pages > SWAP_CLUSTER_MAX)
-		sc.swap_cluster_max = nr_pages;
-	else
-		sc.swap_cluster_max = SWAP_CLUSTER_MAX;
-
-	/* Don't reclaim the zone if there are other reclaimers active */
-	if (atomic_read(&zone->reclaim_in_progress) > 0)
-		goto out;
-
-	shrink_zone(zone, &sc);
-	total_reclaimed = sc.nr_reclaimed;
-
- out:
-	return total_reclaimed;
-}
-
-asmlinkage long sys_set_zone_reclaim(unsigned int node, unsigned int zone,
-				     unsigned int state)
-{
-	struct zone *z;
-	int i;
-
-	if (!capable(CAP_SYS_ADMIN))
-		return -EACCES;
-
-	if (node >= MAX_NUMNODES || !node_online(node))
-		return -EINVAL;
-
-	/* This will break if we ever add more zones */
-	if (!(zone & (1<<ZONE_DMA|1<<ZONE_NORMAL|1<<ZONE_HIGHMEM)))
-		return -EINVAL;
-
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		if (!(zone & 1<<i))
-			continue;
-
-		z = &NODE_DATA(node)->node_zones[i];
-
-		if (state)
-			z->reclaim_pages = 1;
-		else
-			z->reclaim_pages = 0;
-	}
-
-	return 0;
-}
-- 
cgit v1.2.3


From 9328b8faae922e52073785ed6c1eaa8565648a0e Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:10 -0800
Subject: [PATCH] mm: dma32 zone statistics

Add dma32 to zone statistics.  Also attempt to arrange struct page_state a
bit better (visually).

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Andi Kleen <ak@muc.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h     | 11 +++++++++++
 include/linux/page-flags.h | 38 ++++++++++++++++++++++++--------------
 mm/page_alloc.c            | 14 +++++++++++---
 3 files changed, 46 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d1a5981bb94..8d6caa414c4c 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -397,6 +397,7 @@ static inline int is_normal_idx(int idx)
 {
 	return (idx == ZONE_NORMAL);
 }
+
 /**
  * is_highmem - helper function to quickly check if a struct zone is a 
  *              highmem zone or not.  This is an attempt to keep references
@@ -413,6 +414,16 @@ static inline int is_normal(struct zone *zone)
 	return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
 }
 
+static inline int is_dma32(struct zone *zone)
+{
+	return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+}
+
+static inline int is_dma(struct zone *zone)
+{
+	return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
+}
+
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
 struct file;
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 343083fec258..32d09c8d952b 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -97,32 +97,40 @@ struct page_state {
 	unsigned long pgpgout;		/* Disk writes */
 	unsigned long pswpin;		/* swap reads */
 	unsigned long pswpout;		/* swap writes */
-	unsigned long pgalloc_high;	/* page allocations */
 
+	unsigned long pgalloc_high;	/* page allocations */
 	unsigned long pgalloc_normal;
+	unsigned long pgalloc_dma32;
 	unsigned long pgalloc_dma;
+
 	unsigned long pgfree;		/* page freeings */
 	unsigned long pgactivate;	/* pages moved inactive->active */
 	unsigned long pgdeactivate;	/* pages moved active->inactive */
 
 	unsigned long pgfault;		/* faults (major+minor) */
 	unsigned long pgmajfault;	/* faults (major only) */
+
 	unsigned long pgrefill_high;	/* inspected in refill_inactive_zone */
 	unsigned long pgrefill_normal;
+	unsigned long pgrefill_dma32;
 	unsigned long pgrefill_dma;
 
 	unsigned long pgsteal_high;	/* total highmem pages reclaimed */
 	unsigned long pgsteal_normal;
+	unsigned long pgsteal_dma32;
 	unsigned long pgsteal_dma;
+
 	unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
 	unsigned long pgscan_kswapd_normal;
-
+	unsigned long pgscan_kswapd_dma32;
 	unsigned long pgscan_kswapd_dma;
+
 	unsigned long pgscan_direct_high;/* total highmem pages scanned */
 	unsigned long pgscan_direct_normal;
+	unsigned long pgscan_direct_dma32;
 	unsigned long pgscan_direct_dma;
-	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
 
+	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
 	unsigned long slabs_scanned;	/* slab objects scanned */
 	unsigned long kswapd_steal;	/* pages reclaimed by kswapd */
 	unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
@@ -150,17 +158,19 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 #define add_page_state(member,delta) mod_page_state(member, (delta))
 #define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
 
-#define mod_page_state_zone(zone, member, delta)				\
-	do {									\
-		unsigned offset;						\
-		if (is_highmem(zone))						\
-			offset = offsetof(struct page_state, member##_high);	\
-		else if (is_normal(zone))					\
-			offset = offsetof(struct page_state, member##_normal);	\
-		else								\
-			offset = offsetof(struct page_state, member##_dma);	\
-		__mod_page_state(offset, (delta));				\
-	} while (0)
+#define mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	unsigned offset;						\
+	if (is_highmem(zone))						\
+		offset = offsetof(struct page_state, member##_high);	\
+	else if (is_normal(zone))					\
+		offset = offsetof(struct page_state, member##_normal);	\
+	else if (is_dma32(zone))					\
+		offset = offsetof(struct page_state, member##_dma32);	\
+	else								\
+		offset = offsetof(struct page_state, member##_dma);	\
+	__mod_page_state(offset, (delta));				\
+ } while (0)
 
 /*
  * Manipulation of page state flags
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cdad3249cf7f..e12154d9c4ed 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2277,32 +2277,40 @@ static char *vmstat_text[] = {
 	"pgpgout",
 	"pswpin",
 	"pswpout",
-	"pgalloc_high",
 
+	"pgalloc_high",
 	"pgalloc_normal",
+	"pgalloc_dma32",
 	"pgalloc_dma",
+
 	"pgfree",
 	"pgactivate",
 	"pgdeactivate",
 
 	"pgfault",
 	"pgmajfault",
+
 	"pgrefill_high",
 	"pgrefill_normal",
+	"pgrefill_dma32",
 	"pgrefill_dma",
 
 	"pgsteal_high",
 	"pgsteal_normal",
+	"pgsteal_dma32",
 	"pgsteal_dma",
+
 	"pgscan_kswapd_high",
 	"pgscan_kswapd_normal",
-
+	"pgscan_kswapd_dma32",
 	"pgscan_kswapd_dma",
+
 	"pgscan_direct_high",
 	"pgscan_direct_normal",
+	"pgscan_direct_dma32",
 	"pgscan_direct_dma",
-	"pginodesteal",
 
+	"pginodesteal",
 	"slabs_scanned",
 	"kswapd_steal",
 	"kswapd_inodesteal",
-- 
cgit v1.2.3


From 9617d95e6e9ffd883cf90a89724fe60d7ab22f9a Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:12 -0800
Subject: [PATCH] mm: rmap optimisation

Optimise rmap functions by minimising atomic operations when we know there
will be no concurrent modifications.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/exec.c            |  2 +-
 include/linux/rmap.h |  1 +
 mm/memory.c          |  6 +++---
 mm/rmap.c            | 49 ++++++++++++++++++++++++++++++++++++++-----------
 4 files changed, 43 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/exec.c b/fs/exec.c
index 22533cce0611..e75a9548da8e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -324,7 +324,7 @@ void install_arg_page(struct vm_area_struct *vma,
 	lru_cache_add_active(page);
 	set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
 					page, vma->vm_page_prot))));
-	page_add_anon_rmap(page, vma, address);
+	page_add_new_anon_rmap(page, vma, address);
 	pte_unmap_unlock(pte, ptl);
 
 	/* no need for flush_tlb */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 33261f1d2239..9d6fbeef2104 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -71,6 +71,7 @@ void __anon_vma_link(struct vm_area_struct *);
  * rmap interfaces called when adding or removing pte of page
  */
 void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
+void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
 void page_add_file_rmap(struct page *);
 void page_remove_rmap(struct page *);
 
diff --git a/mm/memory.c b/mm/memory.c
index e249088908c4..d7ca7de10f4d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1498,7 +1498,7 @@ gotten:
 		update_mmu_cache(vma, address, entry);
 		lazy_mmu_prot_update(entry);
 		lru_cache_add_active(new_page);
-		page_add_anon_rmap(new_page, vma, address);
+		page_add_new_anon_rmap(new_page, vma, address);
 
 		/* Free the old page.. */
 		new_page = old_page;
@@ -1978,7 +1978,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		inc_mm_counter(mm, anon_rss);
 		lru_cache_add_active(page);
 		SetPageReferenced(page);
-		page_add_anon_rmap(page, vma, address);
+		page_add_new_anon_rmap(page, vma, address);
 	} else {
 		/* Map the ZERO_PAGE - vm_page_prot is readonly */
 		page = ZERO_PAGE(address);
@@ -2109,7 +2109,7 @@ retry:
 		if (anon) {
 			inc_mm_counter(mm, anon_rss);
 			lru_cache_add_active(new_page);
-			page_add_anon_rmap(new_page, vma, address);
+			page_add_new_anon_rmap(new_page, vma, address);
 		} else {
 			inc_mm_counter(mm, file_rss);
 			page_add_file_rmap(new_page);
diff --git a/mm/rmap.c b/mm/rmap.c
index f853c6def159..4107f64ff749 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -434,6 +434,26 @@ int page_referenced(struct page *page, int is_locked)
 	return referenced;
 }
 
+/**
+ * page_set_anon_rmap - setup new anonymous rmap
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ */
+static void __page_set_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	struct anon_vma *anon_vma = vma->anon_vma;
+
+	BUG_ON(!anon_vma);
+	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
+	page->mapping = (struct address_space *) anon_vma;
+
+	page->index = linear_page_index(vma, address);
+
+	inc_page_state(nr_mapped);
+}
+
 /**
  * page_add_anon_rmap - add pte mapping to an anonymous page
  * @page:	the page to add the mapping to
@@ -445,20 +465,27 @@ int page_referenced(struct page *page, int is_locked)
 void page_add_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	if (atomic_inc_and_test(&page->_mapcount)) {
-		struct anon_vma *anon_vma = vma->anon_vma;
-
-		BUG_ON(!anon_vma);
-		anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
-		page->mapping = (struct address_space *) anon_vma;
-
-		page->index = linear_page_index(vma, address);
-
-		inc_page_state(nr_mapped);
-	}
+	if (atomic_inc_and_test(&page->_mapcount))
+		__page_set_anon_rmap(page, vma, address);
 	/* else checking page index and mapping is racy */
 }
 
+/*
+ * page_add_new_anon_rmap - add pte mapping to a new anonymous page
+ * @page:	the page to add the mapping to
+ * @vma:	the vm area in which the mapping is added
+ * @address:	the user virtual address mapped
+ *
+ * Same as page_add_anon_rmap but must only be called on *new* pages.
+ * This means the inc-and-test can be bypassed.
+ */
+void page_add_new_anon_rmap(struct page *page,
+	struct vm_area_struct *vma, unsigned long address)
+{
+	atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+	__page_set_anon_rmap(page, vma, address);
+}
+
 /**
  * page_add_file_rmap - add pte mapping to a file page
  * @page: the page to add the mapping to
-- 
cgit v1.2.3


From f3fe65122da05e1cd4c9140340d96ea2f95d0c49 Mon Sep 17 00:00:00 2001
From: Con Kolivas <kernel@kolivas.org>
Date: Fri, 6 Jan 2006 00:11:15 -0800
Subject: [PATCH] mm: add populated_zone() helper

There are numerous places we check whether a zone is populated or not.

Provide a helper function to check for populated zones and convert all
checks for zone->present_pages.

Signed-off-by: Con Kolivas <kernel@kolivas.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h | 5 +++++
 mm/page_alloc.c        | 8 ++++----
 mm/vmscan.c            | 8 ++++----
 3 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 8d6caa414c4c..c34f4a2c62f8 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -388,6 +388,11 @@ static inline struct zone *next_zone(struct zone *zone)
 #define for_each_zone(zone) \
 	for (zone = pgdat_list->node_zones; zone; zone = next_zone(zone))
 
+static inline int populated_zone(struct zone *zone)
+{
+	return (!!zone->present_pages);
+}
+
 static inline int is_highmem_idx(int idx)
 {
 	return (idx == ZONE_HIGHMEM);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b9fd2c238f13..8f3de5af92dd 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1358,7 +1358,7 @@ void show_free_areas(void)
 		show_node(zone);
 		printk("%s per-cpu:", zone->name);
 
-		if (!zone->present_pages) {
+		if (!populated_zone(zone)) {
 			printk(" empty\n");
 			continue;
 		} else
@@ -1435,7 +1435,7 @@ void show_free_areas(void)
 
 		show_node(zone);
 		printk("%s: ", zone->name);
-		if (!zone->present_pages) {
+		if (!populated_zone(zone)) {
 			printk("empty\n");
 			continue;
 		}
@@ -2134,7 +2134,7 @@ static int frag_show(struct seq_file *m, void *arg)
 	int order;
 
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) {
-		if (!zone->present_pages)
+		if (!populated_zone(zone))
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
@@ -2167,7 +2167,7 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 	for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) {
 		int i;
 
-		if (!zone->present_pages)
+		if (!populated_zone(zone))
 			continue;
 
 		spin_lock_irqsave(&zone->lock, flags);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 5c8a412b43f4..7681d8ee04fe 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -897,7 +897,7 @@ shrink_caches(struct zone **zones, struct scan_control *sc)
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
 
-		if (zone->present_pages == 0)
+		if (!populated_zone(zone))
 			continue;
 
 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
@@ -1069,7 +1069,7 @@ loop_again:
 			for (i = pgdat->nr_zones - 1; i >= 0; i--) {
 				struct zone *zone = pgdat->node_zones + i;
 
-				if (zone->present_pages == 0)
+				if (!populated_zone(zone))
 					continue;
 
 				if (zone->all_unreclaimable &&
@@ -1106,7 +1106,7 @@ scan:
 			struct zone *zone = pgdat->node_zones + i;
 			int nr_slab;
 
-			if (zone->present_pages == 0)
+			if (!populated_zone(zone))
 				continue;
 
 			if (zone->all_unreclaimable && priority != DEF_PRIORITY)
@@ -1258,7 +1258,7 @@ void wakeup_kswapd(struct zone *zone, int order)
 {
 	pg_data_t *pgdat;
 
-	if (zone->present_pages == 0)
+	if (!populated_zone(zone))
 		return;
 
 	pgdat = zone->zone_pgdat;
-- 
cgit v1.2.3


From 4be38e351c5f455f6f490f5aff29053e33ab4f99 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:17 -0800
Subject: [PATCH] mm: move determination of policy_zone into page allocator

Currently the function to build a zonelist for a BIND policy has the side
effect to set the policy_zone.  This seems to be a bit strange.  policy
zone seems to not be initialized elsewhere and therefore 0.  Do we police
ZONE_DMA if no bind policy has been used yet?

This patch moves the determination of the zone to apply policies to into
the page allocator.  We determine the zone while building the zonelist for
nodes.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h | 11 +++++++++++
 mm/mempolicy.c            | 15 +++------------
 mm/page_alloc.c           |  2 ++
 3 files changed, 16 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index b972f985a3c5..ed00b278cb93 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -151,6 +151,14 @@ extern struct mempolicy default_policy;
 extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 		unsigned long addr);
 
+extern int policy_zone;
+
+static inline void check_highest_zone(int k)
+{
+	if (k > policy_zone)
+		policy_zone = k;
+}
+
 #else
 
 struct mempolicy {};
@@ -221,6 +229,9 @@ static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
 	return NODE_DATA(0)->node_zonelists + gfp_zone(GFP_HIGHUSER);
 }
 
+static inline void check_highest_zone(int k)
+{
+}
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
 
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 96714e2646ad..0f1d2b8a952b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -93,7 +93,7 @@ static kmem_cache_t *sn_cache;
 
 /* Highest zone. An specific allocation for a zone below that is not
    policied. */
-static int policy_zone;
+int policy_zone = ZONE_DMA;
 
 struct mempolicy default_policy = {
 	.refcnt = ATOMIC_INIT(1), /* never free it */
@@ -131,17 +131,8 @@ static struct zonelist *bind_zonelist(nodemask_t *nodes)
 	if (!zl)
 		return NULL;
 	num = 0;
-	for_each_node_mask(nd, *nodes) {
-		int k;
-		for (k = MAX_NR_ZONES-1; k >= 0; k--) {
-			struct zone *z = &NODE_DATA(nd)->node_zones[k];
-			if (!z->present_pages)
-				continue;
-			zl->zones[num++] = z;
-			if (k > policy_zone)
-				policy_zone = k;
-		}
-	}
+	for_each_node_mask(nd, *nodes)
+		zl->zones[num++] = &NODE_DATA(nd)->node_zones[policy_zone];
 	zl->zones[num] = NULL;
 	return zl;
 }
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7adc9526d329..512e3f4d4963 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -36,6 +36,7 @@
 #include <linux/memory_hotplug.h>
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
+#include <linux/mempolicy.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -1470,6 +1471,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat,
 			BUG_ON(zone - pgdat->node_zones > ZONE_NORMAL);
 #endif
 			zonelist->zones[j++] = zone;
+			check_highest_zone(k);
 		}
 	}
 	return j;
-- 
cgit v1.2.3


From d3cb487149bd706aa6aeb02042332a450978dc1c Mon Sep 17 00:00:00 2001
From: Christoph Lameter <clameter@engr.sgi.com>
Date: Fri, 6 Jan 2006 00:11:20 -0800
Subject: [PATCH] atomic_long_t & include/asm-generic/atomic.h V2

Several counters already have the need to use 64 atomic variables on 64 bit
platforms (see mm_counter_t in sched.h).  We have to do ugly ifdefs to fall
back to 32 bit atomic on 32 bit platforms.

The VM statistics patch that I am working on will also make more extensive
use of atomic64.

This patch introduces a new type atomic_long_t by providing definitions in
asm-generic/atomic.h that works similar to the c "long" type.  Its 32 bits
on 32 bit platforms and 64 bits on 64 bit platforms.

Also cleans up the determination of the mm_counter_t in sched.h.

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/atomic.h     |   1 +
 include/asm-arm/atomic.h       |   1 +
 include/asm-arm26/atomic.h     |   1 +
 include/asm-cris/atomic.h      |   1 +
 include/asm-frv/atomic.h       |   1 +
 include/asm-generic/atomic.h   | 116 +++++++++++++++++++++++++++++++++++++++++
 include/asm-h8300/atomic.h     |   1 +
 include/asm-i386/atomic.h      |   1 +
 include/asm-ia64/atomic.h      |   1 +
 include/asm-m32r/atomic.h      |   1 +
 include/asm-m68k/atomic.h      |   1 +
 include/asm-m68knommu/atomic.h |   1 +
 include/asm-mips/atomic.h      |   1 +
 include/asm-parisc/atomic.h    |   1 +
 include/asm-powerpc/atomic.h   |   1 +
 include/asm-s390/atomic.h      |   1 +
 include/asm-sh/atomic.h        |   1 +
 include/asm-sh64/atomic.h      |   1 +
 include/asm-sparc/atomic.h     |   1 +
 include/asm-sparc64/atomic.h   |   1 +
 include/asm-v850/atomic.h      |   1 +
 include/asm-x86_64/atomic.h    |   1 +
 include/asm-xtensa/atomic.h    |   1 +
 include/linux/sched.h          |  25 +++------
 24 files changed, 144 insertions(+), 19 deletions(-)
 create mode 100644 include/asm-generic/atomic.h

(limited to 'include/linux')

diff --git a/include/asm-alpha/atomic.h b/include/asm-alpha/atomic.h
index 6183eab006d4..cb03bbe92cdf 100644
--- a/include/asm-alpha/atomic.h
+++ b/include/asm-alpha/atomic.h
@@ -216,4 +216,5 @@ static __inline__ long atomic64_sub_return(long i, atomic64_t * v)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
+#include <asm-generic/atomic.h>
 #endif /* _ALPHA_ATOMIC_H */
diff --git a/include/asm-arm/atomic.h b/include/asm-arm/atomic.h
index d586f65c8228..f72b63309bc5 100644
--- a/include/asm-arm/atomic.h
+++ b/include/asm-arm/atomic.h
@@ -205,5 +205,6 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif
 #endif
diff --git a/include/asm-arm26/atomic.h b/include/asm-arm26/atomic.h
index a47cadc59686..3074b0e76343 100644
--- a/include/asm-arm26/atomic.h
+++ b/include/asm-arm26/atomic.h
@@ -118,5 +118,6 @@ static inline void atomic_clear_mask(unsigned long mask, unsigned long *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif
 #endif
diff --git a/include/asm-cris/atomic.h b/include/asm-cris/atomic.h
index 683b05a57d88..2df2c7aa19b7 100644
--- a/include/asm-cris/atomic.h
+++ b/include/asm-cris/atomic.h
@@ -156,4 +156,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc()     barrier()
 
+#include <asm-generic/atomic.h>
 #endif
diff --git a/include/asm-frv/atomic.h b/include/asm-frv/atomic.h
index f6539ff569c5..3f54fea2b051 100644
--- a/include/asm-frv/atomic.h
+++ b/include/asm-frv/atomic.h
@@ -426,4 +426,5 @@ extern uint32_t __cmpxchg_32(uint32_t *v, uint32_t test, uint32_t new);
 })
 #define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0)
 
+#include <asm-generic/atomic.h>
 #endif /* _ASM_ATOMIC_H */
diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h
new file mode 100644
index 000000000000..e0a28b925ef0
--- /dev/null
+++ b/include/asm-generic/atomic.h
@@ -0,0 +1,116 @@
+#ifndef _ASM_GENERIC_ATOMIC_H
+#define _ASM_GENERIC_ATOMIC_H
+/*
+ * Copyright (C) 2005 Silicon Graphics, Inc.
+ *	Christoph Lameter <clameter@sgi.com>
+ *
+ * Allows to provide arch independent atomic definitions without the need to
+ * edit all arch specific atomic.h files.
+ */
+
+
+/*
+ * Suppport for atomic_long_t
+ *
+ * Casts for parameters are avoided for existing atomic functions in order to
+ * avoid issues with cast-as-lval under gcc 4.x and other limitations that the
+ * macros of a platform may have.
+ */
+
+#if BITS_PER_LONG == 64
+
+typedef atomic64_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC64_INIT(i)
+
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	return (long)atomic64_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic64_t *v = (atomic64_t *)l;
+
+	atomic64_sub(i, v);
+}
+
+#else
+
+typedef atomic_t atomic_long_t;
+
+#define ATOMIC_LONG_INIT(i)	ATOMIC_INIT(i)
+static inline long atomic_long_read(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	return (long)atomic_read(v);
+}
+
+static inline void atomic_long_set(atomic_long_t *l, long i)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_set(v, i);
+}
+
+static inline void atomic_long_inc(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_inc(v);
+}
+
+static inline void atomic_long_dec(atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_dec(v);
+}
+
+static inline void atomic_long_add(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_add(i, v);
+}
+
+static inline void atomic_long_sub(long i, atomic_long_t *l)
+{
+	atomic_t *v = (atomic_t *)l;
+
+	atomic_sub(i, v);
+}
+
+#endif
+#endif
diff --git a/include/asm-h8300/atomic.h b/include/asm-h8300/atomic.h
index f23d86819ea8..d891541e89c3 100644
--- a/include/asm-h8300/atomic.h
+++ b/include/asm-h8300/atomic.h
@@ -137,4 +137,5 @@ static __inline__ void atomic_set_mask(unsigned long mask, unsigned long *v)
 #define smp_mb__before_atomic_inc()    barrier()
 #define smp_mb__after_atomic_inc() barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __ARCH_H8300_ATOMIC __ */
diff --git a/include/asm-i386/atomic.h b/include/asm-i386/atomic.h
index c68557aa04b2..7a5472d77091 100644
--- a/include/asm-i386/atomic.h
+++ b/include/asm-i386/atomic.h
@@ -254,4 +254,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif
diff --git a/include/asm-ia64/atomic.h b/include/asm-ia64/atomic.h
index 2fbebf85c31d..15cf7984c48e 100644
--- a/include/asm-ia64/atomic.h
+++ b/include/asm-ia64/atomic.h
@@ -192,4 +192,5 @@ atomic64_add_negative (__s64 i, atomic64_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* _ASM_IA64_ATOMIC_H */
diff --git a/include/asm-m32r/atomic.h b/include/asm-m32r/atomic.h
index ef1fb8ea4726..70761278b6cb 100644
--- a/include/asm-m32r/atomic.h
+++ b/include/asm-m32r/atomic.h
@@ -313,4 +313,5 @@ static __inline__ void atomic_set_mask(unsigned long  mask, atomic_t *addr)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif	/* _ASM_M32R_ATOMIC_H */
diff --git a/include/asm-m68k/atomic.h b/include/asm-m68k/atomic.h
index e3c962eeabf3..b8a4e75d679d 100644
--- a/include/asm-m68k/atomic.h
+++ b/include/asm-m68k/atomic.h
@@ -157,4 +157,5 @@ static inline void atomic_set_mask(unsigned long mask, unsigned long *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __ARCH_M68K_ATOMIC __ */
diff --git a/include/asm-m68knommu/atomic.h b/include/asm-m68knommu/atomic.h
index 3c1cc153c415..1702dbe9318c 100644
--- a/include/asm-m68knommu/atomic.h
+++ b/include/asm-m68knommu/atomic.h
@@ -143,4 +143,5 @@ static inline int atomic_sub_return(int i, atomic_t * v)
 #define atomic_dec_return(v) atomic_sub_return(1,(v))
 #define atomic_inc_return(v) atomic_add_return(1,(v))
 
+#include <asm-generic/atomic.h>
 #endif /* __ARCH_M68KNOMMU_ATOMIC __ */
diff --git a/include/asm-mips/atomic.h b/include/asm-mips/atomic.h
index 55c37c106ef0..92256e43a938 100644
--- a/include/asm-mips/atomic.h
+++ b/include/asm-mips/atomic.h
@@ -713,4 +713,5 @@ static __inline__ long atomic64_sub_if_positive(long i, atomic64_t * v)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
+#include <asm-generic/atomic.h>
 #endif /* _ASM_ATOMIC_H */
diff --git a/include/asm-parisc/atomic.h b/include/asm-parisc/atomic.h
index 983e9a2b6042..64ebd086c40d 100644
--- a/include/asm-parisc/atomic.h
+++ b/include/asm-parisc/atomic.h
@@ -216,4 +216,5 @@ static __inline__ int atomic_read(const atomic_t *v)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
+#include <asm-generic/atomic.h>
 #endif
diff --git a/include/asm-powerpc/atomic.h b/include/asm-powerpc/atomic.h
index ec4b14468959..ae395a0632a6 100644
--- a/include/asm-powerpc/atomic.h
+++ b/include/asm-powerpc/atomic.h
@@ -402,5 +402,6 @@ static __inline__ long atomic64_dec_if_positive(atomic64_t *v)
 
 #endif /* __powerpc64__ */
 
+#include <asm-generic/atomic.h>
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_ATOMIC_H_ */
diff --git a/include/asm-s390/atomic.h b/include/asm-s390/atomic.h
index b3bd4f679f72..6d07c7df4b40 100644
--- a/include/asm-s390/atomic.h
+++ b/include/asm-s390/atomic.h
@@ -215,5 +215,6 @@ atomic_compare_and_swap(int expected_oldval,int new_val,atomic_t *v)
 #define smp_mb__before_atomic_inc()	smp_mb()
 #define smp_mb__after_atomic_inc()	smp_mb()
 
+#include <asm-generic/atomic.h>
 #endif /* __KERNEL__ */
 #endif /* __ARCH_S390_ATOMIC__  */
diff --git a/include/asm-sh/atomic.h b/include/asm-sh/atomic.h
index aabfd334462c..618d8e0de348 100644
--- a/include/asm-sh/atomic.h
+++ b/include/asm-sh/atomic.h
@@ -140,4 +140,5 @@ static __inline__ void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __ASM_SH_ATOMIC_H */
diff --git a/include/asm-sh64/atomic.h b/include/asm-sh64/atomic.h
index 927a2bc27b30..f3ce5c0df13a 100644
--- a/include/asm-sh64/atomic.h
+++ b/include/asm-sh64/atomic.h
@@ -152,4 +152,5 @@ static __inline__ void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __ASM_SH64_ATOMIC_H */
diff --git a/include/asm-sparc/atomic.h b/include/asm-sparc/atomic.h
index 62bec7ad271c..accb4967e9d2 100644
--- a/include/asm-sparc/atomic.h
+++ b/include/asm-sparc/atomic.h
@@ -159,4 +159,5 @@ static inline int __atomic24_sub(int i, atomic24_t *v)
 
 #endif /* !(__KERNEL__) */
 
+#include <asm-generic/atomic.h>
 #endif /* !(__ARCH_SPARC_ATOMIC__) */
diff --git a/include/asm-sparc64/atomic.h b/include/asm-sparc64/atomic.h
index 3789fe315992..11f5aa5d108c 100644
--- a/include/asm-sparc64/atomic.h
+++ b/include/asm-sparc64/atomic.h
@@ -96,4 +96,5 @@ extern int atomic64_sub_ret(int, atomic64_t *);
 #define smp_mb__after_atomic_inc()	barrier()
 #endif
 
+#include <asm-generic/atomic.h>
 #endif /* !(__ARCH_SPARC64_ATOMIC__) */
diff --git a/include/asm-v850/atomic.h b/include/asm-v850/atomic.h
index bede3172ce7f..f5b9ab6f4e70 100644
--- a/include/asm-v850/atomic.h
+++ b/include/asm-v850/atomic.h
@@ -126,4 +126,5 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __V850_ATOMIC_H__ */
diff --git a/include/asm-x86_64/atomic.h b/include/asm-x86_64/atomic.h
index 50db9f39274f..72eb071488c7 100644
--- a/include/asm-x86_64/atomic.h
+++ b/include/asm-x86_64/atomic.h
@@ -424,4 +424,5 @@ __asm__ __volatile__(LOCK "orl %0,%1" \
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif
diff --git a/include/asm-xtensa/atomic.h b/include/asm-xtensa/atomic.h
index 3670cc7695da..e2ce06b101ad 100644
--- a/include/asm-xtensa/atomic.h
+++ b/include/asm-xtensa/atomic.h
@@ -286,6 +286,7 @@ static inline void atomic_set_mask(unsigned int mask, atomic_t *v)
 #define smp_mb__before_atomic_inc()	barrier()
 #define smp_mb__after_atomic_inc()	barrier()
 
+#include <asm-generic/atomic.h>
 #endif /* __KERNEL__ */
 
 #endif /* _XTENSA_ATOMIC_H */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index b0ad6f30679e..7da33619d5d0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -254,25 +254,12 @@ extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
  * The mm counters are not protected by its page_table_lock,
  * so must be incremented atomically.
  */
-#ifdef ATOMIC64_INIT
-#define set_mm_counter(mm, member, value) atomic64_set(&(mm)->_##member, value)
-#define get_mm_counter(mm, member) ((unsigned long)atomic64_read(&(mm)->_##member))
-#define add_mm_counter(mm, member, value) atomic64_add(value, &(mm)->_##member)
-#define inc_mm_counter(mm, member) atomic64_inc(&(mm)->_##member)
-#define dec_mm_counter(mm, member) atomic64_dec(&(mm)->_##member)
-typedef atomic64_t mm_counter_t;
-#else /* !ATOMIC64_INIT */
-/*
- * The counters wrap back to 0 at 2^32 * PAGE_SIZE,
- * that is, at 16TB if using 4kB page size.
- */
-#define set_mm_counter(mm, member, value) atomic_set(&(mm)->_##member, value)
-#define get_mm_counter(mm, member) ((unsigned long)atomic_read(&(mm)->_##member))
-#define add_mm_counter(mm, member, value) atomic_add(value, &(mm)->_##member)
-#define inc_mm_counter(mm, member) atomic_inc(&(mm)->_##member)
-#define dec_mm_counter(mm, member) atomic_dec(&(mm)->_##member)
-typedef atomic_t mm_counter_t;
-#endif /* !ATOMIC64_INIT */
+#define set_mm_counter(mm, member, value) atomic_long_set(&(mm)->_##member, value)
+#define get_mm_counter(mm, member) ((unsigned long)atomic_long_read(&(mm)->_##member))
+#define add_mm_counter(mm, member, value) atomic_long_add(value, &(mm)->_##member)
+#define inc_mm_counter(mm, member) atomic_long_inc(&(mm)->_##member)
+#define dec_mm_counter(mm, member) atomic_long_dec(&(mm)->_##member)
+typedef atomic_long_t mm_counter_t;
 
 #else  /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
 /*
-- 
cgit v1.2.3


From a74609fafa2e5cc31d558012abaaa55ec9ad9da4 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:20 -0800
Subject: [PATCH] mm: page_state opt

Optimise page_state manipulations by introducing interrupt unsafe accessors
to page_state fields.  Callers must provide their own locking (either
disable interrupts or not update from interrupt context).

Switch over the hot callsites that can easily be moved under interrupts off
sections.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 43 ++++++++++++++++------
 mm/page_alloc.c            | 89 ++++++++++++++++++++++++++--------------------
 mm/rmap.c                  | 10 ++++--
 mm/vmscan.c                | 27 +++++++-------
 4 files changed, 104 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 32d09c8d952b..dede8d412dca 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -144,22 +144,33 @@ struct page_state {
 extern void get_page_state(struct page_state *ret);
 extern void get_page_state_node(struct page_state *ret, int node);
 extern void get_full_page_state(struct page_state *ret);
-extern unsigned long __read_page_state(unsigned long offset);
-extern void __mod_page_state(unsigned long offset, unsigned long delta);
+extern unsigned long read_page_state_offset(unsigned long offset);
+extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
+extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
 
 #define read_page_state(member) \
-	__read_page_state(offsetof(struct page_state, member))
+	read_page_state_offset(offsetof(struct page_state, member))
 
 #define mod_page_state(member, delta)	\
-	__mod_page_state(offsetof(struct page_state, member), (delta))
+	mod_page_state_offset(offsetof(struct page_state, member), (delta))
 
-#define inc_page_state(member)	mod_page_state(member, 1UL)
-#define dec_page_state(member)	mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta) mod_page_state(member, (delta))
-#define sub_page_state(member,delta) mod_page_state(member, 0UL - (delta))
+#define __mod_page_state(member, delta)	\
+	__mod_page_state_offset(offsetof(struct page_state, member), (delta))
 
-#define mod_page_state_zone(zone, member, delta)			\
- do {									\
+#define inc_page_state(member)		mod_page_state(member, 1UL)
+#define dec_page_state(member)		mod_page_state(member, 0UL - 1)
+#define add_page_state(member,delta)	mod_page_state(member, (delta))
+#define sub_page_state(member,delta)	mod_page_state(member, 0UL - (delta))
+
+#define __inc_page_state(member)	__mod_page_state(member, 1UL)
+#define __dec_page_state(member)	__mod_page_state(member, 0UL - 1)
+#define __add_page_state(member,delta)	__mod_page_state(member, (delta))
+#define __sub_page_state(member,delta)	__mod_page_state(member, 0UL - (delta))
+
+#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
+
+#define state_zone_offset(zone, member)					\
+({									\
 	unsigned offset;						\
 	if (is_highmem(zone))						\
 		offset = offsetof(struct page_state, member##_high);	\
@@ -169,7 +180,17 @@ extern void __mod_page_state(unsigned long offset, unsigned long delta);
 		offset = offsetof(struct page_state, member##_dma32);	\
 	else								\
 		offset = offsetof(struct page_state, member##_dma);	\
-	__mod_page_state(offset, (delta));				\
+	offset;								\
+})
+
+#define __mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	__mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
+ } while (0)
+
+#define mod_page_state_zone(zone, member, delta)			\
+ do {									\
+	mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
  } while (0)
 
 /*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 7f580779abdb..fd47494cb989 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -424,9 +424,9 @@ void __free_pages_ok(struct page *page, unsigned int order)
 		return;
 
 	list_add(&page->lru, &list);
-	mod_page_state(pgfree, 1 << order);
 	kernel_map_pages(page, 1<<order, 0);
 	local_irq_save(flags);
+	__mod_page_state(pgfree, 1 << order);
 	free_pages_bulk(page_zone(page), 1, &list, order);
 	local_irq_restore(flags);
 }
@@ -674,18 +674,14 @@ void drain_local_pages(void)
 }
 #endif /* CONFIG_PM */
 
-static void zone_statistics(struct zonelist *zonelist, struct zone *z)
+static void zone_statistics(struct zonelist *zonelist, struct zone *z, int cpu)
 {
 #ifdef CONFIG_NUMA
-	unsigned long flags;
-	int cpu;
 	pg_data_t *pg = z->zone_pgdat;
 	pg_data_t *orig = zonelist->zones[0]->zone_pgdat;
 	struct per_cpu_pageset *p;
 
-	local_irq_save(flags);
-	cpu = smp_processor_id();
-	p = zone_pcp(z,cpu);
+	p = zone_pcp(z, cpu);
 	if (pg == orig) {
 		p->numa_hit++;
 	} else {
@@ -696,7 +692,6 @@ static void zone_statistics(struct zonelist *zonelist, struct zone *z)
 		p->local_node++;
 	else
 		p->other_node++;
-	local_irq_restore(flags);
 #endif
 }
 
@@ -716,11 +711,11 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	if (free_pages_check(page))
 		return;
 
-	inc_page_state(pgfree);
 	kernel_map_pages(page, 1, 0);
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
+	__inc_page_state(pgfree);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high)
@@ -753,49 +748,58 @@ static inline void prep_zero_page(struct page *page, int order, gfp_t gfp_flags)
  * we cheat by calling it from here, in the order > 0 path.  Saves a branch
  * or two.
  */
-static struct page *
-buffered_rmqueue(struct zone *zone, int order, gfp_t gfp_flags)
+static struct page *buffered_rmqueue(struct zonelist *zonelist,
+			struct zone *zone, int order, gfp_t gfp_flags)
 {
 	unsigned long flags;
 	struct page *page;
 	int cold = !!(gfp_flags & __GFP_COLD);
+	int cpu;
 
 again:
+	cpu  = get_cpu();
 	if (order == 0) {
 		struct per_cpu_pages *pcp;
 
-		page = NULL;
-		pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
+		pcp = &zone_pcp(zone, cpu)->pcp[cold];
 		local_irq_save(flags);
-		if (!pcp->count)
+		if (!pcp->count) {
 			pcp->count += rmqueue_bulk(zone, 0,
 						pcp->batch, &pcp->list);
-		if (likely(pcp->count)) {
-			page = list_entry(pcp->list.next, struct page, lru);
-			list_del(&page->lru);
-			pcp->count--;
+			if (unlikely(!pcp->count))
+				goto failed;
 		}
-		local_irq_restore(flags);
-		put_cpu();
+		page = list_entry(pcp->list.next, struct page, lru);
+		list_del(&page->lru);
+		pcp->count--;
 	} else {
 		spin_lock_irqsave(&zone->lock, flags);
 		page = __rmqueue(zone, order);
-		spin_unlock_irqrestore(&zone->lock, flags);
+		spin_unlock(&zone->lock);
+		if (!page)
+			goto failed;
 	}
 
-	if (page != NULL) {
-		BUG_ON(bad_range(zone, page));
-		mod_page_state_zone(zone, pgalloc, 1 << order);
-		if (prep_new_page(page, order))
-			goto again;
+	__mod_page_state_zone(zone, pgalloc, 1 << order);
+	zone_statistics(zonelist, zone, cpu);
+	local_irq_restore(flags);
+	put_cpu();
 
-		if (gfp_flags & __GFP_ZERO)
-			prep_zero_page(page, order, gfp_flags);
+	BUG_ON(bad_range(zone, page));
+	if (prep_new_page(page, order))
+		goto again;
 
-		if (order && (gfp_flags & __GFP_COMP))
-			prep_compound_page(page, order);
-	}
+	if (gfp_flags & __GFP_ZERO)
+		prep_zero_page(page, order, gfp_flags);
+
+	if (order && (gfp_flags & __GFP_COMP))
+		prep_compound_page(page, order);
 	return page;
+
+failed:
+	local_irq_restore(flags);
+	put_cpu();
+	return NULL;
 }
 
 #define ALLOC_NO_WATERMARKS	0x01 /* don't check watermarks at all */
@@ -871,9 +875,8 @@ get_page_from_freelist(gfp_t gfp_mask, unsigned int order,
 				continue;
 		}
 
-		page = buffered_rmqueue(*z, order, gfp_mask);
+		page = buffered_rmqueue(zonelist, *z, order, gfp_mask);
 		if (page) {
-			zone_statistics(zonelist, *z);
 			break;
 		}
 	} while (*(++z) != NULL);
@@ -1248,7 +1251,7 @@ void get_full_page_state(struct page_state *ret)
 	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
 }
 
-unsigned long __read_page_state(unsigned long offset)
+unsigned long read_page_state_offset(unsigned long offset)
 {
 	unsigned long ret = 0;
 	int cpu;
@@ -1262,18 +1265,26 @@ unsigned long __read_page_state(unsigned long offset)
 	return ret;
 }
 
-void __mod_page_state(unsigned long offset, unsigned long delta)
+void __mod_page_state_offset(unsigned long offset, unsigned long delta)
+{
+	void *ptr;
+
+	ptr = &__get_cpu_var(page_states);
+	*(unsigned long *)(ptr + offset) += delta;
+}
+EXPORT_SYMBOL(__mod_page_state_offset);
+
+void mod_page_state_offset(unsigned long offset, unsigned long delta)
 {
 	unsigned long flags;
-	void* ptr;
+	void *ptr;
 
 	local_irq_save(flags);
 	ptr = &__get_cpu_var(page_states);
-	*(unsigned long*)(ptr + offset) += delta;
+	*(unsigned long *)(ptr + offset) += delta;
 	local_irq_restore(flags);
 }
-
-EXPORT_SYMBOL(__mod_page_state);
+EXPORT_SYMBOL(mod_page_state_offset);
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
diff --git a/mm/rmap.c b/mm/rmap.c
index 4107f64ff749..6f3f7db27128 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -451,7 +451,11 @@ static void __page_set_anon_rmap(struct page *page,
 
 	page->index = linear_page_index(vma, address);
 
-	inc_page_state(nr_mapped);
+	/*
+	 * nr_mapped state can be updated without turning off
+	 * interrupts because it is not modified via interrupt.
+	 */
+	__inc_page_state(nr_mapped);
 }
 
 /**
@@ -498,7 +502,7 @@ void page_add_file_rmap(struct page *page)
 	BUG_ON(!pfn_valid(page_to_pfn(page)));
 
 	if (atomic_inc_and_test(&page->_mapcount))
-		inc_page_state(nr_mapped);
+		__inc_page_state(nr_mapped);
 }
 
 /**
@@ -522,7 +526,7 @@ void page_remove_rmap(struct page *page)
 		 */
 		if (page_test_and_clear_dirty(page))
 			set_page_dirty(page);
-		dec_page_state(nr_mapped);
+		__dec_page_state(nr_mapped);
 	}
 }
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 7681d8ee04fe..be8235fb1939 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -645,16 +645,17 @@ static void shrink_cache(struct zone *zone, struct scan_control *sc)
 			goto done;
 
 		max_scan -= nr_scan;
-		if (current_is_kswapd())
-			mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-		else
-			mod_page_state_zone(zone, pgscan_direct, nr_scan);
 		nr_freed = shrink_list(&page_list, sc);
-		if (current_is_kswapd())
-			mod_page_state(kswapd_steal, nr_freed);
-		mod_page_state_zone(zone, pgsteal, nr_freed);
 
-		spin_lock_irq(&zone->lru_lock);
+		local_irq_disable();
+		if (current_is_kswapd()) {
+			__mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
+			__mod_page_state(kswapd_steal, nr_freed);
+		} else
+			__mod_page_state_zone(zone, pgscan_direct, nr_scan);
+		__mod_page_state_zone(zone, pgsteal, nr_freed);
+
+		spin_lock(&zone->lru_lock);
 		/*
 		 * Put back any unfreeable pages.
 		 */
@@ -816,11 +817,13 @@ refill_inactive_zone(struct zone *zone, struct scan_control *sc)
 		}
 	}
 	zone->nr_active += pgmoved;
-	spin_unlock_irq(&zone->lru_lock);
-	pagevec_release(&pvec);
+	spin_unlock(&zone->lru_lock);
+
+	__mod_page_state_zone(zone, pgrefill, pgscanned);
+	__mod_page_state(pgdeactivate, pgdeactivate);
+	local_irq_enable();
 
-	mod_page_state_zone(zone, pgrefill, pgscanned);
-	mod_page_state(pgdeactivate, pgdeactivate);
+	pagevec_release(&pvec);
 }
 
 /*
-- 
cgit v1.2.3


From b09eb1c06a14641209e6b86e9a5b28ea8287f193 Mon Sep 17 00:00:00 2001
From: Nick Piggin <nickpiggin@yahoo.com.au>
Date: Fri, 6 Jan 2006 00:11:21 -0800
Subject: [PATCH] mm: page_state opt docs

Comment the new locking rules for page_state statistics.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/page-flags.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index dede8d412dca..d52999c43336 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -79,13 +79,23 @@
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
  * allowed.
+ *
+ * - Fields can be modified with xxx_page_state and xxx_page_state_zone at
+ * any time safely (which protects the instance from modification by
+ * interrupt.
+ * - The __xxx_page_state variants can be used safely when interrupts are
+ * disabled.
+ * - The __xxx_page_state variants can be used if the field is only
+ * modified from process context, or only modified from interrupt context.
+ * In this case, the field should be commented here.
  */
 struct page_state {
 	unsigned long nr_dirty;		/* Dirty writeable pages */
 	unsigned long nr_writeback;	/* Pages under writeback */
 	unsigned long nr_unstable;	/* NFS unstable pages */
 	unsigned long nr_page_table_pages;/* Pages used for pagetables */
-	unsigned long nr_mapped;	/* mapped into pagetables */
+	unsigned long nr_mapped;	/* mapped into pagetables.
+					 * only modified from process context */
 	unsigned long nr_slab;		/* In slab */
 #define GET_PAGE_STATE_LAST nr_slab
 
-- 
cgit v1.2.3


From 8d9067bda99c68e1a17d93e78cf3a5a3f67e0c35 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Jan 2006 00:11:24 -0800
Subject: [PATCH] Keys: Remove key duplication

Remove the key duplication stuff since there's nothing that uses it, no way
to get at it and it's awkward to deal with for LSM purposes.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/keys.txt       | 18 -------------
 include/keys/user-type.h     |  1 -
 include/linux/key.h          |  8 ------
 security/keys/key.c          | 56 +++-----------------------------------
 security/keys/keyring.c      | 64 --------------------------------------------
 security/keys/user_defined.c | 33 -----------------------
 6 files changed, 3 insertions(+), 177 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/keys.txt b/Documentation/keys.txt
index 31154882000a..6304db59bfe4 100644
--- a/Documentation/keys.txt
+++ b/Documentation/keys.txt
@@ -860,24 +860,6 @@ The structure has a number of fields, some of which are mandatory:
      It is safe to sleep in this method.
 
 
- (*) int (*duplicate)(struct key *key, const struct key *source);
-
-     If this type of key can be duplicated, then this method should be
-     provided. It is called to copy the payload attached to the source into the
-     new key. The data length on the new key will have been updated and the
-     quota adjusted already.
-
-     This method will be called with the source key's semaphore read-locked to
-     prevent its payload from being changed, thus RCU constraints need not be
-     applied to the source key.
-
-     This method does not have to lock the destination key in order to attach a
-     payload. The fact that KEY_FLAG_INSTANTIATED is not set in key->flags
-     prevents anything else from gaining access to the key.
-
-     It is safe to sleep in this method.
-
-
  (*) int (*update)(struct key *key, const void *data, size_t datalen);
 
      If this type of key can be updated, then this method should be provided.
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 26f6ec38577a..a3dae1803f45 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -35,7 +35,6 @@ struct user_key_payload {
 extern struct key_type key_type_user;
 
 extern int user_instantiate(struct key *key, const void *data, size_t datalen);
-extern int user_duplicate(struct key *key, const struct key *source);
 extern int user_update(struct key *key, const void *data, size_t datalen);
 extern int user_match(const struct key *key, const void *criterion);
 extern void user_destroy(struct key *key);
diff --git a/include/linux/key.h b/include/linux/key.h
index 53513a3be53b..4d189e51bc6c 100644
--- a/include/linux/key.h
+++ b/include/linux/key.h
@@ -193,14 +193,6 @@ struct key_type {
 	 */
 	int (*instantiate)(struct key *key, const void *data, size_t datalen);
 
-	/* duplicate a key of this type (optional)
-	 * - the source key will be locked against change
-	 * - the new description will be attached
-	 * - the quota will have been adjusted automatically from
-	 *   source->quotalen
-	 */
-	int (*duplicate)(struct key *key, const struct key *source);
-
 	/* update a key of this type (optional)
 	 * - this method should call key_payload_reserve() to recalculate the
 	 *   quota consumption
diff --git a/security/keys/key.c b/security/keys/key.c
index 01bcfecb7eae..bb036623d0a8 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -240,9 +240,9 @@ static inline void key_alloc_serial(struct key *key)
 /*
  * allocate a key of the specified type
  * - update the user's quota to reflect the existence of the key
- * - called from a key-type operation with key_types_sem read-locked by either
- *   key_create_or_update() or by key_duplicate(); this prevents unregistration
- *   of the key type
+ * - called from a key-type operation with key_types_sem read-locked by
+ *   key_create_or_update()
+ *   - this prevents unregistration of the key type
  * - upon return the key is as yet uninstantiated; the caller needs to either
  *   instantiate the key or discard it before returning
  */
@@ -887,56 +887,6 @@ int key_update(key_ref_t key_ref, const void *payload, size_t plen)
 
 EXPORT_SYMBOL(key_update);
 
-/*****************************************************************************/
-/*
- * duplicate a key, potentially with a revised description
- * - must be supported by the keytype (keyrings for instance can be duplicated)
- */
-struct key *key_duplicate(struct key *source, const char *desc)
-{
-	struct key *key;
-	int ret;
-
-	key_check(source);
-
-	if (!desc)
-		desc = source->description;
-
-	down_read(&key_types_sem);
-
-	ret = -EINVAL;
-	if (!source->type->duplicate)
-		goto error;
-
-	/* allocate and instantiate a key */
-	key = key_alloc(source->type, desc, current->fsuid, current->fsgid,
-			source->perm, 0);
-	if (IS_ERR(key))
-		goto error_k;
-
-	down_read(&source->sem);
-	ret = key->type->duplicate(key, source);
-	up_read(&source->sem);
-	if (ret < 0)
-		goto error2;
-
-	atomic_inc(&key->user->nikeys);
-	set_bit(KEY_FLAG_INSTANTIATED, &key->flags);
-
- error_k:
-	up_read(&key_types_sem);
- out:
-	return key;
-
- error2:
-	key_put(key);
- error:
-	up_read(&key_types_sem);
-	key = ERR_PTR(ret);
-	goto out;
-
-} /* end key_duplicate() */
-
 /*****************************************************************************/
 /*
  * revoke a key
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 4e9fa8be44b8..0acecbd4fa37 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -48,7 +48,6 @@ static inline unsigned keyring_hash(const char *desc)
  */
 static int keyring_instantiate(struct key *keyring,
 			       const void *data, size_t datalen);
-static int keyring_duplicate(struct key *keyring, const struct key *source);
 static int keyring_match(const struct key *keyring, const void *criterion);
 static void keyring_destroy(struct key *keyring);
 static void keyring_describe(const struct key *keyring, struct seq_file *m);
@@ -59,7 +58,6 @@ struct key_type key_type_keyring = {
 	.name		= "keyring",
 	.def_datalen	= sizeof(struct keyring_list),
 	.instantiate	= keyring_instantiate,
-	.duplicate	= keyring_duplicate,
 	.match		= keyring_match,
 	.destroy	= keyring_destroy,
 	.describe	= keyring_describe,
@@ -118,68 +116,6 @@ static int keyring_instantiate(struct key *keyring,
 
 } /* end keyring_instantiate() */
 
-/*****************************************************************************/
-/*
- * duplicate the list of subscribed keys from a source keyring into this one
- */
-static int keyring_duplicate(struct key *keyring, const struct key *source)
-{
-	struct keyring_list *sklist, *klist;
-	unsigned max;
-	size_t size;
-	int loop, ret;
-
-	const unsigned limit =
-		(PAGE_SIZE - sizeof(*klist)) / sizeof(struct key *);
-
-	ret = 0;
-
-	/* find out how many keys are currently linked */
-	rcu_read_lock();
-	sklist = rcu_dereference(source->payload.subscriptions);
-	max = 0;
-	if (sklist)
-		max = sklist->nkeys;
-	rcu_read_unlock();
-
-	/* allocate a new payload and stuff load with key links */
-	if (max > 0) {
-		BUG_ON(max > limit);
-
-		max = (max + 3) & ~3;
-		if (max > limit)
-			max = limit;
-
-		ret = -ENOMEM;
-		size = sizeof(*klist) + sizeof(struct key *) * max;
-		klist = kmalloc(size, GFP_KERNEL);
-		if (!klist)
-			goto error;
-
-		/* set links */
-		rcu_read_lock();
-		sklist = rcu_dereference(source->payload.subscriptions);
-
-		klist->maxkeys = max;
-		klist->nkeys = sklist->nkeys;
-		memcpy(klist->keys,
-		       sklist->keys,
-		       sklist->nkeys * sizeof(struct key *));
-
-		for (loop = klist->nkeys - 1; loop >= 0; loop--)
-			atomic_inc(&klist->keys[loop]->usage);
-
-		rcu_read_unlock();
-
-		rcu_assign_pointer(keyring->payload.subscriptions, klist);
-		ret = 0;
-	}
-
- error:
-	return ret;
-
-} /* end keyring_duplicate() */
-
 /*****************************************************************************/
 /*
  * match keyrings on their name
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index cbda3b2780a1..8e71895b97a7 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -26,7 +26,6 @@
 struct key_type key_type_user = {
 	.name		= "user",
 	.instantiate	= user_instantiate,
-	.duplicate	= user_duplicate,
 	.update		= user_update,
 	.match		= user_match,
 	.destroy	= user_destroy,
@@ -68,40 +67,8 @@ error:
 	return ret;
 
 } /* end user_instantiate() */
-
 EXPORT_SYMBOL_GPL(user_instantiate);
 
-/*****************************************************************************/
-/*
- * duplicate a user defined key
- * - both keys' semaphores are locked against further modification
- * - the new key cannot yet be accessed
- */
-int user_duplicate(struct key *key, const struct key *source)
-{
-	struct user_key_payload *upayload, *spayload;
-	int ret;
-
-	/* just copy the payload */
-	ret = -ENOMEM;
-	upayload = kmalloc(sizeof(*upayload) + source->datalen, GFP_KERNEL);
-	if (upayload) {
-		spayload = rcu_dereference(source->payload.data);
-		BUG_ON(source->datalen != spayload->datalen);
-
-		upayload->datalen = key->datalen = spayload->datalen;
-		memcpy(upayload->data, spayload->data, key->datalen);
-
-		key->payload.data = upayload;
-		ret = 0;
-	}
-
-	return ret;
-
-} /* end user_duplicate() */
-
-EXPORT_SYMBOL_GPL(user_duplicate);
-
 /*****************************************************************************/
 /*
  * dispose of the old data from an updated user defined key
-- 
cgit v1.2.3


From 642fb4d1f1dd2417aa69189fe5ceb81e4fb72900 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Jan 2006 00:11:41 -0800
Subject: [PATCH] NOMMU: Provide shared-writable mmap support on ramfs

The attached patch makes ramfs support shared-writable mmaps by:

 (1) Attempting to perform a contiguous block allocation to the requested size
     when truncate attempts to increase the file from zero size, such as
     happens when:

	fd = shm_open("/file/on/ramfs", ...):
	ftruncate(fd, size_requested);
	addr = mmap(NULL, subsize, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_SHARED,
		    fd, offset);

 (2) Permitting any shared-writable mapping over any contiguous set of extant
     pages. get_unmapped_area() will return the address into the actual ramfs
     pages. The mapping may start anywhere and be of any size, but may not go
     over the end of file. Multiple mappings may overlap in any way.

 (3) Not permitting a file to be shrunk if it would truncate any shared
     mappings (private mappings are copied).

Thus this patch provides support for POSIX shared memory on NOMMU kernels,
with certain limitations such as there being a large enough block of pages
available to support the allocation and it only working on directly mappable
filesystems.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ramfs/Makefile     |   4 +-
 fs/ramfs/file-mmu.c   |  57 ++++++++++
 fs/ramfs/file-nommu.c | 292 ++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/ramfs/inode.c      |  22 +---
 fs/ramfs/internal.h   |  15 +++
 include/linux/ramfs.h |  10 ++
 6 files changed, 378 insertions(+), 22 deletions(-)
 create mode 100644 fs/ramfs/file-mmu.c
 create mode 100644 fs/ramfs/file-nommu.c
 create mode 100644 fs/ramfs/internal.h

(limited to 'include/linux')

diff --git a/fs/ramfs/Makefile b/fs/ramfs/Makefile
index f096f3007091..5a0236e02ee1 100644
--- a/fs/ramfs/Makefile
+++ b/fs/ramfs/Makefile
@@ -4,4 +4,6 @@
 
 obj-$(CONFIG_RAMFS) += ramfs.o
 
-ramfs-objs := inode.o
+file-mmu-y := file-nommu.o
+file-mmu-$(CONFIG_MMU) := file-mmu.o
+ramfs-objs += inode.o $(file-mmu-y)
diff --git a/fs/ramfs/file-mmu.c b/fs/ramfs/file-mmu.c
new file mode 100644
index 000000000000..2115383dcc8d
--- /dev/null
+++ b/fs/ramfs/file-mmu.c
@@ -0,0 +1,57 @@
+/* file-mmu.c: ramfs MMU-based file operations
+ *
+ * Resizable simple ram filesystem for Linux.
+ *
+ * Copyright (C) 2000 Linus Torvalds.
+ *               2000 Transmeta Corp.
+ *
+ * Usage limits added by David Gibson, Linuxcare Australia.
+ * This file is released under the GPL.
+ */
+
+/*
+ * NOTE! This filesystem is probably most useful
+ * not as a real filesystem, but as an example of
+ * how virtual filesystems can be written.
+ *
+ * It doesn't get much simpler than this. Consider
+ * that this file implements the full semantics of
+ * a POSIX-compliant read-write filesystem.
+ *
+ * Note in particular how the filesystem does not
+ * need to implement any data structures of its own
+ * to keep track of the virtual data: using the VFS
+ * caches is sufficient.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/ramfs.h>
+
+#include <asm/uaccess.h>
+#include "internal.h"
+
+struct address_space_operations ramfs_aops = {
+	.readpage	= simple_readpage,
+	.prepare_write	= simple_prepare_write,
+	.commit_write	= simple_commit_write
+};
+
+struct file_operations ramfs_file_operations = {
+	.read		= generic_file_read,
+	.write		= generic_file_write,
+	.mmap		= generic_file_mmap,
+	.fsync		= simple_sync_file,
+	.sendfile	= generic_file_sendfile,
+	.llseek		= generic_file_llseek,
+};
+
+struct inode_operations ramfs_file_inode_operations = {
+	.getattr	= simple_getattr,
+};
diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
new file mode 100644
index 000000000000..3f810acd0bfa
--- /dev/null
+++ b/fs/ramfs/file-nommu.c
@@ -0,0 +1,292 @@
+/* file-nommu.c: no-MMU version of ramfs
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+#include <linux/init.h>
+#include <linux/string.h>
+#include <linux/smp_lock.h>
+#include <linux/backing-dev.h>
+#include <linux/ramfs.h>
+#include <linux/quotaops.h>
+#include <linux/pagevec.h>
+#include <linux/mman.h>
+
+#include <asm/uaccess.h>
+#include "internal.h"
+
+static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
+
+struct address_space_operations ramfs_aops = {
+	.readpage		= simple_readpage,
+	.prepare_write		= simple_prepare_write,
+	.commit_write		= simple_commit_write
+};
+
+struct file_operations ramfs_file_operations = {
+	.mmap			= ramfs_nommu_mmap,
+	.get_unmapped_area	= ramfs_nommu_get_unmapped_area,
+	.read			= generic_file_read,
+	.write			= generic_file_write,
+	.fsync			= simple_sync_file,
+	.sendfile		= generic_file_sendfile,
+	.llseek			= generic_file_llseek,
+};
+
+struct inode_operations ramfs_file_inode_operations = {
+	.setattr		= ramfs_nommu_setattr,
+	.getattr		= simple_getattr,
+};
+
+/*****************************************************************************/
+/*
+ * add a contiguous set of pages into a ramfs inode when it's truncated from
+ * size 0 on the assumption that it's going to be used for an mmap of shared
+ * memory
+ */
+static int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+{
+	struct pagevec lru_pvec;
+	unsigned long npages, xpages, loop, limit;
+	struct page *pages;
+	unsigned order;
+	void *data;
+	int ret;
+
+	/* make various checks */
+	order = get_order(newsize);
+	if (unlikely(order >= MAX_ORDER))
+		goto too_big;
+
+	limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
+	if (limit != RLIM_INFINITY && newsize > limit)
+		goto fsize_exceeded;
+
+	if (newsize > inode->i_sb->s_maxbytes)
+		goto too_big;
+
+	i_size_write(inode, newsize);
+
+	/* allocate enough contiguous pages to be able to satisfy the
+	 * request */
+	pages = alloc_pages(mapping_gfp_mask(inode->i_mapping), order);
+	if (!pages)
+		return -ENOMEM;
+
+	/* split the high-order page into an array of single pages */
+	xpages = 1UL << order;
+	npages = (newsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	for (loop = 0; loop < npages; loop++)
+		set_page_count(pages + loop, 1);
+
+	/* trim off any pages we don't actually require */
+	for (loop = npages; loop < xpages; loop++)
+		__free_page(pages + loop);
+
+	/* clear the memory we allocated */
+	newsize = PAGE_SIZE * npages;
+	data = page_address(pages);
+	memset(data, 0, newsize);
+
+	/* attach all the pages to the inode's address space */
+	pagevec_init(&lru_pvec, 0);
+	for (loop = 0; loop < npages; loop++) {
+		struct page *page = pages + loop;
+
+		ret = add_to_page_cache(page, inode->i_mapping, loop, GFP_KERNEL);
+		if (ret < 0)
+			goto add_error;
+
+		if (!pagevec_add(&lru_pvec, page))
+			__pagevec_lru_add(&lru_pvec);
+
+		unlock_page(page);
+	}
+
+	pagevec_lru_add(&lru_pvec);
+	return 0;
+
+ fsize_exceeded:
+	send_sig(SIGXFSZ, current, 0);
+ too_big:
+	return -EFBIG;
+
+ add_error:
+	page_cache_release(pages + loop);
+	for (loop++; loop < npages; loop++)
+		__free_page(pages + loop);
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * check that file shrinkage doesn't leave any VMAs dangling in midair
+ */
+static int ramfs_nommu_check_mappings(struct inode *inode,
+				      size_t newsize, size_t size)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      newsize >> PAGE_SHIFT,
+			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
+			      ) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED)
+			return -ETXTBSY; /* not quite true, but near enough */
+	}
+
+	return 0;
+}
+
+/*****************************************************************************/
+/*
+ *
+ */
+static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
+{
+	int ret;
+
+	/* assume a truncate from zero size is going to be for the purposes of
+	 * shared mmap */
+	if (size == 0) {
+		if (unlikely(newsize >> 32))
+			return -EFBIG;
+
+		return ramfs_nommu_expand_for_mapping(inode, newsize);
+	}
+
+	/* check that a decrease in size doesn't cut off any shared mappings */
+	if (newsize < size) {
+		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		if (ret < 0)
+			return ret;
+	}
+
+	ret = vmtruncate(inode, size);
+
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * handle a change of attributes
+ * - we're specifically interested in a change of size
+ */
+static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
+{
+	struct inode *inode = dentry->d_inode;
+	unsigned int old_ia_valid = ia->ia_valid;
+	int ret = 0;
+
+	/* by providing our own setattr() method, we skip this quotaism */
+	if ((old_ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
+	    (old_ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid))
+		ret = DQUOT_TRANSFER(inode, ia) ? -EDQUOT : 0;
+
+	/* pick out size-changing events */
+	if (ia->ia_valid & ATTR_SIZE) {
+		loff_t size = i_size_read(inode);
+		if (ia->ia_size != size) {
+			ret = ramfs_nommu_resize(inode, ia->ia_size, size);
+			if (ret < 0 || ia->ia_valid == ATTR_SIZE)
+				goto out;
+		} else {
+			/* we skipped the truncate but must still update
+			 * timestamps
+			 */
+			ia->ia_valid |= ATTR_MTIME|ATTR_CTIME;
+		}
+	}
+
+	ret = inode_setattr(inode, ia);
+ out:
+	ia->ia_valid = old_ia_valid;
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * try to determine where a shared mapping can be made
+ * - we require that:
+ *   - the pages to be mapped must exist
+ *   - the pages be physically contiguous in sequence
+ */
+unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+					    unsigned long addr, unsigned long len,
+					    unsigned long pgoff, unsigned long flags)
+{
+	unsigned long maxpages, lpages, nr, loop, ret;
+	struct inode *inode = file->f_dentry->d_inode;
+	struct page **pages = NULL, **ptr, *page;
+	loff_t isize;
+
+	if (!(flags & MAP_SHARED))
+		return addr;
+
+	/* the mapping mustn't extend beyond the EOF */
+	lpages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	isize = i_size_read(inode);
+
+	ret = -EINVAL;
+	maxpages = (isize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	if (pgoff >= maxpages)
+		goto out;
+
+	if (maxpages - pgoff < lpages)
+		goto out;
+
+	/* gang-find the pages */
+	ret = -ENOMEM;
+	pages = kzalloc(lpages * sizeof(struct page *), GFP_KERNEL);
+	if (!pages)
+		goto out;
+
+	nr = find_get_pages(inode->i_mapping, pgoff, lpages, pages);
+	if (nr != lpages)
+		goto out; /* leave if some pages were missing */
+
+	/* check the pages for physical adjacency */
+	ptr = pages;
+	page = *ptr++;
+	page++;
+	for (loop = lpages; loop > 1; loop--)
+		if (*ptr++ != page++)
+			goto out;
+
+	/* okay - all conditions fulfilled */
+	ret = (unsigned long) page_address(pages[0]);
+
+ out:
+	if (pages) {
+		ptr = pages;
+		for (loop = lpages; loop > 0; loop--)
+			put_page(*ptr++);
+		kfree(pages);
+	}
+
+	return ret;
+}
+
+/*****************************************************************************/
+/*
+ * set up a mapping
+ */
+int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	return 0;
+}
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 0a88917605ae..c66bd5e4c05c 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -34,13 +34,12 @@
 #include <linux/ramfs.h>
 
 #include <asm/uaccess.h>
+#include "internal.h"
 
 /* some random number */
 #define RAMFS_MAGIC	0x858458f6
 
 static struct super_operations ramfs_ops;
-static struct address_space_operations ramfs_aops;
-static struct inode_operations ramfs_file_inode_operations;
 static struct inode_operations ramfs_dir_inode_operations;
 
 static struct backing_dev_info ramfs_backing_dev_info = {
@@ -142,25 +141,6 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
 	return error;
 }
 
-static struct address_space_operations ramfs_aops = {
-	.readpage	= simple_readpage,
-	.prepare_write	= simple_prepare_write,
-	.commit_write	= simple_commit_write
-};
-
-struct file_operations ramfs_file_operations = {
-	.read		= generic_file_read,
-	.write		= generic_file_write,
-	.mmap		= generic_file_mmap,
-	.fsync		= simple_sync_file,
-	.sendfile	= generic_file_sendfile,
-	.llseek		= generic_file_llseek,
-};
-
-static struct inode_operations ramfs_file_inode_operations = {
-	.getattr	= simple_getattr,
-};
-
 static struct inode_operations ramfs_dir_inode_operations = {
 	.create		= ramfs_create,
 	.lookup		= simple_lookup,
diff --git a/fs/ramfs/internal.h b/fs/ramfs/internal.h
new file mode 100644
index 000000000000..272c8a7120b0
--- /dev/null
+++ b/fs/ramfs/internal.h
@@ -0,0 +1,15 @@
+/* internal.h: ramfs internal definitions
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+
+extern struct address_space_operations ramfs_aops;
+extern struct file_operations ramfs_file_operations;
+extern struct inode_operations ramfs_file_inode_operations;
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index e0a4faa9610c..953b6df5d037 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -5,6 +5,16 @@ struct inode *ramfs_get_inode(struct super_block *sb, int mode, dev_t dev);
 struct super_block *ramfs_get_sb(struct file_system_type *fs_type,
 	 int flags, const char *dev_name, void *data);
 
+#ifndef CONFIG_MMU
+extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
+						   unsigned long addr,
+						   unsigned long len,
+						   unsigned long pgoff,
+						   unsigned long flags);
+
+extern int ramfs_nommu_mmap(struct file *file, struct vm_area_struct *vma);
+#endif
+
 extern struct file_operations ramfs_file_operations;
 extern struct vm_operations_struct generic_file_vm_ops;
 
-- 
cgit v1.2.3


From b0e15190ead07056ab0c3844a499ff35e66d27cc Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Jan 2006 00:11:42 -0800
Subject: [PATCH] NOMMU: Make SYSV IPC SHM use ramfs facilities on NOMMU

The attached patch makes the SYSV IPC shared memory facilities use the new
ramfs facilities on a no-MMU kernel.

The following changes are made:

 (1) There are now shmem_mmap() and shmem_get_unmapped_area() functions to
     allow the IPC SHM facilities to commune with the tiny-shmem and shmem
     code.

 (2) ramfs files now need resizing using do_truncate() rather than by modifying
     the inode size directly (see shmem_file_setup()). This causes ramfs to
     attempt to bind a block of pages of sufficient size to the inode.

 (3) CONFIG_SYSVIPC is no longer contingent on CONFIG_MMU.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h |  9 +++++++++
 init/Kconfig       |  1 -
 ipc/shm.c          | 18 +++++++++++++-----
 mm/nommu.c         |  7 +++++++
 mm/shmem.c         |  2 +-
 mm/tiny-shmem.c    | 29 ++++++++++++++++++++++++++++-
 6 files changed, 58 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 75ec04e2f184..26f3094911a5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -654,9 +654,18 @@ static inline struct mempolicy *shmem_get_policy(struct vm_area_struct *vma,
 }
 #endif
 struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags);
+extern int shmem_mmap(struct file *file, struct vm_area_struct *vma);
 
 int shmem_zero_setup(struct vm_area_struct *);
 
+#ifndef CONFIG_MMU
+extern unsigned long shmem_get_unmapped_area(struct file *file,
+					     unsigned long addr,
+					     unsigned long len,
+					     unsigned long pgoff,
+					     unsigned long flags);
+#endif
+
 static inline int can_do_mlock(void)
 {
 	if (capable(CAP_IPC_LOCK))
diff --git a/init/Kconfig b/init/Kconfig
index ce737e02c5a2..24e0f7c756c0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -105,7 +105,6 @@ config SWAP
 
 config SYSVIPC
 	bool "System V IPC"
-	depends on MMU
 	---help---
 	  Inter Process Communication is a suite of library functions and
 	  system calls which let processes (running programs) synchronize and
diff --git a/ipc/shm.c b/ipc/shm.c
index 587d836d80d9..0ef4a1cf3e27 100644
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -157,14 +157,22 @@ static void shm_close (struct vm_area_struct *shmd)
 
 static int shm_mmap(struct file * file, struct vm_area_struct * vma)
 {
-	file_accessed(file);
-	vma->vm_ops = &shm_vm_ops;
-	shm_inc(file->f_dentry->d_inode->i_ino);
-	return 0;
+	int ret;
+
+	ret = shmem_mmap(file, vma);
+	if (ret == 0) {
+		vma->vm_ops = &shm_vm_ops;
+		shm_inc(file->f_dentry->d_inode->i_ino);
+	}
+
+	return ret;
 }
 
 static struct file_operations shm_file_operations = {
-	.mmap	= shm_mmap
+	.mmap	= shm_mmap,
+#ifndef CONFIG_MMU
+	.get_unmapped_area = shmem_get_unmapped_area,
+#endif
 };
 
 static struct vm_operations_struct shm_vm_ops = {
diff --git a/mm/nommu.c b/mm/nommu.c
index c1196812876b..c10262d68232 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1177,3 +1177,10 @@ int in_gate_area_no_task(unsigned long addr)
 {
 	return 0;
 }
+
+struct page *filemap_nopage(struct vm_area_struct *area,
+			unsigned long address, int *type)
+{
+	BUG();
+	return NULL;
+}
diff --git a/mm/shmem.c b/mm/shmem.c
index 65c148efa2ed..a1f2f02af724 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1270,7 +1270,7 @@ out_nomem:
 	return retval;
 }
 
-static int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+int shmem_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	file_accessed(file);
 	vma->vm_ops = &shmem_vm_ops;
diff --git a/mm/tiny-shmem.c b/mm/tiny-shmem.c
index b58abcf44ed6..cdc6d431972b 100644
--- a/mm/tiny-shmem.c
+++ b/mm/tiny-shmem.c
@@ -81,13 +81,19 @@ struct file *shmem_file_setup(char *name, loff_t size, unsigned long flags)
 		goto close_file;
 
 	d_instantiate(dentry, inode);
-	inode->i_size = size;
 	inode->i_nlink = 0;	/* It is unlinked */
+
 	file->f_vfsmnt = mntget(shm_mnt);
 	file->f_dentry = dentry;
 	file->f_mapping = inode->i_mapping;
 	file->f_op = &ramfs_file_operations;
 	file->f_mode = FMODE_WRITE | FMODE_READ;
+
+	/* notify everyone as to the change of file size */
+	error = do_truncate(dentry, size, file);
+	if (error < 0)
+		goto close_file;
+
 	return file;
 
 close_file:
@@ -123,3 +129,24 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
 {
 	return 0;
 }
+
+int shmem_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	file_accessed(file);
+#ifndef CONFIG_MMU
+	return ramfs_nommu_mmap(file, vma);
+#else
+	return 0;
+#endif
+}
+
+#ifndef CONFIG_MMU
+unsigned long shmem_get_unmapped_area(struct file *file,
+				      unsigned long addr,
+				      unsigned long len,
+				      unsigned long pgoff,
+				      unsigned long flags)
+{
+	return ramfs_nommu_get_unmapped_area(file, addr, len, pgoff, flags);
+}
+#endif
-- 
cgit v1.2.3


From 7ee1dd3fee22f15728f545d266403fc977e1eb99 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 6 Jan 2006 00:11:44 -0800
Subject: [PATCH] FRV: Make futex code compilable on nommu [try #2]

Make the futex code compilable and usable on NOMMU by making the attempt to
handle page faults conditional on CONFIG_MMU.  If this is not enabled, then
we can assume that EFAULT returned from futex_atomic_op_inuser() is not
recoverable, and that the address lies outside of valid memory.

handle_mm_fault() is made to BUG if called on NOMMU without attempting to
invoke the actual handler (__handle_mm_fault).

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 22 +++++++++++++++++++---
 kernel/futex.c     |  7 +++++++
 2 files changed, 26 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26f3094911a5..bc01fff3aa01 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -717,12 +717,28 @@ extern int vmtruncate(struct inode * inode, loff_t offset);
 extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
 extern int install_page(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, struct page *page, pgprot_t prot);
 extern int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long addr, unsigned long pgoff, pgprot_t prot);
-extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma, unsigned long address, int write_access);
 
-static inline int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, unsigned long address, int write_access)
+#ifdef CONFIG_MMU
+extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma,
+			unsigned long address, int write_access);
+
+static inline int handle_mm_fault(struct mm_struct *mm,
+			struct vm_area_struct *vma, unsigned long address,
+			int write_access)
 {
-	return __handle_mm_fault(mm, vma, address, write_access) & (~VM_FAULT_WRITE);
+	return __handle_mm_fault(mm, vma, address, write_access) &
+				(~VM_FAULT_WRITE);
 }
+#else
+static inline int handle_mm_fault(struct mm_struct *mm,
+			struct vm_area_struct *vma, unsigned long address,
+			int write_access)
+{
+	/* should never happen if there's no MMU */
+	BUG();
+	return VM_FAULT_SIGBUS;
+}
+#endif
 
 extern int make_pages_present(unsigned long addr, unsigned long end);
 extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write);
diff --git a/kernel/futex.c b/kernel/futex.c
index 5e71a6bf6f6b..5efa2f978032 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -356,6 +356,13 @@ retry:
 		if (bh1 != bh2)
 			spin_unlock(&bh2->lock);
 
+#ifndef CONFIG_MMU
+		/* we don't get EFAULT from MMU faults if we don't have an MMU,
+		 * but we might get them from range checking */
+		ret = op_ret;
+		goto out;
+#endif
+
 		if (unlikely(op_ret != -EFAULT)) {
 			ret = op_ret;
 			goto out;
-- 
cgit v1.2.3


From f90b8116032f4216d260e31f966a3585319387ac Mon Sep 17 00:00:00 2001
From: Jordan Crouse <jordan.crouse@amd.com>
Date: Fri, 6 Jan 2006 00:12:14 -0800
Subject: [PATCH] Base support for AMD Geode GX/LX processors

Provide basic support for the AMD Geode GX and LX processors.

Signed-off-by: Jordan Crouse <jordan.crouse@amd.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 MAINTAINERS                  |  7 +++++++
 arch/i386/Kconfig.cpu        | 14 ++++++++++----
 arch/i386/kernel/cpu/amd.c   |  7 ++++++-
 arch/i386/kernel/cpu/cyrix.c | 27 ++++++++++++++++++++++++++-
 include/asm-i386/module.h    |  4 +++-
 include/linux/pci_ids.h      | 10 ++++++++++
 6 files changed, 62 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index e9db0d6b928a..cb536bbed9ff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -258,6 +258,13 @@ P:	Ivan Kokshaysky
 M:	ink@jurassic.park.msu.ru
 S:	Maintained for 2.4; PCI support for 2.6.
 
+AMD GEODE PROCESSOR/CHIPSET SUPPORT
+P:      Jordan Crouse
+M:      info-linux@geode.amd.com
+L:	info-linux@geode.amd.com
+W:	http://www.amd.com/us-en/ConnectivitySolutions/TechnicalResources/0,,50_2334_2452_11363,00.html
+S:	Supported
+
 APM DRIVER
 P:	Stephen Rothwell
 M:	sfr@canb.auug.org.au
diff --git a/arch/i386/Kconfig.cpu b/arch/i386/Kconfig.cpu
index 53bbb3c008ee..79603b3471f9 100644
--- a/arch/i386/Kconfig.cpu
+++ b/arch/i386/Kconfig.cpu
@@ -39,6 +39,7 @@ config M386
 	  - "Winchip-2" for IDT Winchip 2.
 	  - "Winchip-2A" for IDT Winchips with 3dNow! capabilities.
 	  - "GeodeGX1" for Geode GX1 (Cyrix MediaGX).
+	  - "Geode GX/LX" For AMD Geode GX and LX processors.
 	  - "CyrixIII/VIA C3" for VIA Cyrix III or VIA C3.
 	  - "VIA C3-2 for VIA C3-2 "Nehemiah" (model 9 and above).
 
@@ -171,6 +172,11 @@ config MGEODEGX1
 	help
 	  Select this for a Geode GX1 (Cyrix MediaGX) chip.
 
+config MGEODE_LX
+       bool "Geode GX/LX"
+       help
+         Select this for AMD Geode GX and LX processors.
+
 config MCYRIXIII
 	bool "CyrixIII/VIA-C3"
 	help
@@ -220,8 +226,8 @@ config X86_XADD
 config X86_L1_CACHE_SHIFT
 	int
 	default "7" if MPENTIUM4 || X86_GENERIC
-	default "4" if X86_ELAN || M486 || M386
-	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODEGX1
+	default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
+	default "5" if MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
 	default "6" if MK7 || MK8 || MPENTIUMM
 
 config RWSEM_GENERIC_SPINLOCK
@@ -290,12 +296,12 @@ config X86_INTEL_USERCOPY
 
 config X86_USE_PPRO_CHECKSUM
 	bool
-	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON
+	depends on MWINCHIP3D || MWINCHIP2 || MWINCHIPC6 || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || MK8 || MVIAC3_2 || MEFFICEON || MGEODE_LX
 	default y
 
 config X86_USE_3DNOW
 	bool
-	depends on MCYRIXIII || MK7
+	depends on MCYRIXIII || MK7 || MGEODE_LX
 	default y
 
 config X86_OOSTORE
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index e344ef88cfcd..e7697e077f6b 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -161,8 +161,13 @@ static void __init init_amd(struct cpuinfo_x86 *c)
 					set_bit(X86_FEATURE_K6_MTRR, c->x86_capability);
 				break;
 			}
-			break;
 
+			if (c->x86_model == 10) {
+				/* AMD Geode LX is model 10 */
+				/* placeholder for any needed mods */
+				break;
+			}
+			break;
 		case 6: /* An Athlon/Duron */
  
 			/* Bit 15 of Athlon specific MSR 15, needs to be 0
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index ff87cc22b323..75015975d038 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -342,6 +342,31 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
 	return;
 }
 
+/*
+ * Handle National Semiconductor branded processors
+ */
+static void __devinit init_nsc(struct cpuinfo_x86 *c)
+{
+	/* There may be GX1 processors in the wild that are branded
+	 * NSC and not Cyrix.
+	 *
+	 * This function only handles the GX processor, and kicks every
+	 * thing else to the Cyrix init function above - that should
+	 * cover any processors that might have been branded differently
+	 * after NSC aquired Cyrix.
+	 *
+	 * If this breaks your GX1 horribly, please e-mail
+	 * info-linux@ldcmail.amd.com to tell us.
+	 */
+
+	/* Handle the GX (Formally known as the GX2) */
+
+	if (c->x86 == 5 && c->x86_model == 5)
+		display_cacheinfo(c);
+	else
+		init_cyrix(c);
+}
+
 /*
  * Cyrix CPUs without cpuid or with cpuid not yet enabled can be detected
  * by the fact that they preserve the flags across the division of 5/2.
@@ -422,7 +447,7 @@ int __init cyrix_init_cpu(void)
 static struct cpu_dev nsc_cpu_dev __initdata = {
 	.c_vendor	= "NSC",
 	.c_ident 	= { "Geode by NSC" },
-	.c_init		= init_cyrix,
+	.c_init		= init_nsc,
 	.c_identify	= generic_identify,
 };
 
diff --git a/include/asm-i386/module.h b/include/asm-i386/module.h
index eb7f2b4234aa..424661d25bd3 100644
--- a/include/asm-i386/module.h
+++ b/include/asm-i386/module.h
@@ -52,8 +52,10 @@ struct mod_arch_specific
 #define MODULE_PROC_FAMILY "CYRIXIII "
 #elif defined CONFIG_MVIAC3_2
 #define MODULE_PROC_FAMILY "VIAC3-2 "
-#elif CONFIG_MGEODEGX1
+#elif defined CONFIG_MGEODEGX1
 #define MODULE_PROC_FAMILY "GEODEGX1 "
+#elif defined CONFIG_MGEODE_LX
+#define MODULE_PROC_FAMILY "GEODE "
 #else
 #error unknown processor family
 #endif
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 4f01710485cd..24db7248301a 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -394,6 +394,13 @@
 #define PCI_DEVICE_ID_NS_87410		0xd001
 #define PCI_DEVICE_ID_NS_CS5535_IDE	0x002d
 
+#define PCI_DEVICE_ID_NS_CS5535_HOST_BRIDGE  0x0028
+#define PCI_DEVICE_ID_NS_CS5535_ISA_BRIDGE   0x002b
+#define PCI_DEVICE_ID_NS_CS5535_IDE          0x002d
+#define PCI_DEVICE_ID_NS_CS5535_AUDIO        0x002e
+#define PCI_DEVICE_ID_NS_CS5535_USB          0x002f
+#define PCI_DEVICE_ID_NS_CS5535_VIDEO        0x0030
+
 #define PCI_VENDOR_ID_TSENG		0x100c
 #define PCI_DEVICE_ID_TSENG_W32P_2	0x3202
 #define PCI_DEVICE_ID_TSENG_W32P_b	0x3205
@@ -496,6 +503,9 @@
 
 #define PCI_DEVICE_ID_AMD_CS5536_IDE	0x209A
 
+#define PCI_DEVICE_ID_AMD_LX_VIDEO  0x2081
+#define PCI_DEVICE_ID_AMD_LX_AES    0x2082
+
 #define PCI_VENDOR_ID_TRIDENT		0x1023
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_DX	0x2000
 #define PCI_DEVICE_ID_TRIDENT_4DWAVE_NX	0x2001
-- 
cgit v1.2.3


From eee45269b0f5979c70bc151c6c2f4e5f4f5ababe Mon Sep 17 00:00:00 2001
From: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Date: Fri, 6 Jan 2006 00:12:21 -0800
Subject: [PATCH] Alpha: convert to generic irq framework (generic part)

Thanks to Christoph for doing most of the work.

This allows automatic SMP IRQ affinity assignment other than default "all
interrupts on all CPUs" which is rather expensive.  This might be useful if
the hardware can be programmed to distribute interrupts among different
CPUs, like Alpha does.

Signed-off-by: Ivan Kokshaysky <ink@jurassic.park.msu.ru>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Richard Henderson <rth@twiddle.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 11 +++++++++++
 kernel/irq/manage.c |  2 ++
 kernel/irq/proc.c   |  4 +++-
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index f04ba20712a2..60f8bc78a35a 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -221,6 +221,17 @@ extern void note_interrupt(unsigned int irq, irq_desc_t *desc,
 extern int can_request_irq(unsigned int irq, unsigned long irqflags);
 
 extern void init_irq_proc(void);
+
+#ifdef CONFIG_AUTO_IRQ_AFFINITY
+extern int select_smp_affinity(unsigned int irq);
+#else
+static inline int
+select_smp_affinity(unsigned int irq)
+{
+	return 1;
+}
+#endif
+
 #endif
 
 extern hw_irq_controller no_irq_type;  /* needed in every arch ? */
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 81c49a4d679e..97d5559997d2 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -366,6 +366,8 @@ int request_irq(unsigned int irq,
 	action->next = NULL;
 	action->dev_id = dev_id;
 
+	select_smp_affinity(irq);
+
 	retval = setup_irq(irq, action);
 	if (retval)
 		kfree(action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index f26e534c6585..8a64a4844cde 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -68,7 +68,9 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
 	 */
 	cpus_and(tmp, new_value, cpu_online_map);
 	if (cpus_empty(tmp))
-		return -EINVAL;
+		/* Special case for empty set - allow the architecture
+		   code to set default SMP affinity. */
+		return select_smp_affinity(irq) ? -EINVAL : full_count;
 
 	proc_set_irq_affinity(irq, new_value);
 
-- 
cgit v1.2.3


From 7088a5c00103ef48782d6c359cd12b13a10666e6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 6 Jan 2006 00:13:05 -0800
Subject: [PATCH] swsusp: introduce the swap map structure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch introduces the swap map structure that can be used by swsusp for
keeping tracks of data pages written to the swap.   The structure itself is
described in a comment within the patch.

The overall idea is to reduce the amount of metadata written to the swap and
to write and read the image pages sequentially, in a file-alike way.  This
makes the swap-handling part of swsusp fairly independent of its
snapshot-handling part and will hopefully allow us to completely separate
these two parts in the future.

This patch is needed to remove the suspend image size limit imposed by the
limited size of the swsusp_info structure, which is essential for x86-64
systems with more than 512 MB of RAM.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h |   6 +-
 kernel/power/disk.c     |   8 +-
 kernel/power/power.h    |  13 +-
 kernel/power/snapshot.c |  14 +-
 kernel/power/swsusp.c   | 558 ++++++++++++++++++++++++++++++++++--------------
 5 files changed, 418 insertions(+), 181 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index a61c04f804b2..33bbaea23aaf 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -14,11 +14,7 @@
 typedef struct pbe {
 	unsigned long address;		/* address of the copy */
 	unsigned long orig_address;	/* original address of page */
-	swp_entry_t swap_address;	
-
-	struct pbe *next;	/* also used as scratch space at
-				 * end of page (see link, diskpage)
-				 */
+	struct pbe *next;
 } suspend_pagedir_t;
 
 #define for_each_pbe(pbe, pblist) \
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 4d944b281b28..76a5131b0e80 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -25,9 +25,9 @@
 extern suspend_disk_method_t pm_disk_mode;
 
 extern int swsusp_suspend(void);
-extern int swsusp_write(void);
+extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages);
 extern int swsusp_check(void);
-extern int swsusp_read(void);
+extern int swsusp_read(struct pbe **pblist_ptr);
 extern void swsusp_close(void);
 extern int swsusp_resume(void);
 
@@ -176,7 +176,7 @@ int pm_suspend_disk(void)
 	if (in_suspend) {
 		device_resume();
 		pr_debug("PM: writing image.\n");
-		error = swsusp_write();
+		error = swsusp_write(pagedir_nosave, nr_copy_pages);
 		if (!error)
 			power_down(pm_disk_mode);
 		else {
@@ -247,7 +247,7 @@ static int software_resume(void)
 
 	pr_debug("PM: Reading swsusp image.\n");
 
-	if ((error = swsusp_read())) {
+	if ((error = swsusp_read(&pagedir_nosave))) {
 		swsusp_free();
 		goto Thaw;
 	}
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 6c042b5ee14b..977877c6dcfc 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -9,19 +9,14 @@
 #define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1)
 #endif
 
-#define MAX_PBES	((PAGE_SIZE - sizeof(struct new_utsname) \
-			- 4 - 3*sizeof(unsigned long) - sizeof(int) \
-			- sizeof(void *)) / sizeof(swp_entry_t))
-
 struct swsusp_info {
 	struct new_utsname	uts;
 	u32			version_code;
 	unsigned long		num_physpages;
 	int			cpus;
 	unsigned long		image_pages;
-	unsigned long		pagedir_pages;
-	suspend_pagedir_t	* suspend_pagedir;
-	swp_entry_t		pagedir[MAX_PBES];
+	unsigned long		pages;
+	swp_entry_t		start;
 } __attribute__((aligned(PAGE_SIZE)));
 
 
@@ -67,6 +62,8 @@ extern asmlinkage int swsusp_arch_resume(void);
 
 extern void free_pagedir(struct pbe *pblist);
 extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed);
-extern void create_pbe_list(struct pbe *pblist, unsigned nr_pages);
 extern void swsusp_free(void);
 extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed);
+extern unsigned int snapshot_nr_pages(void);
+extern struct pbe *snapshot_pblist(void);
+extern void snapshot_pblist_set(struct pbe *pblist);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 4a6dbcefd378..152d56cdf017 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -33,6 +33,9 @@
 
 #include "power.h"
 
+struct pbe *pagedir_nosave;
+unsigned int nr_copy_pages;
+
 #ifdef CONFIG_HIGHMEM
 struct highmem_page {
 	char *data;
@@ -244,7 +247,7 @@ static inline void fill_pb_page(struct pbe *pbpage)
  *	of memory pages allocated with alloc_pagedir()
  */
 
-void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
+static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
 {
 	struct pbe *pbpage, *p;
 	unsigned int num = PBES_PER_PAGE;
@@ -261,7 +264,6 @@ void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
 			p->next = p + 1;
 		p->next = NULL;
 	}
-	pr_debug("create_pbe_list(): initialized %d PBEs\n", num);
 }
 
 /**
@@ -332,7 +334,8 @@ struct pbe *alloc_pagedir(unsigned int nr_pages, gfp_t gfp_mask, int safe_needed
 	if (!pbe) { /* get_zeroed_page() failed */
 		free_pagedir(pblist);
 		pblist = NULL;
-        }
+        } else
+        	create_pbe_list(pblist, nr_pages);
 	return pblist;
 }
 
@@ -395,7 +398,6 @@ static struct pbe *swsusp_alloc(unsigned int nr_pages)
 		printk(KERN_ERR "suspend: Allocating pagedir failed.\n");
 		return NULL;
 	}
-	create_pbe_list(pblist, nr_pages);
 
 	if (alloc_data_pages(pblist, GFP_ATOMIC | __GFP_COLD, 0)) {
 		printk(KERN_ERR "suspend: Allocating image pages failed.\n");
@@ -421,10 +423,6 @@ asmlinkage int swsusp_save(void)
 		 (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE,
 		 PAGES_FOR_IO, nr_free_pages());
 
-	/* This is needed because of the fixed size of swsusp_info */
-	if (MAX_PBES < (nr_pages + PBES_PER_PAGE - 1) / PBES_PER_PAGE)
-		return -ENOSPC;
-
 	if (!enough_free_mem(nr_pages)) {
 		printk(KERN_ERR "swsusp: Not enough free memory\n");
 		return -ENOMEM;
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index bd3097c583bf..b09bd7c0998d 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -30,6 +30,9 @@
  * Alex Badea <vampire@go.ro>:
  * Fixed runaway init
  *
+ * Rafael J. Wysocki <rjw@sisk.pl>
+ * Added the swap map data structure and reworked the handling of swap
+ *
  * More state savers are welcome. Especially for the scsi layer...
  *
  * For TODOs,FIXMEs also look in Documentation/power/swsusp.txt
@@ -76,18 +79,6 @@ static int restore_highmem(void) { return 0; }
 
 extern char resume_file[];
 
-/* Local variables that should not be affected by save */
-unsigned int nr_copy_pages __nosavedata = 0;
-
-/* Suspend pagedir is allocated before final copy, therefore it
-   must be freed after resume
-
-   Warning: this is even more evil than it seems. Pagedirs this file
-   talks about are completely different from page directories used by
-   MMU hardware.
- */
-suspend_pagedir_t *pagedir_nosave __nosavedata = NULL;
-
 #define SWSUSP_SIG	"S1SUSPEND"
 
 static struct swsusp_header {
@@ -238,48 +229,205 @@ static int write_page(unsigned long addr, swp_entry_t *loc)
 }
 
 /**
- *	data_free - Free the swap entries used by the saved image.
+ *	Swap map-handling functions
+ *
+ *	The swap map is a data structure used for keeping track of each page
+ *	written to the swap.  It consists of many swap_map_page structures
+ *	that contain each an array of MAP_PAGE_SIZE swap entries.
+ *	These structures are linked together with the help of either the
+ *	.next (in memory) or the .next_swap (in swap) member.
  *
- *	Walk the list of used swap entries and free each one.
- *	This is only used for cleanup when suspend fails.
+ *	The swap map is created during suspend.  At that time we need to keep
+ *	it in memory, because we have to free all of the allocated swap
+ *	entries if an error occurs.  The memory needed is preallocated
+ *	so that we know in advance if there's enough of it.
+ *
+ *	The first swap_map_page structure is filled with the swap entries that
+ *	correspond to the first MAP_PAGE_SIZE data pages written to swap and
+ *	so on.  After the all of the data pages have been written, the order
+ *	of the swap_map_page structures in the map is reversed so that they
+ *	can be read from swap in the original order.  This causes the data
+ *	pages to be loaded in exactly the same order in which they have been
+ *	saved.
+ *
+ *	During resume we only need to use one swap_map_page structure
+ *	at a time, which means that we only need to use two memory pages for
+ *	reading the image - one for reading the swap_map_page structures
+ *	and the second for reading the data pages from swap.
  */
-static void data_free(void)
+
+#define MAP_PAGE_SIZE	((PAGE_SIZE - sizeof(swp_entry_t) - sizeof(void *)) \
+			/ sizeof(swp_entry_t))
+
+struct swap_map_page {
+	swp_entry_t		entries[MAP_PAGE_SIZE];
+	swp_entry_t		next_swap;
+	struct swap_map_page	*next;
+};
+
+static inline void free_swap_map(struct swap_map_page *swap_map)
 {
-	swp_entry_t entry;
-	struct pbe *p;
+	struct swap_map_page *swp;
 
-	for_each_pbe (p, pagedir_nosave) {
-		entry = p->swap_address;
-		if (entry.val)
-			swap_free(entry);
-		else
-			break;
+	while (swap_map) {
+		swp = swap_map->next;
+		free_page((unsigned long)swap_map);
+		swap_map = swp;
+	}
+}
+
+static struct swap_map_page *alloc_swap_map(unsigned int nr_pages)
+{
+	struct swap_map_page *swap_map, *swp;
+	unsigned n = 0;
+
+	if (!nr_pages)
+		return NULL;
+
+	pr_debug("alloc_swap_map(): nr_pages = %d\n", nr_pages);
+	swap_map = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+	swp = swap_map;
+	for (n = MAP_PAGE_SIZE; n < nr_pages; n += MAP_PAGE_SIZE) {
+		swp->next = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+		swp = swp->next;
+		if (!swp) {
+			free_swap_map(swap_map);
+			return NULL;
+		}
 	}
+	return swap_map;
 }
 
 /**
- *	data_write - Write saved image to swap.
- *
- *	Walk the list of pages in the image and sync each one to swap.
+ *	reverse_swap_map - reverse the order of pages in the swap map
+ *	@swap_map
  */
-static int data_write(void)
+
+static inline struct swap_map_page *reverse_swap_map(struct swap_map_page *swap_map)
 {
-	int error = 0, i = 0;
-	unsigned int mod = nr_copy_pages / 100;
-	struct pbe *p;
+	struct swap_map_page *prev, *next;
+
+	prev = NULL;
+	while (swap_map) {
+		next = swap_map->next;
+		swap_map->next = prev;
+		prev = swap_map;
+		swap_map = next;
+	}
+	return prev;
+}
 
-	if (!mod)
-		mod = 1;
+/**
+ *	free_swap_map_entries - free the swap entries allocated to store
+ *	the swap map @swap_map (this is only called in case of an error)
+ */
+static inline void free_swap_map_entries(struct swap_map_page *swap_map)
+{
+	while (swap_map) {
+		if (swap_map->next_swap.val)
+			swap_free(swap_map->next_swap);
+		swap_map = swap_map->next;
+	}
+}
 
-	printk( "Writing data to swap (%d pages)...     ", nr_copy_pages );
-	for_each_pbe (p, pagedir_nosave) {
-		if (!(i%mod))
-			printk( "\b\b\b\b%3d%%", i / mod );
-		if ((error = write_page(p->address, &p->swap_address)))
+/**
+ *	save_swap_map - save the swap map used for tracing the data pages
+ *	stored in the swap
+ */
+
+static int save_swap_map(struct swap_map_page *swap_map, swp_entry_t *start)
+{
+	swp_entry_t entry = (swp_entry_t){0};
+	int error;
+
+	while (swap_map) {
+		swap_map->next_swap = entry;
+		if ((error = write_page((unsigned long)swap_map, &entry)))
 			return error;
-		i++;
+		swap_map = swap_map->next;
 	}
-	printk("\b\b\b\bdone\n");
+	*start = entry;
+	return 0;
+}
+
+/**
+ *	free_image_entries - free the swap entries allocated to store
+ *	the image data pages (this is only called in case of an error)
+ */
+
+static inline void free_image_entries(struct swap_map_page *swp)
+{
+	unsigned k;
+
+	while (swp) {
+		for (k = 0; k < MAP_PAGE_SIZE; k++)
+			if (swp->entries[k].val)
+				swap_free(swp->entries[k]);
+		swp = swp->next;
+	}
+}
+
+/**
+ *	The swap_map_handle structure is used for handling the swap map in
+ *	a file-alike way
+ */
+
+struct swap_map_handle {
+	struct swap_map_page *cur;
+	unsigned int k;
+};
+
+static inline void init_swap_map_handle(struct swap_map_handle *handle,
+                                        struct swap_map_page *map)
+{
+	handle->cur = map;
+	handle->k = 0;
+}
+
+static inline int swap_map_write_page(struct swap_map_handle *handle,
+                                      unsigned long addr)
+{
+	int error;
+
+	error = write_page(addr, handle->cur->entries + handle->k);
+	if (error)
+		return error;
+	if (++handle->k >= MAP_PAGE_SIZE) {
+		handle->cur = handle->cur->next;
+		handle->k = 0;
+	}
+	return 0;
+}
+
+/**
+ *	save_image_data - save the data pages pointed to by the PBEs
+ *	from the list @pblist using the swap map handle @handle
+ *	(assume there are @nr_pages data pages to save)
+ */
+
+static int save_image_data(struct pbe *pblist,
+                           struct swap_map_handle *handle,
+                           unsigned int nr_pages)
+{
+	unsigned int m;
+	struct pbe *p;
+	int error = 0;
+
+	printk("Saving image data pages (%u pages) ...     ", nr_pages);
+	m = nr_pages / 100;
+	if (!m)
+		m = 1;
+	nr_pages = 0;
+	for_each_pbe (p, pblist) {
+		error = swap_map_write_page(handle, p->address);
+		if (error)
+			break;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
+	}
+	if (!error)
+		printk("\b\b\b\bdone\n");
 	return error;
 }
 
@@ -295,19 +443,20 @@ static void dump_info(void)
 	pr_debug(" swsusp: UTS Domain: %s\n",swsusp_info.uts.domainname);
 	pr_debug(" swsusp: CPUs: %d\n",swsusp_info.cpus);
 	pr_debug(" swsusp: Image: %ld Pages\n",swsusp_info.image_pages);
-	pr_debug(" swsusp: Pagedir: %ld Pages\n",swsusp_info.pagedir_pages);
+	pr_debug(" swsusp: Total: %ld Pages\n", swsusp_info.pages);
 }
 
-static void init_header(void)
+static void init_header(unsigned int nr_pages)
 {
 	memset(&swsusp_info, 0, sizeof(swsusp_info));
 	swsusp_info.version_code = LINUX_VERSION_CODE;
 	swsusp_info.num_physpages = num_physpages;
 	memcpy(&swsusp_info.uts, &system_utsname, sizeof(system_utsname));
 
-	swsusp_info.suspend_pagedir = pagedir_nosave;
 	swsusp_info.cpus = num_online_cpus();
-	swsusp_info.image_pages = nr_copy_pages;
+	swsusp_info.image_pages = nr_pages;
+	swsusp_info.pages = nr_pages +
+		((nr_pages * sizeof(long) + PAGE_SIZE - 1) >> PAGE_SHIFT);
 }
 
 static int close_swap(void)
@@ -326,39 +475,53 @@ static int close_swap(void)
 }
 
 /**
- *	free_pagedir_entries - Free pages used by the page directory.
- *
- *	This is used during suspend for error recovery.
+ *	pack_orig_addresses - the .orig_address fields of the PBEs from the
+ *	list starting at @pbe are stored in the array @buf[] (1 page)
  */
 
-static void free_pagedir_entries(void)
+static inline struct pbe *pack_orig_addresses(unsigned long *buf,
+                                              struct pbe *pbe)
 {
-	int i;
+	int j;
 
-	for (i = 0; i < swsusp_info.pagedir_pages; i++)
-		swap_free(swsusp_info.pagedir[i]);
+	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
+		buf[j] = pbe->orig_address;
+		pbe = pbe->next;
+	}
+	if (!pbe)
+		for (; j < PAGE_SIZE / sizeof(long); j++)
+			buf[j] = 0;
+	return pbe;
 }
 
-
 /**
- *	write_pagedir - Write the array of pages holding the page directory.
- *	@last:	Last swap entry we write (needed for header).
+ *	save_image_metadata - save the .orig_address fields of the PBEs
+ *	from the list @pblist using the swap map handle @handle
  */
 
-static int write_pagedir(void)
+static int save_image_metadata(struct pbe *pblist,
+                               struct swap_map_handle *handle)
 {
-	int error = 0;
+	unsigned long *buf;
 	unsigned int n = 0;
-	struct pbe *pbe;
+	struct pbe *p;
+	int error = 0;
 
-	printk( "Writing pagedir...");
-	for_each_pb_page (pbe, pagedir_nosave) {
-		if ((error = write_page((unsigned long)pbe, &swsusp_info.pagedir[n++])))
-			return error;
+	printk("Saving image metadata ... ");
+	buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
+	if (!buf)
+		return -ENOMEM;
+	p = pblist;
+	while (p) {
+		p = pack_orig_addresses(buf, p);
+		error = swap_map_write_page(handle, (unsigned long)buf);
+		if (error)
+			break;
+		n++;
 	}
-
-	swsusp_info.pagedir_pages = n;
-	printk("done (%u pages)\n", n);
+	free_page((unsigned long)buf);
+	if (!error)
+		printk("done (%u pages saved)\n", n);
 	return error;
 }
 
@@ -384,33 +547,48 @@ static int enough_swap(unsigned int nr_pages)
 
 /**
  *	write_suspend_image - Write entire image and metadata.
- *
  */
-static int write_suspend_image(void)
+static int write_suspend_image(struct pbe *pblist, unsigned int nr_pages)
 {
+	struct swap_map_page *swap_map;
+	struct swap_map_handle handle;
 	int error;
 
-	if (!enough_swap(nr_copy_pages)) {
+	if (!enough_swap(nr_pages)) {
 		printk(KERN_ERR "swsusp: Not enough free swap\n");
 		return -ENOSPC;
 	}
 
-	init_header();
-	if ((error = data_write()))
-		goto FreeData;
+	init_header(nr_pages);
+	swap_map = alloc_swap_map(swsusp_info.pages);
+	if (!swap_map)
+		return -ENOMEM;
+	init_swap_map_handle(&handle, swap_map);
 
-	if ((error = write_pagedir()))
-		goto FreePagedir;
+	error = save_image_metadata(pblist, &handle);
+	if (!error)
+		error = save_image_data(pblist, &handle, nr_pages);
+	if (error)
+		goto Free_image_entries;
 
-	if ((error = close_swap()))
-		goto FreePagedir;
- Done:
+	swap_map = reverse_swap_map(swap_map);
+	error = save_swap_map(swap_map, &swsusp_info.start);
+	if (error)
+		goto Free_map_entries;
+
+	error = close_swap();
+	if (error)
+		goto Free_map_entries;
+
+Free_swap_map:
+	free_swap_map(swap_map);
 	return error;
- FreePagedir:
-	free_pagedir_entries();
- FreeData:
-	data_free();
-	goto Done;
+
+Free_map_entries:
+	free_swap_map_entries(swap_map);
+Free_image_entries:
+	free_image_entries(swap_map);
+	goto Free_swap_map;
 }
 
 /* It is important _NOT_ to umount filesystems at this point. We want
@@ -418,7 +596,7 @@ static int write_suspend_image(void)
  * filesystem clean: it is not. (And it does not matter, if we resume
  * correctly, we'll mark system clean, anyway.)
  */
-int swsusp_write(void)
+int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
 {
 	int error;
 
@@ -427,14 +605,12 @@ int swsusp_write(void)
 		return error;
 	}
 	lock_swapdevices();
-	error = write_suspend_image();
+	error = write_suspend_image(pblist, nr_pages);
 	/* This will unlock ignored swap devices since writing is finished */
 	lock_swapdevices();
 	return error;
 }
 
-
-
 int swsusp_suspend(void)
 {
 	int error;
@@ -531,7 +707,6 @@ static void copy_page_backup_list(struct pbe *dst, struct pbe *src)
 	/* We assume both lists contain the same number of elements */
 	while (src) {
 		dst->orig_address = src->orig_address;
-		dst->swap_address = src->swap_address;
 		dst = dst->next;
 		src = src->next;
 	}
@@ -611,6 +786,61 @@ static int bio_write_page(pgoff_t page_off, void *page)
 	return submit(WRITE, page_off, page);
 }
 
+/**
+ *	The following functions allow us to read data using a swap map
+ *	in a file-alike way
+ */
+
+static inline void release_swap_map_reader(struct swap_map_handle *handle)
+{
+	if (handle->cur)
+		free_page((unsigned long)handle->cur);
+	handle->cur = NULL;
+}
+
+static inline int get_swap_map_reader(struct swap_map_handle *handle,
+                                      swp_entry_t start)
+{
+	int error;
+
+	if (!swp_offset(start))
+		return -EINVAL;
+	handle->cur = (struct swap_map_page *)get_zeroed_page(GFP_ATOMIC);
+	if (!handle->cur)
+		return -ENOMEM;
+	error = bio_read_page(swp_offset(start), handle->cur);
+	if (error) {
+		release_swap_map_reader(handle);
+		return error;
+	}
+	handle->k = 0;
+	return 0;
+}
+
+static inline int swap_map_read_page(struct swap_map_handle *handle, void *buf)
+{
+	unsigned long offset;
+	int error;
+
+	if (!handle->cur)
+		return -EINVAL;
+	offset = swp_offset(handle->cur->entries[handle->k]);
+	if (!offset)
+		return -EINVAL;
+	error = bio_read_page(offset, buf);
+	if (error)
+		return error;
+	if (++handle->k >= MAP_PAGE_SIZE) {
+		handle->k = 0;
+		offset = swp_offset(handle->cur->next_swap);
+		if (!offset)
+			release_swap_map_reader(handle);
+		else
+			error = bio_read_page(offset, handle->cur);
+	}
+	return error;
+}
+
 /*
  * Sanity check if this image makes sense with this kernel/swap context
  * I really don't think that it's foolproof but more than nothing..
@@ -639,7 +869,6 @@ static const char *sanity_check(void)
 	return NULL;
 }
 
-
 static int check_header(void)
 {
 	const char *reason = NULL;
@@ -653,7 +882,6 @@ static int check_header(void)
 		printk(KERN_ERR "swsusp: Resume mismatch: %s\n",reason);
 		return -EPERM;
 	}
-	nr_copy_pages = swsusp_info.image_pages;
 	return error;
 }
 
@@ -680,75 +908,88 @@ static int check_sig(void)
 }
 
 /**
- *	data_read - Read image pages from swap.
- *
- *	You do not need to check for overlaps, check_pagedir()
- *	already did that.
+ *	load_image_data - load the image data using the swap map handle
+ *	@handle and store them using the page backup list @pblist
+ *	(assume there are @nr_pages pages to load)
  */
 
-static int data_read(struct pbe *pblist)
+static int load_image_data(struct pbe *pblist,
+                           struct swap_map_handle *handle,
+                           unsigned int nr_pages)
 {
+	int error;
+	unsigned int m;
 	struct pbe *p;
-	int error = 0;
-	int i = 0;
-	int mod = swsusp_info.image_pages / 100;
-
-	if (!mod)
-		mod = 1;
-
-	printk("swsusp: Reading image data (%lu pages):     ",
-			swsusp_info.image_pages);
-
-	for_each_pbe (p, pblist) {
-		if (!(i % mod))
-			printk("\b\b\b\b%3d%%", i / mod);
 
-		if ((error = bio_read_page(swp_offset(p->swap_address),
-						(void *)p->address)))
-			return error;
-
-		i++;
+	if (!pblist)
+		return -EINVAL;
+	printk("Loading image data pages (%u pages) ...     ", nr_pages);
+	m = nr_pages / 100;
+	if (!m)
+		m = 1;
+	nr_pages = 0;
+	p = pblist;
+	while (p) {
+		error = swap_map_read_page(handle, (void *)p->address);
+		if (error)
+			break;
+		p = p->next;
+		if (!(nr_pages % m))
+			printk("\b\b\b\b%3d%%", nr_pages / m);
+		nr_pages++;
 	}
-	printk("\b\b\b\bdone\n");
+	if (!error)
+		printk("\b\b\b\bdone\n");
 	return error;
 }
 
 /**
- *	read_pagedir - Read page backup list pages from swap
+ *	unpack_orig_addresses - copy the elements of @buf[] (1 page) to
+ *	the PBEs in the list starting at @pbe
  */
 
-static int read_pagedir(struct pbe *pblist)
+static inline struct pbe *unpack_orig_addresses(unsigned long *buf,
+                                                struct pbe *pbe)
 {
-	struct pbe *pbpage, *p;
-	unsigned int i = 0;
-	int error;
+	int j;
 
-	if (!pblist)
-		return -EFAULT;
+	for (j = 0; j < PAGE_SIZE / sizeof(long) && pbe; j++) {
+		pbe->orig_address = buf[j];
+		pbe = pbe->next;
+	}
+	return pbe;
+}
 
-	printk("swsusp: Reading pagedir (%lu pages)\n",
-			swsusp_info.pagedir_pages);
+/**
+ *	load_image_metadata - load the image metadata using the swap map
+ *	handle @handle and put them into the PBEs in the list @pblist
+ */
 
-	for_each_pb_page (pbpage, pblist) {
-		unsigned long offset = swp_offset(swsusp_info.pagedir[i++]);
+static int load_image_metadata(struct pbe *pblist, struct swap_map_handle *handle)
+{
+	struct pbe *p;
+	unsigned long *buf;
+	unsigned int n = 0;
+	int error = 0;
 
-		error = -EFAULT;
-		if (offset) {
-			p = (pbpage + PB_PAGE_SKIP)->next;
-			error = bio_read_page(offset, (void *)pbpage);
-			(pbpage + PB_PAGE_SKIP)->next = p;
-		}
+	printk("Loading image metadata ... ");
+	buf = (unsigned long *)get_zeroed_page(GFP_ATOMIC);
+	if (!buf)
+		return -ENOMEM;
+	p = pblist;
+	while (p) {
+		error = swap_map_read_page(handle, buf);
 		if (error)
 			break;
+		p = unpack_orig_addresses(buf, p);
+		n++;
 	}
-
+	free_page((unsigned long)buf);
 	if (!error)
-		BUG_ON(i != swsusp_info.pagedir_pages);
-
+		printk("done (%u pages loaded)\n", n);
 	return error;
 }
 
-
 static int check_suspend_image(void)
 {
 	int error = 0;
@@ -762,34 +1003,39 @@ static int check_suspend_image(void)
 	return 0;
 }
 
-static int read_suspend_image(void)
+static int read_suspend_image(struct pbe **pblist_ptr)
 {
 	int error = 0;
-	struct pbe *p;
+	struct pbe *p, *pblist;
+	struct swap_map_handle handle;
+	unsigned int nr_pages = swsusp_info.image_pages;
 
-	if (!(p = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 0)))
+	p = alloc_pagedir(nr_pages, GFP_ATOMIC, 0);
+	if (!p)
 		return -ENOMEM;
-
-	if ((error = read_pagedir(p)))
+	error = get_swap_map_reader(&handle, swsusp_info.start);
+	if (error)
+		/* The PBE list at p will be released by swsusp_free() */
 		return error;
-	create_pbe_list(p, nr_copy_pages);
-	mark_unsafe_pages(p);
-	pagedir_nosave = alloc_pagedir(nr_copy_pages, GFP_ATOMIC, 1);
-	if (pagedir_nosave) {
-		create_pbe_list(pagedir_nosave, nr_copy_pages);
-		copy_page_backup_list(pagedir_nosave, p);
+	error = load_image_metadata(p, &handle);
+	if (!error) {
+		mark_unsafe_pages(p);
+		pblist = alloc_pagedir(nr_pages, GFP_ATOMIC, 1);
+		if (pblist)
+			copy_page_backup_list(pblist, p);
+		free_pagedir(p);
+		if (!pblist)
+			error = -ENOMEM;
+
+		/* Allocate memory for the image and read the data from swap */
+		if (!error)
+			error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
+		if (!error)
+			error = load_image_data(pblist, &handle, nr_pages);
+		if (!error)
+			*pblist_ptr = pblist;
 	}
-	free_pagedir(p);
-	if (!pagedir_nosave)
-		return -ENOMEM;
-
-	/* Allocate memory for the image and read the data from swap */
-
-	error = alloc_data_pages(pagedir_nosave, GFP_ATOMIC, 1);
-
-	if (!error)
-		error = data_read(pagedir_nosave);
-
+	release_swap_map_reader(&handle);
 	return error;
 }
 
@@ -821,7 +1067,7 @@ int swsusp_check(void)
  *	swsusp_read - Read saved image from swap.
  */
 
-int swsusp_read(void)
+int swsusp_read(struct pbe **pblist_ptr)
 {
 	int error;
 
@@ -830,7 +1076,7 @@ int swsusp_read(void)
 		return PTR_ERR(resume_bdev);
 	}
 
-	error = read_suspend_image();
+	error = read_suspend_image(pblist_ptr);
 	blkdev_put(resume_bdev);
 
 	if (!error)
-- 
cgit v1.2.3


From 72a97e08394a3b2e75481ff680ec2a0591e3cba4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 6 Jan 2006 00:13:46 -0800
Subject: [PATCH] swsusp: improve freeing of memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch makes swsusp free only as much memory as needed to complete the
suspend and not as much as possible.   In the most of cases this should speed
up the suspend and make the system much more responsive after resume,
especially if a GUI (eg.  X Windows) is used.

If needed, the old behavior (ie to free as much memory as possible during
suspend) can be restored by unsetting FAST_FREE in power.h

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/suspend.h |  2 +-
 kernel/power/disk.c     | 30 +++--------------------
 kernel/power/power.h    | 14 ++++++++---
 kernel/power/snapshot.c | 65 +++++++++++++++++++++++++++++++++++++++++++++----
 kernel/power/swsusp.c   | 52 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 126 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 33bbaea23aaf..5dc94e777fab 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -73,6 +73,6 @@ unsigned long get_safe_page(gfp_t gfp_mask);
  * XXX: We try to keep some more pages free so that I/O operations succeed
  * without paging. Might this be more?
  */
-#define PAGES_FOR_IO	512
+#define PAGES_FOR_IO	1024
 
 #endif /* _LINUX_SWSUSP_H */
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index 76a5131b0e80..9e51cdf7b78d 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -24,6 +24,7 @@
 
 extern suspend_disk_method_t pm_disk_mode;
 
+extern int swsusp_shrink_memory(void);
 extern int swsusp_suspend(void);
 extern int swsusp_write(struct pbe *pblist, unsigned int nr_pages);
 extern int swsusp_check(void);
@@ -73,31 +74,6 @@ static void power_down(suspend_disk_method_t mode)
 static int in_suspend __nosavedata = 0;
 
 
-/**
- *	free_some_memory -  Try to free as much memory as possible
- *
- *	... but do not OOM-kill anyone
- *
- *	Notice: all userland should be stopped at this point, or
- *	livelock is possible.
- */
-
-static void free_some_memory(void)
-{
-	unsigned int i = 0;
-	unsigned int tmp;
-	unsigned long pages = 0;
-	char *p = "-\\|/";
-
-	printk("Freeing memory...  ");
-	while ((tmp = shrink_all_memory(10000))) {
-		pages += tmp;
-		printk("\b%c", p[i++ % 4]);
-	}
-	printk("\bdone (%li pages freed)\n", pages);
-}
-
-
 static inline void platform_finish(void)
 {
 	if (pm_disk_mode == PM_DISK_PLATFORM) {
@@ -127,8 +103,8 @@ static int prepare_processes(void)
 	}
 
 	/* Free memory before shutting down devices. */
-	free_some_memory();
-	return 0;
+	if (!(error = swsusp_shrink_memory()))
+		return 0;
 thaw:
 	thaw_processes();
 	enable_nonboot_cpus();
diff --git a/kernel/power/power.h b/kernel/power/power.h
index 977877c6dcfc..acdc83b3d890 100644
--- a/kernel/power/power.h
+++ b/kernel/power/power.h
@@ -49,18 +49,26 @@ extern void thaw_processes(void);
 extern int pm_prepare_console(void);
 extern void pm_restore_console(void);
 
-
 /* References to section boundaries */
 extern const void __nosave_begin, __nosave_end;
 
 extern unsigned int nr_copy_pages;
-extern suspend_pagedir_t *pagedir_nosave;
-extern suspend_pagedir_t *pagedir_save;
+extern struct pbe *pagedir_nosave;
+
+/*
+ * This compilation switch determines the way in which memory will be freed
+ * during suspend.  If defined, only as much memory will be freed as needed
+ * to complete the suspend, which will make it go faster.  Otherwise, the
+ * largest possible amount of memory will be freed.
+ */
+#define FAST_FREE	1
 
 extern asmlinkage int swsusp_arch_suspend(void);
 extern asmlinkage int swsusp_arch_resume(void);
 
+extern unsigned int count_data_pages(void);
 extern void free_pagedir(struct pbe *pblist);
+extern void release_eaten_pages(void);
 extern struct pbe *alloc_pagedir(unsigned nr_pages, gfp_t gfp_mask, int safe_needed);
 extern void swsusp_free(void);
 extern int alloc_data_pages(struct pbe *pblist, gfp_t gfp_mask, int safe_needed);
diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 152d56cdf017..e80d282dbf58 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -37,6 +37,31 @@ struct pbe *pagedir_nosave;
 unsigned int nr_copy_pages;
 
 #ifdef CONFIG_HIGHMEM
+unsigned int count_highmem_pages(void)
+{
+	struct zone *zone;
+	unsigned long zone_pfn;
+	unsigned int n = 0;
+
+	for_each_zone (zone)
+		if (is_highmem(zone)) {
+			mark_free_pages(zone);
+			for (zone_pfn = 0; zone_pfn < zone->spanned_pages; zone_pfn++) {
+				struct page *page;
+				unsigned long pfn = zone_pfn + zone->zone_start_pfn;
+				if (!pfn_valid(pfn))
+					continue;
+				page = pfn_to_page(pfn);
+				if (PageReserved(page))
+					continue;
+				if (PageNosaveFree(page))
+					continue;
+				n++;
+			}
+		}
+	return n;
+}
+
 struct highmem_page {
 	char *data;
 	struct page *page;
@@ -152,17 +177,15 @@ static int saveable(struct zone *zone, unsigned long *zone_pfn)
 	BUG_ON(PageReserved(page) && PageNosave(page));
 	if (PageNosave(page))
 		return 0;
-	if (PageReserved(page) && pfn_is_nosave(pfn)) {
-		pr_debug("[nosave pfn 0x%lx]", pfn);
+	if (PageReserved(page) && pfn_is_nosave(pfn))
 		return 0;
-	}
 	if (PageNosaveFree(page))
 		return 0;
 
 	return 1;
 }
 
-static unsigned count_data_pages(void)
+unsigned int count_data_pages(void)
 {
 	struct zone *zone;
 	unsigned long zone_pfn;
@@ -266,6 +289,35 @@ static inline void create_pbe_list(struct pbe *pblist, unsigned int nr_pages)
 	}
 }
 
+/**
+ *	On resume it is necessary to trace and eventually free the unsafe
+ *	pages that have been allocated, because they are needed for I/O
+ *	(on x86-64 we likely will "eat" these pages once again while
+ *	creating the temporary page translation tables)
+ */
+
+struct eaten_page {
+	struct eaten_page *next;
+	char padding[PAGE_SIZE - sizeof(void *)];
+};
+
+static struct eaten_page *eaten_pages = NULL;
+
+void release_eaten_pages(void)
+{
+	struct eaten_page *p, *q;
+
+	p = eaten_pages;
+	while (p) {
+		q = p->next;
+		/* We don't want swsusp_free() to free this page again */
+		ClearPageNosave(virt_to_page(p));
+		free_page((unsigned long)p);
+		p = q;
+	}
+	eaten_pages = NULL;
+}
+
 /**
  *	@safe_needed - on resume, for storing the PBE list and the image,
  *	we can only use memory pages that do not conflict with the pages
@@ -284,9 +336,12 @@ static inline void *alloc_image_page(gfp_t gfp_mask, int safe_needed)
 	if (safe_needed)
 		do {
 			res = (void *)get_zeroed_page(gfp_mask);
-			if (res && PageNosaveFree(virt_to_page(res)))
+			if (res && PageNosaveFree(virt_to_page(res))) {
 				/* This is for swsusp_free() */
 				SetPageNosave(virt_to_page(res));
+				((struct eaten_page *)res)->next = eaten_pages;
+				eaten_pages = res;
+			}
 		} while (res && PageNosaveFree(virt_to_page(res)));
 	else
 		res = (void *)get_zeroed_page(gfp_mask);
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index b09bd7c0998d..f77f9397a364 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -70,11 +70,13 @@
 #include "power.h"
 
 #ifdef CONFIG_HIGHMEM
+unsigned int count_highmem_pages(void);
 int save_highmem(void);
 int restore_highmem(void);
 #else
 static int save_highmem(void) { return 0; }
 static int restore_highmem(void) { return 0; }
+static unsigned int count_highmem_pages(void) { return 0; }
 #endif
 
 extern char resume_file[];
@@ -611,6 +613,52 @@ int swsusp_write(struct pbe *pblist, unsigned int nr_pages)
 	return error;
 }
 
+/**
+ *	swsusp_shrink_memory -  Try to free as much memory as needed
+ *
+ *	... but do not OOM-kill anyone
+ *
+ *	Notice: all userland should be stopped before it is called, or
+ *	livelock is possible.
+ */
+
+#define SHRINK_BITE	10000
+
+int swsusp_shrink_memory(void)
+{
+	long tmp;
+	struct zone *zone;
+	unsigned long pages = 0;
+	unsigned int i = 0;
+	char *p = "-\\|/";
+
+	printk("Shrinking memory...  ");
+	do {
+#ifdef FAST_FREE
+		tmp = 2 * count_highmem_pages();
+		tmp += tmp / 50 + count_data_pages();
+		tmp += (tmp + PBES_PER_PAGE - 1) / PBES_PER_PAGE +
+			PAGES_FOR_IO;
+		for_each_zone (zone)
+			if (!is_highmem(zone))
+				tmp -= zone->free_pages;
+		if (tmp > 0) {
+			tmp = shrink_all_memory(SHRINK_BITE);
+			if (!tmp)
+				return -ENOMEM;
+			pages += tmp;
+		}
+#else
+		tmp = shrink_all_memory(SHRINK_BITE);
+		pages += tmp;
+#endif
+		printk("\b%c", p[i++%4]);
+	} while (tmp > 0);
+	printk("\bdone (%lu pages freed)\n", pages);
+
+	return 0;
+}
+
 int swsusp_suspend(void)
 {
 	int error;
@@ -1030,8 +1078,10 @@ static int read_suspend_image(struct pbe **pblist_ptr)
 		/* Allocate memory for the image and read the data from swap */
 		if (!error)
 			error = alloc_data_pages(pblist, GFP_ATOMIC, 1);
-		if (!error)
+		if (!error) {
+			release_eaten_pages();
 			error = load_image_data(pblist, &handle, nr_pages);
+		}
 		if (!error)
 			*pblist_ptr = pblist;
 	}
-- 
cgit v1.2.3


From 3a291a20bd6fcfafb2109031f0760a0d3e92ecd7 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 6 Jan 2006 00:16:37 -0800
Subject: [PATCH] mm: add a new function (needed for swap suspend)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This adds the function get_swap_page_of_type() allowing us to specify an index
in swap_info[] and select a swap_info_struct structure to be used for
allocating a swap page.

This function (or another one of similar functionality) will be necessary for
implementing the image-writing part of swsusp in the user space.   It can also
be used for simplifying the current in-kernel implementation of the
image-writing part of swsusp.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/swap.h |  1 +
 mm/swapfile.c        | 20 ++++++++++++++++++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index bd6641784107..556617bcf7ac 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -209,6 +209,7 @@ extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(void);
+extern swp_entry_t get_swap_page_of_type(int type);
 extern int swap_duplicate(swp_entry_t);
 extern int valid_swaphandles(swp_entry_t, unsigned long *);
 extern void swap_free(swp_entry_t);
diff --git a/mm/swapfile.c b/mm/swapfile.c
index edafeace301f..6da4b28b896b 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -211,6 +211,26 @@ noswap:
 	return (swp_entry_t) {0};
 }
 
+swp_entry_t get_swap_page_of_type(int type)
+{
+	struct swap_info_struct *si;
+	pgoff_t offset;
+
+	spin_lock(&swap_lock);
+	si = swap_info + type;
+	if (si->flags & SWP_WRITEOK) {
+		nr_swap_pages--;
+		offset = scan_swap_map(si);
+		if (offset) {
+			spin_unlock(&swap_lock);
+			return swp_entry(type, offset);
+		}
+		nr_swap_pages++;
+	}
+	spin_unlock(&swap_lock);
+	return (swp_entry_t) {0};
+}
+
 static struct swap_info_struct * swap_info_get(swp_entry_t entry)
 {
 	struct swap_info_struct * p;
-- 
cgit v1.2.3


From 347a8dc3b815f0c0fa62a1df075184ffe4cbdcf1 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Fri, 6 Jan 2006 00:19:28 -0800
Subject: [PATCH] s390: cleanup Kconfig

Sanitize some s390 Kconfig options.  We have ARCH_S390, ARCH_S390X,
ARCH_S390_31, 64BIT, S390_SUPPORT and COMPAT.  Replace these 6 options by
S390, 64BIT and COMPAT.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/s390/Kconfig                  | 27 +++++++--------------------
 arch/s390/Makefile                 |  6 ++----
 arch/s390/appldata/appldata_base.c |  8 ++++----
 arch/s390/crypto/crypt_s390.h      | 10 +++++-----
 arch/s390/defconfig                |  4 +---
 arch/s390/kernel/Makefile          | 15 +++++----------
 arch/s390/kernel/cpcmd.c           | 16 ++++++++--------
 arch/s390/kernel/entry64.S         | 18 +++++++++---------
 arch/s390/kernel/head.S            |  4 ++--
 arch/s390/kernel/module.c          | 12 ++++++------
 arch/s390/kernel/process.c         | 12 ++++++------
 arch/s390/kernel/ptrace.c          | 24 ++++++++++++------------
 arch/s390/kernel/reipl_diag.c      |  2 +-
 arch/s390/kernel/setup.c           | 14 +++++++-------
 arch/s390/kernel/signal.c          |  2 +-
 arch/s390/kernel/smp.c             |  8 ++++----
 arch/s390/kernel/sys_s390.c        | 12 +++++-------
 arch/s390/kernel/traps.c           | 10 +++++-----
 arch/s390/kernel/vmlinux.lds.S     |  2 +-
 arch/s390/lib/Makefile             |  5 ++---
 arch/s390/lib/spinlock.c           |  2 +-
 arch/s390/mm/extmem.c              |  2 +-
 arch/s390/mm/fault.c               | 18 +++++++++---------
 arch/s390/mm/init.c                |  8 ++++----
 arch/s390/mm/mmap.c                |  2 +-
 block/Kconfig                      |  2 +-
 crypto/Kconfig                     |  8 ++++----
 drivers/char/Kconfig               |  2 +-
 drivers/char/hangcheck-timer.c     |  2 +-
 drivers/char/watchdog/Kconfig      |  2 +-
 drivers/input/evdev.c              |  2 +-
 drivers/net/phy/Kconfig            |  2 +-
 drivers/s390/block/Kconfig         |  8 ++++----
 drivers/s390/block/dasd.c          |  2 +-
 drivers/s390/block/dasd_diag.c     |  2 +-
 drivers/s390/block/dasd_diag.h     |  6 +++---
 drivers/s390/block/dasd_eckd.c     |  2 +-
 drivers/s390/block/dasd_fba.c      |  2 +-
 drivers/s390/block/xpram.c         |  4 ++--
 drivers/s390/char/vmwatchdog.c     |  2 +-
 drivers/s390/cio/cio.c             |  2 +-
 drivers/s390/cio/device_id.c       |  2 +-
 drivers/s390/cio/ioasm.h           |  4 ++--
 drivers/s390/cio/qdio.c            |  2 +-
 drivers/s390/cio/qdio.h            | 34 +++++++++++++++++-----------------
 drivers/s390/crypto/z90hardware.c  |  8 ++++----
 drivers/s390/net/Kconfig           |  2 +-
 drivers/s390/net/claw.c            |  6 +++---
 drivers/s390/s390mach.c            | 10 +++++-----
 drivers/s390/sysinfo.c             |  2 +-
 drivers/scsi/Kconfig               |  2 +-
 fs/partitions/Kconfig              |  2 +-
 fs/proc/array.c                    |  2 +-
 include/asm-s390/unistd.h          |  2 +-
 include/linux/irq.h                |  2 +-
 init/Kconfig                       |  2 +-
 init/do_mounts_rd.c                |  4 ++--
 kernel/panic.c                     |  4 ++--
 kernel/sysctl.c                    |  6 +++---
 lib/Kconfig.debug                  |  2 +-
 60 files changed, 183 insertions(+), 208 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 1846fbfd6bf2..6fe532d82417 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -23,14 +23,14 @@ config GENERIC_BUST_SPINLOCK
 
 mainmenu "Linux Kernel Configuration"
 
-config ARCH_S390
+config S390
 	bool
 	default y
 
 config UID16
 	bool
 	default y
-	depends on ARCH_S390X = 'n'
+	depends on !64BIT
 
 source "init/Kconfig"
 
@@ -38,20 +38,12 @@ menu "Base setup"
 
 comment "Processor type and features"
 
-config ARCH_S390X
+config 64BIT
 	bool "64 bit kernel"
 	help
 	  Select this option if you have a 64 bit IBM zSeries machine
 	  and want to use the 64 bit addressing mode.
 
-config 64BIT
-	def_bool ARCH_S390X
-
-config ARCH_S390_31
-	bool
-	depends on ARCH_S390X = 'n'
-	default y
-
 config SMP
 	bool "Symmetric multi-processing support"
 	---help---
@@ -101,20 +93,15 @@ config MATHEMU
 	  on older S/390 machines. Say Y unless you know your machine doesn't
 	  need this.
 
-config S390_SUPPORT
+config COMPAT
 	bool "Kernel support for 31 bit emulation"
-	depends on ARCH_S390X
+	depends on 64BIT
 	help
 	  Select this option if you want to enable your system kernel to
 	  handle system-calls from ELF binaries for 31 bit ESA.  This option
 	  (and some other stuff like libraries and such) is needed for
 	  executing 31 bit applications.  It is safe to say "Y".
 
-config COMPAT
-	bool
-	depends on S390_SUPPORT
-	default y
-
 config SYSVIPC_COMPAT
 	bool
 	depends on COMPAT && SYSVIPC
@@ -122,7 +109,7 @@ config SYSVIPC_COMPAT
 
 config BINFMT_ELF32
 	tristate "Kernel support for 31 bit ELF binaries"
-	depends on S390_SUPPORT
+	depends on COMPAT
 	help
 	  This allows you to run 32-bit Linux/ELF binaries on your zSeries
 	  in 64 bit mode. Everybody wants this; say Y.
@@ -135,7 +122,7 @@ choice
 
 config MARCH_G5
 	bool "S/390 model G5 and G6"
-	depends on ARCH_S390_31
+	depends on !64BIT
 	help
 	  Select this to build a 31 bit kernel that works
 	  on all S/390 and zSeries machines.
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 73a09a6ee6c8..6c6b197898d0 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -13,16 +13,14 @@
 # Copyright (C) 1994 by Linus Torvalds
 #
 
-ifdef CONFIG_ARCH_S390_31
+ifndef CONFIG_64BIT
 LDFLAGS		:= -m elf_s390
 CFLAGS		+= -m31
 AFLAGS		+= -m31
 UTS_MACHINE	:= s390
 STACK_SIZE	:= 8192
 CHECKFLAGS	+= -D__s390__
-endif
-
-ifdef CONFIG_ARCH_S390X
+else
 LDFLAGS		:= -m elf64_s390
 MODFLAGS	+= -fpic -D__PIC__
 CFLAGS		+= -m64
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index dee6ab54984d..d06a8d71c71d 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -40,7 +40,7 @@
 
 #define TOD_MICRO	0x01000			/* nr. of TOD clock units
 						   for 1 microsecond */
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 
 #define APPLDATA_START_INTERVAL_REC 0x00   	/* Function codes for */
 #define APPLDATA_STOP_REC	    0x01	/* DIAG 0xDC	  */
@@ -54,13 +54,13 @@
 #define APPLDATA_GEN_EVENT_RECORD   0x82
 #define APPLDATA_START_CONFIG_REC   0x83
 
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 
 /*
  * Parameter list for DIAGNOSE X'DC'
  */
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 struct appldata_parameter_list {
 	u16 diag;		/* The DIAGNOSE code X'00DC'          */
 	u8  function;		/* The function code for the DIAGNOSE */
@@ -82,7 +82,7 @@ struct appldata_parameter_list {
 	u64 product_id_addr;
 	u64 buffer_addr;
 };
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 /*
  * /proc entries (sysctl)
diff --git a/arch/s390/crypto/crypt_s390.h b/arch/s390/crypto/crypt_s390.h
index d6712cfa6def..d1c259a7fe33 100644
--- a/arch/s390/crypto/crypt_s390.h
+++ b/arch/s390/crypto/crypt_s390.h
@@ -112,7 +112,7 @@ struct crypt_s390_query_status {
  * [ret] is the variable to receive the error code
  * [ERR] is the error code value
  */
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 #define __crypt_s390_fixup \
 	".section .fixup,\"ax\" \n"	\
 	"7:	lhi	%0,%h[e1] \n"	\
@@ -129,7 +129,7 @@ struct crypt_s390_query_status {
 	"	.long	0b,7b \n"	\
 	"	.long	1b,8b \n"	\
 	".previous"
-#else /* __s390x__ */
+#else /* CONFIG_64BIT */
 #define __crypt_s390_fixup \
 	".section .fixup,\"ax\" \n"	\
 	"7:	lhi	%0,%h[e1] \n"	\
@@ -142,7 +142,7 @@ struct crypt_s390_query_status {
 	"	.quad	0b,7b \n"	\
 	"	.quad	1b,8b \n"	\
 	".previous"
-#endif /* __s390x__ */
+#endif /* CONFIG_64BIT */
 
 /*
  * Standard code for setting the result of s390 crypto instructions.
@@ -150,10 +150,10 @@ struct crypt_s390_query_status {
  * [result]: the register containing the result (e.g. second operand length
  * to compute number of processed bytes].
  */
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 #define __crypt_s390_set_result \
 	"	lr	%0,%[result] \n"
-#else /* __s390x__ */
+#else /* CONFIG_64BIT */
 #define __crypt_s390_set_result \
 	"	lgr	%0,%[result] \n"
 #endif
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index f195c7ea1d7b..7d23edc6facb 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -6,7 +6,7 @@
 CONFIG_MMU=y
 CONFIG_RWSEM_XCHGADD_ALGORITHM=y
 CONFIG_GENERIC_CALIBRATE_DELAY=y
-CONFIG_ARCH_S390=y
+CONFIG_S390=y
 CONFIG_UID16=y
 
 #
@@ -89,9 +89,7 @@ CONFIG_DEFAULT_IOSCHED="anticipatory"
 #
 # Processor type and features
 #
-# CONFIG_ARCH_S390X is not set
 # CONFIG_64BIT is not set
-CONFIG_ARCH_S390_31=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=32
 CONFIG_HOTPLUG_CPU=y
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 7434c32bc631..4865e4b49464 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -8,31 +8,26 @@ obj-y	:=  bitmap.o traps.o time.o process.o \
             setup.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o \
             semaphore.o s390_ext.o debug.o profile.o irq.o reipl_diag.o
 
+obj-y	+= $(if $(CONFIG_64BIT),entry64.o,entry.o)
+obj-y	+= $(if $(CONFIG_64BIT),reipl64.o,reipl.o)
+
 extra-y				+= head.o init_task.o vmlinux.lds
 
 obj-$(CONFIG_MODULES)		+= s390_ksyms.o module.o
 obj-$(CONFIG_SMP)		+= smp.o
 
-obj-$(CONFIG_S390_SUPPORT)	+= compat_linux.o compat_signal.o \
+obj-$(CONFIG_COMPAT)		+= compat_linux.o compat_signal.o \
 					compat_ioctl.o compat_wrapper.o \
 					compat_exec_domain.o
 obj-$(CONFIG_BINFMT_ELF32)	+= binfmt_elf32.o
 
-obj-$(CONFIG_ARCH_S390_31)	+= entry.o reipl.o
-obj-$(CONFIG_ARCH_S390X)	+= entry64.o reipl64.o
-
 obj-$(CONFIG_VIRT_TIMER)	+= vtime.o
 
 # Kexec part
 S390_KEXEC_OBJS := machine_kexec.o crash.o
-ifeq ($(CONFIG_ARCH_S390X),y)
-S390_KEXEC_OBJS += relocate_kernel64.o
-else
-S390_KEXEC_OBJS += relocate_kernel.o
-endif
+S390_KEXEC_OBJS += $(if $(CONFIG_64BIT),relocate_kernel64.o,relocate_kernel.o)
 obj-$(CONFIG_KEXEC) += $(S390_KEXEC_OBJS)
 
-
 #
 # This is just to get the dependencies...
 #
diff --git a/arch/s390/kernel/cpcmd.c b/arch/s390/kernel/cpcmd.c
index d47fecb42cc5..4ef44e536b2c 100644
--- a/arch/s390/kernel/cpcmd.c
+++ b/arch/s390/kernel/cpcmd.c
@@ -39,7 +39,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 
 	if (response != NULL && rlen > 0) {
 		memset(response, 0, rlen);
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		asm volatile (	"lra	2,0(%2)\n"
 				"lr	4,%3\n"
 				"o	4,%6\n"
@@ -55,7 +55,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 				: "a" (cpcmd_buf), "d" (cmdlen),
 				"a" (response), "d" (rlen), "m" (mask)
 				: "cc", "2", "3", "4", "5" );
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
                 asm volatile (	"lrag	2,0(%2)\n"
 				"lgr	4,%3\n"
 				"o	4,%6\n"
@@ -73,11 +73,11 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 				: "a" (cpcmd_buf), "d" (cmdlen),
 				"a" (response), "d" (rlen), "m" (mask)
 				: "cc", "2", "3", "4", "5" );
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
                 EBCASC(response, rlen);
         } else {
 		return_len = 0;
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
                 asm volatile (	"lra	2,0(%1)\n"
 				"lr	3,%2\n"
 				"diag	2,3,0x8\n"
@@ -85,7 +85,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 				: "=d" (return_code)
 				: "a" (cpcmd_buf), "d" (cmdlen)
 				: "2", "3"  );
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
                 asm volatile (	"lrag	2,0(%1)\n"
 				"lgr	3,%2\n"
 				"sam31\n"
@@ -95,7 +95,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 				: "=d" (return_code)
 				: "a" (cpcmd_buf), "d" (cmdlen)
 				: "2", "3" );
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
         }
 	spin_unlock_irqrestore(&cpcmd_lock, flags);
 	if (response_code != NULL)
@@ -105,7 +105,7 @@ int  __cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 
 EXPORT_SYMBOL(__cpcmd);
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 {
 	char *lowbuf;
@@ -129,4 +129,4 @@ int cpcmd(const char *cmd, char *response, int rlen, int *response_code)
 }
 
 EXPORT_SYMBOL(cpcmd);
-#endif		/* CONFIG_ARCH_S390X */
+#endif		/* CONFIG_64BIT */
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 4eb71ffcf484..369ab4413ec7 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -213,7 +213,7 @@ sysc_nr_ok:
 	mvc	SP_ARGS(8,%r15),SP_R7(%r15)
 sysc_do_restart:
 	larl    %r10,sys_call_table
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 	tm	__TI_flags+5(%r9),(_TIF_31BIT>>16)  # running in 31 bit mode ?
 	jno	sysc_noemu
 	larl    %r10,sys_call_table_emu  # use 31 bit emulation system calls
@@ -361,7 +361,7 @@ sys_clone_glue:
         la      %r2,SP_PTREGS(%r15)    # load pt_regs
         jg      sys_clone              # branch to sys_clone
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_clone_glue: 
         la      %r2,SP_PTREGS(%r15)    # load pt_regs
         jg      sys32_clone            # branch to sys32_clone
@@ -383,7 +383,7 @@ sys_execve_glue:
         bnz     0(%r12)               # it did fail -> store result in gpr2
         b       6(%r12)               # SKIP STG 2,SP_R2(15) in
                                       # system_call/sysc_tracesys
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_execve_glue:        
         la      %r2,SP_PTREGS(%r15)   # load pt_regs
 	lgr     %r12,%r14             # save return address
@@ -398,7 +398,7 @@ sys_sigreturn_glue:
         la      %r2,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys_sigreturn         # branch to sys_sigreturn
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_sigreturn_glue:     
         la      %r2,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys32_sigreturn       # branch to sys32_sigreturn
@@ -408,7 +408,7 @@ sys_rt_sigreturn_glue:
         la      %r2,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys_rt_sigreturn      # branch to sys_sigreturn
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_rt_sigreturn_glue:     
         la      %r2,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys32_rt_sigreturn    # branch to sys32_sigreturn
@@ -429,7 +429,7 @@ sys_sigsuspend_glue:
 	la      %r14,6(%r14)          # skip store of return value
         jg      sys_sigsuspend        # branch to sys_sigsuspend
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_sigsuspend_glue:    
 	llgfr	%r4,%r4               # unsigned long			
         lgr     %r5,%r4               # move mask back
@@ -449,7 +449,7 @@ sys_rt_sigsuspend_glue:
 	la      %r14,6(%r14)          # skip store of return value
         jg      sys_rt_sigsuspend     # branch to sys_rt_sigsuspend
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_rt_sigsuspend_glue: 
 	llgfr	%r3,%r3               # size_t			
         lgr     %r4,%r3               # move sigsetsize parameter
@@ -464,7 +464,7 @@ sys_sigaltstack_glue:
         la      %r4,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys_sigaltstack       # branch to sys_sigreturn
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 sys32_sigaltstack_glue:
         la      %r4,SP_PTREGS(%r15)   # load pt_regs as parameter
         jg      sys32_sigaltstack_wrapper # branch to sys_sigreturn
@@ -1009,7 +1009,7 @@ sys_call_table:
 #include "syscalls.S"
 #undef SYSCALL
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 
 #define SYSCALL(esa,esame,emu)	.long emu
 	.globl  sys_call_table_emu
diff --git a/arch/s390/kernel/head.S b/arch/s390/kernel/head.S
index d31a97c89f68..ea88d066bf04 100644
--- a/arch/s390/kernel/head.S
+++ b/arch/s390/kernel/head.S
@@ -30,7 +30,7 @@
 #include <asm/thread_info.h>
 #include <asm/page.h>
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 #define ARCH_OFFSET	4
 #else
 #define ARCH_OFFSET	0
@@ -539,7 +539,7 @@ ipl_devno:
 	.word 0
 .endm
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 #include "head64.S"
 #else
 #include "head31.S"
diff --git a/arch/s390/kernel/module.c b/arch/s390/kernel/module.c
index 607d506689c8..c271cdab58e2 100644
--- a/arch/s390/kernel/module.c
+++ b/arch/s390/kernel/module.c
@@ -37,11 +37,11 @@
 #define DEBUGP(fmt , ...)
 #endif
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 #define PLT_ENTRY_SIZE 12
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define PLT_ENTRY_SIZE 20
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 void *module_alloc(unsigned long size)
 {
@@ -294,17 +294,17 @@ apply_rela(Elf_Rela *rela, Elf_Addr base, Elf_Sym *symtab,
 			unsigned int *ip;
 			ip = me->module_core + me->arch.plt_offset +
 				info->plt_offset;
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 			ip[0] = 0x0d105810; /* basr 1,0; l 1,6(1); br 1 */
 			ip[1] = 0x100607f1;
 			ip[2] = val;
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 			ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */
 			ip[1] = 0x100a0004;
 			ip[2] = 0x07f10000;
 			ip[3] = (unsigned int) (val >> 32);
 			ip[4] = (unsigned int) val;
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 			info->plt_initialized = 1;
 		}
 		if (r_type == R_390_PLTOFF16 ||
diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
index 78b64fe5e7c2..a942bf2d58e9 100644
--- a/arch/s390/kernel/process.c
+++ b/arch/s390/kernel/process.c
@@ -235,7 +235,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
 	/* Save access registers to new thread structure. */
 	save_access_regs(&p->thread.acrs[0]);
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
         /*
 	 * save fprs to current->thread.fp_regs to merge them with
 	 * the emulated registers and then copy the result to the child.
@@ -247,7 +247,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
 	/* Set a new TLS ?  */
 	if (clone_flags & CLONE_SETTLS)
 		p->thread.acrs[0] = regs->gprs[6];
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	/* Save the fpu registers to new thread structure. */
 	save_fp_regs(&p->thread.fp_regs);
         p->thread.user_seg = __pa((unsigned long) p->mm->pgd) | _REGION_TABLE;
@@ -260,7 +260,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long new_stackp,
 			p->thread.acrs[1] = (unsigned int) regs->gprs[6];
 		}
 	}
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	/* start new process with ar4 pointing to the correct address space */
 	p->thread.mm_segment = get_fs();
         /* Don't copy debug registers */
@@ -339,16 +339,16 @@ out:
  */
 int dump_fpu (struct pt_regs * regs, s390_fp_regs *fpregs)
 {
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
         /*
 	 * save fprs to current->thread.fp_regs to merge them with
 	 * the emulated registers and then copy the result to the dump.
 	 */
 	save_fp_regs(&current->thread.fp_regs);
 	memcpy(fpregs, &current->thread.fp_regs, sizeof(s390_fp_regs));
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	save_fp_regs(fpregs);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	return 1;
 }
 
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 06afa3103ace..8ecda6d66de4 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -42,7 +42,7 @@
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 #include "compat_ptrace.h"
 #endif
 
@@ -59,7 +59,7 @@ FixPerRegisters(struct task_struct *task)
 	
 	if (per_info->single_step) {
 		per_info->control_regs.bits.starting_addr = 0;
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT))
 			per_info->control_regs.bits.ending_addr = 0x7fffffffUL;
 		else
@@ -112,7 +112,7 @@ ptrace_disable(struct task_struct *child)
 	clear_single_step(child);
 }
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 # define __ADDR_MASK 3
 #else
 # define __ADDR_MASK 7
@@ -138,7 +138,7 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
 	 * an alignment of 4. Programmers from hell...
 	 */
 	mask = __ADDR_MASK;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	if (addr >= (addr_t) &dummy->regs.acrs &&
 	    addr < (addr_t) &dummy->regs.orig_gpr2)
 		mask = 3;
@@ -160,7 +160,7 @@ peek_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * access registers are stored in the thread structure
 		 */
 		offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		/*
 		 * Very special case: old & broken 64 bit gdb reading
 		 * from acrs[15]. Result is a 64 bit value. Read the
@@ -218,7 +218,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 	 * an alignment of 4. Programmers from hell indeed...
 	 */
 	mask = __ADDR_MASK;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	if (addr >= (addr_t) &dummy->regs.acrs &&
 	    addr < (addr_t) &dummy->regs.orig_gpr2)
 		mask = 3;
@@ -231,13 +231,13 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * psw and gprs are stored on the stack
 		 */
 		if (addr == (addr_t) &dummy->regs.psw.mask &&
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 		    data != PSW_MASK_MERGE(PSW_USER32_BITS, data) &&
 #endif
 		    data != PSW_MASK_MERGE(PSW_USER_BITS, data))
 			/* Invalid psw mask. */
 			return -EINVAL;
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		if (addr == (addr_t) &dummy->regs.psw.addr)
 			/* I'd like to reject addresses without the
 			   high order bit but older gdb's rely on it */
@@ -250,7 +250,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
 		 * access registers are stored in the thread structure
 		 */
 		offset = addr - (addr_t) &dummy->regs.acrs;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		/*
 		 * Very special case: old & broken 64 bit gdb writing
 		 * to acrs[15] with a 64 bit value. Ignore the lower
@@ -357,7 +357,7 @@ do_ptrace_normal(struct task_struct *child, long request, long addr, long data)
 	return ptrace_request(child, request, addr, data);
 }
 
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 /*
  * Now the fun part starts... a 31 bit program running in the
  * 31 bit emulation tracing another program. PTRACE_PEEKTEXT,
@@ -629,7 +629,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 			return peek_user(child, addr, data);
 		if (request == PTRACE_POKEUSR && addr == PT_IEEE_IP)
 			return poke_user(child, addr, data);
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 		if (request == PTRACE_PEEKUSR &&
 		    addr == PT32_IEEE_IP && test_thread_flag(TIF_31BIT))
 			return peek_user_emu31(child, addr, data);
@@ -695,7 +695,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	/* Do requests that differ for 31/64 bit */
 	default:
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT))
 			return do_ptrace_emu31(child, request, addr, data);
 #endif
diff --git a/arch/s390/kernel/reipl_diag.c b/arch/s390/kernel/reipl_diag.c
index 83cb42bc0b76..1f33951ba439 100644
--- a/arch/s390/kernel/reipl_diag.c
+++ b/arch/s390/kernel/reipl_diag.c
@@ -26,7 +26,7 @@ void reipl_diag(void)
 		"   st   %%r4,%0\n"
 		"   st   %%r5,%1\n"
                 ".section __ex_table,\"a\"\n"
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
                 "   .align 8\n"
                 "   .quad 0b, 0b\n"
 #else
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 31e7b19348b7..b03847d100d9 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -427,7 +427,7 @@ setup_lowcore(void)
 		__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0) + PAGE_SIZE;
 	lc->current_task = (unsigned long) init_thread_union.thread_info.task;
 	lc->thread_info = (unsigned long) &init_thread_union;
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE) {
 		lc->extended_save_area_addr = (__u32)
 			__alloc_bootmem(PAGE_SIZE, PAGE_SIZE, 0);
@@ -562,21 +562,21 @@ setup_arch(char **cmdline_p)
         /*
          * print what head.S has found out about the machine
          */
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	printk((MACHINE_IS_VM) ?
 	       "We are running under VM (31 bit mode)\n" :
 	       "We are running native (31 bit mode)\n");
 	printk((MACHINE_HAS_IEEE) ?
 	       "This machine has an IEEE fpu\n" :
 	       "This machine has no IEEE fpu\n");
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	printk((MACHINE_IS_VM) ?
 	       "We are running under VM (64 bit mode)\n" :
 	       "We are running native (64 bit mode)\n");
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
         ROOT_DEV = Root_RAM0;
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	memory_end = memory_size & ~0x400000UL;  /* align memory end to 4MB */
         /*
          * We need some free virtual space to be able to do vmalloc.
@@ -585,9 +585,9 @@ setup_arch(char **cmdline_p)
          */
         if (memory_end > 1920*1024*1024)
                 memory_end = 1920*1024*1024;
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	memory_end = memory_size & ~0x200000UL;  /* detected in head.s */
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 	init_mm.start_code = PAGE_OFFSET;
 	init_mm.end_code = (unsigned long) &_etext;
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 13592d00a10f..6ae4a77270b5 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -501,7 +501,7 @@ int do_signal(struct pt_regs *regs, sigset_t *oldset)
 
 	if (signr > 0) {
 		/* Whee!  Actually deliver the signal.  */
-#ifdef CONFIG_S390_SUPPORT
+#ifdef CONFIG_COMPAT
 		if (test_thread_flag(TIF_31BIT)) {
 			extern void handle_signal32(unsigned long sig,
 						    struct k_sigaction *ka,
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index bd5b311006be..e10f4ca00499 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -402,7 +402,7 @@ static void smp_ext_bitcall_others(ec_bit_sig sig)
         }
 }
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 /*
  * this function sends a 'purge tlb' signal to another CPU.
  */
@@ -416,7 +416,7 @@ void smp_ptlb_all(void)
         on_each_cpu(smp_ptlb_callback, NULL, 0, 1);
 }
 EXPORT_SYMBOL(smp_ptlb_all);
-#endif /* ! CONFIG_ARCH_S390X */
+#endif /* ! CONFIG_64BIT */
 
 /*
  * this function sends a 'reschedule' IPI to another CPU.
@@ -783,7 +783,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		if (stack == 0ULL)
 			panic("smp_boot_cpus failed to allocate memory\n");
 		lowcore_ptr[i]->panic_stack = stack + (PAGE_SIZE);
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 		if (MACHINE_HAS_IEEE) {
 			lowcore_ptr[i]->extended_save_area_addr =
 				(__u32) __get_free_pages(GFP_KERNEL,0);
@@ -793,7 +793,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 		}
 #endif
 	}
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 	if (MACHINE_HAS_IEEE)
 		ctl_set_bit(14, 29); /* enable extended save area */
 #endif
diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
index efe6b83b53f7..6a63553493c5 100644
--- a/arch/s390/kernel/sys_s390.c
+++ b/arch/s390/kernel/sys_s390.c
@@ -26,9 +26,7 @@
 #include <linux/mman.h>
 #include <linux/file.h>
 #include <linux/utsname.h>
-#ifdef CONFIG_ARCH_S390X
 #include <linux/personality.h>
-#endif /* CONFIG_ARCH_S390X */
 
 #include <asm/uaccess.h>
 #include <asm/ipc.h>
@@ -121,7 +119,7 @@ out:
 	return error;
 }
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 struct sel_arg_struct {
 	unsigned long n;
 	fd_set *inp, *outp, *exp;
@@ -138,7 +136,7 @@ asmlinkage long old_select(struct sel_arg_struct __user *arg)
 	return sys_select(a.n, a.inp, a.outp, a.exp, a.tvp);
 
 }
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 /*
  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
@@ -211,7 +209,7 @@ asmlinkage long sys_ipc(uint call, int first, unsigned long second,
 	return -EINVAL;
 }
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 asmlinkage long s390x_newuname(struct new_utsname __user *name)
 {
 	int ret = sys_newuname(name);
@@ -235,12 +233,12 @@ asmlinkage long s390x_personality(unsigned long personality)
 
 	return ret;
 }
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 /*
  * Wrapper function for sys_fadvise64/fadvise64_64
  */
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 
 asmlinkage long
 s390_fadvise64(int fd, u32 offset_high, u32 offset_low, size_t len, int advice)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index c5bd36fae56b..95d109968619 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -67,13 +67,13 @@ extern pgm_check_handler_t do_monitor_call;
 
 #define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 #define FOURLONG "%08lx %08lx %08lx %08lx\n"
 static int kstack_depth_to_print = 12;
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define FOURLONG "%016lx %016lx %016lx %016lx\n"
 static int kstack_depth_to_print = 20;
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 /*
  * For show_trace we have tree different stack to consider:
@@ -702,12 +702,12 @@ void __init trap_init(void)
         pgm_check_table[0x11] = &do_dat_exception;
         pgm_check_table[0x12] = &translation_exception;
         pgm_check_table[0x13] = &special_op_exception;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
         pgm_check_table[0x38] = &do_dat_exception;
 	pgm_check_table[0x39] = &do_dat_exception;
 	pgm_check_table[0x3A] = &do_dat_exception;
         pgm_check_table[0x3B] = &do_dat_exception;
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
         pgm_check_table[0x15] = &operand_exception;
         pgm_check_table[0x1C] = &space_switch_exception;
         pgm_check_table[0x1D] = &hfp_sqrt_exception;
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index 89fdb3808bc0..9289face3027 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -5,7 +5,7 @@
 #include <asm-generic/vmlinux.lds.h>
 #include <linux/config.h>
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 OUTPUT_FORMAT("elf32-s390", "elf32-s390", "elf32-s390")
 OUTPUT_ARCH(s390)
 ENTRY(_start)
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index b701efa1f00e..d9b97b3c597f 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -4,6 +4,5 @@
 
 EXTRA_AFLAGS := -traditional
 
-lib-y += delay.o string.o
-lib-$(CONFIG_ARCH_S390_31) += uaccess.o spinlock.o
-lib-$(CONFIG_ARCH_S390X) += uaccess64.o spinlock.o
+lib-y += delay.o string.o spinlock.o
+lib-y += $(if $(CONFIG_64BIT),uaccess64.o,uaccess.o)
diff --git a/arch/s390/lib/spinlock.c b/arch/s390/lib/spinlock.c
index 2dc14e9c8327..68d79c502081 100644
--- a/arch/s390/lib/spinlock.c
+++ b/arch/s390/lib/spinlock.c
@@ -29,7 +29,7 @@ __setup("spin_retry=", spin_retry_setup);
 static inline void
 _diag44(void)
 {
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	if (MACHINE_HAS_DIAG44)
 #endif
 		asm volatile("diag 0,0,0x44");
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 506a33b51e4f..a9566bcab682 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -143,7 +143,7 @@ dcss_diag (__u8 func, void *parameter,
 	rx = (unsigned long) parameter;
 	ry = (unsigned long) func;
 	__asm__ __volatile__(
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		"   sam31\n" // switch to 31 bit
 		"   diag    %0,%1,0x64\n"
 		"   sam64\n" // switch back to 64 bit
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index fb2607c369ed..81ade401b073 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -31,17 +31,17 @@
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 #define __FAIL_ADDR_MASK 0x7ffff000
 #define __FIXUP_MASK 0x7fffffff
 #define __SUBCODE_MASK 0x0200
 #define __PF_RES_FIELD 0ULL
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define __FAIL_ADDR_MASK -4096L
 #define __FIXUP_MASK ~0L
 #define __SUBCODE_MASK 0x0600
 #define __PF_RES_FIELD 0x8000000000000000ULL
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 #ifdef CONFIG_SYSCTL
 extern int sysctl_userprocess_debug;
@@ -393,11 +393,11 @@ int pfault_init(void)
 		"2:\n"
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		"   .long  0b,1b\n"
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 		"   .quad  0b,1b\n"
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 		".previous"
                 : "=d" (rc) : "a" (&refbk), "m" (refbk) : "cc" );
         __ctl_set_bit(0, 9);
@@ -417,11 +417,11 @@ void pfault_fini(void)
 		"0:\n"
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		"   .long  0b,0b\n"
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 		"   .quad  0b,0b\n"
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 		".previous"
 		: : "a" (&refbk), "m" (refbk) : "cc" );
 }
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 6ec5cd981e74..df953383724d 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -44,7 +44,7 @@ void diag10(unsigned long addr)
 {
         if (addr >= 0x7ff00000)
                 return;
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
         asm volatile (
 		"   sam31\n"
 		"   diag %0,%0,0x10\n"
@@ -106,7 +106,7 @@ extern unsigned long __initdata zholes_size[];
  * paging_init() sets up the page tables
  */
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 void __init paging_init(void)
 {
         pgd_t * pg_dir;
@@ -175,7 +175,7 @@ void __init paging_init(void)
         return;
 }
 
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 void __init paging_init(void)
 {
         pgd_t * pg_dir;
@@ -256,7 +256,7 @@ void __init paging_init(void)
 
         return;
 }
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 void __init mem_init(void)
 {
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index fb187e5a54b4..356257c171de 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -50,7 +50,7 @@ static inline unsigned long mmap_base(void)
 
 static inline int mmap_is_legacy(void)
 {
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	/*
 	 * Force standard allocation for 64 bit programs.
 	 */
diff --git a/block/Kconfig b/block/Kconfig
index eb48edb80c1d..377f6dd20e17 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -5,7 +5,7 @@
 #for instance.
 config LBD
 	bool "Support for Large Block Devices"
-	depends on X86 || (MIPS && 32BIT) || PPC32 || ARCH_S390_31 || SUPERH || UML
+	depends on X86 || (MIPS && 32BIT) || PPC32 || (S390 && !64BIT) || SUPERH || UML
 	help
 	  Say Y here if you want to attach large (bigger than 2TB) discs to
 	  your machine, or if you want to have a raid or loopback device
diff --git a/crypto/Kconfig b/crypto/Kconfig
index c696f7ab729e..52e1d4108a99 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -42,7 +42,7 @@ config CRYPTO_SHA1
 
 config CRYPTO_SHA1_S390
 	tristate "SHA1 digest algorithm (s390)"
-	depends on CRYPTO && ARCH_S390
+	depends on CRYPTO && S390
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
@@ -58,7 +58,7 @@ config CRYPTO_SHA256
 
 config CRYPTO_SHA256_S390
 	tristate "SHA256 digest algorithm (s390)"
-	depends on CRYPTO && ARCH_S390
+	depends on CRYPTO && S390
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  SHA256 secure hash standard (DFIPS 180-2).
@@ -111,7 +111,7 @@ config CRYPTO_DES
 
 config CRYPTO_DES_S390
 	tristate "DES and Triple DES cipher algorithms (s390)"
-	depends on CRYPTO && ARCH_S390
+	depends on CRYPTO && S390
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
@@ -217,7 +217,7 @@ config CRYPTO_AES_X86_64
 
 config CRYPTO_AES_S390
 	tristate "AES cipher algorithms (s390)"
-	depends on CRYPTO && ARCH_S390
+	depends on CRYPTO && S390
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 84e68cdd451b..5ebd06b1b4ca 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -985,7 +985,7 @@ config HPET_MMAP
 
 config HANGCHECK_TIMER
 	tristate "Hangcheck timer"
-	depends on X86 || IA64 || PPC64 || ARCH_S390
+	depends on X86 || IA64 || PPC64 || S390
 	help
 	  The hangcheck-timer module detects when the system has gone
 	  out to lunch past a certain margin.  It can reboot the system
diff --git a/drivers/char/hangcheck-timer.c b/drivers/char/hangcheck-timer.c
index 66e53dd450ff..40a67c86420c 100644
--- a/drivers/char/hangcheck-timer.c
+++ b/drivers/char/hangcheck-timer.c
@@ -120,7 +120,7 @@ __setup("hcheck_dump_tasks", hangcheck_parse_dump_tasks);
 #if defined(CONFIG_X86)
 # define HAVE_MONOTONIC
 # define TIMER_FREQ 1000000000ULL
-#elif defined(CONFIG_ARCH_S390)
+#elif defined(CONFIG_S390)
 /* FA240000 is 1 Second in the IBM time universe (Page 4-38 Principles of Op for zSeries */
 # define TIMER_FREQ 0xFA240000ULL
 #elif defined(CONFIG_IA64)
diff --git a/drivers/char/watchdog/Kconfig b/drivers/char/watchdog/Kconfig
index 344001b45af9..a6544790af60 100644
--- a/drivers/char/watchdog/Kconfig
+++ b/drivers/char/watchdog/Kconfig
@@ -438,7 +438,7 @@ config INDYDOG
 
 config ZVM_WATCHDOG
 	tristate "z/VM Watchdog Timer"
-	depends on WATCHDOG && ARCH_S390
+	depends on WATCHDOG && S390
 	help
 	  IBM s/390 and zSeries machines running under z/VM 5.1 or later
 	  provide a virtual watchdog timer to their guest that cause a
diff --git a/drivers/input/evdev.c b/drivers/input/evdev.c
index 9f2352bd8348..a1e660e3531d 100644
--- a/drivers/input/evdev.c
+++ b/drivers/input/evdev.c
@@ -157,7 +157,7 @@ struct input_event_compat {
 #  define COMPAT_TEST test_thread_flag(TIF_IA32)
 #elif defined(CONFIG_IA64)
 #  define COMPAT_TEST IS_IA32_PROCESS(ia64_task_regs(current))
-#elif defined(CONFIG_ARCH_S390)
+#elif defined(CONFIG_S390)
 #  define COMPAT_TEST test_thread_flag(TIF_31BIT)
 #elif defined(CONFIG_MIPS)
 #  define COMPAT_TEST (current->thread.mflags & MF_32BIT_ADDR)
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index c782a6329805..fa39b944bc46 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -6,7 +6,7 @@ menu "PHY device support"
 
 config PHYLIB
 	tristate "PHY Device support and infrastructure"
-	depends on NET_ETHERNET && (BROKEN || !ARCH_S390)
+	depends on NET_ETHERNET && (BROKEN || !S390)
 	help
 	  Ethernet controllers are usually attached to PHY
 	  devices.  This option provides infrastructure for
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 6e7d7b06421d..6f50cc9323d9 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -1,11 +1,11 @@
-if ARCH_S390
+if S390
 
 comment "S/390 block device drivers"
-	depends on ARCH_S390
+	depends on S390
 
 config BLK_DEV_XPRAM
 	tristate "XPRAM disk support"
-	depends on ARCH_S390
+	depends on S390
 	help
 	  Select this option if you want to use your expanded storage on S/390
 	  or zSeries as a disk.  This is useful as a _fast_ swap device if you
@@ -49,7 +49,7 @@ config DASD_FBA
 
 config DASD_DIAG
 	tristate "Support for DIAG access to Disks"
-	depends on DASD && ( ARCH_S390X = 'n' || EXPERIMENTAL)
+	depends on DASD && ( 64BIT = 'n' || EXPERIMENTAL)
 	help
 	  Select this option if you want to use Diagnose250 command to access
 	  Disks under VM.  If you are not running under VM or unsure what it is,
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index 1141a5963b67..041e1a621885 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -604,7 +604,7 @@ dasd_smalloc_request(char *magic, int cplength, int datasize,
 void
 dasd_kfree_request(struct dasd_ccw_req * cqr, struct dasd_device * device)
 {
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	struct ccw1 *ccw;
 
 	/* Clear any idals used for the request. */
diff --git a/drivers/s390/block/dasd_diag.c b/drivers/s390/block/dasd_diag.c
index a33d4064b537..ba80fdea7ebf 100644
--- a/drivers/s390/block/dasd_diag.c
+++ b/drivers/s390/block/dasd_diag.c
@@ -75,7 +75,7 @@ dia250(void *iob, int cmd)
 	int rc;
 
 	__asm__ __volatile__(
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		"	lghi	%0,3\n"
 		"	lgr	0,%3\n"
 		"	diag	0,%2,0x250\n"
diff --git a/drivers/s390/block/dasd_diag.h b/drivers/s390/block/dasd_diag.h
index 37edf6e91715..a4f80bd735f1 100644
--- a/drivers/s390/block/dasd_diag.h
+++ b/drivers/s390/block/dasd_diag.h
@@ -45,7 +45,7 @@ struct dasd_diag_characteristics {
 } __attribute__ ((packed, aligned(4)));
 
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 #define DASD_DIAG_FLAGA_DEFAULT		DASD_DIAG_FLAGA_FORMAT_64BIT
 
 typedef u64 blocknum_t;
@@ -86,7 +86,7 @@ struct dasd_diag_rw_io {
 	struct dasd_diag_bio *bio_list;
 	u8  spare4[8];
 } __attribute__ ((packed, aligned(8)));
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define DASD_DIAG_FLAGA_DEFAULT		0x0
 
 typedef u32 blocknum_t;
@@ -125,4 +125,4 @@ struct dasd_diag_rw_io {
 	u32 interrupt_params;
 	u8 spare3[20];
 } __attribute__ ((packed, aligned(8)));
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index efc4cf62496e..96eb48258580 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -1041,7 +1041,7 @@ dasd_eckd_build_cp(struct dasd_device * device, struct request *req)
 				/* Eckd can only do full blocks. */
 				return ERR_PTR(-EINVAL);
 			count += bv->bv_len >> (device->s2b_shift + 9);
-#if defined(CONFIG_ARCH_S390X)
+#if defined(CONFIG_64BIT)
 			if (idal_is_needed (page_address(bv->bv_page),
 					    bv->bv_len))
 				cidaw += bv->bv_len >> (device->s2b_shift + 9);
diff --git a/drivers/s390/block/dasd_fba.c b/drivers/s390/block/dasd_fba.c
index 9bac8d87a9cc..8ec75dc08e2c 100644
--- a/drivers/s390/block/dasd_fba.c
+++ b/drivers/s390/block/dasd_fba.c
@@ -271,7 +271,7 @@ dasd_fba_build_cp(struct dasd_device * device, struct request *req)
 				/* Fba can only do full blocks. */
 				return ERR_PTR(-EINVAL);
 			count += bv->bv_len >> (device->s2b_shift + 9);
-#if defined(CONFIG_ARCH_S390X)
+#if defined(CONFIG_64BIT)
 			if (idal_is_needed (page_address(bv->bv_page),
 					    bv->bv_len))
 				cidaw += bv->bv_len / blksize;
diff --git a/drivers/s390/block/xpram.c b/drivers/s390/block/xpram.c
index d428c909b8a0..bf3a67c3cc5e 100644
--- a/drivers/s390/block/xpram.c
+++ b/drivers/s390/block/xpram.c
@@ -160,7 +160,7 @@ static int xpram_page_in (unsigned long page_addr, unsigned int xpage_index)
                 "0: ipm   %0\n"
 		"   srl   %0,28\n"
 		"1:\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
 		"   .long  0b,1b\n"
@@ -208,7 +208,7 @@ static long xpram_page_out (unsigned long page_addr, unsigned int xpage_index)
                 "0: ipm   %0\n"
 		"   srl   %0,28\n"
 		"1:\n"
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 4\n"
 		"   .long  0b,1b\n"
diff --git a/drivers/s390/char/vmwatchdog.c b/drivers/s390/char/vmwatchdog.c
index 5473c23fcb52..5acc0ace3d7d 100644
--- a/drivers/s390/char/vmwatchdog.c
+++ b/drivers/s390/char/vmwatchdog.c
@@ -66,7 +66,7 @@ static int __diag288(enum vmwdt_func func, unsigned int timeout,
 	__cmdl = len;
 	err = 0;
 	asm volatile (
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		       "diag %2,%4,0x288\n"
 		"1:	\n"
 		".section .fixup,\"ax\"\n"
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 6f274f4f92eb..7376bc87206d 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -195,7 +195,7 @@ cio_start_key (struct subchannel *sch,	/* subchannel structure */
 	sch->orb.spnd = sch->options.suspend;
 	sch->orb.ssic = sch->options.suspend && sch->options.inter;
 	sch->orb.lpm = (lpm != 0) ? (lpm & sch->opm) : sch->lpm;
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	/*
 	 * for 64 bit we always support 64 bit IDAWs with 4k page size only
 	 */
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 3c77c3fd461d..04ceba343db8 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -27,7 +27,7 @@
 /*
  * diag210 is used under VM to get information about a virtual device
  */
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 int
 diag210(struct diag210 * addr)
 {
diff --git a/drivers/s390/cio/ioasm.h b/drivers/s390/cio/ioasm.h
index 62b0e2ad507f..95a9462f9a91 100644
--- a/drivers/s390/cio/ioasm.h
+++ b/drivers/s390/cio/ioasm.h
@@ -50,7 +50,7 @@ static inline int stsch_err(struct subchannel_id schid,
 		"0:  ipm  %0\n"
 		"    srl  %0,28\n"
 		"1:\n"
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 8\n"
 		"   .quad 0b,1b\n"
@@ -95,7 +95,7 @@ static inline int msch_err(struct subchannel_id schid,
 		"0:  ipm  %0\n"
 		"    srl  %0,28\n"
 		"1:\n"
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 		".section __ex_table,\"a\"\n"
 		"   .align 8\n"
 		"   .quad 0b,1b\n"
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index 035c77af9cd3..30a836ffc31f 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -2394,7 +2394,7 @@ tiqdio_check_chsc_availability(void)
 	sprintf(dbf_text,"hydrati%1x", hydra_thinints);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	/* Check for QEBSM support in general (bit 58). */
 	is_passthrough = css_general_characteristics.qebsm;
 #endif
diff --git a/drivers/s390/cio/qdio.h b/drivers/s390/cio/qdio.h
index 43b840af5300..fa385e761fe1 100644
--- a/drivers/s390/cio/qdio.h
+++ b/drivers/s390/cio/qdio.h
@@ -271,7 +271,7 @@ static inline int
 do_sqbs(unsigned long sch, unsigned char state, int queue,
        unsigned int *start, unsigned int *count)
 {
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
        register unsigned long _ccq asm ("0") = *count;
        register unsigned long _sch asm ("1") = sch;
        unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
@@ -295,7 +295,7 @@ static inline int
 do_eqbs(unsigned long sch, unsigned char *state, int queue,
 	unsigned int *start, unsigned int *count)
 {
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 	register unsigned long _ccq asm ("0") = *count;
 	register unsigned long _sch asm ("1") = sch;
 	unsigned long _queuestart = ((unsigned long)queue << 32) | *start;
@@ -323,7 +323,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
 {
 	int cc;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,2	\n\t"
 		"lr	1,%1	\n\t"
@@ -336,7 +336,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
 		: "d" (schid), "d" (mask1), "d" (mask2)
 		: "cc", "0", "1", "2", "3"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	0,2	\n\t"
 		"llgfr	1,%1	\n\t"
@@ -349,7 +349,7 @@ do_siga_sync(struct subchannel_id schid, unsigned int mask1, unsigned int mask2)
 		: "d" (schid), "d" (mask1), "d" (mask2)
 		: "cc", "0", "1", "2", "3"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	return cc;
 }
 
@@ -358,7 +358,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask)
 {
 	int cc;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,1	\n\t"
 		"lr	1,%1	\n\t"
@@ -370,7 +370,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask)
 		: "d" (schid), "d" (mask)
 		: "cc", "0", "1", "2", "memory"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	0,1	\n\t"
 		"llgfr	1,%1	\n\t"
@@ -382,7 +382,7 @@ do_siga_input(struct subchannel_id schid, unsigned int mask)
 		: "d" (schid), "d" (mask)
 		: "cc", "0", "1", "2", "memory"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	return cc;
 }
@@ -394,7 +394,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
 	int cc;
 	__u32 busy_bit;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	0,0	\n\t"
 		"lr	1,%2	\n\t"
@@ -424,7 +424,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
 		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION)
 		: "cc", "0", "1", "2", "memory"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
         	"llgfr  0,%5    \n\t"
                 "lgr    1,%2    \n\t"
@@ -449,7 +449,7 @@ do_siga_output(unsigned long schid, unsigned long mask, __u32 *bb,
 		"i" (QDIO_SIGA_ERROR_ACCESS_EXCEPTION), "d" (fc)
 		: "cc", "0", "1", "2", "memory"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	(*bb) = busy_bit;
 	return cc;
@@ -461,21 +461,21 @@ do_clear_global_summary(void)
 
 	unsigned long time;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	asm volatile (
 		"lhi	1,3	\n\t"
 		".insn	rre,0xb2650000,2,0	\n\t"
 		"lr	%0,3	\n\t"
 		: "=d" (time) : : "cc", "1", "2", "3"
 		);
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 	asm volatile (
 		"lghi	1,3	\n\t"
 		".insn	rre,0xb2650000,2,0	\n\t"
 		"lgr	%0,3	\n\t"
 		: "=d" (time) : : "cc", "1", "2", "3"
 		);
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 	
 	return time;
 }
@@ -542,11 +542,11 @@ struct qdio_perf_stats {
 
 #define MY_MODULE_STRING(x) #x
 
-#ifdef CONFIG_ARCH_S390X
+#ifdef CONFIG_64BIT
 #define QDIO_GET_ADDR(x) ((__u32)(unsigned long)x)
-#else /* CONFIG_ARCH_S390X */
+#else /* CONFIG_64BIT */
 #define QDIO_GET_ADDR(x) ((__u32)(long)x)
-#endif /* CONFIG_ARCH_S390X */
+#endif /* CONFIG_64BIT */
 
 struct qdio_q {
 	volatile struct slsb slsb;
diff --git a/drivers/s390/crypto/z90hardware.c b/drivers/s390/crypto/z90hardware.c
index 7c3ed52e03e1..d7f7494a0cbe 100644
--- a/drivers/s390/crypto/z90hardware.c
+++ b/drivers/s390/crypto/z90hardware.c
@@ -785,7 +785,7 @@ testq(int q_nr, int *q_depth, int *dev_type, struct ap_status_word *stat)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%4		\n"
 	 "	slgr	1,1		\n"
 	 "	lgr	2,1		\n"
@@ -855,7 +855,7 @@ resetq(int q_nr, struct ap_status_word *stat_p)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%2		\n"
 	 "	lghi	1,1		\n"
 	 "	sll	1,24		\n"
@@ -921,7 +921,7 @@ sen(int msg_len, unsigned char *msg_ext, struct ap_status_word *stat)
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	lgr	6,%3		\n"
 	 "	llgfr	7,%2		\n"
 	 "	llgt	0,0(6)		\n"
@@ -1000,7 +1000,7 @@ rec(int q_nr, int buff_l, unsigned char *rsp, unsigned char *id,
 	int ccode;
 
 	asm volatile
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	("	llgfr	0,%2		\n"
 	 "	lgr	3,%4		\n"
 	 "	lgr	6,%3		\n"
diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index a7efc394515e..548854754921 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -1,5 +1,5 @@
 menu "S/390 network device drivers"
-	depends on NETDEVICES && ARCH_S390
+	depends on NETDEVICES && S390
 
 config LCS
 	tristate "Lan Channel Station Interface"
diff --git a/drivers/s390/net/claw.c b/drivers/s390/net/claw.c
index 6b63d21612ec..e70af7f39946 100644
--- a/drivers/s390/net/claw.c
+++ b/drivers/s390/net/claw.c
@@ -1603,7 +1603,7 @@ dumpit(char* buf, int len)
         __u32      ct, sw, rm, dup;
         char       *ptr, *rptr;
         char       tbuf[82], tdup[82];
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
         char       addr[22];
 #else
         char       addr[12];
@@ -1619,7 +1619,7 @@ dumpit(char* buf, int len)
         dup = 0;
         for ( ct=0; ct < len; ct++, ptr++, rptr++ )  {
                 if (sw == 0) {
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
                         sprintf(addr, "%16.16lX",(unsigned long)rptr);
 #else
                         sprintf(addr, "%8.8X",(__u32)rptr);
@@ -1634,7 +1634,7 @@ dumpit(char* buf, int len)
                 if (sw == 8) {
                         strcat(bhex, "  ");
                 }
-#if (CONFIG_ARCH_S390X)
+#if (CONFIG_64BIT)
                 sprintf(tbuf,"%2.2lX", (unsigned long)*ptr);
 #else
                 sprintf(tbuf,"%2.2X", (__u32)*ptr);
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index 7dad597ff86e..3bf466603512 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -246,7 +246,7 @@ s390_revalidate_registers(struct mci *mci)
 		 */
 		kill_task = 1;
 
-#ifndef __s390x__
+#ifndef CONFIG_64BIT
 	asm volatile("ld 0,0(%0)\n"
 		     "ld 2,8(%0)\n"
 		     "ld 4,16(%0)\n"
@@ -255,7 +255,7 @@ s390_revalidate_registers(struct mci *mci)
 #endif
 
 	if (MACHINE_HAS_IEEE) {
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		fpt_save_area = &S390_lowcore.floating_pt_save_area;
 		fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 #else
@@ -314,7 +314,7 @@ s390_revalidate_registers(struct mci *mci)
 		 */
 		s390_handle_damage("invalid control registers.");
 	else
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 		asm volatile("lctlg 0,15,0(%0)"
 			     : : "a" (&S390_lowcore.cregs_save_area));
 #else
@@ -327,7 +327,7 @@ s390_revalidate_registers(struct mci *mci)
 	 * can't write something sensible into that register.
 	 */
 
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 	/*
 	 * See if we can revalidate the TOD programmable register with its
 	 * old contents (should be zero) otherwise set it to zero.
@@ -384,7 +384,7 @@ s390_do_machine_check(struct pt_regs *regs)
 		if (mci->b) {
 			/* Processing backup -> verify if we can survive this */
 			u64 z_mcic, o_mcic, t_mcic;
-#ifdef __s390x__
+#ifdef CONFIG_64BIT
 			z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
 			o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
 				  1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
diff --git a/drivers/s390/sysinfo.c b/drivers/s390/sysinfo.c
index 87c2db1bd4f5..66da840c9316 100644
--- a/drivers/s390/sysinfo.c
+++ b/drivers/s390/sysinfo.c
@@ -106,7 +106,7 @@ static inline int stsi (void *sysinfo,
 {
 	int cc, retv;
 
-#ifndef CONFIG_ARCH_S390X
+#ifndef CONFIG_64BIT
 	__asm__ __volatile__ (	"lr\t0,%2\n"
 				"\tlr\t1,%3\n"
 				"\tstsi\t0(%4)\n"
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 4c42065dea88..9e8254f0256c 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -1815,7 +1815,7 @@ config SCSI_SUNESP
 
 config ZFCP
 	tristate "FCP host bus adapter driver for IBM eServer zSeries"
-	depends on ARCH_S390 && QDIO && SCSI
+	depends on S390 && QDIO && SCSI
 	select SCSI_FC_ATTRS
 	help
           If you want to access SCSI devices attached to your IBM eServer
diff --git a/fs/partitions/Kconfig b/fs/partitions/Kconfig
index 656bc43431b9..e227a04261ab 100644
--- a/fs/partitions/Kconfig
+++ b/fs/partitions/Kconfig
@@ -85,7 +85,7 @@ config ATARI_PARTITION
 
 config IBM_PARTITION
 	bool "IBM disk label and partition support"
-	depends on PARTITION_ADVANCED && ARCH_S390
+	depends on PARTITION_ADVANCED && S390
 	help
 	  Say Y here if you would like to be able to read the hard disk
 	  partition table format used by IBM DASD disks operating under CMS.
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 3e1239e4b303..5e9251f65317 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -308,7 +308,7 @@ int proc_pid_status(struct task_struct *task, char * buffer)
 	buffer = task_sig(task, buffer);
 	buffer = task_cap(task, buffer);
 	buffer = cpuset_task_status_allowed(task, buffer);
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
 	buffer = task_show_regs(task, buffer);
 #endif
 	return buffer - orig;
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index f97d92691f17..2861cdc243ad 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -539,7 +539,7 @@ type name(type1 arg1, type2 arg2, type3 arg3, type4 arg4,    \
 #define __ARCH_WANT_SYS_SIGPENDING
 #define __ARCH_WANT_SYS_SIGPROCMASK
 #define __ARCH_WANT_SYS_RT_SIGACTION
-# ifdef CONFIG_ARCH_S390_31
+# ifndef CONFIG_64BIT
 #   define __ARCH_WANT_STAT64
 #   define __ARCH_WANT_SYS_TIME
 # endif
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 60f8bc78a35a..6c5d4c898ccb 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -12,7 +12,7 @@
 #include <linux/config.h>
 #include <linux/smp.h>
 
-#if !defined(CONFIG_ARCH_S390)
+#if !defined(CONFIG_S390)
 
 #include <linux/linkage.h>
 #include <linux/cache.h>
diff --git a/init/Kconfig b/init/Kconfig
index 24e0f7c756c0..ba42f3793a84 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -189,7 +189,7 @@ config AUDIT
 
 config AUDITSYSCALL
 	bool "Enable system-call auditing support"
-	depends on AUDIT && (X86 || PPC || PPC64 || ARCH_S390 || IA64 || UML || SPARC64)
+	depends on AUDIT && (X86 || PPC || PPC64 || S390 || IA64 || UML || SPARC64)
 	default y if SECURITY_SELINUX
 	help
 	  Enable low-overhead system-call auditing infrastructure that
diff --git a/init/do_mounts_rd.c b/init/do_mounts_rd.c
index c10b08a80982..c2683fcd792d 100644
--- a/init/do_mounts_rd.c
+++ b/init/do_mounts_rd.c
@@ -145,7 +145,7 @@ int __init rd_load_image(char *from)
 	int nblocks, i, disk;
 	char *buf = NULL;
 	unsigned short rotate = 0;
-#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES)
+#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES)
 	char rotator[4] = { '|' , '/' , '-' , '\\' };
 #endif
 
@@ -237,7 +237,7 @@ int __init rd_load_image(char *from)
 		}
 		sys_read(in_fd, buf, BLOCK_SIZE);
 		sys_write(out_fd, buf, BLOCK_SIZE);
-#if !defined(CONFIG_ARCH_S390) && !defined(CONFIG_PPC_ISERIES)
+#if !defined(CONFIG_S390) && !defined(CONFIG_PPC_ISERIES)
 		if (!(i % 16)) {
 			printk("%c\b", rotator[rotate & 0x3]);
 			rotate++;
diff --git a/kernel/panic.c b/kernel/panic.c
index aabc5f86fa3f..c5c4ab255834 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -60,7 +60,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 	long i;
 	static char buf[1024];
 	va_list args;
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
         unsigned long caller = (unsigned long) __builtin_return_address(0);
 #endif
 
@@ -125,7 +125,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 		printk(KERN_EMERG "Press Stop-A (L1-A) to return to the boot prom\n");
 	}
 #endif
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
         disabled_wait(caller);
 #endif
 	local_irq_enable();
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 345f4a1d533f..a85047bb5739 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -108,7 +108,7 @@ extern int pwrsw_enabled;
 extern int unaligned_enabled;
 #endif
 
-#ifdef CONFIG_ARCH_S390
+#ifdef CONFIG_S390
 #ifdef CONFIG_MATHEMU
 extern int sysctl_ieee_emulation_warnings;
 #endif
@@ -542,7 +542,7 @@ static ctl_table kern_table[] = {
 		.extra1		= &minolduid,
 		.extra2		= &maxolduid,
 	},
-#ifdef CONFIG_ARCH_S390
+#ifdef CONFIG_S390
 #ifdef CONFIG_MATHEMU
 	{
 		.ctl_name	= KERN_IEEE_EMULATION_WARNINGS,
@@ -644,7 +644,7 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
-#if defined(CONFIG_ARCH_S390)
+#if defined(CONFIG_S390)
 	{
 		.ctl_name	= KERN_SPIN_RETRY,
 		.procname	= "spin_retry",
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1cedc2356b78..80598cfd728c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -32,7 +32,7 @@ config MAGIC_SYSRQ
 config LOG_BUF_SHIFT
 	int "Kernel log buffer size (16 => 64KB, 17 => 128KB)" if DEBUG_KERNEL
 	range 12 21
-	default 17 if ARCH_S390
+	default 17 if S390
 	default 16 if X86_NUMAQ || IA64
 	default 15 if SMP
 	default 14
-- 
cgit v1.2.3


From a1a5ea70a6e9db6332b27fe2d96666e17aa1436b Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Fri, 6 Jan 2006 00:19:29 -0800
Subject: [PATCH] I2O: changed I2O API to create I2O messages in kernel memory

Changed the I2O API to create I2O messages first in kernel memory and then
transfer it at once over the PCI bus instead of sending each quad-word over
the PCI bus.

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/message/i2o/bus-osm.c    |   21 +-
 drivers/message/i2o/device.c     |   51 +-
 drivers/message/i2o/exec-osm.c   |   93 +-
 drivers/message/i2o/i2o_block.c  |  157 +--
 drivers/message/i2o/i2o_config.c |  169 ++--
 drivers/message/i2o/i2o_scsi.c   |   50 +-
 drivers/message/i2o/iop.c        |  296 +++---
 drivers/message/i2o/pci.c        |    1 +
 include/linux/i2o.h              | 1947 ++++++++++++++++++++------------------
 9 files changed, 1450 insertions(+), 1335 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/message/i2o/bus-osm.c b/drivers/message/i2o/bus-osm.c
index 151b228e1cb3..ce039d322fd0 100644
--- a/drivers/message/i2o/bus-osm.c
+++ b/drivers/message/i2o/bus-osm.c
@@ -39,18 +39,18 @@ static struct i2o_class_id i2o_bus_class_id[] = {
  */
 static int i2o_bus_scan(struct i2o_device *dev)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
 		return -ETIMEDOUT;
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BUS_SCAN << 24 | HOST_TID << 12 | dev->lct_data.
+			tid);
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 };
 
 /**
@@ -59,8 +59,9 @@ static int i2o_bus_scan(struct i2o_device *dev)
  *
  *	Returns count.
  */
-static ssize_t i2o_bus_store_scan(struct device *d, struct device_attribute *attr, const char *buf,
-				  size_t count)
+static ssize_t i2o_bus_store_scan(struct device *d,
+				  struct device_attribute *attr,
+				  const char *buf, size_t count)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(d);
 	int rc;
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index 8eb50cdb8ae1..002ae0ed8966 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -35,18 +35,18 @@
 static inline int i2o_device_issue_claim(struct i2o_device *dev, u32 cmd,
 					 u32 type)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid, &msg->u.head[1]);
-	writel(type, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+		cpu_to_le32(cmd << 24 | HOST_TID << 12 | dev->lct_data.tid);
+	msg->body[0] = cpu_to_le32(type);
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 }
 
 /**
@@ -419,10 +419,9 @@ int i2o_device_parse_lct(struct i2o_controller *c)
  *	ResultCount, ErrorInfoSize, BlockStatus and BlockSize.
  */
 int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
-			  int oplen, void *reslist, int reslen)
+		   int oplen, void *reslist, int reslen)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	u32 *res32 = (u32 *) reslist;
 	u32 *restmp = (u32 *) reslist;
 	int len = 0;
@@ -437,26 +436,28 @@ int i2o_parm_issue(struct i2o_device *i2o_dev, int cmd, void *oplist,
 	if (i2o_dma_alloc(dev, &res, reslen, GFP_KERNEL))
 		return -ENOMEM;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY) {
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg)) {
 		i2o_dma_free(dev, &res);
-		return -ETIMEDOUT;
+		return PTR_ERR(msg);
 	}
 
 	i = 0;
-	writel(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(0, &msg->body[i++]);
-	writel(0x4C000000 | oplen, &msg->body[i++]);	/* OperationList */
-	memcpy_toio(&msg->body[i], oplist, oplen);
+	msg->u.head[1] =
+	    cpu_to_le32(cmd << 24 | HOST_TID << 12 | i2o_dev->lct_data.tid);
+	msg->body[i++] = cpu_to_le32(0x00000000);
+	msg->body[i++] = cpu_to_le32(0x4C000000 | oplen);	/* OperationList */
+	memcpy(&msg->body[i], oplist, oplen);
+
 	i += (oplen / 4 + (oplen % 4 ? 1 : 0));
-	writel(0xD0000000 | res.len, &msg->body[i++]);	/* ResultList */
-	writel(res.phys, &msg->body[i++]);
+	msg->body[i++] = cpu_to_le32(0xD0000000 | res.len);	/* ResultList */
+	msg->body[i++] = cpu_to_le32(res.phys);
 
-	writel(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) |
-	       SGL_OFFSET_5, &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(i + sizeof(struct i2o_message) / 4) |
+			SGL_OFFSET_5);
 
-	rc = i2o_msg_post_wait_mem(c, m, 10, &res);
+	rc = i2o_msg_post_wait_mem(c, msg, 10, &res);
 
 	/* This only looks like a memory leak - don't "fix" it. */
 	if (rc == -ETIMEDOUT)
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 9c339a2505b0..71a09332e7c0 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -114,13 +114,12 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait)
  *	Returns 0 on success, negative error code on timeout or positive error
  *	code from reply.
  */
-int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long
-			  timeout, struct i2o_dma *dma)
+int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
+			  unsigned long timeout, struct i2o_dma *dma)
 {
 	DECLARE_WAIT_QUEUE_HEAD(wq);
 	struct i2o_exec_wait *wait;
 	static u32 tcntxt = 0x80000000;
-	struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m);
 	int rc = 0;
 
 	wait = i2o_exec_wait_alloc();
@@ -138,15 +137,15 @@ int i2o_msg_post_wait_mem(struct i2o_controller *c, u32 m, unsigned long
 	 * We will only use transaction contexts >= 0x80000000 for POST WAIT,
 	 * so we could find a POST WAIT reply easier in the reply handler.
 	 */
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
 	wait->tcntxt = tcntxt++;
-	writel(wait->tcntxt, &msg->u.s.tcntxt);
+	msg->u.s.tcntxt = cpu_to_le32(wait->tcntxt);
 
 	/*
 	 * Post the message to the controller. At some point later it will
 	 * return. If we time out before it returns then complete will be zero.
 	 */
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	if (!wait->complete) {
 		wait->wq = &wq;
@@ -266,7 +265,8 @@ static int i2o_msg_post_wait_complete(struct i2o_controller *c, u32 m,
  *
  *	Returns number of bytes printed into buffer.
  */
-static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t i2o_exec_show_vendor_id(struct device *d,
+				       struct device_attribute *attr, char *buf)
 {
 	struct i2o_device *dev = to_i2o_device(d);
 	u16 id;
@@ -286,7 +286,9 @@ static ssize_t i2o_exec_show_vendor_id(struct device *d, struct device_attribute
  *
  *	Returns number of bytes printed into buffer.
  */
-static ssize_t i2o_exec_show_product_id(struct device *d, struct device_attribute *attr, char *buf)
+static ssize_t i2o_exec_show_product_id(struct device *d,
+					struct device_attribute *attr,
+					char *buf)
 {
 	struct i2o_device *dev = to_i2o_device(d);
 	u16 id;
@@ -385,23 +387,22 @@ static int i2o_exec_reply(struct i2o_controller *c, u32 m,
 	u32 context;
 
 	if (le32_to_cpu(msg->u.head[0]) & MSG_FAIL) {
+		struct i2o_message __iomem *pmsg;
+		u32 pm;
+
 		/*
 		 * If Fail bit is set we must take the transaction context of
 		 * the preserved message to find the right request again.
 		 */
-		struct i2o_message __iomem *pmsg;
-		u32 pm;
 
 		pm = le32_to_cpu(msg->body[3]);
-
 		pmsg = i2o_msg_in_to_virt(c, pm);
+		context = readl(&pmsg->u.s.tcntxt);
 
 		i2o_report_status(KERN_INFO, "i2o_core", msg);
 
-		context = readl(&pmsg->u.s.tcntxt);
-
 		/* Release the preserved msg */
-		i2o_msg_nop(c, pm);
+		i2o_msg_nop_mfa(c, pm);
 	} else
 		context = le32_to_cpu(msg->u.s.tcntxt);
 
@@ -462,25 +463,26 @@ static void i2o_exec_event(struct i2o_event *evt)
  */
 int i2o_exec_lct_get(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int i = 0;
 	int rc = -EAGAIN;
 
 	for (i = 1; i <= I2O_LCT_GET_TRIES; i++) {
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-		if (m == I2O_QUEUE_EMPTY)
-			return -ETIMEDOUT;
-
-		writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-		writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
-		       &msg->u.head[1]);
-		writel(0xffffffff, &msg->body[0]);
-		writel(0x00000000, &msg->body[1]);
-		writel(0xd0000000 | c->dlct.len, &msg->body[2]);
-		writel(c->dlct.phys, &msg->body[3]);
-
-		rc = i2o_msg_post_wait(c, m, I2O_TIMEOUT_LCT_GET);
+		msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
+
+		msg->u.head[0] =
+		    cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 |
+				ADAPTER_TID);
+		msg->body[0] = cpu_to_le32(0xffffffff);
+		msg->body[1] = cpu_to_le32(0x00000000);
+		msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len);
+		msg->body[3] = cpu_to_le32(c->dlct.phys);
+
+		rc = i2o_msg_post_wait(c, msg, I2O_TIMEOUT_LCT_GET);
 		if (rc < 0)
 			break;
 
@@ -506,29 +508,28 @@ static int i2o_exec_lct_notify(struct i2o_controller *c, u32 change_ind)
 {
 	i2o_status_block *sb = c->status_block.virt;
 	struct device *dev;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
 	dev = &c->pdev->dev;
 
 	if (i2o_dma_realloc(dev, &c->dlct, sb->expected_lct_size, GFP_KERNEL))
 		return -ENOMEM;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	/* FIXME */
-	writel(0xffffffff, &msg->body[0]);
-	writel(change_ind, &msg->body[1]);
-	writel(0xd0000000 | c->dlct.len, &msg->body[2]);
-	writel(c->dlct.phys, &msg->body[3]);
-
-	i2o_msg_post(c, m);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+	msg->u.head[1] = cpu_to_le32(I2O_CMD_LCT_NOTIFY << 24 | HOST_TID << 12 |
+				     ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0xffffffff);
+	msg->body[1] = cpu_to_le32(change_ind);
+	msg->body[2] = cpu_to_le32(0xd0000000 | c->dlct.len);
+	msg->body[3] = cpu_to_le32(c->dlct.phys);
+
+	i2o_msg_post(c, msg);
 
 	return 0;
 };
diff --git a/drivers/message/i2o/i2o_block.c b/drivers/message/i2o/i2o_block.c
index f283b5bafdd3..2bd15c70773b 100644
--- a/drivers/message/i2o/i2o_block.c
+++ b/drivers/message/i2o/i2o_block.c
@@ -130,20 +130,20 @@ static int i2o_block_remove(struct device *dev)
  */
 static int i2o_block_device_flush(struct i2o_device *dev)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(60 << 16, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_CFLUSH << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(60 << 16);
 	osm_debug("Flushing...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 60);
+	return i2o_msg_post_wait(dev->iop, msg, 60);
 };
 
 /**
@@ -181,21 +181,21 @@ static int i2o_block_issue_flush(request_queue_t * queue, struct gendisk *disk,
  */
 static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
-
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(-1, &msg->body[0]);
-	writel(0, &msg->body[1]);
+	struct i2o_message *msg;
+
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MMOUNT << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(-1);
+	msg->body[1] = cpu_to_le32(0x00000000);
 	osm_debug("Mounting...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -210,20 +210,20 @@ static int i2o_block_device_mount(struct i2o_device *dev, u32 media_id)
  */
 static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg) == I2O_QUEUE_EMPTY)
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(-1, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MLOCK << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(-1);
 	osm_debug("Locking...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -238,20 +238,20 @@ static int i2o_block_device_lock(struct i2o_device *dev, u32 media_id)
  */
 static int i2o_block_device_unlock(struct i2o_device *dev, u32 media_id)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(dev->iop, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(dev->iop, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->lct_data.tid,
-	       &msg->u.head[1]);
-	writel(media_id, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_MUNLOCK << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(media_id);
 	osm_debug("Unlocking...\n");
 
-	return i2o_msg_post_wait(dev->iop, m, 2);
+	return i2o_msg_post_wait(dev->iop, msg, 2);
 };
 
 /**
@@ -267,21 +267,21 @@ static int i2o_block_device_power(struct i2o_block_device *dev, u8 op)
 {
 	struct i2o_device *i2o_dev = dev->i2o_dev;
 	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int rc;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->lct_data.
-	       tid, &msg->u.head[1]);
-	writel(op << 24, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_BLOCK_POWER << 24 | HOST_TID << 12 | i2o_dev->
+			lct_data.tid);
+	msg->body[0] = cpu_to_le32(op << 24);
 	osm_debug("Power...\n");
 
-	rc = i2o_msg_post_wait(c, m, 60);
+	rc = i2o_msg_post_wait(c, msg, 60);
 	if (!rc)
 		dev->power = op;
 
@@ -331,7 +331,7 @@ static inline void i2o_block_request_free(struct i2o_block_request *ireq)
  */
 static inline int i2o_block_sglist_alloc(struct i2o_controller *c,
 					 struct i2o_block_request *ireq,
-					 u32 __iomem ** mptr)
+					 u32 ** mptr)
 {
 	int nents;
 	enum dma_data_direction direction;
@@ -745,10 +745,9 @@ static int i2o_block_transfer(struct request *req)
 	struct i2o_block_device *dev = req->rq_disk->private_data;
 	struct i2o_controller *c;
 	int tid = dev->i2o_dev->lct_data.tid;
-	struct i2o_message __iomem *msg;
-	u32 __iomem *mptr;
+	struct i2o_message *msg;
+	u32 *mptr;
 	struct i2o_block_request *ireq = req->special;
-	u32 m;
 	u32 tcntxt;
 	u32 sgl_offset = SGL_OFFSET_8;
 	u32 ctl_flags = 0x00000000;
@@ -763,9 +762,9 @@ static int i2o_block_transfer(struct request *req)
 
 	c = dev->i2o_dev->iop;
 
-	m = i2o_msg_get(c, &msg);
-	if (m == I2O_QUEUE_EMPTY) {
-		rc = -EBUSY;
+	msg = i2o_msg_get(c);
+	if (IS_ERR(msg)) {
+		rc = PTR_ERR(msg);
 		goto exit;
 	}
 
@@ -775,8 +774,8 @@ static int i2o_block_transfer(struct request *req)
 		goto nop_msg;
 	}
 
-	writel(i2o_block_driver.context, &msg->u.s.icntxt);
-	writel(tcntxt, &msg->u.s.tcntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_block_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(tcntxt);
 
 	mptr = &msg->body[0];
 
@@ -834,11 +833,11 @@ static int i2o_block_transfer(struct request *req)
 
 		sgl_offset = SGL_OFFSET_12;
 
-		writel(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid,
-		       &msg->u.head[1]);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_PRIVATE << 24 | HOST_TID << 12 | tid);
 
-		writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++);
-		writel(tid, mptr++);
+		*mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC);
+		*mptr++ = cpu_to_le32(tid);
 
 		/*
 		 * ENABLE_DISCONNECT
@@ -853,22 +852,24 @@ static int i2o_block_transfer(struct request *req)
 			scsi_flags = 0xa0a0000a;
 		}
 
-		writel(scsi_flags, mptr++);
+		*mptr++ = cpu_to_le32(scsi_flags);
 
 		*((u32 *) & cmd[2]) = cpu_to_be32(req->sector * hwsec);
 		*((u16 *) & cmd[7]) = cpu_to_be16(req->nr_sectors * hwsec);
 
-		memcpy_toio(mptr, cmd, 10);
+		memcpy(mptr, cmd, 10);
 		mptr += 4;
-		writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++);
+		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
 	} else
 #endif
 	{
-		writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]);
-		writel(ctl_flags, mptr++);
-		writel(req->nr_sectors << KERNEL_SECTOR_SHIFT, mptr++);
-		writel((u32) (req->sector << KERNEL_SECTOR_SHIFT), mptr++);
-		writel(req->sector >> (32 - KERNEL_SECTOR_SHIFT), mptr++);
+		msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
+		*mptr++ = cpu_to_le32(ctl_flags);
+		*mptr++ = cpu_to_le32(req->nr_sectors << KERNEL_SECTOR_SHIFT);
+		*mptr++ =
+		    cpu_to_le32((u32) (req->sector << KERNEL_SECTOR_SHIFT));
+		*mptr++ =
+		    cpu_to_le32(req->sector >> (32 - KERNEL_SECTOR_SHIFT));
 	}
 
 	if (!i2o_block_sglist_alloc(c, ireq, &mptr)) {
@@ -876,13 +877,13 @@ static int i2o_block_transfer(struct request *req)
 		goto context_remove;
 	}
 
-	writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) |
-	       sgl_offset, &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset);
 
 	list_add_tail(&ireq->queue, &dev->open_queue);
 	dev->open_queue_depth++;
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 
@@ -890,7 +891,7 @@ static int i2o_block_transfer(struct request *req)
 	i2o_cntxt_list_remove(c, req);
 
       nop_msg:
-	i2o_msg_nop(c, m);
+	i2o_msg_nop(c, msg);
 
       exit:
 	return rc;
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index 3c3a7abebb1b..4fe73d628c5b 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -230,8 +230,7 @@ static int i2o_cfg_swdl(unsigned long arg)
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
 	unsigned char maxfrag = 0, curfrag = 1;
 	struct i2o_dma buffer;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int status = 0, swlen = 0, fragsize = 8192;
 	struct i2o_controller *c;
 
@@ -257,31 +256,34 @@ static int i2o_cfg_swdl(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
 	__copy_from_user(buffer.virt, kxfer.buf, fragsize);
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-	writel(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((((u32) kxfer.flags) << 24) | (((u32) kxfer.sw_type) << 16) |
-	       (((u32) maxfrag) << 8) | (((u32) curfrag)), &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
-	writel(0xD0000000 | fragsize, &msg->body[3]);
-	writel(buffer.phys, &msg->body[4]);
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_DOWNLOAD << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((((u32) kxfer.flags) << 24) | (((u32) kxfer.
+							sw_type) << 16) |
+			(((u32) maxfrag) << 8) | (((u32) curfrag)));
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
+	msg->body[3] = cpu_to_le32(0xD0000000 | fragsize);
+	msg->body[4] = cpu_to_le32(buffer.phys);
 
 	osm_debug("swdl frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
-	status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
+	status = i2o_msg_post_wait_mem(c, msg, 60, &buffer);
 
 	if (status != -ETIMEDOUT)
 		i2o_dma_free(&c->pdev->dev, &buffer);
@@ -302,8 +304,7 @@ static int i2o_cfg_swul(unsigned long arg)
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
 	unsigned char maxfrag = 0, curfrag = 1;
 	struct i2o_dma buffer;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int status = 0, swlen = 0, fragsize = 8192;
 	struct i2o_controller *c;
 	int ret = 0;
@@ -330,30 +331,30 @@ static int i2o_cfg_swul(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	if (i2o_dma_alloc(&c->pdev->dev, &buffer, fragsize, GFP_KERNEL)) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_7, &msg->u.head[0]);
-	writel(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((u32) kxfer.flags << 24 | (u32) kxfer.
-	       sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag,
-	       &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
-	writel(0xD0000000 | fragsize, &msg->body[3]);
-	writel(buffer.phys, &msg->body[4]);
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_7);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_UPLOAD << 24 | HOST_TID << 12 | ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.
+			sw_type << 16 | (u32) maxfrag << 8 | (u32) curfrag);
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
+	msg->body[3] = cpu_to_le32(0xD0000000 | fragsize);
+	msg->body[4] = cpu_to_le32(buffer.phys);
 
 	osm_debug("swul frag %d/%d (size %d)\n", curfrag, maxfrag, fragsize);
-	status = i2o_msg_post_wait_mem(c, m, 60, &buffer);
+	status = i2o_msg_post_wait_mem(c, msg, 60, &buffer);
 
 	if (status != I2O_POST_WAIT_OK) {
 		if (status != -ETIMEDOUT)
@@ -380,8 +381,7 @@ static int i2o_cfg_swdel(unsigned long arg)
 	struct i2o_controller *c;
 	struct i2o_sw_xfer kxfer;
 	struct i2o_sw_xfer __user *pxfer = (struct i2o_sw_xfer __user *)arg;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int swlen;
 	int token;
 
@@ -395,21 +395,21 @@ static int i2o_cfg_swdel(unsigned long arg)
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	writel((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16,
-	       &msg->body[0]);
-	writel(swlen, &msg->body[1]);
-	writel(kxfer.sw_id, &msg->body[2]);
+	msg->u.head[0] = cpu_to_le32(SEVEN_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SW_REMOVE << 24 | HOST_TID << 12 | ADAPTER_TID);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
+	msg->body[0] =
+	    cpu_to_le32((u32) kxfer.flags << 24 | (u32) kxfer.sw_type << 16);
+	msg->body[1] = cpu_to_le32(swlen);
+	msg->body[2] = cpu_to_le32(kxfer.sw_id);
 
-	token = i2o_msg_post_wait(c, m, 10);
+	token = i2o_msg_post_wait(c, msg, 10);
 
 	if (token != I2O_POST_WAIT_OK) {
 		osm_info("swdel failed, DetailedStatus = %d\n", token);
@@ -423,25 +423,24 @@ static int i2o_cfg_validate(unsigned long arg)
 {
 	int token;
 	int iop = (int)arg;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	struct i2o_controller *c;
 
 	c = i2o_find_iop(iop);
 	if (!c)
 		return -ENXIO;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_CONFIG_VALIDATE << 24 | HOST_TID << 12 | iop);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(0);
 
-	token = i2o_msg_post_wait(c, m, 10);
+	token = i2o_msg_post_wait(c, msg, 10);
 
 	if (token != I2O_POST_WAIT_OK) {
 		osm_info("Can't validate configuration, ErrorStatus = %d\n",
@@ -454,8 +453,7 @@ static int i2o_cfg_validate(unsigned long arg)
 
 static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	struct i2o_evt_id __user *pdesc = (struct i2o_evt_id __user *)arg;
 	struct i2o_evt_id kdesc;
 	struct i2o_controller *c;
@@ -474,18 +472,19 @@ static int i2o_cfg_evt_reg(unsigned long arg, struct file *fp)
 	if (!d)
 		return -ENODEV;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -EBUSY;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | kdesc.tid,
-	       &msg->u.head[1]);
-	writel(i2o_config_driver.context, &msg->u.head[2]);
-	writel(i2o_cntxt_list_add(c, fp->private_data), &msg->u.head[3]);
-	writel(kdesc.evt_mask, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 |
+			kdesc.tid);
+	msg->u.head[2] = cpu_to_le32(i2o_config_driver.context);
+	msg->u.head[3] = cpu_to_le32(i2o_cntxt_list_add(c, fp->private_data));
+	msg->body[0] = cpu_to_le32(kdesc.evt_mask);
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 }
@@ -537,7 +536,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 	u32 sg_index = 0;
 	i2o_status_block *sb;
 	struct i2o_message *msg;
-	u32 m;
 	unsigned int iop;
 
 	cmd = (struct i2o_cmd_passthru32 __user *)arg;
@@ -553,7 +551,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		return -ENXIO;
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
 
 	sb = c->status_block.virt;
 
@@ -595,8 +593,8 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	writel(i2o_config_driver.context, &msg->u.s.icntxt);
-	writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply));
 
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
@@ -662,7 +660,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
-	rcode = i2o_msg_post_wait(c, m, 60);
+	rcode = i2o_msg_post_wait(c, msg, 60);
 	if (rcode)
 		goto sg_list_cleanup;
 
@@ -780,8 +778,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 	u32 i = 0;
 	void *p = NULL;
 	i2o_status_block *sb;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned int iop;
 
 	if (get_user(iop, &cmd->iop) || get_user(user_msg, &cmd->msg))
@@ -793,7 +790,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 		return -ENXIO;
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
 
 	sb = c->status_block.virt;
 
@@ -830,8 +827,8 @@ static int i2o_cfg_passthru(unsigned long arg)
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	writel(i2o_config_driver.context, &msg->u.s.icntxt);
-	writel(i2o_cntxt_list_add(c, reply), &msg->u.s.tcntxt);
+	msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply));
 
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
@@ -894,7 +891,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
-	rcode = i2o_msg_post_wait(c, m, 60);
+	rcode = i2o_msg_post_wait(c, msg, 60);
 	if (rcode)
 		goto sg_list_cleanup;
 
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index 9f1744c3933b..7a784fd60804 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -510,8 +510,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	struct i2o_controller *c;
 	struct i2o_device *i2o_dev;
 	int tid;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	/*
 	 * ENABLE_DISCONNECT
 	 * SIMPLE_TAG
@@ -519,7 +518,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	 */
 	u32 scsi_flags = 0x20a00000;
 	u32 sgl_offset;
-	u32 __iomem *mptr;
+	u32 *mptr;
 	u32 cmd = I2O_CMD_SCSI_EXEC << 24;
 	int rc = 0;
 
@@ -576,8 +575,8 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	 *      throw it back to the scsi layer
 	 */
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY) {
+	msg = i2o_msg_get(c);
+	if (IS_ERR(msg)) {
 		rc = SCSI_MLQUEUE_HOST_BUSY;
 		goto exit;
 	}
@@ -617,16 +616,16 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 		if (sgl_offset == SGL_OFFSET_10)
 			sgl_offset = SGL_OFFSET_12;
 		cmd = I2O_CMD_PRIVATE << 24;
-		writel(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC, mptr++);
-		writel(adpt_flags | tid, mptr++);
+		*mptr++ = cpu_to_le32(I2O_VENDOR_DPT << 16 | I2O_CMD_SCSI_EXEC);
+		*mptr++ = cpu_to_le32(adpt_flags | tid);
 	}
 #endif
 
-	writel(cmd | HOST_TID << 12 | tid, &msg->u.head[1]);
-	writel(i2o_scsi_driver.context, &msg->u.s.icntxt);
+	msg->u.head[1] = cpu_to_le32(cmd | HOST_TID << 12 | tid);
+	msg->u.s.icntxt = cpu_to_le32(i2o_scsi_driver.context);
 
 	/* We want the SCSI control block back */
-	writel(i2o_cntxt_list_add(c, SCpnt), &msg->u.s.tcntxt);
+	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, SCpnt));
 
 	/* LSI_920_PCI_QUIRK
 	 *
@@ -649,15 +648,15 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	   }
 	 */
 
-	writel(scsi_flags | SCpnt->cmd_len, mptr++);
+	*mptr++ = cpu_to_le32(scsi_flags | SCpnt->cmd_len);
 
 	/* Write SCSI command into the message - always 16 byte block */
-	memcpy_toio(mptr, SCpnt->cmnd, 16);
+	memcpy(mptr, SCpnt->cmnd, 16);
 	mptr += 4;
 
 	if (sgl_offset != SGL_OFFSET_0) {
 		/* write size of data addressed by SGL */
-		writel(SCpnt->request_bufflen, mptr++);
+		*mptr++ = cpu_to_le32(SCpnt->request_bufflen);
 
 		/* Now fill in the SGList and command */
 		if (SCpnt->use_sg) {
@@ -676,11 +675,11 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 	}
 
 	/* Stick the headers on */
-	writel(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset,
-	       &msg->u.head[0]);
+	msg->u.head[0] =
+	    cpu_to_le32(I2O_MESSAGE_SIZE(mptr - &msg->u.head[0]) | sgl_offset);
 
 	/* Queue the message */
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	osm_debug("Issued %ld\n", SCpnt->serial_number);
 
@@ -688,7 +687,7 @@ static int i2o_scsi_queuecommand(struct scsi_cmnd *SCpnt,
 
       nomem:
 	rc = -ENOMEM;
-	i2o_msg_nop(c, m);
+	i2o_msg_nop(c, msg);
 
       exit:
 	return rc;
@@ -709,8 +708,7 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt)
 {
 	struct i2o_device *i2o_dev;
 	struct i2o_controller *c;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int tid;
 	int status = FAILED;
 
@@ -720,16 +718,16 @@ static int i2o_scsi_abort(struct scsi_cmnd *SCpnt)
 	c = i2o_dev->iop;
 	tid = i2o_dev->lct_data.tid;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
 		return SCSI_MLQUEUE_HOST_BUSY;
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid,
-	       &msg->u.head[1]);
-	writel(i2o_cntxt_list_get_ptr(c, SCpnt), &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SCSI_ABORT << 24 | HOST_TID << 12 | tid);
+	msg->body[0] = cpu_to_le32(i2o_cntxt_list_get_ptr(c, SCpnt));
 
-	if (i2o_msg_post_wait(c, m, I2O_TIMEOUT_SCSI_SCB_ABORT))
+	if (i2o_msg_post_wait(c, msg, I2O_TIMEOUT_SCSI_SCB_ABORT))
 		status = SUCCESS;
 
 	return status;
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index 4eb53258842e..f86abb42bf89 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -46,27 +46,6 @@ static struct i2o_dma i2o_systab;
 
 static int i2o_hrt_get(struct i2o_controller *c);
 
-/**
- *	i2o_msg_nop - Returns a message which is not used
- *	@c: I2O controller from which the message was created
- *	@m: message which should be returned
- *
- *	If you fetch a message via i2o_msg_get, and can't use it, you must
- *	return the message with this function. Otherwise the message frame
- *	is lost.
- */
-void i2o_msg_nop(struct i2o_controller *c, u32 m)
-{
-	struct i2o_message __iomem *msg = i2o_msg_in_to_virt(c, m);
-
-	writel(THREE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(0, &msg->u.head[2]);
-	writel(0, &msg->u.head[3]);
-	i2o_msg_post(c, m);
-};
-
 /**
  *	i2o_msg_get_wait - obtain an I2O message from the IOP
  *	@c: I2O controller
@@ -81,22 +60,21 @@ void i2o_msg_nop(struct i2o_controller *c, u32 m)
  *	address from the read port (see the i2o spec). If no message is
  *	available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
  */
-u32 i2o_msg_get_wait(struct i2o_controller *c,
-		     struct i2o_message __iomem ** msg, int wait)
+struct i2o_message *i2o_msg_get_wait(struct i2o_controller *c, int wait)
 {
 	unsigned long timeout = jiffies + wait * HZ;
-	u32 m;
+	struct i2o_message *msg;
 
-	while ((m = i2o_msg_get(c, msg)) == I2O_QUEUE_EMPTY) {
+	while (IS_ERR(msg = i2o_msg_get(c))) {
 		if (time_after(jiffies, timeout)) {
 			osm_debug("%s: Timeout waiting for message frame.\n",
 				  c->name);
-			return I2O_QUEUE_EMPTY;
+			return ERR_PTR(-ETIMEDOUT);
 		}
 		schedule_timeout_uninterruptible(1);
 	}
 
-	return m;
+	return msg;
 };
 
 #if BITS_PER_LONG == 64
@@ -301,8 +279,7 @@ struct i2o_device *i2o_iop_find_device(struct i2o_controller *c, u16 tid)
  */
 static int i2o_iop_quiesce(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc;
 
@@ -313,16 +290,17 @@ static int i2o_iop_quiesce(struct i2o_controller *c)
 	    (sb->iop_state != ADAPTER_STATE_OPERATIONAL))
 		return 0;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_QUIESCE << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/* Long timeout needed for quiesce if lots of devices */
-	if ((rc = i2o_msg_post_wait(c, m, 240)))
+	if ((rc = i2o_msg_post_wait(c, msg, 240)))
 		osm_info("%s: Unable to quiesce (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Quiesced.\n", c->name);
@@ -342,8 +320,7 @@ static int i2o_iop_quiesce(struct i2o_controller *c)
  */
 static int i2o_iop_enable(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc;
 
@@ -353,16 +330,17 @@ static int i2o_iop_enable(struct i2o_controller *c)
 	if (sb->iop_state != ADAPTER_STATE_READY)
 		return -EINVAL;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_ENABLE << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/* How long of a timeout do we need? */
-	if ((rc = i2o_msg_post_wait(c, m, 240)))
+	if ((rc = i2o_msg_post_wait(c, msg, 240)))
 		osm_err("%s: Could not enable (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Enabled.\n", c->name);
@@ -413,22 +391,22 @@ static inline void i2o_iop_enable_all(void)
  */
 static int i2o_iop_clear(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	int rc;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	/* Quiesce all IOPs first */
 	i2o_iop_quiesce_all();
 
-	writel(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(FOUR_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_ADAPTER_CLEAR << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
-	if ((rc = i2o_msg_post_wait(c, m, 30)))
+	if ((rc = i2o_msg_post_wait(c, msg, 30)))
 		osm_info("%s: Unable to clear (status=%#x).\n", c->name, -rc);
 	else
 		osm_debug("%s: Cleared.\n", c->name);
@@ -446,13 +424,13 @@ static int i2o_iop_clear(struct i2o_controller *c)
  *	Clear and (re)initialize IOP's outbound queue and post the message
  *	frames to the IOP.
  *
- *	Returns 0 on success or a negative errno code on failure.
+ *	Returns 0 on success or negative error code on failure.
  */
 static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 {
-	volatile u8 *status = c->status.virt;
 	u32 m;
-	struct i2o_message __iomem *msg;
+	volatile u8 *status = c->status.virt;
+	struct i2o_message *msg;
 	ulong timeout;
 	int i;
 
@@ -460,23 +438,24 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 
 	memset(c->status.virt, 0, 4);
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
-
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0x00000000, &msg->u.s.tcntxt);
-	writel(PAGE_SIZE, &msg->body[0]);
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
+
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_6);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_OUTBOUND_INIT << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(PAGE_SIZE);
 	/* Outbound msg frame size in words and Initcode */
-	writel(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80, &msg->body[1]);
-	writel(0xd0000004, &msg->body[2]);
-	writel(i2o_dma_low(c->status.phys), &msg->body[3]);
-	writel(i2o_dma_high(c->status.phys), &msg->body[4]);
+	msg->body[1] = cpu_to_le32(I2O_OUTBOUND_MSG_FRAME_SIZE << 16 | 0x80);
+	msg->body[2] = cpu_to_le32(0xd0000004);
+	msg->body[3] = cpu_to_le32(i2o_dma_low(c->status.phys));
+	msg->body[4] = cpu_to_le32(i2o_dma_high(c->status.phys));
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	timeout = jiffies + I2O_TIMEOUT_INIT_OUTBOUND_QUEUE * HZ;
 	while (*status <= I2O_CMD_IN_PROGRESS) {
@@ -511,34 +490,34 @@ static int i2o_iop_init_outbound_queue(struct i2o_controller *c)
 static int i2o_iop_reset(struct i2o_controller *c)
 {
 	volatile u8 *status = c->status.virt;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	unsigned long timeout;
 	i2o_status_block *sb = c->status_block.virt;
 	int rc = 0;
 
 	osm_debug("%s: Resetting controller\n", c->name);
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	memset(c->status_block.virt, 0, 8);
 
 	/* Quiesce all IOPs first */
 	i2o_iop_quiesce_all();
 
-	writel(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	//FIXME: use reasonable transaction context
-	writel(0, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(i2o_dma_low(c->status.phys), &msg->body[2]);
-	writel(i2o_dma_high(c->status.phys), &msg->body[3]);
+	msg->u.head[0] = cpu_to_le32(EIGHT_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_ADAPTER_RESET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0x00000000);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(i2o_dma_low(c->status.phys));
+	msg->body[3] = cpu_to_le32(i2o_dma_high(c->status.phys));
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	/* Wait for a reply */
 	timeout = jiffies + I2O_TIMEOUT_RESET * HZ;
@@ -567,18 +546,15 @@ static int i2o_iop_reset(struct i2o_controller *c)
 		osm_debug("%s: Reset in progress, waiting for reboot...\n",
 			  c->name);
 
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
-		while (m == I2O_QUEUE_EMPTY) {
+		while (IS_ERR(msg = i2o_msg_get_wait(c, I2O_TIMEOUT_RESET))) {
 			if (time_after(jiffies, timeout)) {
 				osm_err("%s: IOP reset timeout.\n", c->name);
-				rc = -ETIMEDOUT;
+				rc = PTR_ERR(msg);
 				goto exit;
 			}
 			schedule_timeout_uninterruptible(1);
-
-			m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_RESET);
 		}
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 
 		/* from here all quiesce commands are safe */
 		c->no_quiesce = 0;
@@ -686,8 +662,7 @@ static int i2o_iop_activate(struct i2o_controller *c)
  */
 static int i2o_iop_systab_set(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	i2o_status_block *sb = c->status_block.virt;
 	struct device *dev = &c->pdev->dev;
 	struct resource *root;
@@ -735,20 +710,21 @@ static int i2o_iop_systab_set(struct i2o_controller *c)
 		}
 	}
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
 	i2o_systab.phys = dma_map_single(dev, i2o_systab.virt, i2o_systab.len,
 					 PCI_DMA_TODEVICE);
 	if (!i2o_systab.phys) {
-		i2o_msg_nop(c, m);
+		i2o_msg_nop(c, msg);
 		return -ENOMEM;
 	}
 
-	writel(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6, &msg->u.head[0]);
-	writel(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
+	msg->u.head[0] = cpu_to_le32(I2O_MESSAGE_SIZE(12) | SGL_OFFSET_6);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_SYS_TAB_SET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
 
 	/*
 	 * Provide three SGL-elements:
@@ -760,16 +736,16 @@ static int i2o_iop_systab_set(struct i2o_controller *c)
 	 * same table to everyone. We have to go remap it for them all
 	 */
 
-	writel(c->unit + 2, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(0x54000000 | i2o_systab.len, &msg->body[2]);
-	writel(i2o_systab.phys, &msg->body[3]);
-	writel(0x54000000 | sb->current_mem_size, &msg->body[4]);
-	writel(sb->current_mem_base, &msg->body[5]);
-	writel(0xd4000000 | sb->current_io_size, &msg->body[6]);
-	writel(sb->current_io_base, &msg->body[6]);
+	msg->body[0] = cpu_to_le32(c->unit + 2);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(0x54000000 | i2o_systab.len);
+	msg->body[3] = cpu_to_le32(i2o_systab.phys);
+	msg->body[4] = cpu_to_le32(0x54000000 | sb->current_mem_size);
+	msg->body[5] = cpu_to_le32(sb->current_mem_base);
+	msg->body[6] = cpu_to_le32(0xd4000000 | sb->current_io_size);
+	msg->body[6] = cpu_to_le32(sb->current_io_base);
 
-	rc = i2o_msg_post_wait(c, m, 120);
+	rc = i2o_msg_post_wait(c, msg, 120);
 
 	dma_unmap_single(dev, i2o_systab.phys, i2o_systab.len,
 			 PCI_DMA_TODEVICE);
@@ -952,30 +928,30 @@ static int i2o_parse_hrt(struct i2o_controller *c)
  */
 int i2o_status_get(struct i2o_controller *c)
 {
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 	volatile u8 *status_block;
 	unsigned long timeout;
 
 	status_block = (u8 *) c->status_block.virt;
 	memset(c->status_block.virt, 0, sizeof(i2o_status_block));
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(NINE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
-	       &msg->u.head[1]);
-	writel(i2o_exec_driver.context, &msg->u.s.icntxt);
-	writel(0, &msg->u.s.tcntxt);	// FIXME: use resonable transaction context
-	writel(0, &msg->body[0]);
-	writel(0, &msg->body[1]);
-	writel(i2o_dma_low(c->status_block.phys), &msg->body[2]);
-	writel(i2o_dma_high(c->status_block.phys), &msg->body[3]);
-	writel(sizeof(i2o_status_block), &msg->body[4]);	/* always 88 bytes */
+	msg->u.head[0] = cpu_to_le32(NINE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_STATUS_GET << 24 | HOST_TID << 12 |
+			ADAPTER_TID);
+	msg->u.s.icntxt = cpu_to_le32(i2o_exec_driver.context);
+	msg->u.s.tcntxt = cpu_to_le32(0x00000000);
+	msg->body[0] = cpu_to_le32(0x00000000);
+	msg->body[1] = cpu_to_le32(0x00000000);
+	msg->body[2] = cpu_to_le32(i2o_dma_low(c->status_block.phys));
+	msg->body[3] = cpu_to_le32(i2o_dma_high(c->status_block.phys));
+	msg->body[4] = cpu_to_le32(sizeof(i2o_status_block));	/* always 88 bytes */
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	/* Wait for a reply */
 	timeout = jiffies + I2O_TIMEOUT_STATUS_GET * HZ;
@@ -1013,20 +989,20 @@ static int i2o_hrt_get(struct i2o_controller *c)
 	struct device *dev = &c->pdev->dev;
 
 	for (i = 0; i < I2O_HRT_GET_TRIES; i++) {
-		struct i2o_message __iomem *msg;
-		u32 m;
+		struct i2o_message *msg;
 
-		m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-		if (m == I2O_QUEUE_EMPTY)
-			return -ETIMEDOUT;
+		msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+		if (IS_ERR(msg))
+			return PTR_ERR(msg);
 
-		writel(SIX_WORD_MSG_SIZE | SGL_OFFSET_4, &msg->u.head[0]);
-		writel(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 | ADAPTER_TID,
-		       &msg->u.head[1]);
-		writel(0xd0000000 | c->hrt.len, &msg->body[0]);
-		writel(c->hrt.phys, &msg->body[1]);
+		msg->u.head[0] = cpu_to_le32(SIX_WORD_MSG_SIZE | SGL_OFFSET_4);
+		msg->u.head[1] =
+		    cpu_to_le32(I2O_CMD_HRT_GET << 24 | HOST_TID << 12 |
+				ADAPTER_TID);
+		msg->body[0] = cpu_to_le32(0xd0000000 | c->hrt.len);
+		msg->body[1] = cpu_to_le32(c->hrt.phys);
 
-		rc = i2o_msg_post_wait_mem(c, m, 20, &c->hrt);
+		rc = i2o_msg_post_wait_mem(c, msg, 20, &c->hrt);
 
 		if (rc < 0) {
 			osm_err("%s: Unable to get HRT (status=%#x)\n", c->name,
@@ -1056,6 +1032,7 @@ static int i2o_hrt_get(struct i2o_controller *c)
  */
 void i2o_iop_free(struct i2o_controller *c)
 {
+	i2o_pool_free(&c->in_msg);
 	kfree(c);
 };
 
@@ -1080,7 +1057,7 @@ static struct class *i2o_controller_class;
  *	i2o_iop_alloc - Allocate and initialize a i2o_controller struct
  *
  *	Allocate the necessary memory for a i2o_controller struct and
- *	initialize the lists.
+ *	initialize the lists and message mempool.
  *
  *	Returns a pointer to the I2O controller or a negative error code on
  *	failure.
@@ -1089,6 +1066,7 @@ struct i2o_controller *i2o_iop_alloc(void)
 {
 	static int unit = 0;	/* 0 and 1 are NULL IOP and Local Host */
 	struct i2o_controller *c;
+	char poolname[32];
 
 	c = kmalloc(sizeof(*c), GFP_KERNEL);
 	if (!c) {
@@ -1098,11 +1076,20 @@ struct i2o_controller *i2o_iop_alloc(void)
 	}
 	memset(c, 0, sizeof(*c));
 
+	c->unit = unit++;
+	sprintf(c->name, "iop%d", c->unit);
+
+	snprintf(poolname, sizeof(poolname), "i2o_%s_msg_inpool", c->name);
+	if (i2o_pool_alloc
+	    (&c->in_msg, poolname, I2O_INBOUND_MSG_FRAME_SIZE * 4,
+	     I2O_MSG_INPOOL_MIN)) {
+		kfree(c);
+		return ERR_PTR(-ENOMEM);
+	};
+
 	INIT_LIST_HEAD(&c->devices);
 	spin_lock_init(&c->lock);
 	init_MUTEX(&c->lct_lock);
-	c->unit = unit++;
-	sprintf(c->name, "iop%d", c->unit);
 
 	device_initialize(&c->device);
 
@@ -1199,28 +1186,27 @@ int i2o_iop_add(struct i2o_controller *c)
  *	is waited for, or expected. If you do not want further notifications,
  *	call the i2o_event_register again with a evt_mask of 0.
  *
- *	Returns 0 on success or -ETIMEDOUT if no message could be fetched for
- *	sending the request.
+ *	Returns 0 on success or negative error code on failure.
  */
 int i2o_event_register(struct i2o_device *dev, struct i2o_driver *drv,
 		       int tcntxt, u32 evt_mask)
 {
 	struct i2o_controller *c = dev->iop;
-	struct i2o_message __iomem *msg;
-	u32 m;
+	struct i2o_message *msg;
 
-	m = i2o_msg_get_wait(c, &msg, I2O_TIMEOUT_MESSAGE_GET);
-	if (m == I2O_QUEUE_EMPTY)
-		return -ETIMEDOUT;
+	msg = i2o_msg_get_wait(c, I2O_TIMEOUT_MESSAGE_GET);
+	if (IS_ERR(msg))
+		return PTR_ERR(msg);
 
-	writel(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0, &msg->u.head[0]);
-	writel(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->lct_data.
-	       tid, &msg->u.head[1]);
-	writel(drv->context, &msg->u.s.icntxt);
-	writel(tcntxt, &msg->u.s.tcntxt);
-	writel(evt_mask, &msg->body[0]);
+	msg->u.head[0] = cpu_to_le32(FIVE_WORD_MSG_SIZE | SGL_OFFSET_0);
+	msg->u.head[1] =
+	    cpu_to_le32(I2O_CMD_UTIL_EVT_REGISTER << 24 | HOST_TID << 12 | dev->
+			lct_data.tid);
+	msg->u.s.icntxt = cpu_to_le32(drv->context);
+	msg->u.s.tcntxt = cpu_to_le32(tcntxt);
+	msg->body[0] = cpu_to_le32(evt_mask);
 
-	i2o_msg_post(c, m);
+	i2o_msg_post(c, msg);
 
 	return 0;
 };
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index ee7075fa1ec3..329d482eee81 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -483,4 +483,5 @@ void __exit i2o_pci_exit(void)
 {
 	pci_unregister_driver(&i2o_pci_driver);
 };
+
 MODULE_DEVICE_TABLE(pci, i2o_pci_ids);
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index d79c8a4bc4f8..9e359a981221 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -30,6 +30,7 @@
 #include <linux/string.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>	/* work_struct */
+#include <linux/mempool.h>
 
 #include <asm/io.h>
 #include <asm/semaphore.h>	/* Needed for MUTEX init macros */
@@ -38,1091 +39,1219 @@
 #define I2O_QUEUE_EMPTY		0xffffffff
 
 /*
- *	Message structures
+ *	Cache strategies
  */
-struct i2o_message {
-	union {
-		struct {
-			u8 version_offset;
-			u8 flags;
-			u16 size;
-			u32 target_tid:12;
-			u32 init_tid:12;
-			u32 function:8;
-			u32 icntxt;	/* initiator context */
-			u32 tcntxt;	/* transaction context */
-		} s;
-		u32 head[4];
-	} u;
-	/* List follows */
-	u32 body[0];
-};
 
-/*
- *	Each I2O device entity has one of these. There is one per device.
+/*	The NULL strategy leaves everything up to the controller. This tends to be a
+ *	pessimal but functional choice.
  */
-struct i2o_device {
-	i2o_lct_entry lct_data;	/* Device LCT information */
-
-	struct i2o_controller *iop;	/* Controlling IOP */
-	struct list_head list;	/* node in IOP devices list */
-
-	struct device device;
-
-	struct semaphore lock;	/* device lock */
-};
+#define CACHE_NULL		0
+/*	Prefetch data when reading. We continually attempt to load the next 32 sectors
+ *	into the controller cache.
+ */
+#define CACHE_PREFETCH		1
+/*	Prefetch data when reading. We sometimes attempt to load the next 32 sectors
+ *	into the controller cache. When an I/O is less <= 8K we assume its probably
+ *	not sequential and don't prefetch (default)
+ */
+#define CACHE_SMARTFETCH	2
+/*	Data is written to the cache and then out on to the disk. The I/O must be
+ *	physically on the medium before the write is acknowledged (default without
+ *	NVRAM)
+ */
+#define CACHE_WRITETHROUGH	17
+/*	Data is written to the cache and then out on to the disk. The controller
+ *	is permitted to write back the cache any way it wants. (default if battery
+ *	backed NVRAM is present). It can be useful to set this for swap regardless of
+ *	battery state.
+ */
+#define CACHE_WRITEBACK		18
+/*	Optimise for under powered controllers, especially on RAID1 and RAID0. We
+ *	write large I/O's directly to disk bypassing the cache to avoid the extra
+ *	memory copy hits. Small writes are writeback cached
+ */
+#define CACHE_SMARTBACK		19
+/*	Optimise for under powered controllers, especially on RAID1 and RAID0. We
+ *	write large I/O's directly to disk bypassing the cache to avoid the extra
+ *	memory copy hits. Small writes are writethrough cached. Suitable for devices
+ *	lacking battery backup
+ */
+#define CACHE_SMARTTHROUGH	20
 
 /*
- *	Event structure provided to the event handling function
+ *	Ioctl structures
  */
-struct i2o_event {
-	struct work_struct work;
-	struct i2o_device *i2o_dev;	/* I2O device pointer from which the
-					   event reply was initiated */
-	u16 size;		/* Size of data in 32-bit words */
-	u32 tcntxt;		/* Transaction context used at
-				   registration */
-	u32 event_indicator;	/* Event indicator from reply */
-	u32 data[0];		/* Event data from reply */
-};
+
+#define 	BLKI2OGRSTRAT	_IOR('2', 1, int)
+#define 	BLKI2OGWSTRAT	_IOR('2', 2, int)
+#define 	BLKI2OSRSTRAT	_IOW('2', 3, int)
+#define 	BLKI2OSWSTRAT	_IOW('2', 4, int)
 
 /*
- *	I2O classes which could be handled by the OSM
+ *	I2O Function codes
  */
-struct i2o_class_id {
-	u16 class_id:12;
-};
 
 /*
- *	I2O driver structure for OSMs
+ *	Executive Class
  */
-struct i2o_driver {
-	char *name;		/* OSM name */
-	int context;		/* Low 8 bits of the transaction info */
-	struct i2o_class_id *classes;	/* I2O classes that this OSM handles */
-
-	/* Message reply handler */
-	int (*reply) (struct i2o_controller *, u32, struct i2o_message *);
-
-	/* Event handler */
-	void (*event) (struct i2o_event *);
-
-	struct workqueue_struct *event_queue;	/* Event queue */
-
-	struct device_driver driver;
-
-	/* notification of changes */
-	void (*notify_controller_add) (struct i2o_controller *);
-	void (*notify_controller_remove) (struct i2o_controller *);
-	void (*notify_device_add) (struct i2o_device *);
-	void (*notify_device_remove) (struct i2o_device *);
-
-	struct semaphore lock;
-};
+#define	I2O_CMD_ADAPTER_ASSIGN		0xB3
+#define	I2O_CMD_ADAPTER_READ		0xB2
+#define	I2O_CMD_ADAPTER_RELEASE		0xB5
+#define	I2O_CMD_BIOS_INFO_SET		0xA5
+#define	I2O_CMD_BOOT_DEVICE_SET		0xA7
+#define	I2O_CMD_CONFIG_VALIDATE		0xBB
+#define	I2O_CMD_CONN_SETUP		0xCA
+#define	I2O_CMD_DDM_DESTROY		0xB1
+#define	I2O_CMD_DDM_ENABLE		0xD5
+#define	I2O_CMD_DDM_QUIESCE		0xC7
+#define	I2O_CMD_DDM_RESET		0xD9
+#define	I2O_CMD_DDM_SUSPEND		0xAF
+#define	I2O_CMD_DEVICE_ASSIGN		0xB7
+#define	I2O_CMD_DEVICE_RELEASE		0xB9
+#define	I2O_CMD_HRT_GET			0xA8
+#define	I2O_CMD_ADAPTER_CLEAR		0xBE
+#define	I2O_CMD_ADAPTER_CONNECT		0xC9
+#define	I2O_CMD_ADAPTER_RESET		0xBD
+#define	I2O_CMD_LCT_NOTIFY		0xA2
+#define	I2O_CMD_OUTBOUND_INIT		0xA1
+#define	I2O_CMD_PATH_ENABLE		0xD3
+#define	I2O_CMD_PATH_QUIESCE		0xC5
+#define	I2O_CMD_PATH_RESET		0xD7
+#define	I2O_CMD_STATIC_MF_CREATE	0xDD
+#define	I2O_CMD_STATIC_MF_RELEASE	0xDF
+#define	I2O_CMD_STATUS_GET		0xA0
+#define	I2O_CMD_SW_DOWNLOAD		0xA9
+#define	I2O_CMD_SW_UPLOAD		0xAB
+#define	I2O_CMD_SW_REMOVE		0xAD
+#define	I2O_CMD_SYS_ENABLE		0xD1
+#define	I2O_CMD_SYS_MODIFY		0xC1
+#define	I2O_CMD_SYS_QUIESCE		0xC3
+#define	I2O_CMD_SYS_TAB_SET		0xA3
 
 /*
- *	Contains DMA mapped address information
+ * Utility Class
  */
-struct i2o_dma {
-	void *virt;
-	dma_addr_t phys;
-	size_t len;
-};
+#define I2O_CMD_UTIL_NOP		0x00
+#define I2O_CMD_UTIL_ABORT		0x01
+#define I2O_CMD_UTIL_CLAIM		0x09
+#define I2O_CMD_UTIL_RELEASE		0x0B
+#define I2O_CMD_UTIL_PARAMS_GET		0x06
+#define I2O_CMD_UTIL_PARAMS_SET		0x05
+#define I2O_CMD_UTIL_EVT_REGISTER	0x13
+#define I2O_CMD_UTIL_EVT_ACK		0x14
+#define I2O_CMD_UTIL_CONFIG_DIALOG	0x10
+#define I2O_CMD_UTIL_DEVICE_RESERVE	0x0D
+#define I2O_CMD_UTIL_DEVICE_RELEASE	0x0F
+#define I2O_CMD_UTIL_LOCK		0x17
+#define I2O_CMD_UTIL_LOCK_RELEASE	0x19
+#define I2O_CMD_UTIL_REPLY_FAULT_NOTIFY	0x15
 
 /*
- *	Contains IO mapped address information
+ * SCSI Host Bus Adapter Class
  */
-struct i2o_io {
-	void __iomem *virt;
-	unsigned long phys;
-	unsigned long len;
-};
+#define I2O_CMD_SCSI_EXEC		0x81
+#define I2O_CMD_SCSI_ABORT		0x83
+#define I2O_CMD_SCSI_BUSRESET		0x27
 
 /*
- *	Context queue entry, used for 32-bit context on 64-bit systems
+ * Bus Adapter Class
  */
-struct i2o_context_list_element {
-	struct list_head list;
-	u32 context;
-	void *ptr;
-	unsigned long timestamp;
-};
+#define I2O_CMD_BUS_ADAPTER_RESET	0x85
+#define I2O_CMD_BUS_RESET		0x87
+#define I2O_CMD_BUS_SCAN		0x89
+#define I2O_CMD_BUS_QUIESCE		0x8b
 
 /*
- * Each I2O controller has one of these objects
+ * Random Block Storage Class
  */
-struct i2o_controller {
-	char name[16];
-	int unit;
-	int type;
+#define I2O_CMD_BLOCK_READ		0x30
+#define I2O_CMD_BLOCK_WRITE		0x31
+#define I2O_CMD_BLOCK_CFLUSH		0x37
+#define I2O_CMD_BLOCK_MLOCK		0x49
+#define I2O_CMD_BLOCK_MUNLOCK		0x4B
+#define I2O_CMD_BLOCK_MMOUNT		0x41
+#define I2O_CMD_BLOCK_MEJECT		0x43
+#define I2O_CMD_BLOCK_POWER		0x70
 
-	struct pci_dev *pdev;	/* PCI device */
+#define I2O_CMD_PRIVATE			0xFF
 
-	unsigned int promise:1;	/* Promise controller */
-	unsigned int adaptec:1;	/* DPT / Adaptec controller */
-	unsigned int raptor:1;	/* split bar */
-	unsigned int no_quiesce:1;	/* dont quiesce before reset */
-	unsigned int short_req:1;	/* use small block sizes */
-	unsigned int limit_sectors:1;	/* limit number of sectors / request */
-	unsigned int pae_support:1;	/* controller has 64-bit SGL support */
+/* Command status values  */
 
-	struct list_head devices;	/* list of I2O devices */
-	struct list_head list;	/* Controller list */
+#define I2O_CMD_IN_PROGRESS	0x01
+#define I2O_CMD_REJECTED	0x02
+#define I2O_CMD_FAILED		0x03
+#define I2O_CMD_COMPLETED	0x04
 
-	void __iomem *in_port;	/* Inbout port address */
-	void __iomem *out_port;	/* Outbound port address */
-	void __iomem *irq_status;	/* Interrupt status register address */
-	void __iomem *irq_mask;	/* Interrupt mask register address */
+/* I2O API function return values */
 
-	/* Dynamic LCT related data */
+#define I2O_RTN_NO_ERROR			0
+#define I2O_RTN_NOT_INIT			1
+#define I2O_RTN_FREE_Q_EMPTY			2
+#define I2O_RTN_TCB_ERROR			3
+#define I2O_RTN_TRANSACTION_ERROR		4
+#define I2O_RTN_ADAPTER_ALREADY_INIT		5
+#define I2O_RTN_MALLOC_ERROR			6
+#define I2O_RTN_ADPTR_NOT_REGISTERED		7
+#define I2O_RTN_MSG_REPLY_TIMEOUT		8
+#define I2O_RTN_NO_STATUS			9
+#define I2O_RTN_NO_FIRM_VER			10
+#define	I2O_RTN_NO_LINK_SPEED			11
 
-	struct i2o_dma status;	/* IOP status block */
+/* Reply message status defines for all messages */
 
-	struct i2o_dma hrt;	/* HW Resource Table */
-	i2o_lct *lct;		/* Logical Config Table */
-	struct i2o_dma dlct;	/* Temp LCT */
-	struct semaphore lct_lock;	/* Lock for LCT updates */
-	struct i2o_dma status_block;	/* IOP status block */
+#define I2O_REPLY_STATUS_SUCCESS                    	0x00
+#define I2O_REPLY_STATUS_ABORT_DIRTY                	0x01
+#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER     	0x02
+#define	I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER		0x03
+#define	I2O_REPLY_STATUS_ERROR_DIRTY			0x04
+#define	I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER		0x05
+#define	I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER		0x06
+#define	I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY		0x08
+#define	I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER	0x09
+#define	I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER	0x0A
+#define	I2O_REPLY_STATUS_TRANSACTION_ERROR		0x0B
+#define	I2O_REPLY_STATUS_PROGRESS_REPORT		0x80
 
-	struct i2o_io base;	/* controller messaging unit */
-	struct i2o_io in_queue;	/* inbound message queue Host->IOP */
-	struct i2o_dma out_queue;	/* outbound message queue IOP->Host */
+/* Status codes and Error Information for Parameter functions */
 
-	unsigned int battery:1;	/* Has a battery backup */
-	unsigned int io_alloc:1;	/* An I/O resource was allocated */
-	unsigned int mem_alloc:1;	/* A memory resource was allocated */
+#define I2O_PARAMS_STATUS_SUCCESS		0x00
+#define I2O_PARAMS_STATUS_BAD_KEY_ABORT		0x01
+#define I2O_PARAMS_STATUS_BAD_KEY_CONTINUE   	0x02
+#define I2O_PARAMS_STATUS_BUFFER_FULL		0x03
+#define I2O_PARAMS_STATUS_BUFFER_TOO_SMALL	0x04
+#define I2O_PARAMS_STATUS_FIELD_UNREADABLE	0x05
+#define I2O_PARAMS_STATUS_FIELD_UNWRITEABLE	0x06
+#define I2O_PARAMS_STATUS_INSUFFICIENT_FIELDS	0x07
+#define I2O_PARAMS_STATUS_INVALID_GROUP_ID	0x08
+#define I2O_PARAMS_STATUS_INVALID_OPERATION	0x09
+#define I2O_PARAMS_STATUS_NO_KEY_FIELD		0x0A
+#define I2O_PARAMS_STATUS_NO_SUCH_FIELD		0x0B
+#define I2O_PARAMS_STATUS_NON_DYNAMIC_GROUP	0x0C
+#define I2O_PARAMS_STATUS_OPERATION_ERROR	0x0D
+#define I2O_PARAMS_STATUS_SCALAR_ERROR		0x0E
+#define I2O_PARAMS_STATUS_TABLE_ERROR		0x0F
+#define I2O_PARAMS_STATUS_WRONG_GROUP_TYPE	0x10
 
-	struct resource io_resource;	/* I/O resource allocated to the IOP */
-	struct resource mem_resource;	/* Mem resource allocated to the IOP */
+/* DetailedStatusCode defines for Executive, DDM, Util and Transaction error
+ * messages: Table 3-2 Detailed Status Codes.*/
 
-	struct device device;
-	struct class_device *classdev;	/* I2O controller class device */
-	struct i2o_device *exec;	/* Executive */
-#if BITS_PER_LONG == 64
-	spinlock_t context_list_lock;	/* lock for context_list */
-	atomic_t context_list_counter;	/* needed for unique contexts */
-	struct list_head context_list;	/* list of context id's
-					   and pointers */
-#endif
-	spinlock_t lock;	/* lock for controller
-				   configuration */
+#define I2O_DSC_SUCCESS                        0x0000
+#define I2O_DSC_BAD_KEY                        0x0002
+#define I2O_DSC_TCL_ERROR                      0x0003
+#define I2O_DSC_REPLY_BUFFER_FULL              0x0004
+#define I2O_DSC_NO_SUCH_PAGE                   0x0005
+#define I2O_DSC_INSUFFICIENT_RESOURCE_SOFT     0x0006
+#define I2O_DSC_INSUFFICIENT_RESOURCE_HARD     0x0007
+#define I2O_DSC_CHAIN_BUFFER_TOO_LARGE         0x0009
+#define I2O_DSC_UNSUPPORTED_FUNCTION           0x000A
+#define I2O_DSC_DEVICE_LOCKED                  0x000B
+#define I2O_DSC_DEVICE_RESET                   0x000C
+#define I2O_DSC_INAPPROPRIATE_FUNCTION         0x000D
+#define I2O_DSC_INVALID_INITIATOR_ADDRESS      0x000E
+#define I2O_DSC_INVALID_MESSAGE_FLAGS          0x000F
+#define I2O_DSC_INVALID_OFFSET                 0x0010
+#define I2O_DSC_INVALID_PARAMETER              0x0011
+#define I2O_DSC_INVALID_REQUEST                0x0012
+#define I2O_DSC_INVALID_TARGET_ADDRESS         0x0013
+#define I2O_DSC_MESSAGE_TOO_LARGE              0x0014
+#define I2O_DSC_MESSAGE_TOO_SMALL              0x0015
+#define I2O_DSC_MISSING_PARAMETER              0x0016
+#define I2O_DSC_TIMEOUT                        0x0017
+#define I2O_DSC_UNKNOWN_ERROR                  0x0018
+#define I2O_DSC_UNKNOWN_FUNCTION               0x0019
+#define I2O_DSC_UNSUPPORTED_VERSION            0x001A
+#define I2O_DSC_DEVICE_BUSY                    0x001B
+#define I2O_DSC_DEVICE_NOT_AVAILABLE           0x001C
 
-	void *driver_data[I2O_MAX_DRIVERS];	/* storage for drivers */
-};
+/* DetailedStatusCode defines for Block Storage Operation: Table 6-7 Detailed
+   Status Codes.*/
 
-/*
- * I2O System table entry
- *
- * The system table contains information about all the IOPs in the
- * system.  It is sent to all IOPs so that they can create peer2peer
- * connections between them.
- */
-struct i2o_sys_tbl_entry {
-	u16 org_id;
-	u16 reserved1;
-	u32 iop_id:12;
-	u32 reserved2:20;
-	u16 seg_num:12;
-	u16 i2o_version:4;
-	u8 iop_state;
-	u8 msg_type;
-	u16 frame_size;
-	u16 reserved3;
-	u32 last_changed;
-	u32 iop_capabilities;
-	u32 inbound_low;
-	u32 inbound_high;
-};
+#define I2O_BSA_DSC_SUCCESS               0x0000
+#define I2O_BSA_DSC_MEDIA_ERROR           0x0001
+#define I2O_BSA_DSC_ACCESS_ERROR          0x0002
+#define I2O_BSA_DSC_DEVICE_FAILURE        0x0003
+#define I2O_BSA_DSC_DEVICE_NOT_READY      0x0004
+#define I2O_BSA_DSC_MEDIA_NOT_PRESENT     0x0005
+#define I2O_BSA_DSC_MEDIA_LOCKED          0x0006
+#define I2O_BSA_DSC_MEDIA_FAILURE         0x0007
+#define I2O_BSA_DSC_PROTOCOL_FAILURE      0x0008
+#define I2O_BSA_DSC_BUS_FAILURE           0x0009
+#define I2O_BSA_DSC_ACCESS_VIOLATION      0x000A
+#define I2O_BSA_DSC_WRITE_PROTECTED       0x000B
+#define I2O_BSA_DSC_DEVICE_RESET          0x000C
+#define I2O_BSA_DSC_VOLUME_CHANGED        0x000D
+#define I2O_BSA_DSC_TIMEOUT               0x000E
 
-struct i2o_sys_tbl {
-	u8 num_entries;
-	u8 version;
-	u16 reserved1;
-	u32 change_ind;
-	u32 reserved2;
-	u32 reserved3;
-	struct i2o_sys_tbl_entry iops[0];
-};
+/* FailureStatusCodes, Table 3-3 Message Failure Codes */
 
-extern struct list_head i2o_controllers;
+#define I2O_FSC_TRANSPORT_SERVICE_SUSPENDED             0x81
+#define I2O_FSC_TRANSPORT_SERVICE_TERMINATED            0x82
+#define I2O_FSC_TRANSPORT_CONGESTION                    0x83
+#define I2O_FSC_TRANSPORT_FAILURE                       0x84
+#define I2O_FSC_TRANSPORT_STATE_ERROR                   0x85
+#define I2O_FSC_TRANSPORT_TIME_OUT                      0x86
+#define I2O_FSC_TRANSPORT_ROUTING_FAILURE               0x87
+#define I2O_FSC_TRANSPORT_INVALID_VERSION               0x88
+#define I2O_FSC_TRANSPORT_INVALID_OFFSET                0x89
+#define I2O_FSC_TRANSPORT_INVALID_MSG_FLAGS             0x8A
+#define I2O_FSC_TRANSPORT_FRAME_TOO_SMALL               0x8B
+#define I2O_FSC_TRANSPORT_FRAME_TOO_LARGE               0x8C
+#define I2O_FSC_TRANSPORT_INVALID_TARGET_ID             0x8D
+#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_ID          0x8E
+#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_CONTEXT     0x8F
+#define I2O_FSC_TRANSPORT_UNKNOWN_FAILURE               0xFF
 
-/* Message functions */
-static inline u32 i2o_msg_get(struct i2o_controller *,
-			      struct i2o_message __iomem **);
-extern u32 i2o_msg_get_wait(struct i2o_controller *,
-			    struct i2o_message __iomem **, int);
-static inline void i2o_msg_post(struct i2o_controller *, u32);
-static inline int i2o_msg_post_wait(struct i2o_controller *, u32,
-				    unsigned long);
-extern int i2o_msg_post_wait_mem(struct i2o_controller *, u32, unsigned long,
-				 struct i2o_dma *);
-extern void i2o_msg_nop(struct i2o_controller *, u32);
-static inline void i2o_flush_reply(struct i2o_controller *, u32);
+/* Device Claim Types */
+#define	I2O_CLAIM_PRIMARY					0x01000000
+#define	I2O_CLAIM_MANAGEMENT					0x02000000
+#define	I2O_CLAIM_AUTHORIZED					0x03000000
+#define	I2O_CLAIM_SECONDARY					0x04000000
 
-/* IOP functions */
-extern int i2o_status_get(struct i2o_controller *);
+/* Message header defines for VersionOffset */
+#define I2OVER15	0x0001
+#define I2OVER20	0x0002
 
-extern int i2o_event_register(struct i2o_device *, struct i2o_driver *, int,
-			      u32);
-extern struct i2o_device *i2o_iop_find_device(struct i2o_controller *, u16);
-extern struct i2o_controller *i2o_find_iop(int);
+/* Default is 1.5 */
+#define I2OVERSION	I2OVER15
 
-/* Functions needed for handling 64-bit pointers in 32-bit context */
-#if BITS_PER_LONG == 64
-extern u32 i2o_cntxt_list_add(struct i2o_controller *, void *);
-extern void *i2o_cntxt_list_get(struct i2o_controller *, u32);
-extern u32 i2o_cntxt_list_remove(struct i2o_controller *, void *);
-extern u32 i2o_cntxt_list_get_ptr(struct i2o_controller *, void *);
+#define SGL_OFFSET_0    I2OVERSION
+#define SGL_OFFSET_4    (0x0040 | I2OVERSION)
+#define SGL_OFFSET_5    (0x0050 | I2OVERSION)
+#define SGL_OFFSET_6    (0x0060 | I2OVERSION)
+#define SGL_OFFSET_7    (0x0070 | I2OVERSION)
+#define SGL_OFFSET_8    (0x0080 | I2OVERSION)
+#define SGL_OFFSET_9    (0x0090 | I2OVERSION)
+#define SGL_OFFSET_10   (0x00A0 | I2OVERSION)
+#define SGL_OFFSET_11   (0x00B0 | I2OVERSION)
+#define SGL_OFFSET_12   (0x00C0 | I2OVERSION)
+#define SGL_OFFSET(x)   (((x)<<4) | I2OVERSION)
 
-static inline u32 i2o_ptr_low(void *ptr)
-{
-	return (u32) (u64) ptr;
-};
+/* Transaction Reply Lists (TRL) Control Word structure */
+#define TRL_SINGLE_FIXED_LENGTH		0x00
+#define TRL_SINGLE_VARIABLE_LENGTH	0x40
+#define TRL_MULTIPLE_FIXED_LENGTH	0x80
 
-static inline u32 i2o_ptr_high(void *ptr)
-{
-	return (u32) ((u64) ptr >> 32);
-};
+ /* msg header defines for MsgFlags */
+#define MSG_STATIC	0x0100
+#define MSG_64BIT_CNTXT	0x0200
+#define MSG_MULTI_TRANS	0x1000
+#define MSG_FAIL	0x2000
+#define MSG_FINAL	0x4000
+#define MSG_REPLY	0x8000
 
-static inline u32 i2o_dma_low(dma_addr_t dma_addr)
-{
-	return (u32) (u64) dma_addr;
-};
+ /* minimum size msg */
+#define THREE_WORD_MSG_SIZE	0x00030000
+#define FOUR_WORD_MSG_SIZE	0x00040000
+#define FIVE_WORD_MSG_SIZE	0x00050000
+#define SIX_WORD_MSG_SIZE	0x00060000
+#define SEVEN_WORD_MSG_SIZE	0x00070000
+#define EIGHT_WORD_MSG_SIZE	0x00080000
+#define NINE_WORD_MSG_SIZE	0x00090000
+#define TEN_WORD_MSG_SIZE	0x000A0000
+#define ELEVEN_WORD_MSG_SIZE	0x000B0000
+#define I2O_MESSAGE_SIZE(x)	((x)<<16)
 
-static inline u32 i2o_dma_high(dma_addr_t dma_addr)
-{
-	return (u32) ((u64) dma_addr >> 32);
-};
-#else
-static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr)
-{
-	return (u32) ptr;
-};
+/* special TID assignments */
+#define ADAPTER_TID		0
+#define HOST_TID		1
 
-static inline void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context)
-{
-	return (void *)context;
-};
+/* outbound queue defines */
+#define I2O_MAX_OUTBOUND_MSG_FRAMES	128
+#define I2O_OUTBOUND_MSG_FRAME_SIZE	128	/* in 32-bit words */
 
-static inline u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr)
-{
-	return (u32) ptr;
-};
+/* inbound queue definitions */
+#define I2O_MSG_INPOOL_MIN		32
+#define I2O_INBOUND_MSG_FRAME_SIZE	128	/* in 32-bit words */
 
-static inline u32 i2o_cntxt_list_get_ptr(struct i2o_controller *c, void *ptr)
-{
-	return (u32) ptr;
-};
+#define I2O_POST_WAIT_OK	0
+#define I2O_POST_WAIT_TIMEOUT	-ETIMEDOUT
 
-static inline u32 i2o_ptr_low(void *ptr)
-{
-	return (u32) ptr;
-};
+#define I2O_CONTEXT_LIST_MIN_LENGTH	15
+#define I2O_CONTEXT_LIST_USED		0x01
+#define I2O_CONTEXT_LIST_DELETED	0x02
 
-static inline u32 i2o_ptr_high(void *ptr)
-{
-	return 0;
-};
+/* timeouts */
+#define I2O_TIMEOUT_INIT_OUTBOUND_QUEUE	15
+#define I2O_TIMEOUT_MESSAGE_GET		5
+#define I2O_TIMEOUT_RESET		30
+#define I2O_TIMEOUT_STATUS_GET		5
+#define I2O_TIMEOUT_LCT_GET		360
+#define I2O_TIMEOUT_SCSI_SCB_ABORT	240
 
-static inline u32 i2o_dma_low(dma_addr_t dma_addr)
-{
-	return (u32) dma_addr;
-};
+/* retries */
+#define I2O_HRT_GET_TRIES		3
+#define I2O_LCT_GET_TRIES		3
 
-static inline u32 i2o_dma_high(dma_addr_t dma_addr)
-{
-	return 0;
-};
-#endif
+/* defines for max_sectors and max_phys_segments */
+#define I2O_MAX_SECTORS			1024
+#define I2O_MAX_SECTORS_LIMITED		256
+#define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
 
-/**
- *	i2o_sg_tablesize - Calculate the maximum number of elements in a SGL
- *	@c: I2O controller for which the calculation should be done
- *	@body_size: maximum body size used for message in 32-bit words.
- *
- *	Return the maximum number of SG elements in a SG list.
+/*
+ *	Message structures
  */
-static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size)
-{
-	i2o_status_block *sb = c->status_block.virt;
-	u16 sg_count =
-	    (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) -
-	    body_size;
-
-	if (c->pae_support) {
-		/*
-		 * for 64-bit a SG attribute element must be added and each
-		 * SG element needs 12 bytes instead of 8.
-		 */
-		sg_count -= 2;
-		sg_count /= 3;
-	} else
-		sg_count /= 2;
-
-	if (c->short_req && (sg_count > 8))
-		sg_count = 8;
+struct i2o_message {
+	union {
+		struct {
+			u8 version_offset;
+			u8 flags;
+			u16 size;
+			u32 target_tid:12;
+			u32 init_tid:12;
+			u32 function:8;
+			u32 icntxt;	/* initiator context */
+			u32 tcntxt;	/* transaction context */
+		} s;
+		u32 head[4];
+	} u;
+	/* List follows */
+	u32 body[0];
+};
 
-	return sg_count;
+/* MFA and I2O message used by mempool */
+struct i2o_msg_mfa {
+	u32 mfa;		/* MFA returned by the controller */
+	struct i2o_message msg;	/* I2O message */
 };
 
-/**
- *	i2o_dma_map_single - Map pointer to controller and fill in I2O message.
- *	@c: I2O controller
- *	@ptr: pointer to the data which should be mapped
- *	@size: size of data in bytes
- *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
- *	@sg_ptr: pointer to the SG list inside the I2O message
- *
- *	This function does all necessary DMA handling and also writes the I2O
- *	SGL elements into the I2O message. For details on DMA handling see also
- *	dma_map_single(). The pointer sg_ptr will only be set to the end of the
- *	SG list if the allocation was successful.
- *
- *	Returns DMA address which must be checked for failures using
- *	dma_mapping_error().
+/*
+ *	Each I2O device entity has one of these. There is one per device.
  */
-static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
-					    size_t size,
-					    enum dma_data_direction direction,
-					    u32 __iomem ** sg_ptr)
-{
-	u32 sg_flags;
-	u32 __iomem *mptr = *sg_ptr;
-	dma_addr_t dma_addr;
+struct i2o_device {
+	i2o_lct_entry lct_data;	/* Device LCT information */
 
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		sg_flags = 0xd4000000;
-		break;
-	case DMA_FROM_DEVICE:
-		sg_flags = 0xd0000000;
-		break;
-	default:
-		return 0;
-	}
+	struct i2o_controller *iop;	/* Controlling IOP */
+	struct list_head list;	/* node in IOP devices list */
 
-	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
-	if (!dma_mapping_error(dma_addr)) {
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
-			writel(0x7C020002, mptr++);
-			writel(PAGE_SIZE, mptr++);
-		}
-#endif
+	struct device device;
 
-		writel(sg_flags | size, mptr++);
-		writel(i2o_dma_low(dma_addr), mptr++);
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
-			writel(i2o_dma_high(dma_addr), mptr++);
-#endif
-		*sg_ptr = mptr;
-	}
-	return dma_addr;
+	struct semaphore lock;	/* device lock */
 };
 
-/**
- *	i2o_dma_map_sg - Map a SG List to controller and fill in I2O message.
- *	@c: I2O controller
- *	@sg: SG list to be mapped
- *	@sg_count: number of elements in the SG list
- *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
- *	@sg_ptr: pointer to the SG list inside the I2O message
- *
- *	This function does all necessary DMA handling and also writes the I2O
- *	SGL elements into the I2O message. For details on DMA handling see also
- *	dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG
- *	list if the allocation was successful.
- *
- *	Returns 0 on failure or 1 on success.
+/*
+ *	Event structure provided to the event handling function
  */
-static inline int i2o_dma_map_sg(struct i2o_controller *c,
-				 struct scatterlist *sg, int sg_count,
-				 enum dma_data_direction direction,
-				 u32 __iomem ** sg_ptr)
-{
-	u32 sg_flags;
-	u32 __iomem *mptr = *sg_ptr;
-
-	switch (direction) {
-	case DMA_TO_DEVICE:
-		sg_flags = 0x14000000;
-		break;
-	case DMA_FROM_DEVICE:
-		sg_flags = 0x10000000;
-		break;
-	default:
-		return 0;
-	}
-
-	sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction);
-	if (!sg_count)
-		return 0;
-
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-	if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
-		writel(0x7C020002, mptr++);
-		writel(PAGE_SIZE, mptr++);
-	}
-#endif
-
-	while (sg_count-- > 0) {
-		if (!sg_count)
-			sg_flags |= 0xC0000000;
-		writel(sg_flags | sg_dma_len(sg), mptr++);
-		writel(i2o_dma_low(sg_dma_address(sg)), mptr++);
-#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
-		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
-			writel(i2o_dma_high(sg_dma_address(sg)), mptr++);
-#endif
-		sg++;
-	}
-	*sg_ptr = mptr;
+struct i2o_event {
+	struct work_struct work;
+	struct i2o_device *i2o_dev;	/* I2O device pointer from which the
+					   event reply was initiated */
+	u16 size;		/* Size of data in 32-bit words */
+	u32 tcntxt;		/* Transaction context used at
+				   registration */
+	u32 event_indicator;	/* Event indicator from reply */
+	u32 data[0];		/* Event data from reply */
+};
 
-	return 1;
+/*
+ *	I2O classes which could be handled by the OSM
+ */
+struct i2o_class_id {
+	u16 class_id:12;
 };
 
-/**
- *	i2o_dma_alloc - Allocate DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which should get the DMA buffer
- *	@len: length of the new DMA memory
- *	@gfp_mask: GFP mask
- *
- *	Allocate a coherent DMA memory and write the pointers into addr.
- *
- *	Returns 0 on success or -ENOMEM on failure.
+/*
+ *	I2O driver structure for OSMs
  */
-static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
-				size_t len, gfp_t gfp_mask)
-{
-	struct pci_dev *pdev = to_pci_dev(dev);
-	int dma_64 = 0;
+struct i2o_driver {
+	char *name;		/* OSM name */
+	int context;		/* Low 8 bits of the transaction info */
+	struct i2o_class_id *classes;	/* I2O classes that this OSM handles */
 
-	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
-		dma_64 = 1;
-		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK))
-			return -ENOMEM;
-	}
+	/* Message reply handler */
+	int (*reply) (struct i2o_controller *, u32, struct i2o_message *);
 
-	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
+	/* Event handler */
+	void (*event) (struct i2o_event *);
 
-	if ((sizeof(dma_addr_t) > 4) && dma_64)
-		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK))
-			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
+	struct workqueue_struct *event_queue;	/* Event queue */
 
-	if (!addr->virt)
-		return -ENOMEM;
+	struct device_driver driver;
 
-	memset(addr->virt, 0, len);
-	addr->len = len;
+	/* notification of changes */
+	void (*notify_controller_add) (struct i2o_controller *);
+	void (*notify_controller_remove) (struct i2o_controller *);
+	void (*notify_device_add) (struct i2o_device *);
+	void (*notify_device_remove) (struct i2o_device *);
 
-	return 0;
+	struct semaphore lock;
 };
 
-/**
- *	i2o_dma_free - Free DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: i2o_dma struct which contains the DMA buffer
- *
- *	Free a coherent DMA memory and set virtual address of addr to NULL.
+/*
+ *	Contains DMA mapped address information
  */
-static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
-{
-	if (addr->virt) {
-		if (addr->phys)
-			dma_free_coherent(dev, addr->len, addr->virt,
-					  addr->phys);
-		else
-			kfree(addr->virt);
-		addr->virt = NULL;
-	}
+struct i2o_dma {
+	void *virt;
+	dma_addr_t phys;
+	size_t len;
 };
 
-/**
- *	i2o_dma_realloc - Realloc DMA memory
- *	@dev: struct device pointer to the PCI device of the I2O controller
- *	@addr: pointer to a i2o_dma struct DMA buffer
- *	@len: new length of memory
- *	@gfp_mask: GFP mask
- *
- *	If there was something allocated in the addr, free it first. If len > 0
- *	than try to allocate it and write the addresses back to the addr
- *	structure. If len == 0 set the virtual address to NULL.
- *
- *	Returns the 0 on success or negative error code on failure.
+/*
+ *	Contains slab cache and mempool information
  */
-static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
-				  size_t len, gfp_t gfp_mask)
-{
-	i2o_dma_free(dev, addr);
-
-	if (len)
-		return i2o_dma_alloc(dev, addr, len, gfp_mask);
-
-	return 0;
+struct i2o_pool {
+	char *name;
+	kmem_cache_t *slab;
+	mempool_t *mempool;
 };
 
-/* I2O driver (OSM) functions */
-extern int i2o_driver_register(struct i2o_driver *);
-extern void i2o_driver_unregister(struct i2o_driver *);
-
-/**
- *	i2o_driver_notify_controller_add - Send notification of added controller
- *					   to a single I2O driver
- *
- *	Send notification of added controller to a single registered driver.
+/*
+ *	Contains IO mapped address information
  */
-static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv,
-						    struct i2o_controller *c)
-{
-	if (drv->notify_controller_add)
-		drv->notify_controller_add(c);
+struct i2o_io {
+	void __iomem *virt;
+	unsigned long phys;
+	unsigned long len;
 };
 
-/**
- *	i2o_driver_notify_controller_remove - Send notification of removed
- *					      controller to a single I2O driver
- *
- *	Send notification of removed controller to a single registered driver.
+/*
+ *	Context queue entry, used for 32-bit context on 64-bit systems
  */
-static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv,
-						       struct i2o_controller *c)
-{
-	if (drv->notify_controller_remove)
-		drv->notify_controller_remove(c);
+struct i2o_context_list_element {
+	struct list_head list;
+	u32 context;
+	void *ptr;
+	unsigned long timestamp;
 };
 
-/**
- *	i2o_driver_notify_device_add - Send notification of added device to a
- *				       single I2O driver
- *
- *	Send notification of added device to a single registered driver.
+/*
+ * Each I2O controller has one of these objects
  */
-static inline void i2o_driver_notify_device_add(struct i2o_driver *drv,
-						struct i2o_device *i2o_dev)
-{
-	if (drv->notify_device_add)
-		drv->notify_device_add(i2o_dev);
+struct i2o_controller {
+	char name[16];
+	int unit;
+	int type;
+
+	struct pci_dev *pdev;	/* PCI device */
+
+	unsigned int promise:1;	/* Promise controller */
+	unsigned int adaptec:1;	/* DPT / Adaptec controller */
+	unsigned int raptor:1;	/* split bar */
+	unsigned int no_quiesce:1;	/* dont quiesce before reset */
+	unsigned int short_req:1;	/* use small block sizes */
+	unsigned int limit_sectors:1;	/* limit number of sectors / request */
+	unsigned int pae_support:1;	/* controller has 64-bit SGL support */
+
+	struct list_head devices;	/* list of I2O devices */
+	struct list_head list;	/* Controller list */
+
+	void __iomem *in_port;	/* Inbout port address */
+	void __iomem *out_port;	/* Outbound port address */
+	void __iomem *irq_status;	/* Interrupt status register address */
+	void __iomem *irq_mask;	/* Interrupt mask register address */
+
+	struct i2o_dma status;	/* IOP status block */
+
+	struct i2o_dma hrt;	/* HW Resource Table */
+	i2o_lct *lct;		/* Logical Config Table */
+	struct i2o_dma dlct;	/* Temp LCT */
+	struct semaphore lct_lock;	/* Lock for LCT updates */
+	struct i2o_dma status_block;	/* IOP status block */
+
+	struct i2o_io base;	/* controller messaging unit */
+	struct i2o_io in_queue;	/* inbound message queue Host->IOP */
+	struct i2o_dma out_queue;	/* outbound message queue IOP->Host */
+
+	struct i2o_pool in_msg;	/* mempool for inbound messages */
+
+	unsigned int battery:1;	/* Has a battery backup */
+	unsigned int io_alloc:1;	/* An I/O resource was allocated */
+	unsigned int mem_alloc:1;	/* A memory resource was allocated */
+
+	struct resource io_resource;	/* I/O resource allocated to the IOP */
+	struct resource mem_resource;	/* Mem resource allocated to the IOP */
+
+	struct device device;
+	struct class_device *classdev;	/* I2O controller class device */
+	struct i2o_device *exec;	/* Executive */
+#if BITS_PER_LONG == 64
+	spinlock_t context_list_lock;	/* lock for context_list */
+	atomic_t context_list_counter;	/* needed for unique contexts */
+	struct list_head context_list;	/* list of context id's
+					   and pointers */
+#endif
+	spinlock_t lock;	/* lock for controller
+				   configuration */
+
+	void *driver_data[I2O_MAX_DRIVERS];	/* storage for drivers */
 };
 
-/**
- *	i2o_driver_notify_device_remove - Send notification of removed device
- *					  to a single I2O driver
+/*
+ * I2O System table entry
  *
- *	Send notification of removed device to a single registered driver.
+ * The system table contains information about all the IOPs in the
+ * system.  It is sent to all IOPs so that they can create peer2peer
+ * connections between them.
  */
-static inline void i2o_driver_notify_device_remove(struct i2o_driver *drv,
-						   struct i2o_device *i2o_dev)
-{
-	if (drv->notify_device_remove)
-		drv->notify_device_remove(i2o_dev);
+struct i2o_sys_tbl_entry {
+	u16 org_id;
+	u16 reserved1;
+	u32 iop_id:12;
+	u32 reserved2:20;
+	u16 seg_num:12;
+	u16 i2o_version:4;
+	u8 iop_state;
+	u8 msg_type;
+	u16 frame_size;
+	u16 reserved3;
+	u32 last_changed;
+	u32 iop_capabilities;
+	u32 inbound_low;
+	u32 inbound_high;
 };
 
-extern void i2o_driver_notify_controller_add_all(struct i2o_controller *);
-extern void i2o_driver_notify_controller_remove_all(struct i2o_controller *);
-extern void i2o_driver_notify_device_add_all(struct i2o_device *);
-extern void i2o_driver_notify_device_remove_all(struct i2o_device *);
+struct i2o_sys_tbl {
+	u8 num_entries;
+	u8 version;
+	u16 reserved1;
+	u32 change_ind;
+	u32 reserved2;
+	u32 reserved3;
+	struct i2o_sys_tbl_entry iops[0];
+};
 
-/* I2O device functions */
-extern int i2o_device_claim(struct i2o_device *);
-extern int i2o_device_claim_release(struct i2o_device *);
+extern struct list_head i2o_controllers;
 
-/* Exec OSM functions */
-extern int i2o_exec_lct_get(struct i2o_controller *);
+/* Message functions */
+static inline struct i2o_message *i2o_msg_get(struct i2o_controller *);
+extern struct i2o_message *i2o_msg_get_wait(struct i2o_controller *, int);
+static inline void i2o_msg_post(struct i2o_controller *, struct i2o_message *);
+static inline int i2o_msg_post_wait(struct i2o_controller *,
+				    struct i2o_message *, unsigned long);
+extern int i2o_msg_post_wait_mem(struct i2o_controller *, struct i2o_message *,
+				 unsigned long, struct i2o_dma *);
+static inline void i2o_flush_reply(struct i2o_controller *, u32);
 
-/* device / driver / kobject conversion functions */
-#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
-#define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
-#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
-#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj))
+/* IOP functions */
+extern int i2o_status_get(struct i2o_controller *);
 
-/**
- *	i2o_msg_get - obtain an I2O message from the IOP
- *	@c: I2O controller
- *	@msg: pointer to a I2O message pointer
- *
- *	This function tries to get a message slot. If no message slot is
- *	available do not wait until one is availabe (see also i2o_msg_get_wait).
- *
- *	On a success the message is returned and the pointer to the message is
- *	set in msg. The returned message is the physical page frame offset
- *	address from the read port (see the i2o spec). If no message is
- *	available returns I2O_QUEUE_EMPTY and msg is leaved untouched.
- */
-static inline u32 i2o_msg_get(struct i2o_controller *c,
-			      struct i2o_message __iomem ** msg)
-{
-	u32 m = readl(c->in_port);
+extern int i2o_event_register(struct i2o_device *, struct i2o_driver *, int,
+			      u32);
+extern struct i2o_device *i2o_iop_find_device(struct i2o_controller *, u16);
+extern struct i2o_controller *i2o_find_iop(int);
 
-	if (m != I2O_QUEUE_EMPTY)
-		*msg = c->in_queue.virt + m;
+/* Functions needed for handling 64-bit pointers in 32-bit context */
+#if BITS_PER_LONG == 64
+extern u32 i2o_cntxt_list_add(struct i2o_controller *, void *);
+extern void *i2o_cntxt_list_get(struct i2o_controller *, u32);
+extern u32 i2o_cntxt_list_remove(struct i2o_controller *, void *);
+extern u32 i2o_cntxt_list_get_ptr(struct i2o_controller *, void *);
 
-	return m;
+static inline u32 i2o_ptr_low(void *ptr)
+{
+	return (u32) (u64) ptr;
 };
 
-/**
- *	i2o_msg_post - Post I2O message to I2O controller
- *	@c: I2O controller to which the message should be send
- *	@m: the message identifier
- *
- *	Post the message to the I2O controller.
- */
-static inline void i2o_msg_post(struct i2o_controller *c, u32 m)
+static inline u32 i2o_ptr_high(void *ptr)
 {
-	writel(m, c->in_port);
+	return (u32) ((u64) ptr >> 32);
 };
 
-/**
- * 	i2o_msg_post_wait - Post and wait a message and wait until return
- *	@c: controller
- *	@m: message to post
- *	@timeout: time in seconds to wait
- *
- * 	This API allows an OSM to post a message and then be told whether or
- *	not the system received a successful reply. If the message times out
- *	then the value '-ETIMEDOUT' is returned.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-static inline int i2o_msg_post_wait(struct i2o_controller *c, u32 m,
-				    unsigned long timeout)
+static inline u32 i2o_dma_low(dma_addr_t dma_addr)
 {
-	return i2o_msg_post_wait_mem(c, m, timeout, NULL);
+	return (u32) (u64) dma_addr;
 };
 
-/**
- *	i2o_flush_reply - Flush reply from I2O controller
- *	@c: I2O controller
- *	@m: the message identifier
- *
- *	The I2O controller must be informed that the reply message is not needed
- *	anymore. If you forget to flush the reply, the message frame can't be
- *	used by the controller anymore and is therefore lost.
- */
-static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
+static inline u32 i2o_dma_high(dma_addr_t dma_addr)
 {
-	writel(m, c->out_port);
+	return (u32) ((u64) dma_addr >> 32);
+};
+#else
+static inline u32 i2o_cntxt_list_add(struct i2o_controller *c, void *ptr)
+{
+	return (u32) ptr;
 };
 
-/**
- *	i2o_out_to_virt - Turn an I2O message to a virtual address
- *	@c: controller
- *	@m: message engine value
- *
- *	Turn a receive message from an I2O controller bus address into
- *	a Linux virtual address. The shared page frame is a linear block
- *	so we simply have to shift the offset. This function does not
- *	work for sender side messages as they are ioremap objects
- *	provided by the I2O controller.
- */
-static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c,
-						      u32 m)
+static inline void *i2o_cntxt_list_get(struct i2o_controller *c, u32 context)
 {
-	BUG_ON(m < c->out_queue.phys
-	       || m >= c->out_queue.phys + c->out_queue.len);
+	return (void *)context;
+};
 
-	return c->out_queue.virt + (m - c->out_queue.phys);
+static inline u32 i2o_cntxt_list_remove(struct i2o_controller *c, void *ptr)
+{
+	return (u32) ptr;
 };
 
-/**
- *	i2o_msg_in_to_virt - Turn an I2O message to a virtual address
- *	@c: controller
- *	@m: message engine value
+static inline u32 i2o_cntxt_list_get_ptr(struct i2o_controller *c, void *ptr)
+{
+	return (u32) ptr;
+};
+
+static inline u32 i2o_ptr_low(void *ptr)
+{
+	return (u32) ptr;
+};
+
+static inline u32 i2o_ptr_high(void *ptr)
+{
+	return 0;
+};
+
+static inline u32 i2o_dma_low(dma_addr_t dma_addr)
+{
+	return (u32) dma_addr;
+};
+
+static inline u32 i2o_dma_high(dma_addr_t dma_addr)
+{
+	return 0;
+};
+#endif
+
+/**
+ *	i2o_sg_tablesize - Calculate the maximum number of elements in a SGL
+ *	@c: I2O controller for which the calculation should be done
+ *	@body_size: maximum body size used for message in 32-bit words.
  *
- *	Turn a send message from an I2O controller bus address into
- *	a Linux virtual address. The shared page frame is a linear block
- *	so we simply have to shift the offset. This function does not
- *	work for receive side messages as they are kmalloc objects
- *	in a different pool.
+ *	Return the maximum number of SG elements in a SG list.
  */
-static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct
-							     i2o_controller *c,
-							     u32 m)
+static inline u16 i2o_sg_tablesize(struct i2o_controller *c, u16 body_size)
 {
-	return c->in_queue.virt + m;
+	i2o_status_block *sb = c->status_block.virt;
+	u16 sg_count =
+	    (sb->inbound_frame_size - sizeof(struct i2o_message) / 4) -
+	    body_size;
+
+	if (c->pae_support) {
+		/*
+		 * for 64-bit a SG attribute element must be added and each
+		 * SG element needs 12 bytes instead of 8.
+		 */
+		sg_count -= 2;
+		sg_count /= 3;
+	} else
+		sg_count /= 2;
+
+	if (c->short_req && (sg_count > 8))
+		sg_count = 8;
+
+	return sg_count;
 };
 
-/*
- *	Endian handling wrapped into the macro - keeps the core code
- *	cleaner.
+/**
+ *	i2o_dma_map_single - Map pointer to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@ptr: pointer to the data which should be mapped
+ *	@size: size of data in bytes
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_single(). The pointer sg_ptr will only be set to the end of the
+ *	SG list if the allocation was successful.
+ *
+ *	Returns DMA address which must be checked for failures using
+ *	dma_mapping_error().
  */
+static inline dma_addr_t i2o_dma_map_single(struct i2o_controller *c, void *ptr,
+					    size_t size,
+					    enum dma_data_direction direction,
+					    u32 ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 *mptr = *sg_ptr;
+	dma_addr_t dma_addr;
 
-#define i2o_raw_writel(val, mem)	__raw_writel(cpu_to_le32(val), mem)
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0xd4000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0xd0000000;
+		break;
+	default:
+		return 0;
+	}
 
-extern int i2o_parm_field_get(struct i2o_device *, int, int, void *, int);
-extern int i2o_parm_table_get(struct i2o_device *, int, int, int, void *, int,
-			      void *, int);
+	dma_addr = dma_map_single(&c->pdev->dev, ptr, size, direction);
+	if (!dma_mapping_error(dma_addr)) {
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+			*mptr++ = cpu_to_le32(0x7C020002);
+			*mptr++ = cpu_to_le32(PAGE_SIZE);
+		}
+#endif
 
-/* debugging and troubleshooting/diagnostic helpers. */
-#define osm_printk(level, format, arg...)  \
-	printk(level "%s: " format, OSM_NAME , ## arg)
+		*mptr++ = cpu_to_le32(sg_flags | size);
+		*mptr++ = cpu_to_le32(i2o_dma_low(dma_addr));
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			*mptr++ = cpu_to_le32(i2o_dma_high(dma_addr));
+#endif
+		*sg_ptr = mptr;
+	}
+	return dma_addr;
+};
 
-#ifdef DEBUG
-#define osm_debug(format, arg...) \
-	osm_printk(KERN_DEBUG, format , ## arg)
-#else
-#define osm_debug(format, arg...) \
-        do { } while (0)
+/**
+ *	i2o_dma_map_sg - Map a SG List to controller and fill in I2O message.
+ *	@c: I2O controller
+ *	@sg: SG list to be mapped
+ *	@sg_count: number of elements in the SG list
+ *	@direction: DMA_TO_DEVICE / DMA_FROM_DEVICE
+ *	@sg_ptr: pointer to the SG list inside the I2O message
+ *
+ *	This function does all necessary DMA handling and also writes the I2O
+ *	SGL elements into the I2O message. For details on DMA handling see also
+ *	dma_map_sg(). The pointer sg_ptr will only be set to the end of the SG
+ *	list if the allocation was successful.
+ *
+ *	Returns 0 on failure or 1 on success.
+ */
+static inline int i2o_dma_map_sg(struct i2o_controller *c,
+				 struct scatterlist *sg, int sg_count,
+				 enum dma_data_direction direction,
+				 u32 ** sg_ptr)
+{
+	u32 sg_flags;
+	u32 *mptr = *sg_ptr;
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		sg_flags = 0x14000000;
+		break;
+	case DMA_FROM_DEVICE:
+		sg_flags = 0x10000000;
+		break;
+	default:
+		return 0;
+	}
+
+	sg_count = dma_map_sg(&c->pdev->dev, sg, sg_count, direction);
+	if (!sg_count)
+		return 0;
+
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+	if ((sizeof(dma_addr_t) > 4) && c->pae_support) {
+		*mptr++ = cpu_to_le32(0x7C020002);
+		*mptr++ = cpu_to_le32(PAGE_SIZE);
+	}
 #endif
 
-#define osm_err(format, arg...)		\
-	osm_printk(KERN_ERR, format , ## arg)
-#define osm_info(format, arg...)		\
-	osm_printk(KERN_INFO, format , ## arg)
-#define osm_warn(format, arg...)		\
-	osm_printk(KERN_WARNING, format , ## arg)
+	while (sg_count-- > 0) {
+		if (!sg_count)
+			sg_flags |= 0xC0000000;
+		*mptr++ = cpu_to_le32(sg_flags | sg_dma_len(sg));
+		*mptr++ = cpu_to_le32(i2o_dma_low(sg_dma_address(sg)));
+#ifdef CONFIG_I2O_EXT_ADAPTEC_DMA64
+		if ((sizeof(dma_addr_t) > 4) && c->pae_support)
+			*mptr++ = cpu_to_le32(i2o_dma_high(sg_dma_address(sg)));
+#endif
+		sg++;
+	}
+	*sg_ptr = mptr;
 
-/* debugging functions */
-extern void i2o_report_status(const char *, const char *, struct i2o_message *);
-extern void i2o_dump_message(struct i2o_message *);
-extern void i2o_dump_hrt(struct i2o_controller *c);
-extern void i2o_debug_state(struct i2o_controller *c);
+	return 1;
+};
 
-/*
- *	Cache strategies
+/**
+ *	i2o_dma_alloc - Allocate DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which should get the DMA buffer
+ *	@len: length of the new DMA memory
+ *	@gfp_mask: GFP mask
+ *
+ *	Allocate a coherent DMA memory and write the pointers into addr.
+ *
+ *	Returns 0 on success or -ENOMEM on failure.
  */
+static inline int i2o_dma_alloc(struct device *dev, struct i2o_dma *addr,
+				size_t len, gfp_t gfp_mask)
+{
+	struct pci_dev *pdev = to_pci_dev(dev);
+	int dma_64 = 0;
 
-/*	The NULL strategy leaves everything up to the controller. This tends to be a
- *	pessimal but functional choice.
- */
-#define CACHE_NULL		0
-/*	Prefetch data when reading. We continually attempt to load the next 32 sectors
- *	into the controller cache.
- */
-#define CACHE_PREFETCH		1
-/*	Prefetch data when reading. We sometimes attempt to load the next 32 sectors
- *	into the controller cache. When an I/O is less <= 8K we assume its probably
- *	not sequential and don't prefetch (default)
- */
-#define CACHE_SMARTFETCH	2
-/*	Data is written to the cache and then out on to the disk. The I/O must be
- *	physically on the medium before the write is acknowledged (default without
- *	NVRAM)
- */
-#define CACHE_WRITETHROUGH	17
-/*	Data is written to the cache and then out on to the disk. The controller
- *	is permitted to write back the cache any way it wants. (default if battery
- *	backed NVRAM is present). It can be useful to set this for swap regardless of
- *	battery state.
- */
-#define CACHE_WRITEBACK		18
-/*	Optimise for under powered controllers, especially on RAID1 and RAID0. We
- *	write large I/O's directly to disk bypassing the cache to avoid the extra
- *	memory copy hits. Small writes are writeback cached
- */
-#define CACHE_SMARTBACK		19
-/*	Optimise for under powered controllers, especially on RAID1 and RAID0. We
- *	write large I/O's directly to disk bypassing the cache to avoid the extra
- *	memory copy hits. Small writes are writethrough cached. Suitable for devices
- *	lacking battery backup
- */
-#define CACHE_SMARTTHROUGH	20
+	if ((sizeof(dma_addr_t) > 4) && (pdev->dma_mask == DMA_64BIT_MASK)) {
+		dma_64 = 1;
+		if (pci_set_dma_mask(pdev, DMA_32BIT_MASK))
+			return -ENOMEM;
+	}
 
-/*
- *	Ioctl structures
- */
+	addr->virt = dma_alloc_coherent(dev, len, &addr->phys, gfp_mask);
 
-#define 	BLKI2OGRSTRAT	_IOR('2', 1, int)
-#define 	BLKI2OGWSTRAT	_IOR('2', 2, int)
-#define 	BLKI2OSRSTRAT	_IOW('2', 3, int)
-#define 	BLKI2OSWSTRAT	_IOW('2', 4, int)
+	if ((sizeof(dma_addr_t) > 4) && dma_64)
+		if (pci_set_dma_mask(pdev, DMA_64BIT_MASK))
+			printk(KERN_WARNING "i2o: unable to set 64-bit DMA");
 
-/*
- *	I2O Function codes
- */
+	if (!addr->virt)
+		return -ENOMEM;
 
-/*
- *	Executive Class
- */
-#define	I2O_CMD_ADAPTER_ASSIGN		0xB3
-#define	I2O_CMD_ADAPTER_READ		0xB2
-#define	I2O_CMD_ADAPTER_RELEASE		0xB5
-#define	I2O_CMD_BIOS_INFO_SET		0xA5
-#define	I2O_CMD_BOOT_DEVICE_SET		0xA7
-#define	I2O_CMD_CONFIG_VALIDATE		0xBB
-#define	I2O_CMD_CONN_SETUP		0xCA
-#define	I2O_CMD_DDM_DESTROY		0xB1
-#define	I2O_CMD_DDM_ENABLE		0xD5
-#define	I2O_CMD_DDM_QUIESCE		0xC7
-#define	I2O_CMD_DDM_RESET		0xD9
-#define	I2O_CMD_DDM_SUSPEND		0xAF
-#define	I2O_CMD_DEVICE_ASSIGN		0xB7
-#define	I2O_CMD_DEVICE_RELEASE		0xB9
-#define	I2O_CMD_HRT_GET			0xA8
-#define	I2O_CMD_ADAPTER_CLEAR		0xBE
-#define	I2O_CMD_ADAPTER_CONNECT		0xC9
-#define	I2O_CMD_ADAPTER_RESET		0xBD
-#define	I2O_CMD_LCT_NOTIFY		0xA2
-#define	I2O_CMD_OUTBOUND_INIT		0xA1
-#define	I2O_CMD_PATH_ENABLE		0xD3
-#define	I2O_CMD_PATH_QUIESCE		0xC5
-#define	I2O_CMD_PATH_RESET		0xD7
-#define	I2O_CMD_STATIC_MF_CREATE	0xDD
-#define	I2O_CMD_STATIC_MF_RELEASE	0xDF
-#define	I2O_CMD_STATUS_GET		0xA0
-#define	I2O_CMD_SW_DOWNLOAD		0xA9
-#define	I2O_CMD_SW_UPLOAD		0xAB
-#define	I2O_CMD_SW_REMOVE		0xAD
-#define	I2O_CMD_SYS_ENABLE		0xD1
-#define	I2O_CMD_SYS_MODIFY		0xC1
-#define	I2O_CMD_SYS_QUIESCE		0xC3
-#define	I2O_CMD_SYS_TAB_SET		0xA3
+	memset(addr->virt, 0, len);
+	addr->len = len;
 
-/*
- * Utility Class
+	return 0;
+};
+
+/**
+ *	i2o_dma_free - Free DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: i2o_dma struct which contains the DMA buffer
+ *
+ *	Free a coherent DMA memory and set virtual address of addr to NULL.
  */
-#define I2O_CMD_UTIL_NOP		0x00
-#define I2O_CMD_UTIL_ABORT		0x01
-#define I2O_CMD_UTIL_CLAIM		0x09
-#define I2O_CMD_UTIL_RELEASE		0x0B
-#define I2O_CMD_UTIL_PARAMS_GET		0x06
-#define I2O_CMD_UTIL_PARAMS_SET		0x05
-#define I2O_CMD_UTIL_EVT_REGISTER	0x13
-#define I2O_CMD_UTIL_EVT_ACK		0x14
-#define I2O_CMD_UTIL_CONFIG_DIALOG	0x10
-#define I2O_CMD_UTIL_DEVICE_RESERVE	0x0D
-#define I2O_CMD_UTIL_DEVICE_RELEASE	0x0F
-#define I2O_CMD_UTIL_LOCK		0x17
-#define I2O_CMD_UTIL_LOCK_RELEASE	0x19
-#define I2O_CMD_UTIL_REPLY_FAULT_NOTIFY	0x15
+static inline void i2o_dma_free(struct device *dev, struct i2o_dma *addr)
+{
+	if (addr->virt) {
+		if (addr->phys)
+			dma_free_coherent(dev, addr->len, addr->virt,
+					  addr->phys);
+		else
+			kfree(addr->virt);
+		addr->virt = NULL;
+	}
+};
 
-/*
- * SCSI Host Bus Adapter Class
+/**
+ *	i2o_dma_realloc - Realloc DMA memory
+ *	@dev: struct device pointer to the PCI device of the I2O controller
+ *	@addr: pointer to a i2o_dma struct DMA buffer
+ *	@len: new length of memory
+ *	@gfp_mask: GFP mask
+ *
+ *	If there was something allocated in the addr, free it first. If len > 0
+ *	than try to allocate it and write the addresses back to the addr
+ *	structure. If len == 0 set the virtual address to NULL.
+ *
+ *	Returns the 0 on success or negative error code on failure.
  */
-#define I2O_CMD_SCSI_EXEC		0x81
-#define I2O_CMD_SCSI_ABORT		0x83
-#define I2O_CMD_SCSI_BUSRESET		0x27
+static inline int i2o_dma_realloc(struct device *dev, struct i2o_dma *addr,
+				  size_t len, gfp_t gfp_mask)
+{
+	i2o_dma_free(dev, addr);
+
+	if (len)
+		return i2o_dma_alloc(dev, addr, len, gfp_mask);
+
+	return 0;
+};
 
 /*
- * Bus Adapter Class
+ *	i2o_pool_alloc - Allocate an slab cache and mempool
+ *	@mempool: pointer to struct i2o_pool to write data into.
+ *	@name: name which is used to identify cache
+ *	@size: size of each object
+ *	@min_nr: minimum number of objects
+ *
+ *	First allocates a slab cache with name and size. Then allocates a
+ *	mempool which uses the slab cache for allocation and freeing.
+ *
+ *	Returns 0 on success or negative error code on failure.
  */
-#define I2O_CMD_BUS_ADAPTER_RESET	0x85
-#define I2O_CMD_BUS_RESET		0x87
-#define I2O_CMD_BUS_SCAN		0x89
-#define I2O_CMD_BUS_QUIESCE		0x8b
+static inline int i2o_pool_alloc(struct i2o_pool *pool, const char *name,
+				 size_t size, int min_nr)
+{
+	pool->name = kmalloc(strlen(name) + 1, GFP_KERNEL);
+	if (!pool->name)
+		goto exit;
+	strcpy(pool->name, name);
+
+	pool->slab =
+	    kmem_cache_create(pool->name, size, 0, SLAB_HWCACHE_ALIGN, NULL,
+			      NULL);
+	if (!pool->slab)
+		goto free_name;
+
+	pool->mempool =
+	    mempool_create(min_nr, mempool_alloc_slab, mempool_free_slab,
+			   pool->slab);
+	if (!pool->mempool)
+		goto free_slab;
+
+	return 0;
+
+      free_slab:
+	kmem_cache_destroy(pool->slab);
+
+      free_name:
+	kfree(pool->name);
+
+      exit:
+	return -ENOMEM;
+};
 
 /*
- * Random Block Storage Class
+ *	i2o_pool_free - Free slab cache and mempool again
+ *	@mempool: pointer to struct i2o_pool which should be freed
+ *
+ *	Note that you have to return all objects to the mempool again before
+ *	calling i2o_pool_free().
  */
-#define I2O_CMD_BLOCK_READ		0x30
-#define I2O_CMD_BLOCK_WRITE		0x31
-#define I2O_CMD_BLOCK_CFLUSH		0x37
-#define I2O_CMD_BLOCK_MLOCK		0x49
-#define I2O_CMD_BLOCK_MUNLOCK		0x4B
-#define I2O_CMD_BLOCK_MMOUNT		0x41
-#define I2O_CMD_BLOCK_MEJECT		0x43
-#define I2O_CMD_BLOCK_POWER		0x70
-
-#define I2O_CMD_PRIVATE			0xFF
+static inline void i2o_pool_free(struct i2o_pool *pool)
+{
+	mempool_destroy(pool->mempool);
+	kmem_cache_destroy(pool->slab);
+	kfree(pool->name);
+};
 
-/* Command status values  */
+/* I2O driver (OSM) functions */
+extern int i2o_driver_register(struct i2o_driver *);
+extern void i2o_driver_unregister(struct i2o_driver *);
 
-#define I2O_CMD_IN_PROGRESS	0x01
-#define I2O_CMD_REJECTED	0x02
-#define I2O_CMD_FAILED		0x03
-#define I2O_CMD_COMPLETED	0x04
+/**
+ *	i2o_driver_notify_controller_add - Send notification of added controller
+ *					   to a single I2O driver
+ *
+ *	Send notification of added controller to a single registered driver.
+ */
+static inline void i2o_driver_notify_controller_add(struct i2o_driver *drv,
+						    struct i2o_controller *c)
+{
+	if (drv->notify_controller_add)
+		drv->notify_controller_add(c);
+};
 
-/* I2O API function return values */
+/**
+ *	i2o_driver_notify_controller_remove - Send notification of removed
+ *					      controller to a single I2O driver
+ *
+ *	Send notification of removed controller to a single registered driver.
+ */
+static inline void i2o_driver_notify_controller_remove(struct i2o_driver *drv,
+						       struct i2o_controller *c)
+{
+	if (drv->notify_controller_remove)
+		drv->notify_controller_remove(c);
+};
 
-#define I2O_RTN_NO_ERROR			0
-#define I2O_RTN_NOT_INIT			1
-#define I2O_RTN_FREE_Q_EMPTY			2
-#define I2O_RTN_TCB_ERROR			3
-#define I2O_RTN_TRANSACTION_ERROR		4
-#define I2O_RTN_ADAPTER_ALREADY_INIT		5
-#define I2O_RTN_MALLOC_ERROR			6
-#define I2O_RTN_ADPTR_NOT_REGISTERED		7
-#define I2O_RTN_MSG_REPLY_TIMEOUT		8
-#define I2O_RTN_NO_STATUS			9
-#define I2O_RTN_NO_FIRM_VER			10
-#define	I2O_RTN_NO_LINK_SPEED			11
+/**
+ *	i2o_driver_notify_device_add - Send notification of added device to a
+ *				       single I2O driver
+ *
+ *	Send notification of added device to a single registered driver.
+ */
+static inline void i2o_driver_notify_device_add(struct i2o_driver *drv,
+						struct i2o_device *i2o_dev)
+{
+	if (drv->notify_device_add)
+		drv->notify_device_add(i2o_dev);
+};
 
-/* Reply message status defines for all messages */
+/**
+ *	i2o_driver_notify_device_remove - Send notification of removed device
+ *					  to a single I2O driver
+ *
+ *	Send notification of removed device to a single registered driver.
+ */
+static inline void i2o_driver_notify_device_remove(struct i2o_driver *drv,
+						   struct i2o_device *i2o_dev)
+{
+	if (drv->notify_device_remove)
+		drv->notify_device_remove(i2o_dev);
+};
 
-#define I2O_REPLY_STATUS_SUCCESS                    	0x00
-#define I2O_REPLY_STATUS_ABORT_DIRTY                	0x01
-#define I2O_REPLY_STATUS_ABORT_NO_DATA_TRANSFER     	0x02
-#define	I2O_REPLY_STATUS_ABORT_PARTIAL_TRANSFER		0x03
-#define	I2O_REPLY_STATUS_ERROR_DIRTY			0x04
-#define	I2O_REPLY_STATUS_ERROR_NO_DATA_TRANSFER		0x05
-#define	I2O_REPLY_STATUS_ERROR_PARTIAL_TRANSFER		0x06
-#define	I2O_REPLY_STATUS_PROCESS_ABORT_DIRTY		0x08
-#define	I2O_REPLY_STATUS_PROCESS_ABORT_NO_DATA_TRANSFER	0x09
-#define	I2O_REPLY_STATUS_PROCESS_ABORT_PARTIAL_TRANSFER	0x0A
-#define	I2O_REPLY_STATUS_TRANSACTION_ERROR		0x0B
-#define	I2O_REPLY_STATUS_PROGRESS_REPORT		0x80
+extern void i2o_driver_notify_controller_add_all(struct i2o_controller *);
+extern void i2o_driver_notify_controller_remove_all(struct i2o_controller *);
+extern void i2o_driver_notify_device_add_all(struct i2o_device *);
+extern void i2o_driver_notify_device_remove_all(struct i2o_device *);
 
-/* Status codes and Error Information for Parameter functions */
+/* I2O device functions */
+extern int i2o_device_claim(struct i2o_device *);
+extern int i2o_device_claim_release(struct i2o_device *);
 
-#define I2O_PARAMS_STATUS_SUCCESS		0x00
-#define I2O_PARAMS_STATUS_BAD_KEY_ABORT		0x01
-#define I2O_PARAMS_STATUS_BAD_KEY_CONTINUE   	0x02
-#define I2O_PARAMS_STATUS_BUFFER_FULL		0x03
-#define I2O_PARAMS_STATUS_BUFFER_TOO_SMALL	0x04
-#define I2O_PARAMS_STATUS_FIELD_UNREADABLE	0x05
-#define I2O_PARAMS_STATUS_FIELD_UNWRITEABLE	0x06
-#define I2O_PARAMS_STATUS_INSUFFICIENT_FIELDS	0x07
-#define I2O_PARAMS_STATUS_INVALID_GROUP_ID	0x08
-#define I2O_PARAMS_STATUS_INVALID_OPERATION	0x09
-#define I2O_PARAMS_STATUS_NO_KEY_FIELD		0x0A
-#define I2O_PARAMS_STATUS_NO_SUCH_FIELD		0x0B
-#define I2O_PARAMS_STATUS_NON_DYNAMIC_GROUP	0x0C
-#define I2O_PARAMS_STATUS_OPERATION_ERROR	0x0D
-#define I2O_PARAMS_STATUS_SCALAR_ERROR		0x0E
-#define I2O_PARAMS_STATUS_TABLE_ERROR		0x0F
-#define I2O_PARAMS_STATUS_WRONG_GROUP_TYPE	0x10
+/* Exec OSM functions */
+extern int i2o_exec_lct_get(struct i2o_controller *);
 
-/* DetailedStatusCode defines for Executive, DDM, Util and Transaction error
- * messages: Table 3-2 Detailed Status Codes.*/
+/* device / driver / kobject conversion functions */
+#define to_i2o_driver(drv) container_of(drv,struct i2o_driver, driver)
+#define to_i2o_device(dev) container_of(dev, struct i2o_device, device)
+#define to_i2o_controller(dev) container_of(dev, struct i2o_controller, device)
+#define kobj_to_i2o_device(kobj) to_i2o_device(container_of(kobj, struct device, kobj))
 
-#define I2O_DSC_SUCCESS                        0x0000
-#define I2O_DSC_BAD_KEY                        0x0002
-#define I2O_DSC_TCL_ERROR                      0x0003
-#define I2O_DSC_REPLY_BUFFER_FULL              0x0004
-#define I2O_DSC_NO_SUCH_PAGE                   0x0005
-#define I2O_DSC_INSUFFICIENT_RESOURCE_SOFT     0x0006
-#define I2O_DSC_INSUFFICIENT_RESOURCE_HARD     0x0007
-#define I2O_DSC_CHAIN_BUFFER_TOO_LARGE         0x0009
-#define I2O_DSC_UNSUPPORTED_FUNCTION           0x000A
-#define I2O_DSC_DEVICE_LOCKED                  0x000B
-#define I2O_DSC_DEVICE_RESET                   0x000C
-#define I2O_DSC_INAPPROPRIATE_FUNCTION         0x000D
-#define I2O_DSC_INVALID_INITIATOR_ADDRESS      0x000E
-#define I2O_DSC_INVALID_MESSAGE_FLAGS          0x000F
-#define I2O_DSC_INVALID_OFFSET                 0x0010
-#define I2O_DSC_INVALID_PARAMETER              0x0011
-#define I2O_DSC_INVALID_REQUEST                0x0012
-#define I2O_DSC_INVALID_TARGET_ADDRESS         0x0013
-#define I2O_DSC_MESSAGE_TOO_LARGE              0x0014
-#define I2O_DSC_MESSAGE_TOO_SMALL              0x0015
-#define I2O_DSC_MISSING_PARAMETER              0x0016
-#define I2O_DSC_TIMEOUT                        0x0017
-#define I2O_DSC_UNKNOWN_ERROR                  0x0018
-#define I2O_DSC_UNKNOWN_FUNCTION               0x0019
-#define I2O_DSC_UNSUPPORTED_VERSION            0x001A
-#define I2O_DSC_DEVICE_BUSY                    0x001B
-#define I2O_DSC_DEVICE_NOT_AVAILABLE           0x001C
+/**
+ *	i2o_out_to_virt - Turn an I2O message to a virtual address
+ *	@c: controller
+ *	@m: message engine value
+ *
+ *	Turn a receive message from an I2O controller bus address into
+ *	a Linux virtual address. The shared page frame is a linear block
+ *	so we simply have to shift the offset. This function does not
+ *	work for sender side messages as they are ioremap objects
+ *	provided by the I2O controller.
+ */
+static inline struct i2o_message *i2o_msg_out_to_virt(struct i2o_controller *c,
+						      u32 m)
+{
+	BUG_ON(m < c->out_queue.phys
+	       || m >= c->out_queue.phys + c->out_queue.len);
 
-/* DetailedStatusCode defines for Block Storage Operation: Table 6-7 Detailed
-   Status Codes.*/
+	return c->out_queue.virt + (m - c->out_queue.phys);
+};
 
-#define I2O_BSA_DSC_SUCCESS               0x0000
-#define I2O_BSA_DSC_MEDIA_ERROR           0x0001
-#define I2O_BSA_DSC_ACCESS_ERROR          0x0002
-#define I2O_BSA_DSC_DEVICE_FAILURE        0x0003
-#define I2O_BSA_DSC_DEVICE_NOT_READY      0x0004
-#define I2O_BSA_DSC_MEDIA_NOT_PRESENT     0x0005
-#define I2O_BSA_DSC_MEDIA_LOCKED          0x0006
-#define I2O_BSA_DSC_MEDIA_FAILURE         0x0007
-#define I2O_BSA_DSC_PROTOCOL_FAILURE      0x0008
-#define I2O_BSA_DSC_BUS_FAILURE           0x0009
-#define I2O_BSA_DSC_ACCESS_VIOLATION      0x000A
-#define I2O_BSA_DSC_WRITE_PROTECTED       0x000B
-#define I2O_BSA_DSC_DEVICE_RESET          0x000C
-#define I2O_BSA_DSC_VOLUME_CHANGED        0x000D
-#define I2O_BSA_DSC_TIMEOUT               0x000E
+/**
+ *	i2o_msg_in_to_virt - Turn an I2O message to a virtual address
+ *	@c: controller
+ *	@m: message engine value
+ *
+ *	Turn a send message from an I2O controller bus address into
+ *	a Linux virtual address. The shared page frame is a linear block
+ *	so we simply have to shift the offset. This function does not
+ *	work for receive side messages as they are kmalloc objects
+ *	in a different pool.
+ */
+static inline struct i2o_message __iomem *i2o_msg_in_to_virt(struct
+							     i2o_controller *c,
+							     u32 m)
+{
+	return c->in_queue.virt + m;
+};
 
-/* FailureStatusCodes, Table 3-3 Message Failure Codes */
+/**
+ *	i2o_msg_get - obtain an I2O message from the IOP
+ *	@c: I2O controller
+ *
+ *	This function tries to get a message frame. If no message frame is
+ *	available do not wait until one is availabe (see also i2o_msg_get_wait).
+ *	The returned pointer to the message frame is not in I/O memory, it is
+ *	allocated from a mempool. But because a MFA is allocated from the
+ *	controller too it is guaranteed that i2o_msg_post() will never fail.
+ *
+ *	On a success a pointer to the message frame is returned. If the message
+ *	queue is empty -EBUSY is returned and if no memory is available -ENOMEM
+ *	is returned.
+ */
+static inline struct i2o_message *i2o_msg_get(struct i2o_controller *c)
+{
+	struct i2o_msg_mfa *mmsg = mempool_alloc(c->in_msg.mempool, GFP_ATOMIC);
+	if (!mmsg)
+		return ERR_PTR(-ENOMEM);
+
+	mmsg->mfa = readl(c->in_port);
+	if (mmsg->mfa == I2O_QUEUE_EMPTY) {
+		mempool_free(mmsg, c->in_msg.mempool);
+		return ERR_PTR(-EBUSY);
+	}
 
-#define I2O_FSC_TRANSPORT_SERVICE_SUSPENDED             0x81
-#define I2O_FSC_TRANSPORT_SERVICE_TERMINATED            0x82
-#define I2O_FSC_TRANSPORT_CONGESTION                    0x83
-#define I2O_FSC_TRANSPORT_FAILURE                       0x84
-#define I2O_FSC_TRANSPORT_STATE_ERROR                   0x85
-#define I2O_FSC_TRANSPORT_TIME_OUT                      0x86
-#define I2O_FSC_TRANSPORT_ROUTING_FAILURE               0x87
-#define I2O_FSC_TRANSPORT_INVALID_VERSION               0x88
-#define I2O_FSC_TRANSPORT_INVALID_OFFSET                0x89
-#define I2O_FSC_TRANSPORT_INVALID_MSG_FLAGS             0x8A
-#define I2O_FSC_TRANSPORT_FRAME_TOO_SMALL               0x8B
-#define I2O_FSC_TRANSPORT_FRAME_TOO_LARGE               0x8C
-#define I2O_FSC_TRANSPORT_INVALID_TARGET_ID             0x8D
-#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_ID          0x8E
-#define I2O_FSC_TRANSPORT_INVALID_INITIATOR_CONTEXT     0x8F
-#define I2O_FSC_TRANSPORT_UNKNOWN_FAILURE               0xFF
+	return &mmsg->msg;
+};
 
-/* Device Claim Types */
-#define	I2O_CLAIM_PRIMARY					0x01000000
-#define	I2O_CLAIM_MANAGEMENT					0x02000000
-#define	I2O_CLAIM_AUTHORIZED					0x03000000
-#define	I2O_CLAIM_SECONDARY					0x04000000
+/**
+ *	i2o_msg_post - Post I2O message to I2O controller
+ *	@c: I2O controller to which the message should be send
+ *	@msg: message returned by i2o_msg_get()
+ *
+ *	Post the message to the I2O controller and return immediately.
+ */
+static inline void i2o_msg_post(struct i2o_controller *c,
+				struct i2o_message *msg)
+{
+	struct i2o_msg_mfa *mmsg;
 
-/* Message header defines for VersionOffset */
-#define I2OVER15	0x0001
-#define I2OVER20	0x0002
+	mmsg = container_of(msg, struct i2o_msg_mfa, msg);
+	memcpy_toio(i2o_msg_in_to_virt(c, mmsg->mfa), msg,
+		    (le32_to_cpu(msg->u.head[0]) >> 16) << 2);
+	writel(mmsg->mfa, c->in_port);
+	mempool_free(mmsg, c->in_msg.mempool);
+};
 
-/* Default is 1.5 */
-#define I2OVERSION	I2OVER15
+/**
+ * 	i2o_msg_post_wait - Post and wait a message and wait until return
+ *	@c: controller
+ *	@m: message to post
+ *	@timeout: time in seconds to wait
+ *
+ * 	This API allows an OSM to post a message and then be told whether or
+ *	not the system received a successful reply. If the message times out
+ *	then the value '-ETIMEDOUT' is returned.
+ *
+ *	Returns 0 on success or negative error code on failure.
+ */
+static inline int i2o_msg_post_wait(struct i2o_controller *c,
+				    struct i2o_message *msg,
+				    unsigned long timeout)
+{
+	return i2o_msg_post_wait_mem(c, msg, timeout, NULL);
+};
 
-#define SGL_OFFSET_0    I2OVERSION
-#define SGL_OFFSET_4    (0x0040 | I2OVERSION)
-#define SGL_OFFSET_5    (0x0050 | I2OVERSION)
-#define SGL_OFFSET_6    (0x0060 | I2OVERSION)
-#define SGL_OFFSET_7    (0x0070 | I2OVERSION)
-#define SGL_OFFSET_8    (0x0080 | I2OVERSION)
-#define SGL_OFFSET_9    (0x0090 | I2OVERSION)
-#define SGL_OFFSET_10   (0x00A0 | I2OVERSION)
-#define SGL_OFFSET_11   (0x00B0 | I2OVERSION)
-#define SGL_OFFSET_12   (0x00C0 | I2OVERSION)
-#define SGL_OFFSET(x)   (((x)<<4) | I2OVERSION)
+/**
+ *	i2o_msg_nop_mfa - Returns a fetched MFA back to the controller
+ *	@c: I2O controller from which the MFA was fetched
+ *	@mfa: MFA which should be returned
+ *
+ *	This function must be used for preserved messages, because i2o_msg_nop()
+ *	also returns the allocated memory back to the msg_pool mempool.
+ */
+static inline void i2o_msg_nop_mfa(struct i2o_controller *c, u32 mfa)
+{
+	struct i2o_message __iomem *msg;
+	u32 nop[3] = {
+		THREE_WORD_MSG_SIZE | SGL_OFFSET_0,
+		I2O_CMD_UTIL_NOP << 24 | HOST_TID << 12 | ADAPTER_TID,
+		0x00000000
+	};
+
+	msg = i2o_msg_in_to_virt(c, mfa);
+	memcpy_toio(msg, nop, sizeof(nop));
+	writel(mfa, c->in_port);
+};
 
-/* Transaction Reply Lists (TRL) Control Word structure */
-#define TRL_SINGLE_FIXED_LENGTH		0x00
-#define TRL_SINGLE_VARIABLE_LENGTH	0x40
-#define TRL_MULTIPLE_FIXED_LENGTH	0x80
+/**
+ *	i2o_msg_nop - Returns a message which is not used
+ *	@c: I2O controller from which the message was created
+ *	@msg: message which should be returned
+ *
+ *	If you fetch a message via i2o_msg_get, and can't use it, you must
+ *	return the message with this function. Otherwise the MFA is lost as well
+ *	as the allocated memory from the mempool.
+ */
+static inline void i2o_msg_nop(struct i2o_controller *c,
+			       struct i2o_message *msg)
+{
+	struct i2o_msg_mfa *mmsg;
+	mmsg = container_of(msg, struct i2o_msg_mfa, msg);
 
- /* msg header defines for MsgFlags */
-#define MSG_STATIC	0x0100
-#define MSG_64BIT_CNTXT	0x0200
-#define MSG_MULTI_TRANS	0x1000
-#define MSG_FAIL	0x2000
-#define MSG_FINAL	0x4000
-#define MSG_REPLY	0x8000
+	i2o_msg_nop_mfa(c, mmsg->mfa);
+	mempool_free(mmsg, c->in_msg.mempool);
+};
 
- /* minimum size msg */
-#define THREE_WORD_MSG_SIZE	0x00030000
-#define FOUR_WORD_MSG_SIZE	0x00040000
-#define FIVE_WORD_MSG_SIZE	0x00050000
-#define SIX_WORD_MSG_SIZE	0x00060000
-#define SEVEN_WORD_MSG_SIZE	0x00070000
-#define EIGHT_WORD_MSG_SIZE	0x00080000
-#define NINE_WORD_MSG_SIZE	0x00090000
-#define TEN_WORD_MSG_SIZE	0x000A0000
-#define ELEVEN_WORD_MSG_SIZE	0x000B0000
-#define I2O_MESSAGE_SIZE(x)	((x)<<16)
+/**
+ *	i2o_flush_reply - Flush reply from I2O controller
+ *	@c: I2O controller
+ *	@m: the message identifier
+ *
+ *	The I2O controller must be informed that the reply message is not needed
+ *	anymore. If you forget to flush the reply, the message frame can't be
+ *	used by the controller anymore and is therefore lost.
+ */
+static inline void i2o_flush_reply(struct i2o_controller *c, u32 m)
+{
+	writel(m, c->out_port);
+};
 
-/* special TID assignments */
-#define ADAPTER_TID		0
-#define HOST_TID		1
+/*
+ *	Endian handling wrapped into the macro - keeps the core code
+ *	cleaner.
+ */
 
-/* outbound queue defines */
-#define I2O_MAX_OUTBOUND_MSG_FRAMES	128
-#define I2O_OUTBOUND_MSG_FRAME_SIZE	128	/* in 32-bit words */
+#define i2o_raw_writel(val, mem)	__raw_writel(cpu_to_le32(val), mem)
 
-#define I2O_POST_WAIT_OK	0
-#define I2O_POST_WAIT_TIMEOUT	-ETIMEDOUT
+extern int i2o_parm_field_get(struct i2o_device *, int, int, void *, int);
+extern int i2o_parm_table_get(struct i2o_device *, int, int, int, void *, int,
+			      void *, int);
 
-#define I2O_CONTEXT_LIST_MIN_LENGTH	15
-#define I2O_CONTEXT_LIST_USED		0x01
-#define I2O_CONTEXT_LIST_DELETED	0x02
+/* debugging and troubleshooting/diagnostic helpers. */
+#define osm_printk(level, format, arg...)  \
+	printk(level "%s: " format, OSM_NAME , ## arg)
 
-/* timeouts */
-#define I2O_TIMEOUT_INIT_OUTBOUND_QUEUE	15
-#define I2O_TIMEOUT_MESSAGE_GET		5
-#define I2O_TIMEOUT_RESET		30
-#define I2O_TIMEOUT_STATUS_GET		5
-#define I2O_TIMEOUT_LCT_GET		360
-#define I2O_TIMEOUT_SCSI_SCB_ABORT	240
+#ifdef DEBUG
+#define osm_debug(format, arg...) \
+	osm_printk(KERN_DEBUG, format , ## arg)
+#else
+#define osm_debug(format, arg...) \
+        do { } while (0)
+#endif
 
-/* retries */
-#define I2O_HRT_GET_TRIES		3
-#define I2O_LCT_GET_TRIES		3
+#define osm_err(format, arg...)		\
+	osm_printk(KERN_ERR, format , ## arg)
+#define osm_info(format, arg...)		\
+	osm_printk(KERN_INFO, format , ## arg)
+#define osm_warn(format, arg...)		\
+	osm_printk(KERN_WARNING, format , ## arg)
 
-/* defines for max_sectors and max_phys_segments */
-#define I2O_MAX_SECTORS			1024
-#define I2O_MAX_SECTORS_LIMITED		256
-#define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
+/* debugging functions */
+extern void i2o_report_status(const char *, const char *, struct i2o_message *);
+extern void i2o_dump_message(struct i2o_message *);
+extern void i2o_dump_hrt(struct i2o_controller *c);
+extern void i2o_debug_state(struct i2o_controller *c);
 
 #endif				/* __KERNEL__ */
 #endif				/* _I2O_H */
-- 
cgit v1.2.3


From 24791bd48f643194d806654b587251b0f92233e8 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Fri, 6 Jan 2006 00:19:31 -0800
Subject: [PATCH] I2O: Remove wrong I2O device class

Removed wrong I2O device class, which was only needed to add sysfs attributes.

Signed-off-by: Markus Lidel <Markus.Lidel@shadowconnect.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/message/i2o/core.h   |   2 +
 drivers/message/i2o/device.c | 144 ++++++++++++++++++-------------------------
 drivers/message/i2o/driver.c |   2 -
 drivers/message/i2o/iop.c    |  34 ++--------
 include/linux/i2o.h          |   1 -
 5 files changed, 68 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/message/i2o/core.h b/drivers/message/i2o/core.h
index 9eefedb16211..9aa9b91170b2 100644
--- a/drivers/message/i2o/core.h
+++ b/drivers/message/i2o/core.h
@@ -33,6 +33,8 @@ extern int __init i2o_pci_init(void);
 extern void __exit i2o_pci_exit(void);
 
 /* device */
+extern struct device_attribute i2o_device_attrs[];
+
 extern void i2o_device_remove(struct i2o_device *);
 extern int i2o_device_parse_lct(struct i2o_controller *);
 
diff --git a/drivers/message/i2o/device.c b/drivers/message/i2o/device.c
index 1db26215937f..a5e260b7a3b6 100644
--- a/drivers/message/i2o/device.c
+++ b/drivers/message/i2o/device.c
@@ -142,8 +142,9 @@ static void i2o_device_release(struct device *dev)
 
 
 /**
- *	i2o_device_class_show_class_id - Displays class id of I2O device
- *	@cd: class device of which the class id should be displayed
+ *	i2o_device_show_class_id - Displays class id of I2O device
+ *	@dev: device of which the class id should be displayed
+ *	@attr: pointer to device attribute
  *	@buf: buffer into which the class id should be printed
  *
  *	Returns the number of bytes which are printed into the buffer.
@@ -159,15 +160,15 @@ static ssize_t i2o_device_show_class_id(struct device *dev,
 }
 
 /**
- *	i2o_device_class_show_tid - Displays TID of I2O device
- *	@cd: class device of which the TID should be displayed
- *	@buf: buffer into which the class id should be printed
+ *	i2o_device_show_tid - Displays TID of I2O device
+ *	@dev: device of which the TID should be displayed
+ *	@attr: pointer to device attribute
+ *	@buf: buffer into which the TID should be printed
  *
  *	Returns the number of bytes which are printed into the buffer.
  */
 static ssize_t i2o_device_show_tid(struct device *dev,
-				   struct device_attribute *attr,
-				   char *buf)
+				   struct device_attribute *attr, char *buf)
 {
 	struct i2o_device *i2o_dev = to_i2o_device(dev);
 
@@ -208,66 +209,6 @@ static struct i2o_device *i2o_device_alloc(void)
 	return dev;
 }
 
-/**
- *	i2o_setup_sysfs_links - Adds attributes to the I2O device
- *	@cd: I2O class device which is added to the I2O device class
- *
- *	This function get called when a I2O device is added to the class. It
- *	creates the attributes for each device and creates user/parent symlink
- *	if necessary.
- *
- *	Returns 0 on success or negative error code on failure.
- */
-static void i2o_setup_sysfs_links(struct i2o_device *i2o_dev)
-{
-	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_device *tmp;
-
-	/* create user entries for this device */
-	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
-	if (tmp && tmp != i2o_dev)
-		sysfs_create_link(&i2o_dev->device.kobj,
-				  &tmp->device.kobj, "user");
-
-	/* create user entries refering to this device */
-	list_for_each_entry(tmp, &c->devices, list)
-		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid &&
-		    tmp != i2o_dev)
-			sysfs_create_link(&tmp->device.kobj,
-					  &i2o_dev->device.kobj, "user");
-
-	/* create parent entries for this device */
-	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
-	if (tmp && tmp != i2o_dev)
-		sysfs_create_link(&i2o_dev->device.kobj,
-				  &tmp->device.kobj, "parent");
-
-	/* create parent entries refering to this device */
-	list_for_each_entry(tmp, &c->devices, list)
-		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid &&
-		    tmp != i2o_dev)
-		sysfs_create_link(&tmp->device.kobj,
-				  &i2o_dev->device.kobj, "parent");
-}
-
-static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev)
-{
-	struct i2o_controller *c = i2o_dev->iop;
-	struct i2o_device *tmp;
-
-	sysfs_remove_link(&i2o_dev->device.kobj, "parent");
-	sysfs_remove_link(&i2o_dev->device.kobj, "user");
-
-	list_for_each_entry(tmp, &c->devices, list) {
-		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
-			sysfs_remove_link(&tmp->device.kobj, "parent");
-		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
-			sysfs_remove_link(&tmp->device.kobj, "user");
-	}
-}
-
-
-
 /**
  *	i2o_device_add - allocate a new I2O device and add it to the IOP
  *	@iop: I2O controller where the device is on
@@ -282,33 +223,57 @@ static void i2o_remove_sysfs_links(struct i2o_device *i2o_dev)
 static struct i2o_device *i2o_device_add(struct i2o_controller *c,
 					 i2o_lct_entry * entry)
 {
-	struct i2o_device *dev;
+	struct i2o_device *i2o_dev, *tmp;
 
-	dev = i2o_device_alloc();
-	if (IS_ERR(dev)) {
+	i2o_dev = i2o_device_alloc();
+	if (IS_ERR(i2o_dev)) {
 		printk(KERN_ERR "i2o: unable to allocate i2o device\n");
-		return dev;
+		return i2o_dev;
 	}
 
-	dev->lct_data = *entry;
-	dev->iop = c;
+	i2o_dev->lct_data = *entry;
 
-	snprintf(dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit,
-		 dev->lct_data.tid);
+	snprintf(i2o_dev->device.bus_id, BUS_ID_SIZE, "%d:%03x", c->unit,
+		 i2o_dev->lct_data.tid);
 
-	dev->device.parent = &c->device;
+	i2o_dev->iop = c;
+	i2o_dev->device.parent = &c->device;
 
-	device_register(&dev->device);
+	device_register(&i2o_dev->device);
 
-	list_add_tail(&dev->list, &c->devices);
+	list_add_tail(&i2o_dev->list, &c->devices);
 
-	i2o_setup_sysfs_links(dev);
+	/* create user entries for this device */
+	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.user_tid);
+	if (tmp && (tmp != i2o_dev))
+		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
+				  "user");
 
-	i2o_driver_notify_device_add_all(dev);
+	/* create user entries refering to this device */
+	list_for_each_entry(tmp, &c->devices, list)
+		if ((tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
+		    && (tmp != i2o_dev))
+		    sysfs_create_link(&tmp->device.kobj,
+				      &i2o_dev->device.kobj, "user");
 
-	pr_debug("i2o: device %s added\n", dev->device.bus_id);
+	/* create parent entries for this device */
+	tmp = i2o_iop_find_device(i2o_dev->iop, i2o_dev->lct_data.parent_tid);
+	if (tmp && (tmp != i2o_dev))
+		sysfs_create_link(&i2o_dev->device.kobj, &tmp->device.kobj,
+				  "parent");
 
-	return dev;
+	/* create parent entries refering to this device */
+	list_for_each_entry(tmp, &c->devices, list)
+		if ((tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
+		    && (tmp != i2o_dev))
+			sysfs_create_link(&tmp->device.kobj,
+					  &i2o_dev->device.kobj, "parent");
+
+	i2o_driver_notify_device_add_all(i2o_dev);
+
+	pr_debug("i2o: device %s added\n", i2o_dev->device.bus_id);
+
+	return i2o_dev;
 }
 
 /**
@@ -321,9 +286,22 @@ static struct i2o_device *i2o_device_add(struct i2o_controller *c,
  */
 void i2o_device_remove(struct i2o_device *i2o_dev)
 {
+	struct i2o_device *tmp;
+	struct i2o_controller *c = i2o_dev->iop;
+
 	i2o_driver_notify_device_remove_all(i2o_dev);
-	i2o_remove_sysfs_links(i2o_dev);
+
+	sysfs_remove_link(&i2o_dev->device.kobj, "parent");
+	sysfs_remove_link(&i2o_dev->device.kobj, "user");
+
+	list_for_each_entry(tmp, &c->devices, list) {
+		if (tmp->lct_data.parent_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "parent");
+		if (tmp->lct_data.user_tid == i2o_dev->lct_data.tid)
+			sysfs_remove_link(&tmp->device.kobj, "user");
+	}
 	list_del(&i2o_dev->list);
+
 	device_unregister(&i2o_dev->device);
 }
 
diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index 0fb9c4e2ad4c..25292b36e2d9 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -61,8 +61,6 @@ static int i2o_bus_match(struct device *dev, struct device_driver *drv)
 };
 
 /* I2O bus type */
-extern struct device_attribute i2o_device_attrs[];
-
 struct bus_type i2o_bus_type = {
 	.name = "i2o",
 	.match = i2o_bus_match,
diff --git a/drivers/message/i2o/iop.c b/drivers/message/i2o/iop.c
index f86abb42bf89..7411a0504bf2 100644
--- a/drivers/message/i2o/iop.c
+++ b/drivers/message/i2o/iop.c
@@ -806,7 +806,6 @@ void i2o_iop_remove(struct i2o_controller *c)
 	list_for_each_entry_safe(dev, tmp, &c->devices, list)
 	    i2o_device_remove(dev);
 
-	class_device_unregister(c->classdev);
 	device_del(&c->device);
 
 	/* Ask the IOP to switch to RESET state */
@@ -1050,9 +1049,6 @@ static void i2o_iop_release(struct device *dev)
 	i2o_iop_free(c);
 };
 
-/* I2O controller class */
-static struct class *i2o_controller_class;
-
 /**
  *	i2o_iop_alloc - Allocate and initialize a i2o_controller struct
  *
@@ -1124,36 +1120,29 @@ int i2o_iop_add(struct i2o_controller *c)
 		goto iop_reset;
 	}
 
-	c->classdev = class_device_create(i2o_controller_class, NULL, MKDEV(0,0),
-			&c->device, "iop%d", c->unit);
-	if (IS_ERR(c->classdev)) {
-		osm_err("%s: could not add controller class\n", c->name);
-		goto device_del;
-	}
-
 	osm_info("%s: Activating I2O controller...\n", c->name);
 	osm_info("%s: This may take a few minutes if there are many devices\n",
 		 c->name);
 
 	if ((rc = i2o_iop_activate(c))) {
 		osm_err("%s: could not activate controller\n", c->name);
-		goto class_del;
+		goto device_del;
 	}
 
 	osm_debug("%s: building sys table...\n", c->name);
 
 	if ((rc = i2o_systab_build()))
-		goto class_del;
+		goto device_del;
 
 	osm_debug("%s: online controller...\n", c->name);
 
 	if ((rc = i2o_iop_online(c)))
-		goto class_del;
+		goto device_del;
 
 	osm_debug("%s: getting LCT...\n", c->name);
 
 	if ((rc = i2o_exec_lct_get(c)))
-		goto class_del;
+		goto device_del;
 
 	list_add(&c->list, &i2o_controllers);
 
@@ -1163,9 +1152,6 @@ int i2o_iop_add(struct i2o_controller *c)
 
 	return 0;
 
-      class_del:
-	class_device_unregister(c->classdev);
-
       device_del:
 	device_del(&c->device);
 
@@ -1225,14 +1211,8 @@ static int __init i2o_iop_init(void)
 
 	printk(KERN_INFO OSM_DESCRIPTION " v" OSM_VERSION "\n");
 
-	i2o_controller_class = class_create(THIS_MODULE, "i2o_controller");
-	if (IS_ERR(i2o_controller_class)) {
-		osm_err("can't register class i2o_controller\n");
-		goto exit;
-	}
-
 	if ((rc = i2o_driver_init()))
-		goto class_exit;
+		goto exit;
 
 	if ((rc = i2o_exec_init()))
 		goto driver_exit;
@@ -1248,9 +1228,6 @@ static int __init i2o_iop_init(void)
       driver_exit:
 	i2o_driver_exit();
 
-      class_exit:
-	class_destroy(i2o_controller_class);
-
       exit:
 	return rc;
 }
@@ -1265,7 +1242,6 @@ static void __exit i2o_iop_exit(void)
 	i2o_pci_exit();
 	i2o_exec_exit();
 	i2o_driver_exit();
-	class_destroy(i2o_controller_class);
 };
 
 module_init(i2o_iop_init);
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 9e359a981221..4c18b7711bd9 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -561,7 +561,6 @@ struct i2o_controller {
 	struct resource mem_resource;	/* Mem resource allocated to the IOP */
 
 	struct device device;
-	struct class_device *classdev;	/* I2O controller class device */
 	struct i2o_device *exec;	/* Executive */
 #if BITS_PER_LONG == 64
 	spinlock_t context_list_lock;	/* lock for context_list */
-- 
cgit v1.2.3


From dcceafe25a5f47cf69e5b46b4da6f15186ec8386 Mon Sep 17 00:00:00 2001
From: Markus Lidel <Markus.Lidel@shadowconnect.com>
Date: Fri, 6 Jan 2006 00:19:32 -0800
Subject: [PATCH] I2O: Bugfixes

- Removed some kmalloc's with __GFP_ZERO and replace it with memset()
  because it didn't work properly.

- Fixed returned message frame in i2o_cfg_passthru() which caused raidutils
  to display wrong error message in case a disk was missing.

- Fixed size of printk() in i2o_scsi.c.

- Fixed get_device() and put_device() in probing of the I2O controller.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/message/i2o/driver.c     |  5 +++--
 drivers/message/i2o/i2o_config.c | 29 ++++++++++++++---------------
 drivers/message/i2o/i2o_scsi.c   |  4 ++--
 drivers/message/i2o/pci.c        |  6 +-----
 include/linux/i2o.h              |  2 +-
 5 files changed, 21 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/message/i2o/driver.c b/drivers/message/i2o/driver.c
index 25292b36e2d9..9c631c873dc6 100644
--- a/drivers/message/i2o/driver.c
+++ b/drivers/message/i2o/driver.c
@@ -217,14 +217,15 @@ int i2o_driver_dispatch(struct i2o_controller *c, u32 m)
 		/* cut of header from message size (in 32-bit words) */
 		size = (le32_to_cpu(msg->u.head[0]) >> 16) - 5;
 
-		evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC | __GFP_ZERO);
+		evt = kmalloc(size * 4 + sizeof(*evt), GFP_ATOMIC);
 		if (!evt)
 			return -ENOMEM;
+		memset(evt, 0, size * 4 + sizeof(*evt));
 
 		evt->size = size;
 		evt->tcntxt = le32_to_cpu(msg->u.s.tcntxt);
 		evt->event_indicator = le32_to_cpu(msg->body[0]);
-		memcpy(&evt->tcntxt, &msg->u.s.tcntxt, size * 4);
+		memcpy(&evt->data, &msg->body[1], size * 4);
 
 		list_for_each_entry_safe(dev, tmp, &c->devices, list)
 		    if (dev->lct_data.tid == tid) {
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index 4fe73d628c5b..286fef3240c4 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -36,12 +36,12 @@
 
 #include <asm/uaccess.h>
 
-#include "core.h"
-
 #define SG_TABLESIZE		30
 
-static int i2o_cfg_ioctl(struct inode *inode, struct file *fp, unsigned int cmd,
-			 unsigned long arg);
+extern int i2o_parm_issue(struct i2o_device *, int, void *, int, void *, int);
+
+static int i2o_cfg_ioctl(struct inode *, struct file *, unsigned int,
+			 unsigned long);
 
 static spinlock_t i2o_config_lock;
 
@@ -593,9 +593,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context);
-	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply));
-
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
 		struct sg_simple_element *sg;
@@ -629,7 +626,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 				goto cleanup;
 			}
 			sg_size = sg[i].flag_count & 0xffffff;
-			p = &(sg_list[sg_index++]);
+			p = &(sg_list[sg_index]);
 			/* Allocate memory for the transfer */
 			if (i2o_dma_alloc
 			    (&c->pdev->dev, p, sg_size,
@@ -640,6 +637,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 				rcode = -ENOMEM;
 				goto sg_list_cleanup;
 			}
+			sg_index++;
 			/* Copy in the user's SG buffer if necessary */
 			if (sg[i].
 			    flag_count & 0x04000000 /*I2O_SGL_FLAGS_DIR */ ) {
@@ -661,8 +659,10 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 	}
 
 	rcode = i2o_msg_post_wait(c, msg, 60);
-	if (rcode)
+	if (rcode) {
+		reply[4] = ((u32) rcode) << 24;
 		goto sg_list_cleanup;
+	}
 
 	if (sg_offset) {
 		u32 msg[I2O_OUTBOUND_MSG_FRAME_SIZE];
@@ -712,6 +712,7 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
+      sg_list_cleanup:
 	/* Copy back the reply to user space */
 	if (reply_size) {
 		// we wrote our own values for context - now restore the user supplied ones
@@ -729,7 +730,6 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
 		}
 	}
 
-      sg_list_cleanup:
 	for (i = 0; i < sg_index; i++)
 		i2o_dma_free(&c->pdev->dev, &sg_list[i]);
 
@@ -827,9 +827,6 @@ static int i2o_cfg_passthru(unsigned long arg)
 
 	sg_offset = (msg->u.head[0] >> 4) & 0x0f;
 
-	msg->u.s.icntxt = cpu_to_le32(i2o_config_driver.context);
-	msg->u.s.tcntxt = cpu_to_le32(i2o_cntxt_list_add(c, reply));
-
 	memset(sg_list, 0, sizeof(sg_list[0]) * SG_TABLESIZE);
 	if (sg_offset) {
 		struct sg_simple_element *sg;
@@ -892,8 +889,10 @@ static int i2o_cfg_passthru(unsigned long arg)
 	}
 
 	rcode = i2o_msg_post_wait(c, msg, 60);
-	if (rcode)
+	if (rcode) {
+		reply[4] = ((u32) rcode) << 24;
 		goto sg_list_cleanup;
+	}
 
 	if (sg_offset) {
 		u32 msg[128];
@@ -943,6 +942,7 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
+      sg_list_cleanup:
 	/* Copy back the reply to user space */
 	if (reply_size) {
 		// we wrote our own values for context - now restore the user supplied ones
@@ -959,7 +959,6 @@ static int i2o_cfg_passthru(unsigned long arg)
 		}
 	}
 
-      sg_list_cleanup:
 	for (i = 0; i < sg_index; i++)
 		kfree(sg_list[i]);
 
diff --git a/drivers/message/i2o/i2o_scsi.c b/drivers/message/i2o/i2o_scsi.c
index 24061dfd46e4..76b9516b1934 100644
--- a/drivers/message/i2o/i2o_scsi.c
+++ b/drivers/message/i2o/i2o_scsi.c
@@ -309,9 +309,9 @@ static int i2o_scsi_probe(struct device *dev)
 	sysfs_create_link(&i2o_dev->device.kobj, &scsi_dev->sdev_gendev.kobj,
 			  "scsi");
 
-	osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %d\n",
+	osm_info("device added (TID: %03x) channel: %d, id: %d, lun: %ld\n",
 		 i2o_dev->lct_data.tid, channel, le32_to_cpu(id),
-		 (unsigned int)le64_to_cpu(lun));
+		 (long unsigned int)le64_to_cpu(lun));
 
 	return 0;
 };
diff --git a/drivers/message/i2o/pci.c b/drivers/message/i2o/pci.c
index 329d482eee81..c5b656cdea7c 100644
--- a/drivers/message/i2o/pci.c
+++ b/drivers/message/i2o/pci.c
@@ -339,7 +339,7 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 		       pci_name(pdev));
 
 	c->pdev = pdev;
-	c->device.parent = get_device(&pdev->dev);
+	c->device.parent = &pdev->dev;
 
 	/* Cards that fall apart if you hit them with large I/O loads... */
 	if (pdev->vendor == PCI_VENDOR_ID_NCR && pdev->device == 0x0630) {
@@ -410,8 +410,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 	if ((rc = i2o_iop_add(c)))
 		goto uninstall;
 
-	get_device(&c->device);
-
 	if (i960)
 		pci_write_config_word(i960, 0x42, 0x03ff);
 
@@ -424,7 +422,6 @@ static int __devinit i2o_pci_probe(struct pci_dev *pdev,
 	i2o_pci_free(c);
 
       free_controller:
-	put_device(c->device.parent);
 	i2o_iop_free(c);
 
       disable:
@@ -454,7 +451,6 @@ static void __devexit i2o_pci_remove(struct pci_dev *pdev)
 
 	printk(KERN_INFO "%s: Controller removed.\n", c->name);
 
-	put_device(c->device.parent);
 	put_device(&c->device);
 };
 
diff --git a/include/linux/i2o.h b/include/linux/i2o.h
index 4c18b7711bd9..9ba806796667 100644
--- a/include/linux/i2o.h
+++ b/include/linux/i2o.h
@@ -384,7 +384,7 @@
 
 /* defines for max_sectors and max_phys_segments */
 #define I2O_MAX_SECTORS			1024
-#define I2O_MAX_SECTORS_LIMITED		256
+#define I2O_MAX_SECTORS_LIMITED		128
 #define I2O_MAX_PHYS_SEGMENTS		MAX_PHYS_SEGMENTS
 
 /*
-- 
cgit v1.2.3


From 45714d65618407bce1fd0271bc58303ce14b0785 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 6 Jan 2006 00:19:36 -0800
Subject: [PATCH] fuse: bump interface version

Change interface version to 7.4.

Following changes will need backward compatibility support, so store the minor
version returned by userspace.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c        | 2 ++
 fs/fuse/fuse_i.h     | 3 +++
 include/linux/fuse.h | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 8f873e621f41..e5bc3f8eebd0 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -178,6 +178,8 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
 			fc->conn_error = 1;
 
+		fc->minor = req->misc.init_in_out.minor;
+
 		/* After INIT reply is received other requests can go
 		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
 		   up()s on outstanding_sem.  The last up() is done in
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 0ea5301f86be..2d4835e54c90 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -272,6 +272,9 @@ struct fuse_conn {
 	/** Is create not implemented by fs? */
 	unsigned no_create : 1;
 
+	/** Negotiated minor version */
+	unsigned minor;
+
 	/** Backing dev info */
 	struct backing_dev_info bdi;
 };
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index b76b558b03d4..3c85f1a422cc 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -14,7 +14,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 3
+#define FUSE_KERNEL_MINOR_VERSION 4
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
-- 
cgit v1.2.3


From de5f12025572ef8fcffa4be5453061725acfb754 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 6 Jan 2006 00:19:37 -0800
Subject: [PATCH] fuse: add frsize to statfs reply

Add 'frsize' member to the statfs reply.

I'm not sure if sending f_fsid will ever be needed, but just in case leave
some space at the end of the structure, so less compatibility mess would be
required.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/inode.c      | 5 ++++-
 include/linux/fuse.h | 5 +++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index e69a546844d0..3b928a02af04 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -218,6 +218,7 @@ static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr
 {
 	stbuf->f_type    = FUSE_SUPER_MAGIC;
 	stbuf->f_bsize   = attr->bsize;
+	stbuf->f_frsize  = attr->frsize;
 	stbuf->f_blocks  = attr->blocks;
 	stbuf->f_bfree   = attr->bfree;
 	stbuf->f_bavail  = attr->bavail;
@@ -238,10 +239,12 @@ static int fuse_statfs(struct super_block *sb, struct kstatfs *buf)
 	if (!req)
 		return -EINTR;
 
+	memset(&outarg, 0, sizeof(outarg));
 	req->in.numargs = 0;
 	req->in.h.opcode = FUSE_STATFS;
 	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(outarg);
+	req->out.args[0].size =
+		fc->minor < 4 ? FUSE_COMPAT_STATFS_SIZE : sizeof(outarg);
 	req->out.args[0].value = &outarg;
 	request_send(fc, req);
 	err = req->out.h.error;
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 3c85f1a422cc..9d5177c356cc 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -53,6 +53,9 @@ struct fuse_kstatfs {
 	__u64	ffree;
 	__u32	bsize;
 	__u32	namelen;
+	__u32	frsize;
+	__u32	padding;
+	__u32	spare[6];
 };
 
 #define FATTR_MODE	(1 << 0)
@@ -213,6 +216,8 @@ struct fuse_write_out {
 	__u32	padding;
 };
 
+#define FUSE_COMPAT_STATFS_SIZE 48
+
 struct fuse_statfs_out {
 	struct fuse_kstatfs st;
 };
-- 
cgit v1.2.3


From 1d3d752b471d2a3a1d5e4fe177e5e7d52abb4e4c Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 6 Jan 2006 00:19:40 -0800
Subject: [PATCH] fuse: clean up request size limit checking

Change the way a too large request is handled.  Until now in this case the
device read returned -EINVAL and the operation returned -EIO.

Make it more flexibible by not returning -EINVAL from the read, but restarting
it instead.

Also remove the fixed limit on setxattr data and let the filesystem provide as
large a read buffer as it needs to handle the extended attribute data.

The symbolic link length is already checked by VFS to be less than PATH_MAX,
so the extra check against FUSE_SYMLINK_MAX is not needed.

The check in fuse_create_open() against FUSE_NAME_MAX is not needed, since the
dentry has already been looked up, and hence the name already checked.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c        | 26 ++++++++++++++++----------
 fs/fuse/dir.c        | 14 +-------------
 fs/fuse/fuse_i.h     |  9 ++++++---
 fs/fuse/inode.c      |  2 +-
 include/linux/fuse.h |  8 ++------
 5 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index e5bc3f8eebd0..1afdffdf80db 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -617,6 +617,7 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
 	struct fuse_copy_state cs;
 	unsigned reqsize;
 
+ restart:
 	spin_lock(&fuse_lock);
 	fc = file->private_data;
 	err = -EPERM;
@@ -632,20 +633,25 @@ static ssize_t fuse_dev_readv(struct file *file, const struct iovec *iov,
 
 	req = list_entry(fc->pending.next, struct fuse_req, list);
 	list_del_init(&req->list);
-	spin_unlock(&fuse_lock);
 
 	in = &req->in;
-	reqsize = req->in.h.len;
-	fuse_copy_init(&cs, 1, req, iov, nr_segs);
-	err = -EINVAL;
-	if (iov_length(iov, nr_segs) >= reqsize) {
-		err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
-		if (!err)
-			err = fuse_copy_args(&cs, in->numargs, in->argpages,
-					     (struct fuse_arg *) in->args, 0);
+	reqsize = in->h.len;
+	/* If request is too large, reply with an error and restart the read */
+	if (iov_length(iov, nr_segs) < reqsize) {
+		req->out.h.error = -EIO;
+		/* SETXATTR is special, since it may contain too large data */
+		if (in->h.opcode == FUSE_SETXATTR)
+			req->out.h.error = -E2BIG;
+		request_end(fc, req);
+		goto restart;
 	}
+	spin_unlock(&fuse_lock);
+	fuse_copy_init(&cs, 1, req, iov, nr_segs);
+	err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
+	if (!err)
+		err = fuse_copy_args(&cs, in->numargs, in->argpages,
+				     (struct fuse_arg *) in->args, 0);
 	fuse_copy_finish(&cs);
-
 	spin_lock(&fuse_lock);
 	req->locked = 0;
 	if (!err && req->interrupted)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 9a6075de961f..f156392d019e 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -236,10 +236,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, int mode,
 	if (fc->no_create)
 		goto out;
 
-	err = -ENAMETOOLONG;
-	if (entry->d_name.len > FUSE_NAME_MAX)
-		goto out;
-
 	err = -EINTR;
 	req = fuse_get_request(fc);
 	if (!req)
@@ -413,12 +409,7 @@ static int fuse_symlink(struct inode *dir, struct dentry *entry,
 {
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	unsigned len = strlen(link) + 1;
-	struct fuse_req *req;
-
-	if (len > FUSE_SYMLINK_MAX)
-		return -ENAMETOOLONG;
-
-	req = fuse_get_request(fc);
+	struct fuse_req *req = fuse_get_request(fc);
 	if (!req)
 		return -EINTR;
 
@@ -988,9 +979,6 @@ static int fuse_setxattr(struct dentry *entry, const char *name,
 	struct fuse_setxattr_in inarg;
 	int err;
 
-	if (size > FUSE_XATTR_SIZE_MAX)
-		return -E2BIG;
-
 	if (fc->no_setxattr)
 		return -EOPNOTSUPP;
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 2d4835e54c90..17fd368559cd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,6 +21,12 @@
 /** If more requests are outstanding, then the operation will block */
 #define FUSE_MAX_OUTSTANDING 10
 
+/** Maximum size of data in a write request */
+#define FUSE_MAX_WRITE 4096
+
+/** It could be as large as PATH_MAX, but would that have any uses? */
+#define FUSE_NAME_MAX 1024
+
 /** If the FUSE_DEFAULT_PERMISSIONS flag is given, the filesystem
     module will check permissions based on the file mode.  Otherwise no
     permission checking is done in the kernel */
@@ -108,9 +114,6 @@ struct fuse_out {
 	struct fuse_arg args[3];
 };
 
-struct fuse_req;
-struct fuse_conn;
-
 /**
  * A request to the client
  */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3b928a02af04..3580b9e12345 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -485,7 +485,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fc->max_read = d.max_read;
 	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
 		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
-	fc->max_write = FUSE_MAX_IN / 2;
+	fc->max_write = FUSE_MAX_WRITE;
 
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 9d5177c356cc..8f64cc2205b0 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -108,12 +108,8 @@ enum fuse_opcode {
 	FUSE_CREATE        = 35
 };
 
-/* Conservative buffer size for the client */
-#define FUSE_MAX_IN 8192
-
-#define FUSE_NAME_MAX 1024
-#define FUSE_SYMLINK_MAX 4096
-#define FUSE_XATTR_SIZE_MAX 4096
+/* The read buffer is required to be at least 8k, but may be much larger */
+#define FUSE_MIN_READ_BUFFER 8192
 
 struct fuse_entry_out {
 	__u64	nodeid;		/* Inode ID */
-- 
cgit v1.2.3


From 3ec870d524c9150add120475c8ddcfa50574f98e Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Fri, 6 Jan 2006 00:19:41 -0800
Subject: [PATCH] fuse: make maximum write data configurable

Make the maximum size of write data configurable by the filesystem.  The
previous fixed 4096 limit only worked on architectures where the page size is
less or equal to this.  This change make writing work on other architectures
too, and also lets the filesystem receive bigger write requests in direct_io
mode.

Normal writes which go through the page cache are still limited to a page
sized chunk per request.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fuse/dev.c        | 48 ++++++++++++++++++++++++++++++------------------
 fs/fuse/fuse_i.h     |  6 ++----
 fs/fuse/inode.c      |  1 -
 include/linux/fuse.h | 11 +++++++++--
 4 files changed, 41 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 1afdffdf80db..e08ab4702d97 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -148,6 +148,26 @@ void fuse_release_background(struct fuse_req *req)
 	spin_unlock(&fuse_lock);
 }
 
+static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
+{
+	int i;
+	struct fuse_init_out *arg = &req->misc.init_out;
+
+	if (arg->major != FUSE_KERNEL_VERSION)
+		fc->conn_error = 1;
+	else {
+		fc->minor = arg->minor;
+		fc->max_write = arg->minor < 5 ? 4096 : arg->max_write;
+	}
+
+	/* After INIT reply is received other requests can go
+	   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
+	   up()s on outstanding_sem.  The last up() is done in
+	   fuse_putback_request() */
+	for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
+		up(&fc->outstanding_sem);
+}
+
 /*
  * This function is called when a request is finished.  Either a reply
  * has arrived or it was interrupted (and not yet sent) or some error
@@ -172,21 +192,9 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 		up_read(&fc->sbput_sem);
 	}
 	wake_up(&req->waitq);
-	if (req->in.h.opcode == FUSE_INIT) {
-		int i;
-
-		if (req->misc.init_in_out.major != FUSE_KERNEL_VERSION)
-			fc->conn_error = 1;
-
-		fc->minor = req->misc.init_in_out.minor;
-
-		/* After INIT reply is received other requests can go
-		   out.  So do (FUSE_MAX_OUTSTANDING - 1) number of
-		   up()s on outstanding_sem.  The last up() is done in
-		   fuse_putback_request() */
-		for (i = 1; i < FUSE_MAX_OUTSTANDING; i++)
-			up(&fc->outstanding_sem);
-	} else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
+	if (req->in.h.opcode == FUSE_INIT)
+		process_init_reply(fc, req);
+	else if (req->in.h.opcode == FUSE_RELEASE && req->inode == NULL) {
 		/* Special case for failed iget in CREATE */
 		u64 nodeid = req->in.h.nodeid;
 		__fuse_get_request(req);
@@ -359,7 +367,7 @@ void fuse_send_init(struct fuse_conn *fc)
 	/* This is called from fuse_read_super() so there's guaranteed
 	   to be a request available */
 	struct fuse_req *req = do_get_request(fc);
-	struct fuse_init_in_out *arg = &req->misc.init_in_out;
+	struct fuse_init_in *arg = &req->misc.init_in;
 	arg->major = FUSE_KERNEL_VERSION;
 	arg->minor = FUSE_KERNEL_MINOR_VERSION;
 	req->in.h.opcode = FUSE_INIT;
@@ -367,8 +375,12 @@ void fuse_send_init(struct fuse_conn *fc)
 	req->in.args[0].size = sizeof(*arg);
 	req->in.args[0].value = arg;
 	req->out.numargs = 1;
-	req->out.args[0].size = sizeof(*arg);
-	req->out.args[0].value = arg;
+	/* Variable length arguement used for backward compatibility
+	   with interface version < 7.5.  Rest of init_out is zeroed
+	   by do_get_request(), so a short reply is not a problem */
+	req->out.argvar = 1;
+	req->out.args[0].size = sizeof(struct fuse_init_out);
+	req->out.args[0].value = &req->misc.init_out;
 	request_send_background(fc, req);
 }
 
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 17fd368559cd..74c8d098a14a 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -21,9 +21,6 @@
 /** If more requests are outstanding, then the operation will block */
 #define FUSE_MAX_OUTSTANDING 10
 
-/** Maximum size of data in a write request */
-#define FUSE_MAX_WRITE 4096
-
 /** It could be as large as PATH_MAX, but would that have any uses? */
 #define FUSE_NAME_MAX 1024
 
@@ -162,7 +159,8 @@ struct fuse_req {
 	union {
 		struct fuse_forget_in forget_in;
 		struct fuse_release_in release_in;
-		struct fuse_init_in_out init_in_out;
+		struct fuse_init_in init_in;
+		struct fuse_init_out init_out;
 	} misc;
 
 	/** page vector */
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 3580b9e12345..e4541869831e 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -485,7 +485,6 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
 	fc->max_read = d.max_read;
 	if (fc->max_read / PAGE_CACHE_SIZE < fc->bdi.ra_pages)
 		fc->bdi.ra_pages = fc->max_read / PAGE_CACHE_SIZE;
-	fc->max_write = FUSE_MAX_WRITE;
 
 	err = -ENOMEM;
 	root = get_root_inode(sb, d.rootmode);
diff --git a/include/linux/fuse.h b/include/linux/fuse.h
index 8f64cc2205b0..528959c52f1b 100644
--- a/include/linux/fuse.h
+++ b/include/linux/fuse.h
@@ -14,7 +14,7 @@
 #define FUSE_KERNEL_VERSION 7
 
 /** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 4
+#define FUSE_KERNEL_MINOR_VERSION 5
 
 /** The node ID of the root inode */
 #define FUSE_ROOT_ID 1
@@ -244,11 +244,18 @@ struct fuse_access_in {
 	__u32	padding;
 };
 
-struct fuse_init_in_out {
+struct fuse_init_in {
 	__u32	major;
 	__u32	minor;
 };
 
+struct fuse_init_out {
+	__u32	major;
+	__u32	minor;
+	__u32	unused[3];
+	__u32	max_write;
+};
+
 struct fuse_in_header {
 	__u32	len;
 	__u32	opcode;
-- 
cgit v1.2.3


From 742ec650e9b63ea61891455bb6f76bac37025c78 Mon Sep 17 00:00:00 2001
From: Marko Kohtala <marko.kohtala@gmail.com>
Date: Fri, 6 Jan 2006 00:19:44 -0800
Subject: [PATCH] parport: phase fixes

Did not move the parport interface properly into IEEE1284_PH_REV_IDLE phase at
end of data due to comparing bytes with nibbles.  Internal phase
IEEE1284_PH_HBUSY_DNA became unused, so remove it.

Signed-off-by: Marko Kohtala <marko.kohtala@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/media/video/cpia_pp.c  | 30 +++++++++-----------
 drivers/parport/ieee1284_ops.c | 62 ++++++++++++++++++++----------------------
 include/linux/parport.h        |  1 -
 3 files changed, 42 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/cpia_pp.c b/drivers/media/video/cpia_pp.c
index ddf184f95d80..6861d408f1b3 100644
--- a/drivers/media/video/cpia_pp.c
+++ b/drivers/media/video/cpia_pp.c
@@ -170,16 +170,9 @@ static size_t cpia_read_nibble (struct parport *port,
 		/* Does the error line indicate end of data? */
 		if (((i /*& 1*/) == 0) &&
 		    (parport_read_status(port) & PARPORT_STATUS_ERROR)) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-				DBG("%s: No more nibble data (%d bytes)\n",
-				port->name, i/2);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			DBG("%s: No more nibble data (%d bytes)\n",
+			    port->name, i/2);
+			goto end_of_data;
 		}
 
 		/* Event 7: Set nAutoFd low. */
@@ -227,18 +220,21 @@ static size_t cpia_read_nibble (struct parport *port,
 			byte = nibble;
 	}
 
-	i /= 2; /* i is now in bytes */
-
 	if (i == len) {
 		/* Read the last nibble without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
-	return i;
+	return i/2;
 }
 
 /* CPiA nonstandard "Nibble Stream" mode (2 nibbles per cycle, instead of 1)
diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c
index ce1e2aad8b10..d6c77658231e 100644
--- a/drivers/parport/ieee1284_ops.c
+++ b/drivers/parport/ieee1284_ops.c
@@ -165,17 +165,7 @@ size_t parport_ieee1284_read_nibble (struct parport *port,
 		/* Does the error line indicate end of data? */
 		if (((i & 1) == 0) &&
 		    (parport_read_status(port) & PARPORT_STATUS_ERROR)) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-			DPRINTK (KERN_DEBUG
-				"%s: No more nibble data (%d bytes)\n",
-				port->name, i/2);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			goto end_of_data;
 		}
 
 		/* Event 7: Set nAutoFd low. */
@@ -225,18 +215,25 @@ size_t parport_ieee1284_read_nibble (struct parport *port,
 			byte = nibble;
 	}
 
-	i /= 2; /* i is now in bytes */
-
 	if (i == len) {
 		/* Read the last nibble without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			DPRINTK (KERN_DEBUG
+				"%s: No more nibble data (%d bytes)\n",
+				port->name, i/2);
+
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
-	return i;
+	return i/2;
 #endif /* IEEE1284 support */
 }
 
@@ -256,17 +253,7 @@ size_t parport_ieee1284_read_byte (struct parport *port,
 
 		/* Data available? */
 		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
-			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
-			DPRINTK (KERN_DEBUG
-				 "%s: No more byte data (%Zd bytes)\n",
-				 port->name, count);
-
-			/* Go to reverse idle phase. */
-			parport_frob_control (port,
-					      PARPORT_CONTROL_AUTOFD,
-					      PARPORT_CONTROL_AUTOFD);
-			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
-			break;
+			goto end_of_data;
 		}
 
 		/* Event 14: Place data bus in high impedance state. */
@@ -318,11 +305,20 @@ size_t parport_ieee1284_read_byte (struct parport *port,
 
 	if (count == len) {
 		/* Read the last byte without checking data avail. */
-		port = port->physport;
-		if (parport_read_status (port) & PARPORT_STATUS_ERROR)
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DNA;
+		if (parport_read_status (port) & PARPORT_STATUS_ERROR) {
+		end_of_data:
+			DPRINTK (KERN_DEBUG
+				 "%s: No more byte data (%Zd bytes)\n",
+				 port->name, count);
+
+			/* Go to reverse idle phase. */
+			parport_frob_control (port,
+					      PARPORT_CONTROL_AUTOFD,
+					      PARPORT_CONTROL_AUTOFD);
+			port->physport->ieee1284.phase = IEEE1284_PH_REV_IDLE;
+		}
 		else
-			port->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
+			port->physport->ieee1284.phase = IEEE1284_PH_HBUSY_DAVAIL;
 	}
 
 	return count;
diff --git a/include/linux/parport.h b/include/linux/parport.h
index d2a4d9e1e6d1..f7ff0b0c4031 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -242,7 +242,6 @@ enum ieee1284_phase {
 	IEEE1284_PH_FWD_IDLE,
 	IEEE1284_PH_TERMINATE,
 	IEEE1284_PH_NEGOTIATION,
-	IEEE1284_PH_HBUSY_DNA,
 	IEEE1284_PH_REV_IDLE,
 	IEEE1284_PH_HBUSY_DAVAIL,
 	IEEE1284_PH_REV_DATA,
-- 
cgit v1.2.3


From 110bee75d2e03d3b4bcc74743dee5a21fe7b43bd Mon Sep 17 00:00:00 2001
From: Marko Kohtala <marko.kohtala@gmail.com>
Date: Fri, 6 Jan 2006 00:19:49 -0800
Subject: [PATCH] parport: DEBUG_PARPORT build fix

Add missing "struct" keyword preventing compilation with DEBUG_PARPORT
defined.  Also add some "const".

Signed-off-by: Marko Kohtala <marko.kohtala@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/parport_pc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h
index c6f762470879..7e62b3429cdd 100644
--- a/include/linux/parport_pc.h
+++ b/include/linux/parport_pc.h
@@ -85,7 +85,7 @@ extern __inline__ void dump_parport_state (char *str, struct parport *p)
 	unsigned char ecr = inb (ECONTROL (p));
 	unsigned char dcr = inb (CONTROL (p));
 	unsigned char dsr = inb (STATUS (p));
-	static char *ecr_modes[] = {"SPP", "PS2", "PPFIFO", "ECP", "xXx", "yYy", "TST", "CFG"};
+	static const char *const ecr_modes[] = {"SPP", "PS2", "PPFIFO", "ECP", "xXx", "yYy", "TST", "CFG"};
 	const struct parport_pc_private *priv = p->physport->private_data;
 	int i;
 
-- 
cgit v1.2.3


From 81684ee645e15601ec935461d9069a3086179c06 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Fri, 6 Jan 2006 00:19:53 -0800
Subject: [PATCH] include/linux/parport_pc.h: "extern inline" -> "static
 inline"

"extern inline" doesn't make much sense.

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/parport_pc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/parport_pc.h b/include/linux/parport_pc.h
index 7e62b3429cdd..1cc0f6b1a49a 100644
--- a/include/linux/parport_pc.h
+++ b/include/linux/parport_pc.h
@@ -79,7 +79,7 @@ static __inline__ unsigned char parport_pc_read_data(struct parport *p)
 }
 
 #ifdef DEBUG_PARPORT
-extern __inline__ void dump_parport_state (char *str, struct parport *p)
+static inline void dump_parport_state (char *str, struct parport *p)
 {
 	/* here's hoping that reading these ports won't side-effect anything underneath */
 	unsigned char ecr = inb (ECONTROL (p));
-- 
cgit v1.2.3


From f93ea411b73594f7d144855fd34278bcf34a9afc Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 6 Jan 2006 00:19:55 -0800
Subject: [PATCH] jbd: split checkpoint lists

Split the checkpoint list of the transaction into two lists.  In the first
list we keep the buffers that need to be submitted for IO.  In the second
list are kept buffers that were already submitted and we just have to wait
for the IO to complete.  This should simplify a handling of checkpoint
lists a bit and can eventually be also a performance gain.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd/checkpoint.c | 418 ++++++++++++++++++++++++++++++----------------------
 include/linux/jbd.h |   8 +-
 2 files changed, 248 insertions(+), 178 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c
index 014a51fd00d7..cb3cef525c3b 100644
--- a/fs/jbd/checkpoint.c
+++ b/fs/jbd/checkpoint.c
@@ -24,29 +24,75 @@
 #include <linux/slab.h>
 
 /*
- * Unlink a buffer from a transaction. 
+ * Unlink a buffer from a transaction checkpoint list.
  *
  * Called with j_list_lock held.
  */
 
-static inline void __buffer_unlink(struct journal_head *jh)
+static void __buffer_unlink_first(struct journal_head *jh)
 {
 	transaction_t *transaction;
 
 	transaction = jh->b_cp_transaction;
-	jh->b_cp_transaction = NULL;
 
 	jh->b_cpnext->b_cpprev = jh->b_cpprev;
 	jh->b_cpprev->b_cpnext = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
+	if (transaction->t_checkpoint_list == jh) {
 		transaction->t_checkpoint_list = jh->b_cpnext;
-	if (transaction->t_checkpoint_list == jh)
-		transaction->t_checkpoint_list = NULL;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction;
+
+	transaction = jh->b_cp_transaction;
+	__buffer_unlink_first(jh);
+
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
 }
 
 /*
  * Try to release a checkpointed buffer from its transaction.
- * Returns 1 if we released it.
+ * Returns 1 if we released it and 2 if we also released the
+ * whole transaction.
+ *
  * Requires j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
@@ -57,12 +103,11 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		__journal_remove_checkpoint(jh);
+		ret = __journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
 		journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
-		ret = 1;
 	} else {
 		jbd_unlock_bh_state(bh);
 	}
@@ -117,83 +162,53 @@ static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
 }
 
 /*
- * Clean up a transaction's checkpoint list.  
- *
- * We wait for any pending IO to complete and make sure any clean
- * buffers are removed from the transaction. 
- *
- * Return 1 if we performed any actions which might have destroyed the
- * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
- * the last checkpoint buffer is cleansed)
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that we take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
  *
  * Called with j_list_lock held.
  */
-static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
+
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
 {
-	struct journal_head *jh, *next_jh, *last_jh;
+	struct journal_head *jh;
 	struct buffer_head *bh;
-	int ret = 0;
-
-	assert_spin_locked(&journal->j_list_lock);
-	jh = transaction->t_checkpoint_list;
-	if (!jh)
-		return 0;
-
-	last_jh = jh->b_cpprev;
-	next_jh = jh;
-	do {
-		jh = next_jh;
+	tid_t this_tid;
+	int released = 0;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Didn't somebody clean up the transaction in the meanwhile */
+	if (journal->j_checkpoint_transactions != transaction ||
+		transaction->t_tid != this_tid)
+		return;
+	while (!released && transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
 		bh = jh2bh(jh);
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		if (buffer_locked(bh)) {
 			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
 			__brelse(bh);
-			goto out_return_1;
-		}
-
-		/*
-		 * This is foul
-		 */
-		if (!jbd_trylock_bh_state(bh)) {
-			jbd_sync_bh(journal, bh);
-			goto out_return_1;
+			spin_lock(&journal->j_list_lock);
+			goto restart;
 		}
-
-		if (jh->b_transaction != NULL) {
-			transaction_t *t = jh->b_transaction;
-			tid_t tid = t->t_tid;
-
-			spin_unlock(&journal->j_list_lock);
-			jbd_unlock_bh_state(bh);
-			log_start_commit(journal, tid);
-			log_wait_commit(journal, tid);
-			goto out_return_1;
-		}
-
 		/*
-		 * AKPM: I think the buffer_jbddirty test is redundant - it
-		 * shouldn't have NULL b_transaction?
+		 * Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list
 		 */
-		next_jh = jh->b_cpnext;
-		if (!buffer_dirty(bh) && !buffer_jbddirty(bh)) {
-			BUFFER_TRACE(bh, "remove from checkpoint");
-			__journal_remove_checkpoint(jh);
-			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);
-			__brelse(bh);
-			ret = 1;
-		} else {
-			jbd_unlock_bh_state(bh);
-		}
-	} while (jh != last_jh);
-
-	return ret;
-out_return_1:
-	spin_lock(&journal->j_list_lock);
-	return 1;
+		released = __journal_remove_checkpoint(jh);
+		jbd_unlock_bh_state(bh);
+	}
 }
 
 #define NR_BATCH	64
@@ -203,9 +218,7 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
 {
 	int i;
 
-	spin_unlock(&journal->j_list_lock);
 	ll_rw_block(SWRITE, *batch_count, bhs);
-	spin_lock(&journal->j_list_lock);
 	for (i = 0; i < *batch_count; i++) {
 		struct buffer_head *bh = bhs[i];
 		clear_buffer_jwrite(bh);
@@ -221,19 +234,46 @@ __flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
  * Return 1 if something happened which requires us to abort the current
  * scan of the checkpoint list.  
  *
- * Called with j_list_lock held.
+ * Called with j_list_lock held and drops it if 1 is returned
  * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
  */
-static int __flush_buffer(journal_t *journal, struct journal_head *jh,
-			struct buffer_head **bhs, int *batch_count,
-			int *drop_count)
+static int __process_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count)
 {
 	struct buffer_head *bh = jh2bh(jh);
 	int ret = 0;
 
-	if (buffer_dirty(bh) && !buffer_locked(bh) && jh->b_jlist == BJ_None) {
-		J_ASSERT_JH(jh, jh->b_transaction == NULL);
+	if (buffer_locked(bh)) {
+		get_bh(bh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		wait_on_buffer(bh);
+		/* the journal_head may have gone by now */
+		BUFFER_TRACE(bh, "brelse");
+		put_bh(bh);
+		ret = 1;
+	}
+	else if (jh->b_transaction != NULL) {
+		transaction_t *t = jh->b_transaction;
+		tid_t tid = t->t_tid;
 
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		log_start_commit(journal, tid);
+		log_wait_commit(journal, tid);
+		ret = 1;
+	}
+	else if (!buffer_dirty(bh)) {
+		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+		BUFFER_TRACE(bh, "remove from checkpoint");
+		__journal_remove_checkpoint(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		put_bh(bh);
+		ret = 1;
+	}
+	else {
 		/*
 		 * Important: we are about to write the buffer, and
 		 * possibly block, while still holding the journal lock.
@@ -246,45 +286,30 @@ static int __flush_buffer(journal_t *journal, struct journal_head *jh,
 		J_ASSERT_BH(bh, !buffer_jwrite(bh));
 		set_buffer_jwrite(bh);
 		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
 		jbd_unlock_bh_state(bh);
 		(*batch_count)++;
 		if (*batch_count == NR_BATCH) {
+			spin_unlock(&journal->j_list_lock);
 			__flush_batch(journal, bhs, batch_count);
 			ret = 1;
 		}
-	} else {
-		int last_buffer = 0;
-		if (jh->b_cpnext == jh) {
-			/* We may be about to drop the transaction.  Tell the
-			 * caller that the lists have changed.
-			 */
-			last_buffer = 1;
-		}
-		if (__try_to_free_cp_buf(jh)) {
-			(*drop_count)++;
-			ret = last_buffer;
-		}
 	}
 	return ret;
 }
 
 /*
- * Perform an actual checkpoint.  We don't write out only enough to
- * satisfy the current blocked requests: rather we submit a reasonably
- * sized chunk of the outstanding data to disk at once for
- * efficiency.  __log_wait_for_space() will retry if we didn't free enough.
+ * Perform an actual checkpoint. We take the first transaction on the
+ * list of transactions to be checkpointed and send all its buffers
+ * to disk. We submit larger chunks of data at once.
  * 
- * However, we _do_ take into account the amount requested so that once
- * the IO has been queued, we can return as soon as enough of it has
- * completed to disk.  
- *
  * The journal should be locked before calling this function.
  */
 int log_do_checkpoint(journal_t *journal)
 {
+	transaction_t *transaction;
+	tid_t this_tid;
 	int result;
-	int batch_count = 0;
-	struct buffer_head *bhs[NR_BATCH];
 
 	jbd_debug(1, "Start checkpoint\n");
 
@@ -299,79 +324,70 @@ int log_do_checkpoint(journal_t *journal)
 		return result;
 
 	/*
-	 * OK, we need to start writing disk blocks.  Try to free up a
-	 * quarter of the log in a single checkpoint if we can.
+	 * OK, we need to start writing disk blocks.  Take one transaction
+	 * and write it.
 	 */
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions)
+		goto out;
+	transaction = journal->j_checkpoint_transactions;
+	this_tid = transaction->t_tid;
+restart:
 	/*
-	 * AKPM: check this code.  I had a feeling a while back that it
-	 * degenerates into a busy loop at unmount time.
+	 * If someone cleaned up this transaction while we slept, we're
+	 * done (maybe it's a new transaction, but it fell at the same
+	 * address).
 	 */
-	spin_lock(&journal->j_list_lock);
-	while (journal->j_checkpoint_transactions) {
-		transaction_t *transaction;
-		struct journal_head *jh, *last_jh, *next_jh;
-		int drop_count = 0;
-		int cleanup_ret, retry = 0;
-		tid_t this_tid;
-
-		transaction = journal->j_checkpoint_transactions;
-		this_tid = transaction->t_tid;
-		jh = transaction->t_checkpoint_list;
-		last_jh = jh->b_cpprev;
-		next_jh = jh;
-		do {
+ 	if (journal->j_checkpoint_transactions == transaction ||
+			transaction->t_tid == this_tid) {
+		int batch_count = 0;
+		struct buffer_head *bhs[NR_BATCH];
+		struct journal_head *jh;
+		int retry = 0;
+
+		while (!retry && transaction->t_checkpoint_list) {
 			struct buffer_head *bh;
 
-			jh = next_jh;
-			next_jh = jh->b_cpnext;
+			jh = transaction->t_checkpoint_list;
 			bh = jh2bh(jh);
 			if (!jbd_trylock_bh_state(bh)) {
 				jbd_sync_bh(journal, bh);
-				spin_lock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-			retry = __flush_buffer(journal, jh, bhs, &batch_count, &drop_count);
-			if (cond_resched_lock(&journal->j_list_lock)) {
+			retry = __process_buffer(journal, jh, bhs,
+						&batch_count);
+			if (!retry &&
+			    lock_need_resched(&journal->j_list_lock)) {
+				spin_unlock(&journal->j_list_lock);
 				retry = 1;
 				break;
 			}
-		} while (jh != last_jh && !retry);
+		}
 
 		if (batch_count) {
+			if (!retry) {
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+			}
 			__flush_batch(journal, bhs, &batch_count);
-			retry = 1;
 		}
 
+		if (retry) {
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
 		/*
-		 * If someone cleaned up this transaction while we slept, we're
-		 * done
-		 */
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
-		if (retry)
-			continue;
-		/*
-		 * Maybe it's a new transaction, but it fell at the same
-		 * address
-		 */
-		if (transaction->t_tid != this_tid)
-			continue;
-		/*
-		 * We have walked the whole transaction list without
-		 * finding anything to write to disk.  We had better be
-		 * able to make some progress or we are in trouble. 
+		 * Now we have cleaned up the first transaction's checkpoint
+		 * list.  Let's clean up the second one.
 		 */
-		cleanup_ret = __cleanup_transaction(journal, transaction);
-		J_ASSERT(drop_count != 0 || cleanup_ret != 0);
-		if (journal->j_checkpoint_transactions != transaction)
-			break;
+		__wait_cp_io(journal, transaction);
 	}
+out:
 	spin_unlock(&journal->j_list_lock);
 	result = cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
-
 	return 0;
 }
 
@@ -455,6 +471,53 @@ int cleanup_journal_tail(journal_t *journal)
 
 /* Checkpoint list management */
 
+/*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret, freed = 0;
+
+	*released = 0;
+	if (!jh)
+		return 0;
+
+ 	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh))) {
+			ret = __try_to_free_cp_buf(jh);
+			if (ret) {
+				freed++;
+				if (ret == 2) {
+					*released = 1;
+					return freed;
+				}
+			}
+		}
+		/*
+		 * This function only frees up some memory if possible so we
+		 * dont have an obligation to finish processing. Bail out if
+		 * preemption requested:
+		 */
+		if (need_resched())
+			return freed;
+	} while (jh != last_jh);
+
+	return freed;
+}
+
 /*
  * journal_clean_checkpoint_list
  *
@@ -462,46 +525,38 @@ int cleanup_journal_tail(journal_t *journal)
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
- * Returns number of bufers reaped (for debug)
+ * Returns number of buffers reaped (for debug)
  */
 
 int __journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
-	int ret = 0;
+	int ret = 0, released;
 
 	transaction = journal->j_checkpoint_transactions;
-	if (transaction == 0)
+	if (!transaction)
 		goto out;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
-		struct journal_head *jh;
-
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		jh = transaction->t_checkpoint_list;
-		if (jh) {
-			struct journal_head *last_jh = jh->b_cpprev;
-			struct journal_head *next_jh = jh;
-
-			do {
-				jh = next_jh;
-				next_jh = jh->b_cpnext;
-				/* Use trylock because of the ranknig */
-				if (jbd_trylock_bh_state(jh2bh(jh)))
-					ret += __try_to_free_cp_buf(jh);
-				/*
-				 * This function only frees up some memory
-				 * if possible so we dont have an obligation
-				 * to finish processing. Bail out if preemption
-				 * requested:
-				 */
-				if (need_resched())
-					goto out;
-			} while (jh != last_jh);
-		}
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list, &released);
+		if (need_resched())
+			goto out;
+		if (released)
+			continue;
+		/*
+		 * It is essential that we are as careful as in the case of
+		 * t_checkpoint_list with removing the buffer from the list as
+		 * we can possibly see not yet submitted buffers on io_list
+		 */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list, &released);
+		if (need_resched())
+			goto out;
 	} while (transaction != last_transaction);
 out:
 	return ret;
@@ -516,18 +571,22 @@ out:
  * buffer updates committed in that transaction have safely been stored
  * elsewhere on disk.  To achieve this, all of the buffers in a
  * transaction need to be maintained on the transaction's checkpoint
- * list until they have been rewritten, at which point this function is
+ * lists until they have been rewritten, at which point this function is
  * called to remove the buffer from the existing transaction's
- * checkpoint list.  
+ * checkpoint lists.
+ *
+ * The function returns 1 if it frees the transaction, 0 otherwise.
  *
  * This function is called with the journal locked.
  * This function is called with j_list_lock held.
+ * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-void __journal_remove_checkpoint(struct journal_head *jh)
+int __journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
+	int ret = 0;
 
 	JBUFFER_TRACE(jh, "entry");
 
@@ -538,8 +597,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	journal = transaction->t_journal;
 
 	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
 
-	if (transaction->t_checkpoint_list != NULL)
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
 	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
@@ -565,8 +626,10 @@ void __journal_remove_checkpoint(struct journal_head *jh)
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
 	wake_up(&journal->j_wait_logspace);
+	ret = 1;
 out:
 	JBUFFER_TRACE(jh, "exit");
+	return ret;
 }
 
 /*
@@ -628,6 +691,7 @@ void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 	J_ASSERT(transaction->t_shadow_list == NULL);
 	J_ASSERT(transaction->t_log_list == NULL);
 	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
 	J_ASSERT(transaction->t_updates == 0);
 	J_ASSERT(journal->j_committing_transaction != transaction);
 	J_ASSERT(journal->j_running_transaction != transaction);
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index dcde7adfdce5..558cb4c26ec9 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -497,6 +497,12 @@ struct transaction_s
 	 */
 	struct journal_head	*t_checkpoint_list;
 
+	/*
+	 * Doubly-linked circular list of all buffers submitted for IO while
+	 * checkpointing. [j_list_lock]
+	 */
+	struct journal_head	*t_checkpoint_io_list;
+
 	/*
 	 * Doubly-linked circular list of temporary buffers currently undergoing
 	 * IO in the log [j_list_lock]
@@ -843,7 +849,7 @@ extern void journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
 int __journal_clean_checkpoint_list(journal_t *journal);
-void __journal_remove_checkpoint(struct journal_head *);
+int __journal_remove_checkpoint(struct journal_head *);
 void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
-- 
cgit v1.2.3


From a334de28665b14f0a33df82699fa9a78cfeedf31 Mon Sep 17 00:00:00 2001
From: David Shaw <dshaw@jabberwocky.com>
Date: Fri, 6 Jan 2006 00:19:58 -0800
Subject: [PATCH] knfsd: check error status from vfs_getattr and i_op->fsync

Both vfs_getattr and i_op->fsync return error statuses which nfsd was
largely ignoring.  This as noticed when exporting directories using fuse.

This patch cleans up most of the offences, which involves moving the call
to vfs_getattr out of the xdr encoding routines (where it is too late to
report an error) into the main NFS procedure handling routines.

There is still a called to vfs_gettattr (related to the ACL code) where the
status is ignored, and called to nfsd_sync_dir don't check return status
either.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3proc.c        | 11 +++++++++--
 fs/nfsd/nfs3xdr.c         | 47 ++++++++++++++++++++++++----------------------
 fs/nfsd/nfsxdr.c          | 48 +++++++++++++++++++++++------------------------
 fs/nfsd/vfs.c             | 20 +++++++++++++-------
 include/linux/nfsd/xdr.h  |  3 +++
 include/linux/nfsd/xdr3.h |  1 +
 6 files changed, 75 insertions(+), 55 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 041380fe667b..6d2dfed1de08 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -56,13 +56,20 @@ static int
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
-	int	nfserr;
+	int	err, nfserr;
 
 	dprintk("nfsd: GETATTR(3)  %s\n",
-				SVCFH_fmt(&argp->fh));
+		SVCFH_fmt(&argp->fh));
 
 	fh_copy(&resp->fh, &argp->fh);
 	nfserr = fh_verify(rqstp, &resp->fh, 0, MAY_NOP);
+	if (nfserr)
+		RETURN_STATUS(nfserr);
+
+	err = vfs_getattr(resp->fh.fh_export->ex_mnt,
+			  resp->fh.fh_dentry, &resp->stat);
+	nfserr = nfserrno(err);
+
 	RETURN_STATUS(nfserr);
 }
 
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 9147b8524d05..243d94b9653a 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -154,37 +154,34 @@ decode_sattr3(u32 *p, struct iattr *iap)
 }
 
 static inline u32 *
-encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+	      struct kstat *stat)
 {
-	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
 	struct dentry	*dentry = fhp->fh_dentry;
-	struct kstat stat;
 	struct timespec time;
 
-	vfs_getattr(mnt, dentry, &stat);
-
-	*p++ = htonl(nfs3_ftypes[(stat.mode & S_IFMT) >> 12]);
-	*p++ = htonl((u32) stat.mode);
-	*p++ = htonl((u32) stat.nlink);
-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
-	if (S_ISLNK(stat.mode) && stat.size > NFS3_MAXPATHLEN) {
+	*p++ = htonl(nfs3_ftypes[(stat->mode & S_IFMT) >> 12]);
+	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
+	if (S_ISLNK(stat->mode) && stat->size > NFS3_MAXPATHLEN) {
 		p = xdr_encode_hyper(p, (u64) NFS3_MAXPATHLEN);
 	} else {
-		p = xdr_encode_hyper(p, (u64) stat.size);
+		p = xdr_encode_hyper(p, (u64) stat->size);
 	}
-	p = xdr_encode_hyper(p, ((u64)stat.blocks) << 9);
-	*p++ = htonl((u32) MAJOR(stat.rdev));
-	*p++ = htonl((u32) MINOR(stat.rdev));
+	p = xdr_encode_hyper(p, ((u64)stat->blocks) << 9);
+	*p++ = htonl((u32) MAJOR(stat->rdev));
+	*p++ = htonl((u32) MINOR(stat->rdev));
 	if (is_fsid(fhp, rqstp->rq_reffh))
 		p = xdr_encode_hyper(p, (u64) fhp->fh_export->ex_fsid);
 	else
-		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat.dev));
-	p = xdr_encode_hyper(p, (u64) stat.ino);
-	p = encode_time3(p, &stat.atime);
+		p = xdr_encode_hyper(p, (u64) huge_encode_dev(stat->dev));
+	p = xdr_encode_hyper(p, (u64) stat->ino);
+	p = encode_time3(p, &stat->atime);
 	lease_get_mtime(dentry->d_inode, &time); 
 	p = encode_time3(p, &time);
-	p = encode_time3(p, &stat.ctime);
+	p = encode_time3(p, &stat->ctime);
 
 	return p;
 }
@@ -232,8 +229,14 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
 	if (dentry && dentry->d_inode != NULL) {
-		*p++ = xdr_one;		/* attributes follow */
-		return encode_fattr3(rqstp, p, fhp);
+	        int err;
+		struct kstat stat;
+
+		err = vfs_getattr(fhp->fh_export->ex_mnt, dentry, &stat);
+		if (!err) {
+			*p++ = xdr_one;		/* attributes follow */
+			return encode_fattr3(rqstp, p, fhp, &stat);
+		}
 	}
 	*p++ = xdr_zero;
 	return p;
@@ -616,7 +619,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	if (resp->status == 0)
-		p = encode_fattr3(rqstp, p, &resp->fh);
+		p = encode_fattr3(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index b45999ff33e6..aa7bb41b293d 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -152,46 +152,44 @@ decode_sattr(u32 *p, struct iattr *iap)
 }
 
 static inline u32 *
-encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+	     struct kstat *stat)
 {
-	struct vfsmount *mnt = fhp->fh_export->ex_mnt;
 	struct dentry	*dentry = fhp->fh_dentry;
-	struct kstat stat;
 	int type;
 	struct timespec time;
 
-	vfs_getattr(mnt, dentry, &stat);
-	type = (stat.mode & S_IFMT);
+	type = (stat->mode & S_IFMT);
 
 	*p++ = htonl(nfs_ftypes[type >> 12]);
-	*p++ = htonl((u32) stat.mode);
-	*p++ = htonl((u32) stat.nlink);
-	*p++ = htonl((u32) nfsd_ruid(rqstp, stat.uid));
-	*p++ = htonl((u32) nfsd_rgid(rqstp, stat.gid));
+	*p++ = htonl((u32) stat->mode);
+	*p++ = htonl((u32) stat->nlink);
+	*p++ = htonl((u32) nfsd_ruid(rqstp, stat->uid));
+	*p++ = htonl((u32) nfsd_rgid(rqstp, stat->gid));
 
-	if (S_ISLNK(type) && stat.size > NFS_MAXPATHLEN) {
+	if (S_ISLNK(type) && stat->size > NFS_MAXPATHLEN) {
 		*p++ = htonl(NFS_MAXPATHLEN);
 	} else {
-		*p++ = htonl((u32) stat.size);
+		*p++ = htonl((u32) stat->size);
 	}
-	*p++ = htonl((u32) stat.blksize);
+	*p++ = htonl((u32) stat->blksize);
 	if (S_ISCHR(type) || S_ISBLK(type))
-		*p++ = htonl(new_encode_dev(stat.rdev));
+		*p++ = htonl(new_encode_dev(stat->rdev));
 	else
 		*p++ = htonl(0xffffffff);
-	*p++ = htonl((u32) stat.blocks);
+	*p++ = htonl((u32) stat->blocks);
 	if (is_fsid(fhp, rqstp->rq_reffh))
 		*p++ = htonl((u32) fhp->fh_export->ex_fsid);
 	else
-		*p++ = htonl(new_encode_dev(stat.dev));
-	*p++ = htonl((u32) stat.ino);
-	*p++ = htonl((u32) stat.atime.tv_sec);
-	*p++ = htonl(stat.atime.tv_nsec ? stat.atime.tv_nsec / 1000 : 0);
+		*p++ = htonl(new_encode_dev(stat->dev));
+	*p++ = htonl((u32) stat->ino);
+	*p++ = htonl((u32) stat->atime.tv_sec);
+	*p++ = htonl(stat->atime.tv_nsec ? stat->atime.tv_nsec / 1000 : 0);
 	lease_get_mtime(dentry->d_inode, &time); 
 	*p++ = htonl((u32) time.tv_sec);
 	*p++ = htonl(time.tv_nsec ? time.tv_nsec / 1000 : 0); 
-	*p++ = htonl((u32) stat.ctime.tv_sec);
-	*p++ = htonl(stat.ctime.tv_nsec ? stat.ctime.tv_nsec / 1000 : 0);
+	*p++ = htonl((u32) stat->ctime.tv_sec);
+	*p++ = htonl(stat->ctime.tv_nsec ? stat->ctime.tv_nsec / 1000 : 0);
 
 	return p;
 }
@@ -199,7 +197,9 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 /* Helper function for NFSv2 ACL code */
 u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 {
-	return encode_fattr(rqstp, p, fhp);
+	struct kstat stat;
+	vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat);
+	return encode_fattr(rqstp, p, fhp, &stat);
 }
 
 /*
@@ -394,7 +394,7 @@ int
 nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_attrstat *resp)
 {
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
@@ -403,7 +403,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_diropres *resp)
 {
 	p = encode_fh(p, &resp->fh);
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	return xdr_ressize_check(rqstp, p);
 }
 
@@ -428,7 +428,7 @@ int
 nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 					struct nfsd_readres *resp)
 {
-	p = encode_fattr(rqstp, p, &resp->fh);
+	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
 	*p++ = htonl(resp->count);
 	xdr_ressize_check(rqstp, p);
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index af7c3c3074b0..f83ab4cf4265 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -717,27 +717,33 @@ nfsd_close(struct file *filp)
  * As this calls fsync (not fdatasync) there is no need for a write_inode
  * after it.
  */
-static inline void nfsd_dosync(struct file *filp, struct dentry *dp,
-			       struct file_operations *fop)
+static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
+			      struct file_operations *fop)
 {
 	struct inode *inode = dp->d_inode;
 	int (*fsync) (struct file *, struct dentry *, int);
+	int err = nfs_ok;
 
 	filemap_fdatawrite(inode->i_mapping);
 	if (fop && (fsync = fop->fsync))
-		fsync(filp, dp, 0);
+		err=fsync(filp, dp, 0);
 	filemap_fdatawait(inode->i_mapping);
+
+	return nfserrno(err);
 }
 	
 
-static void
+static int
 nfsd_sync(struct file *filp)
 {
+        int err;
 	struct inode *inode = filp->f_dentry->d_inode;
 	dprintk("nfsd: sync file %s\n", filp->f_dentry->d_name.name);
 	down(&inode->i_sem);
-	nfsd_dosync(filp, filp->f_dentry, filp->f_op);
+	err=nfsd_dosync(filp, filp->f_dentry, filp->f_op);
 	up(&inode->i_sem);
+
+	return err;
 }
 
 void
@@ -962,7 +968,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 			if (inode->i_state & I_DIRTY) {
 				dprintk("nfsd: write sync %d\n", current->pid);
-				nfsd_sync(file);
+				err=nfsd_sync(file);
 			}
 #if 0
 			wake_up(&inode->i_wait);
@@ -1066,7 +1072,7 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		return err;
 	if (EX_ISSYNC(fhp->fh_export)) {
 		if (file->f_op && file->f_op->fsync) {
-			nfsd_sync(file);
+			err = nfsd_sync(file);
 		} else {
 			err = nfserr_notsupp;
 		}
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index 130d4f588a37..3f4f7142bbe3 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -88,10 +88,12 @@ struct nfsd_readdirargs {
 
 struct nfsd_attrstat {
 	struct svc_fh		fh;
+	struct kstat		stat;
 };
 
 struct nfsd_diropres  {
 	struct svc_fh		fh;
+	struct kstat		stat;
 };
 
 struct nfsd_readlinkres {
@@ -101,6 +103,7 @@ struct nfsd_readlinkres {
 struct nfsd_readres {
 	struct svc_fh		fh;
 	unsigned long		count;
+	struct kstat		stat;
 };
 
 struct nfsd_readdirres {
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
index 3c2a71b43bac..a4322741f8b9 100644
--- a/include/linux/nfsd/xdr3.h
+++ b/include/linux/nfsd/xdr3.h
@@ -126,6 +126,7 @@ struct nfsd3_setaclargs {
 struct nfsd3_attrstat {
 	__u32			status;
 	struct svc_fh		fh;
+	struct kstat            stat;
 };
 
 /* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
-- 
cgit v1.2.3


From 6da487dcc0c6f4c827779687a20016efeffc4d60 Mon Sep 17 00:00:00 2001
From: Alasdair G Kergon <agk@redhat.com>
Date: Fri, 6 Jan 2006 00:20:07 -0800
Subject: [PATCH] device-mapper ioctl: add skip lock_fs flag

Add ioctl DM_SKIP_LOCKFS_FLAG for userspace to request that lock_fs is
bypassed when suspending a device.

There's no change to the behaviour of existing code that doesn't know about
the new flag.

Signed-off-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/dm-ioctl.c    | 11 +++++++++--
 include/linux/dm-ioctl.h | 11 ++++++++---
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index dbc07afd4462..561bda5011e0 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -693,14 +693,18 @@ static int dev_rename(struct dm_ioctl *param, size_t param_size)
 static int do_suspend(struct dm_ioctl *param)
 {
 	int r = 0;
+	int do_lockfs = 1;
 	struct mapped_device *md;
 
 	md = find_device(param);
 	if (!md)
 		return -ENXIO;
 
+	if (param->flags & DM_SKIP_LOCKFS_FLAG)
+		do_lockfs = 0;
+
 	if (!dm_suspended(md))
-		r = dm_suspend(md, 1);
+		r = dm_suspend(md, do_lockfs);
 
 	if (!r)
 		r = __dev_status(md, param);
@@ -712,6 +716,7 @@ static int do_suspend(struct dm_ioctl *param)
 static int do_resume(struct dm_ioctl *param)
 {
 	int r = 0;
+	int do_lockfs = 1;
 	struct hash_cell *hc;
 	struct mapped_device *md;
 	struct dm_table *new_map;
@@ -737,8 +742,10 @@ static int do_resume(struct dm_ioctl *param)
 	/* Do we need to load a new map ? */
 	if (new_map) {
 		/* Suspend if it isn't already suspended */
+		if (param->flags & DM_SKIP_LOCKFS_FLAG)
+			do_lockfs = 0;
 		if (!dm_suspended(md))
-			dm_suspend(md, 1);
+			dm_suspend(md, do_lockfs);
 
 		r = dm_swap_table(md, new_map);
 		if (r) {
diff --git a/include/linux/dm-ioctl.h b/include/linux/dm-ioctl.h
index f5eb6b6cd109..fa75ba0d635e 100644
--- a/include/linux/dm-ioctl.h
+++ b/include/linux/dm-ioctl.h
@@ -272,9 +272,9 @@ typedef char ioctl_struct[308];
 #define DM_TARGET_MSG	 _IOWR(DM_IOCTL, DM_TARGET_MSG_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	4
+#define DM_VERSION_MINOR	5
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2005-01-12)"
+#define DM_VERSION_EXTRA	"-ioctl (2005-10-04)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
@@ -301,8 +301,13 @@ typedef char ioctl_struct[308];
 #define DM_BUFFER_FULL_FLAG	(1 << 8) /* Out */
 
 /*
- * Set this to improve performance when you aren't going to use open_count
+ * Set this to improve performance when you aren't going to use open_count.
  */
 #define DM_SKIP_BDGET_FLAG	(1 << 9) /* In */
 
+/*
+ * Set this to avoid attempting to freeze any filesystem when suspending.
+ */
+#define DM_SKIP_LOCKFS_FLAG	(1 << 10) /* In */
+
 #endif				/* _LINUX_DM_IOCTL_H */
-- 
cgit v1.2.3


From 17999be4aa408e7ff3b9d32c735649676567a3cd Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:12 -0800
Subject: [PATCH] md: improve raid1 "IO Barrier" concept

raid1 needs to put up a barrier to new requests while it does resync or other
background recovery.  The code for this is currently open-coded, slighty
obscure by its use of two waitqueues, and not documented.

This patch gathers all the related code into 4 functions, and includes a
comment which (hopefully) explains what is happening.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c         | 167 ++++++++++++++++++++++++---------------------
 include/linux/raid/raid1.h |   4 +-
 2 files changed, 91 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 229d7b204297..f5204149ab65 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -51,6 +51,8 @@ static mdk_personality_t raid1_personality;
 
 static void unplug_slaves(mddev_t *mddev);
 
+static void allow_barrier(conf_t *conf);
+static void lower_barrier(conf_t *conf);
 
 static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
@@ -160,20 +162,13 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 
 static inline void free_r1bio(r1bio_t *r1_bio)
 {
-	unsigned long flags;
-
 	conf_t *conf = mddev_to_conf(r1_bio->mddev);
 
 	/*
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	allow_barrier(conf);
 
 	put_all_bios(conf, r1_bio);
 	mempool_free(r1_bio, conf->r1bio_pool);
@@ -182,22 +177,10 @@ static inline void free_r1bio(r1bio_t *r1_bio)
 static inline void put_buf(r1bio_t *r1_bio)
 {
 	conf_t *conf = mddev_to_conf(r1_bio->mddev);
-	unsigned long flags;
 
 	mempool_free(r1_bio, conf->r1buf_pool);
 
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!conf->barrier)
-		BUG();
-	--conf->barrier;
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
-
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	lower_barrier(conf);
 }
 
 static void reschedule_retry(r1bio_t *r1_bio)
@@ -210,6 +193,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
 	list_add(&r1_bio->retry_list, &conf->retry_list);
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
+	wake_up(&conf->wait_barrier);
 	md_wakeup_thread(mddev->thread);
 }
 
@@ -593,30 +577,83 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }
 
-/*
- * Throttle resync depth, so that we can both get proper overlapping of
- * requests, but are still able to handle normal requests quickly.
+/* Barriers....
+ * Sometimes we need to suspend IO while we do something else,
+ * either some resync/recovery, or reconfigure the array.
+ * To do this we raise a 'barrier'.
+ * The 'barrier' is a counter that can be raised multiple times
+ * to count how many activities are happening which preclude
+ * normal IO.
+ * We can only raise the barrier if there is no pending IO.
+ * i.e. if nr_pending == 0.
+ * We choose only to raise the barrier if no-one is waiting for the
+ * barrier to go down.  This means that as soon as an IO request
+ * is ready, no other operations which require a barrier will start
+ * until the IO request has had a chance.
+ *
+ * So: regular IO calls 'wait_barrier'.  When that returns there
+ *    is no backgroup IO happening,  It must arrange to call
+ *    allow_barrier when it has finished its IO.
+ * backgroup IO calls must call raise_barrier.  Once that returns
+ *    there is no normal IO happeing.  It must arrange to call
+ *    lower_barrier when the particular background IO completes.
  */
 #define RESYNC_DEPTH 32
 
-static void device_barrier(conf_t *conf, sector_t sect)
+static void raise_barrier(conf_t *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-	
-	if (!conf->barrier++) {
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-		if (conf->nr_pending)
-			BUG();
+
+	/* Wait until no block IO is waiting */
+	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+
+	/* block any new IO from starting */
+	conf->barrier++;
+
+	/* No wait for all pending IO to complete */
+	wait_event_lock_irq(conf->wait_barrier,
+			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void lower_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->barrier--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void wait_barrier(conf_t *conf)
+{
+	spin_lock_irq(&conf->resync_lock);
+	if (conf->barrier) {
+		conf->nr_waiting++;
+		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+				    conf->resync_lock,
+				    raid1_unplug(conf->mddev->queue));
+		conf->nr_waiting--;
 	}
-	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, raid1_unplug(conf->mddev->queue));
-	conf->next_resync = sect;
+	conf->nr_pending++;
 	spin_unlock_irq(&conf->resync_lock);
 }
 
+static void allow_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->nr_pending--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+
 /* duplicate the data pages for behind I/O */
 static struct page **alloc_behind_pages(struct bio *bio)
 {
@@ -678,10 +715,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 */
 	md_write_start(mddev, bio); /* wait on superblock update early */
 
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
+	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
 	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -909,13 +943,8 @@ static void print_conf(conf_t *conf)
 
 static void close_sync(conf_t *conf)
 {
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-			    conf->resync_lock, 	raid1_unplug(conf->mddev->queue));
-	spin_unlock_irq(&conf->resync_lock);
-
-	if (conf->barrier) BUG();
-	if (waitqueue_active(&conf->wait_idle)) BUG();
+	wait_barrier(conf);
+	allow_barrier(conf);
 
 	mempool_destroy(conf->r1buf_pool);
 	conf->r1buf_pool = NULL;
@@ -1317,12 +1346,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return sync_blocks;
 	}
 	/*
-	 * If there is non-resync activity waiting for us then
-	 * put in a delay to throttle resync.
+	 * If there is non-resync activity waiting for a turn,
+	 * and resync is going fast enough,
+	 * then let it though before starting on this new sync request.
 	 */
-	if (!go_faster && waitqueue_active(&conf->wait_resume))
+	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
-	device_barrier(conf, sector_nr + RESYNC_SECTORS);
+
+	raise_barrier(conf);
+
+	conf->next_resync = sector_nr;
 
 	/*
 	 * If reconstructing, and >1 working disc,
@@ -1355,10 +1388,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 	r1_bio = mempool_alloc(conf->r1buf_pool, GFP_NOIO);
 
-	spin_lock_irq(&conf->resync_lock);
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
-
 	r1_bio->mddev = mddev;
 	r1_bio->sector = sector_nr;
 	r1_bio->state = 0;
@@ -1542,8 +1571,7 @@ static int run(mddev_t *mddev)
 		mddev->recovery_cp = MaxSector;
 
 	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_idle);
-	init_waitqueue_head(&conf->wait_resume);
+	init_waitqueue_head(&conf->wait_barrier);
 
 	bio_list_init(&conf->pending_bio_list);
 	bio_list_init(&conf->flushing_bio_list);
@@ -1714,11 +1742,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	}
 	memset(newmirrors, 0, sizeof(struct mirror_info)*raid_disks);
 
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier++;
-	wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-			    conf->resync_lock, raid1_unplug(mddev->queue));
-	spin_unlock_irq(&conf->resync_lock);
+	raise_barrier(conf);
 
 	/* ok, everything is stopped */
 	oldpool = conf->r1bio_pool;
@@ -1738,12 +1762,7 @@ static int raid1_reshape(mddev_t *mddev, int raid_disks)
 	conf->raid_disks = mddev->raid_disks = raid_disks;
 
 	conf->last_used = 0; /* just make sure it is in-range */
-	spin_lock_irq(&conf->resync_lock);
-	conf->barrier--;
-	spin_unlock_irq(&conf->resync_lock);
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
-
+	lower_barrier(conf);
 
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
@@ -1758,18 +1777,10 @@ static void raid1_quiesce(mddev_t *mddev, int state)
 
 	switch(state) {
 	case 1:
-		spin_lock_irq(&conf->resync_lock);
-		conf->barrier++;
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, raid1_unplug(mddev->queue));
-		spin_unlock_irq(&conf->resync_lock);
+		raise_barrier(conf);
 		break;
 	case 0:
-		spin_lock_irq(&conf->resync_lock);
-		conf->barrier--;
-		spin_unlock_irq(&conf->resync_lock);
-		wake_up(&conf->wait_resume);
-		wake_up(&conf->wait_idle);
+		lower_barrier(conf);
 		break;
 	}
 	if (mddev->thread) {
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index 292b98f2b408..c55674252533 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -45,6 +45,7 @@ struct r1_private_data_s {
 
 	spinlock_t		resync_lock;
 	int			nr_pending;
+	int			nr_waiting;
 	int			barrier;
 	sector_t		next_resync;
 	int			fullsync;  /* set to 1 if a full sync is needed,
@@ -52,8 +53,7 @@ struct r1_private_data_s {
 					    * Cleared when a sync completes.
 					    */
 
-	wait_queue_head_t	wait_idle;
-	wait_queue_head_t	wait_resume;
+	wait_queue_head_t	wait_barrier;
 
 	struct pool_info	*poolinfo;
 
-- 
cgit v1.2.3


From 0a27ec96b6fb1abf867e36d7b0b681d67588767a Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:13 -0800
Subject: [PATCH] md: improve raid10 "IO Barrier" concept

raid10 needs to put up a barrier to new requests while it does resync or other
background recovery.  The code for this is currently open-coded, slighty
obscure by its use of two waitqueues, and not documented.

This patch gathers all the related code into 4 functions, and includes a
comment which (hopefully) explains what is happening.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid10.c         | 135 ++++++++++++++++++++++++++------------------
 include/linux/raid/raid10.h |   4 +-
 2 files changed, 81 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 713dc9c2c730..50bd7b152f28 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -47,6 +47,9 @@
 
 static void unplug_slaves(mddev_t *mddev);
 
+static void allow_barrier(conf_t *conf);
+static void lower_barrier(conf_t *conf);
+
 static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	conf_t *conf = data;
@@ -175,20 +178,13 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
 static inline void free_r10bio(r10bio_t *r10_bio)
 {
-	unsigned long flags;
-
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
 
 	/*
 	 * Wake up any possible resync thread that waits for the device
 	 * to go idle.
 	 */
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	allow_barrier(conf);
 
 	put_all_bios(conf, r10_bio);
 	mempool_free(r10_bio, conf->r10bio_pool);
@@ -197,22 +193,10 @@ static inline void free_r10bio(r10bio_t *r10_bio)
 static inline void put_buf(r10bio_t *r10_bio)
 {
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
-	unsigned long flags;
 
 	mempool_free(r10_bio, conf->r10buf_pool);
 
-	spin_lock_irqsave(&conf->resync_lock, flags);
-	if (!conf->barrier)
-		BUG();
-	--conf->barrier;
-	wake_up(&conf->wait_resume);
-	wake_up(&conf->wait_idle);
-
-	if (!--conf->nr_pending) {
-		wake_up(&conf->wait_idle);
-		wake_up(&conf->wait_resume);
-	}
-	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	lower_barrier(conf);
 }
 
 static void reschedule_retry(r10bio_t *r10_bio)
@@ -640,30 +624,82 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
 	return ret;
 }
 
-/*
- * Throttle resync depth, so that we can both get proper overlapping of
- * requests, but are still able to handle normal requests quickly.
+/* Barriers....
+ * Sometimes we need to suspend IO while we do something else,
+ * either some resync/recovery, or reconfigure the array.
+ * To do this we raise a 'barrier'.
+ * The 'barrier' is a counter that can be raised multiple times
+ * to count how many activities are happening which preclude
+ * normal IO.
+ * We can only raise the barrier if there is no pending IO.
+ * i.e. if nr_pending == 0.
+ * We choose only to raise the barrier if no-one is waiting for the
+ * barrier to go down.  This means that as soon as an IO request
+ * is ready, no other operations which require a barrier will start
+ * until the IO request has had a chance.
+ *
+ * So: regular IO calls 'wait_barrier'.  When that returns there
+ *    is no backgroup IO happening,  It must arrange to call
+ *    allow_barrier when it has finished its IO.
+ * backgroup IO calls must call raise_barrier.  Once that returns
+ *    there is no normal IO happeing.  It must arrange to call
+ *    lower_barrier when the particular background IO completes.
  */
 #define RESYNC_DEPTH 32
 
-static void device_barrier(conf_t *conf, sector_t sect)
+static void raise_barrier(conf_t *conf)
 {
 	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_idle, !waitqueue_active(&conf->wait_resume),
-			    conf->resync_lock, unplug_slaves(conf->mddev));
-
-	if (!conf->barrier++) {
-		wait_event_lock_irq(conf->wait_idle, !conf->nr_pending,
-				    conf->resync_lock, unplug_slaves(conf->mddev));
-		if (conf->nr_pending)
-			BUG();
+
+	/* Wait until no block IO is waiting */
+	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+
+	/* block any new IO from starting */
+	conf->barrier++;
+
+	/* No wait for all pending IO to complete */
+	wait_event_lock_irq(conf->wait_barrier,
+			    !conf->nr_pending && conf->barrier < RESYNC_DEPTH,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void lower_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->barrier--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
+static void wait_barrier(conf_t *conf)
+{
+	spin_lock_irq(&conf->resync_lock);
+	if (conf->barrier) {
+		conf->nr_waiting++;
+		wait_event_lock_irq(conf->wait_barrier, !conf->barrier,
+				    conf->resync_lock,
+				    raid10_unplug(conf->mddev->queue));
+		conf->nr_waiting--;
 	}
-	wait_event_lock_irq(conf->wait_resume, conf->barrier < RESYNC_DEPTH,
-			    conf->resync_lock, unplug_slaves(conf->mddev));
-	conf->next_resync = sect;
+	conf->nr_pending++;
 	spin_unlock_irq(&conf->resync_lock);
 }
 
+static void allow_barrier(conf_t *conf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&conf->resync_lock, flags);
+	conf->nr_pending--;
+	spin_unlock_irqrestore(&conf->resync_lock, flags);
+	wake_up(&conf->wait_barrier);
+}
+
 static int make_request(request_queue_t *q, struct bio * bio)
 {
 	mddev_t *mddev = q->queuedata;
@@ -719,10 +755,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	 * thread has put up a bar for new requests.
 	 * Continue immediately if no resync is active currently.
 	 */
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, );
-	conf->nr_pending++;
-	spin_unlock_irq(&conf->resync_lock);
+	wait_barrier(conf);
 
 	disk_stat_inc(mddev->gendisk, ios[rw]);
 	disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -897,13 +930,8 @@ static void print_conf(conf_t *conf)
 
 static void close_sync(conf_t *conf)
 {
-	spin_lock_irq(&conf->resync_lock);
-	wait_event_lock_irq(conf->wait_resume, !conf->barrier,
-			    conf->resync_lock, 	unplug_slaves(conf->mddev));
-	spin_unlock_irq(&conf->resync_lock);
-
-	if (conf->barrier) BUG();
-	if (waitqueue_active(&conf->wait_idle)) BUG();
+	wait_barrier(conf);
+	allow_barrier(conf);
 
 	mempool_destroy(conf->r10buf_pool);
 	conf->r10buf_pool = NULL;
@@ -1395,9 +1423,10 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	 * If there is non-resync activity waiting for us then
 	 * put in a delay to throttle resync.
 	 */
-	if (!go_faster && waitqueue_active(&conf->wait_resume))
+	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
-	device_barrier(conf, sector_nr + RESYNC_SECTORS);
+	raise_barrier(conf);
+	conf->next_resync = sector_nr;
 
 	/* Again, very different code for resync and recovery.
 	 * Both must result in an r10bio with a list of bios that
@@ -1427,7 +1456,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 
 				r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
 				spin_lock_irq(&conf->resync_lock);
-				conf->nr_pending++;
 				if (rb2) conf->barrier++;
 				spin_unlock_irq(&conf->resync_lock);
 				atomic_set(&r10_bio->remaining, 0);
@@ -1500,10 +1528,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		int count = 0;
 		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
 
-		spin_lock_irq(&conf->resync_lock);
-		conf->nr_pending++;
-		spin_unlock_irq(&conf->resync_lock);
-
 		r10_bio->mddev = mddev;
 		atomic_set(&r10_bio->remaining, 0);
 
@@ -1713,8 +1737,7 @@ static int run(mddev_t *mddev)
 	INIT_LIST_HEAD(&conf->retry_list);
 
 	spin_lock_init(&conf->resync_lock);
-	init_waitqueue_head(&conf->wait_idle);
-	init_waitqueue_head(&conf->wait_resume);
+	init_waitqueue_head(&conf->wait_barrier);
 
 	/* need to check that every block has at least one working mirror */
 	if (!enough(conf)) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index 60708789c8f9..08317b77802b 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -39,11 +39,11 @@ struct r10_private_data_s {
 
 	spinlock_t		resync_lock;
 	int nr_pending;
+	int nr_waiting;
 	int barrier;
 	sector_t		next_resync;
 
-	wait_queue_head_t	wait_idle;
-	wait_queue_head_t	wait_resume;
+	wait_queue_head_t	wait_barrier;
 
 	mempool_t *r10bio_pool;
 	mempool_t *r10buf_pool;
-- 
cgit v1.2.3


From 6ff8d8ec06690f4011a6c3ad9e0759b9094f0601 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:15 -0800
Subject: [PATCH] md: allow dirty raid[456] arrays to be started at boot

See patch to md.txt for more details

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/md.txt      | 24 ++++++++++++++++++++++++
 drivers/md/md.c           |  4 ++++
 drivers/md/raid5.c        | 15 +++++++++++----
 drivers/md/raid6main.c    | 13 +++++++++----
 include/linux/raid/md_k.h |  1 +
 5 files changed, 49 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index 23e6cce40f9c..1dd0fb6021cf 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -51,6 +51,30 @@ superblock can be autodetected and run at boot time.
 The kernel parameter "raid=partitionable" (or "raid=part") means
 that all auto-detected arrays are assembled as partitionable.
 
+Boot time assembly of degraded/dirty arrays
+-------------------------------------------
+
+If a raid5 or raid6 array is both dirty and degraded, it could have
+undetectable data corruption.  This is because the fact that it is
+'dirty' means that the parity cannot be trusted, and the fact that it
+is degraded means that some datablocks are missing and cannot reliably
+be reconstructed (due to no parity).
+
+For this reason, md will normally refuse to start such an array.  This
+requires the sysadmin to take action to explicitly start the array
+desipite possible corruption.  This is normally done with
+   mdadm --assemble --force ....
+
+This option is not really available if the array has the root
+filesystem on it.  In order to support this booting from such an
+array, md supports a module parameter "start_dirty_degraded" which,
+when set to 1, bypassed the checks and will allows dirty degraded
+arrays to be started.
+
+So, to boot with a root filesystem of a dirty degraded raid[56], use
+
+   md-mod.start_dirty_degraded=1
+
 
 Superblock formats
 ------------------
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 8175a2a222da..b4fb7247b3ed 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1937,6 +1937,7 @@ static void md_safemode_timeout(unsigned long data)
 	md_wakeup_thread(mddev->thread);
 }
 
+static int start_dirty_degraded;
 
 static int do_md_run(mddev_t * mddev)
 {
@@ -2048,6 +2049,7 @@ static int do_md_run(mddev_t * mddev)
 	mddev->recovery = 0;
 	mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
 	mddev->barriers_work = 1;
+	mddev->ok_start_degraded = start_dirty_degraded;
 
 	if (start_readonly)
 		mddev->ro = 2; /* read-only, but switch on first write */
@@ -4509,6 +4511,8 @@ static int set_ro(const char *val, struct kernel_param *kp)
 }
 
 module_param_call(start_ro, set_ro, get_ro, NULL, 0600);
+module_param(start_dirty_degraded, int, 0644);
+
 
 EXPORT_SYMBOL(register_md_personality);
 EXPORT_SYMBOL(unregister_md_personality);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 334ff7a07283..53a0f2ce76c8 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1904,10 +1904,17 @@ static int run(mddev_t *mddev)
 
 	if (mddev->degraded == 1 &&
 	    mddev->recovery_cp != MaxSector) {
-		printk(KERN_ERR 
-			"raid5: cannot start dirty degraded array for %s\n",
-			mdname(mddev));
-		goto abort;
+		if (mddev->ok_start_degraded)
+			printk(KERN_WARNING
+			       "raid5: starting dirty degraded array: %s"
+			       "- data corruption possible.\n",
+			       mdname(mddev));
+		else {
+			printk(KERN_ERR
+			       "raid5: cannot start dirty degraded array for %s\n",
+			       mdname(mddev));
+			goto abort;
+		}
 	}
 
 	{
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 0000d162d198..9ac6dcd55127 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1929,13 +1929,18 @@ static int run(mddev_t *mddev)
 		goto abort;
 	}
 
-#if 0				/* FIX: For now */
 	if (mddev->degraded > 0 &&
 	    mddev->recovery_cp != MaxSector) {
-		printk(KERN_ERR "raid6: cannot start dirty degraded array for %s\n", mdname(mddev));
-		goto abort;
+		if (mddev->ok_start_degraded)
+			printk(KERN_WARNING "raid6: starting dirty degraded array:%s"
+			       "- data corruption possible.\n",
+			       mdname(mddev));
+		else {
+			printk(KERN_ERR "raid6: cannot start dirty degraded array"
+			       " for %s\n", mdname(mddev));
+			goto abort;
+		}
 	}
-#endif
 
 	{
 		mddev->thread = md_register_thread(raid6d, mddev, "%s_raid6");
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 46629a275ba9..1dd587b5975a 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -183,6 +183,7 @@ struct mddev_s
 	sector_t			resync_mismatches; /* count of sectors where
 							    * parity/replica mismatch found
 							    */
+	int				ok_start_degraded;
 	/* recovery/resync flags 
 	 * NEEDED:   we might need to start a resync/recover
 	 * RUNNING:  a thread is running, or about to be started
-- 
cgit v1.2.3


From 6cce3b23f6f8e974c00af7a9b88f1d413ba368a8 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:16 -0800
Subject: [PATCH] md: write intent bitmap support for raid10

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/md.c             |  10 ++-
 drivers/md/raid10.c         | 178 ++++++++++++++++++++++++++++++++++++++------
 include/linux/raid/raid10.h |   9 ++-
 3 files changed, 171 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index ee199d462520..64e7da3701a5 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -714,9 +714,10 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
 		if (sb->state & (1<<MD_SB_BITMAP_PRESENT) &&
 		    mddev->bitmap_file == NULL) {
-			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6) {
+			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+			    && mddev->level != 10) {
 				/* FIXME use a better test */
-				printk(KERN_WARNING "md: bitmaps only support for raid1\n");
+				printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
 				return -EINVAL;
 			}
 			mddev->bitmap_offset = mddev->default_bitmap_offset;
@@ -1037,8 +1038,9 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 
 		if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) &&
 		    mddev->bitmap_file == NULL ) {
-			if (mddev->level != 1) {
-				printk(KERN_WARNING "md: bitmaps only supported for raid1\n");
+			if (mddev->level != 1 && mddev->level != 5 && mddev->level != 6
+			    && mddev->level != 10) {
+				printk(KERN_WARNING "md: bitmaps not supported for this level.\n");
 				return -EINVAL;
 			}
 			mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 50bd7b152f28..8f58a447d9f0 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -18,7 +18,9 @@
  * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include "dm-bio-list.h"
 #include <linux/raid/raid10.h>
+#include <linux/raid/bitmap.h>
 
 /*
  * RAID10 provides a combination of RAID0 and RAID1 functionality.
@@ -306,9 +308,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
+	if (!uptodate) {
 		md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-	else
+		/* an I/O failed, we can't clear the bitmap */
+		set_bit(R10BIO_Degraded, &r10_bio->state);
+	} else
 		/*
 		 * Set R10BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -328,6 +332,11 @@ static int raid10_end_write_request(struct bio *bio, unsigned int bytes_done, in
 	 * already.
 	 */
 	if (atomic_dec_and_test(&r10_bio->remaining)) {
+		/* clear the bitmap if all writes complete successfully */
+		bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector,
+				r10_bio->sectors,
+				!test_bit(R10BIO_Degraded, &r10_bio->state),
+				0);
 		md_write_end(r10_bio->mddev);
 		raid_end_bio_io(r10_bio);
 	}
@@ -486,8 +495,9 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 	rcu_read_lock();
 	/*
 	 * Check if we can balance. We can balance on the whole
-	 * device if no resync is going on, or below the resync window.
-	 * We take the first readable disk when above the resync window.
+	 * device if no resync is going on (recovery is ok), or below
+	 * the resync window. We take the first readable disk when
+	 * above the resync window.
 	 */
 	if (conf->mddev->recovery_cp < MaxSector
 	    && (this_sector + sectors >= conf->next_resync)) {
@@ -591,7 +601,10 @@ static void unplug_slaves(mddev_t *mddev)
 
 static void raid10_unplug(request_queue_t *q)
 {
+	mddev_t *mddev = q->queuedata;
+
 	unplug_slaves(q->queuedata);
+	md_wakeup_thread(mddev->thread);
 }
 
 static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
@@ -647,12 +660,13 @@ static int raid10_issue_flush(request_queue_t *q, struct gendisk *disk,
  */
 #define RESYNC_DEPTH 32
 
-static void raise_barrier(conf_t *conf)
+static void raise_barrier(conf_t *conf, int force)
 {
+	BUG_ON(force && !conf->barrier);
 	spin_lock_irq(&conf->resync_lock);
 
-	/* Wait until no block IO is waiting */
-	wait_event_lock_irq(conf->wait_barrier, !conf->nr_waiting,
+	/* Wait until no block IO is waiting (unless 'force') */
+	wait_event_lock_irq(conf->wait_barrier, force || !conf->nr_waiting,
 			    conf->resync_lock,
 			    raid10_unplug(conf->mddev->queue));
 
@@ -710,6 +724,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
 	int i;
 	int chunk_sects = conf->chunk_mask + 1;
 	const int rw = bio_data_dir(bio);
+	struct bio_list bl;
+	unsigned long flags;
 
 	if (unlikely(bio_barrier(bio))) {
 		bio_endio(bio, bio->bi_size, -EOPNOTSUPP);
@@ -767,6 +783,7 @@ static int make_request(request_queue_t *q, struct bio * bio)
 
 	r10_bio->mddev = mddev;
 	r10_bio->sector = bio->bi_sector;
+	r10_bio->state = 0;
 
 	if (rw == READ) {
 		/*
@@ -811,13 +828,16 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		    !test_bit(Faulty, &rdev->flags)) {
 			atomic_inc(&rdev->nr_pending);
 			r10_bio->devs[i].bio = bio;
-		} else
+		} else {
 			r10_bio->devs[i].bio = NULL;
+			set_bit(R10BIO_Degraded, &r10_bio->state);
+		}
 	}
 	rcu_read_unlock();
 
-	atomic_set(&r10_bio->remaining, 1);
+	atomic_set(&r10_bio->remaining, 0);
 
+	bio_list_init(&bl);
 	for (i = 0; i < conf->copies; i++) {
 		struct bio *mbio;
 		int d = r10_bio->devs[i].devnum;
@@ -835,13 +855,14 @@ static int make_request(request_queue_t *q, struct bio * bio)
 		mbio->bi_private = r10_bio;
 
 		atomic_inc(&r10_bio->remaining);
-		generic_make_request(mbio);
+		bio_list_add(&bl, mbio);
 	}
 
-	if (atomic_dec_and_test(&r10_bio->remaining)) {
-		md_write_end(mddev);
-		raid_end_bio_io(r10_bio);
-	}
+	bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0);
+	spin_lock_irqsave(&conf->device_lock, flags);
+	bio_list_merge(&conf->pending_bio_list, &bl);
+	blk_plug_device(mddev->queue);
+	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	return 0;
 }
@@ -999,7 +1020,12 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 	if (!enough(conf))
 		return 0;
 
-	for (mirror=0; mirror < mddev->raid_disks; mirror++)
+	if (rdev->saved_raid_disk >= 0 &&
+	    conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
+		mirror = rdev->saved_raid_disk;
+	else
+		mirror = 0;
+	for ( ; mirror < mddev->raid_disks; mirror++)
 		if ( !(p=conf->mirrors+mirror)->rdev) {
 
 			blk_queue_stack_limits(mddev->queue,
@@ -1015,6 +1041,8 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
 			p->head_position = 0;
 			rdev->raid_disk = mirror;
 			found = 1;
+			if (rdev->saved_raid_disk != mirror)
+				conf->fullsync = 1;
 			rcu_assign_pointer(p->rdev, rdev);
 			break;
 		}
@@ -1282,6 +1310,26 @@ static void raid10d(mddev_t *mddev)
 	for (;;) {
 		char b[BDEVNAME_SIZE];
 		spin_lock_irqsave(&conf->device_lock, flags);
+
+		if (conf->pending_bio_list.head) {
+			bio = bio_list_get(&conf->pending_bio_list);
+			blk_remove_plug(mddev->queue);
+			spin_unlock_irqrestore(&conf->device_lock, flags);
+			/* flush any pending bitmap writes to disk before proceeding w/ I/O */
+			if (bitmap_unplug(mddev->bitmap) != 0)
+				printk("%s: bitmap file write failed!\n", mdname(mddev));
+
+			while (bio) { /* submit pending writes */
+				struct bio *next = bio->bi_next;
+				bio->bi_next = NULL;
+				generic_make_request(bio);
+				bio = next;
+			}
+			unplug = 1;
+
+			continue;
+		}
+
 		if (list_empty(head))
 			break;
 		r10_bio = list_entry(head->prev, r10bio_t, retry_list);
@@ -1388,6 +1436,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	sector_t max_sector, nr_sectors;
 	int disk;
 	int i;
+	int max_sync;
+	int sync_blocks;
 
 	sector_t sectors_skipped = 0;
 	int chunks_skipped = 0;
@@ -1401,6 +1451,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
 		max_sector = mddev->resync_max_sectors;
 	if (sector_nr >= max_sector) {
+		/* If we aborted, we need to abort the
+		 * sync on the 'current' bitmap chucks (there can
+		 * be several when recovering multiple devices).
+		 * as we may have started syncing it but not finished.
+		 * We can find the current address in
+		 * mddev->curr_resync, but for recovery,
+		 * we need to convert that to several
+		 * virtual addresses.
+		 */
+		if (mddev->curr_resync < max_sector) { /* aborted */
+			if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+				bitmap_end_sync(mddev->bitmap, mddev->curr_resync,
+						&sync_blocks, 1);
+			else for (i=0; i<conf->raid_disks; i++) {
+				sector_t sect =
+					raid10_find_virt(conf, mddev->curr_resync, i);
+				bitmap_end_sync(mddev->bitmap, sect,
+						&sync_blocks, 1);
+			}
+		} else /* completed sync */
+			conf->fullsync = 0;
+
+		bitmap_close_sync(mddev->bitmap);
 		close_sync(conf);
 		*skipped = 1;
 		return sectors_skipped;
@@ -1425,8 +1498,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	 */
 	if (!go_faster && conf->nr_waiting)
 		msleep_interruptible(1000);
-	raise_barrier(conf);
-	conf->next_resync = sector_nr;
 
 	/* Again, very different code for resync and recovery.
 	 * Both must result in an r10bio with a list of bios that
@@ -1443,6 +1514,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	 * end_sync_write if we will want to write.
 	 */
 
+	max_sync = RESYNC_PAGES << (PAGE_SHIFT-9);
 	if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
 		/* recovery... the complicated one */
 		int i, j, k;
@@ -1451,13 +1523,29 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		for (i=0 ; i<conf->raid_disks; i++)
 			if (conf->mirrors[i].rdev &&
 			    !test_bit(In_sync, &conf->mirrors[i].rdev->flags)) {
+				int still_degraded = 0;
 				/* want to reconstruct this device */
 				r10bio_t *rb2 = r10_bio;
+				sector_t sect = raid10_find_virt(conf, sector_nr, i);
+				int must_sync;
+				/* Unless we are doing a full sync, we only need
+				 * to recover the block if it is set in the bitmap
+				 */
+				must_sync = bitmap_start_sync(mddev->bitmap, sect,
+							      &sync_blocks, 1);
+				if (sync_blocks < max_sync)
+					max_sync = sync_blocks;
+				if (!must_sync &&
+				    !conf->fullsync) {
+					/* yep, skip the sync_blocks here, but don't assume
+					 * that there will never be anything to do here
+					 */
+					chunks_skipped = -1;
+					continue;
+				}
 
 				r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
-				spin_lock_irq(&conf->resync_lock);
-				if (rb2) conf->barrier++;
-				spin_unlock_irq(&conf->resync_lock);
+				raise_barrier(conf, rb2 != NULL);
 				atomic_set(&r10_bio->remaining, 0);
 
 				r10_bio->master_bio = (struct bio*)rb2;
@@ -1465,8 +1553,21 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 					atomic_inc(&rb2->remaining);
 				r10_bio->mddev = mddev;
 				set_bit(R10BIO_IsRecover, &r10_bio->state);
-				r10_bio->sector = raid10_find_virt(conf, sector_nr, i);
+				r10_bio->sector = sect;
+
 				raid10_find_phys(conf, r10_bio);
+				/* Need to check if this section will still be
+				 * degraded
+				 */
+				for (j=0; j<conf->copies;j++) {
+					int d = r10_bio->devs[j].devnum;
+					if (conf->mirrors[d].rdev == NULL ||
+					    test_bit(Faulty, &conf->mirrors[d].rdev->flags))
+						still_degraded = 1;
+				}
+				must_sync = bitmap_start_sync(mddev->bitmap, sect,
+							      &sync_blocks, still_degraded);
+
 				for (j=0; j<conf->copies;j++) {
 					int d = r10_bio->devs[j].devnum;
 					if (conf->mirrors[d].rdev &&
@@ -1526,10 +1627,22 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	} else {
 		/* resync. Schedule a read for every block at this virt offset */
 		int count = 0;
+
+		if (!bitmap_start_sync(mddev->bitmap, sector_nr,
+				       &sync_blocks, mddev->degraded) &&
+		    !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
+			/* We can skip this block */
+			*skipped = 1;
+			return sync_blocks + sectors_skipped;
+		}
+		if (sync_blocks < max_sync)
+			max_sync = sync_blocks;
 		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
 
 		r10_bio->mddev = mddev;
 		atomic_set(&r10_bio->remaining, 0);
+		raise_barrier(conf, 0);
+		conf->next_resync = sector_nr;
 
 		r10_bio->master_bio = NULL;
 		r10_bio->sector = sector_nr;
@@ -1582,6 +1695,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	}
 
 	nr_sectors = 0;
+	if (sector_nr + max_sync < max_sector)
+		max_sector = sector_nr + max_sync;
 	do {
 		struct page *page;
 		int len = PAGE_SIZE;
@@ -1821,6 +1936,26 @@ static int stop(mddev_t *mddev)
 	return 0;
 }
 
+static void raid10_quiesce(mddev_t *mddev, int state)
+{
+	conf_t *conf = mddev_to_conf(mddev);
+
+	switch(state) {
+	case 1:
+		raise_barrier(conf, 0);
+		break;
+	case 0:
+		lower_barrier(conf);
+		break;
+	}
+	if (mddev->thread) {
+		if (mddev->bitmap)
+			mddev->thread->timeout = mddev->bitmap->daemon_sleep * HZ;
+		else
+			mddev->thread->timeout = MAX_SCHEDULE_TIMEOUT;
+		md_wakeup_thread(mddev->thread);
+	}
+}
 
 static mdk_personality_t raid10_personality =
 {
@@ -1835,6 +1970,7 @@ static mdk_personality_t raid10_personality =
 	.hot_remove_disk= raid10_remove_disk,
 	.spare_active	= raid10_spare_active,
 	.sync_request	= sync_request,
+	.quiesce	= raid10_quiesce,
 };
 
 static int __init raid_init(void)
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index 08317b77802b..b660cbf628d8 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -35,13 +35,19 @@ struct r10_private_data_s {
 	sector_t chunk_mask;
 
 	struct list_head	retry_list;
-	/* for use when syncing mirrors: */
+	/* queue pending writes and submit them on unplug */
+	struct bio_list		pending_bio_list;
+
 
 	spinlock_t		resync_lock;
 	int nr_pending;
 	int nr_waiting;
 	int barrier;
 	sector_t		next_resync;
+	int			fullsync;  /* set to 1 if a full sync is needed,
+					    * (fresh device added).
+					    * Cleared when a sync completes.
+					    */
 
 	wait_queue_head_t	wait_barrier;
 
@@ -100,4 +106,5 @@ struct r10bio_s {
 #define	R10BIO_Uptodate	0
 #define	R10BIO_IsSync	1
 #define	R10BIO_IsRecover 2
+#define	R10BIO_Degraded 3
 #endif
-- 
cgit v1.2.3


From ca65b73bd9c301d243df93780f7b26579e6c9204 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:17 -0800
Subject: [PATCH] md: fix raid6 resync check/repair code

raid6 currently does not check the P/Q syndromes when doing a resync, it just
calculates the correct value and writes it.  Doing the check can reduce writes
(often to 0) for a resync, and it is needed to properly implement the

  echo check > sync_action

operation.

This patch implements the appropriate checks and tidies up some related code.

It also allows raid6 user-requested resync to bypass the intent bitmap.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid6main.c     | 182 ++++++++++++++++++++++++++-------------------
 include/linux/raid/raid5.h |   2 +
 2 files changed, 108 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 304455d236f9..52e8796bb8ac 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -805,7 +805,7 @@ static void compute_parity(struct stripe_head *sh, int method)
 }
 
 /* Compute one missing block */
-static void compute_block_1(struct stripe_head *sh, int dd_idx)
+static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero)
 {
 	raid6_conf_t *conf = sh->raid_conf;
 	int i, count, disks = conf->raid_disks;
@@ -821,7 +821,7 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx)
 		compute_parity(sh, UPDATE_PARITY);
 	} else {
 		ptr[0] = page_address(sh->dev[dd_idx].page);
-		memset(ptr[0], 0, STRIPE_SIZE);
+		if (!nozero) memset(ptr[0], 0, STRIPE_SIZE);
 		count = 1;
 		for (i = disks ; i--; ) {
 			if (i == dd_idx || i == qd_idx)
@@ -838,7 +838,8 @@ static void compute_block_1(struct stripe_head *sh, int dd_idx)
 		}
 		if (count != 1)
 			xor_block(count, STRIPE_SIZE, ptr);
-		set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+		if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
+		else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags);
 	}
 }
 
@@ -871,7 +872,7 @@ static void compute_block_2(struct stripe_head *sh, int dd_idx1, int dd_idx2)
 			return;
 		} else {
 			/* We're missing D+Q; recompute D from P */
-			compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1);
+			compute_block_1(sh, (dd_idx1 == qd_idx) ? dd_idx2 : dd_idx1, 0);
 			compute_parity(sh, UPDATE_PARITY); /* Is this necessary? */
 			return;
 		}
@@ -982,6 +983,12 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
 }
 
 
+static int page_is_zero(struct page *p)
+{
+	char *a = page_address(p);
+	return ((*(u32*)a) == 0 &&
+		memcmp(a, a+4, STRIPE_SIZE-4)==0);
+}
 /*
  * handle_stripe - do things to a stripe.
  *
@@ -1000,7 +1007,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in
  *
  */
 
-static void handle_stripe(struct stripe_head *sh)
+static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 {
 	raid6_conf_t *conf = sh->raid_conf;
 	int disks = conf->raid_disks;
@@ -1228,7 +1235,7 @@ static void handle_stripe(struct stripe_head *sh)
 				if (uptodate == disks-1) {
 					PRINTK("Computing stripe %llu block %d\n",
 					       (unsigned long long)sh->sector, i);
-					compute_block_1(sh, i);
+					compute_block_1(sh, i, 0);
 					uptodate++;
 				} else if ( uptodate == disks-2 && failed >= 2 ) {
 					/* Computing 2-failure is *very* expensive; only do it if failed >= 2 */
@@ -1323,7 +1330,7 @@ static void handle_stripe(struct stripe_head *sh)
 				/* We have failed blocks and need to compute them */
 				switch ( failed ) {
 				case 0:	BUG();
-				case 1: compute_block_1(sh, failed_num[0]); break;
+				case 1: compute_block_1(sh, failed_num[0], 0); break;
 				case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break;
 				default: BUG();	/* This request should have been failed? */
 				}
@@ -1338,12 +1345,10 @@ static void handle_stripe(struct stripe_head *sh)
 					       (unsigned long long)sh->sector, i);
 					locked++;
 					set_bit(R5_Wantwrite, &sh->dev[i].flags);
-#if 0 /**** FIX: I don't understand the logic here... ****/
-					if (!test_bit(R5_Insync, &sh->dev[i].flags)
-					    || ((i==pd_idx || i==qd_idx) && failed == 0)) /* FIX? */
-						set_bit(STRIPE_INSYNC, &sh->state);
-#endif
 				}
+			/* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */
+			set_bit(STRIPE_INSYNC, &sh->state);
+
 			if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
 				atomic_dec(&conf->preread_active_stripes);
 				if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD)
@@ -1356,79 +1361,97 @@ static void handle_stripe(struct stripe_head *sh)
 	 * Any reads will already have been scheduled, so we just see if enough data
 	 * is available
 	 */
-	if (syncing && locked == 0 &&
-	    !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 2) {
-		set_bit(STRIPE_HANDLE, &sh->state);
-#if 0 /* RAID-6: Don't support CHECK PARITY yet */
-		if (failed == 0) {
-			char *pagea;
-			if (uptodate != disks)
-				BUG();
-			compute_parity(sh, CHECK_PARITY);
-			uptodate--;
-			pagea = page_address(sh->dev[pd_idx].page);
-			if ((*(u32*)pagea) == 0 &&
-			    !memcmp(pagea, pagea+4, STRIPE_SIZE-4)) {
-				/* parity is correct (on disc, not in buffer any more) */
-				set_bit(STRIPE_INSYNC, &sh->state);
-			}
-		}
-#endif
-		if (!test_bit(STRIPE_INSYNC, &sh->state)) {
-			int failed_needupdate[2];
-			struct r5dev *adev, *bdev;
-
-			if ( failed < 1 )
-				failed_num[0] = pd_idx;
-			if ( failed < 2 )
-				failed_num[1] = (failed_num[0] == qd_idx) ? pd_idx : qd_idx;
+	if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) {
+		int update_p = 0, update_q = 0;
+		struct r5dev *dev;
 
-			failed_needupdate[0] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[0]].flags);
-			failed_needupdate[1] = !test_bit(R5_UPTODATE, &sh->dev[failed_num[1]].flags);
+		set_bit(STRIPE_HANDLE, &sh->state);
 
-			PRINTK("sync: failed=%d num=%d,%d fnu=%u%u\n",
-			       failed, failed_num[0], failed_num[1], failed_needupdate[0], failed_needupdate[1]);
+		BUG_ON(failed>2);
+		BUG_ON(uptodate < disks);
+		/* Want to check and possibly repair P and Q.
+		 * However there could be one 'failed' device, in which
+		 * case we can only check one of them, possibly using the
+		 * other to generate missing data
+		 */
 
-#if 0  /* RAID-6: This code seems to require that CHECK_PARITY destroys the uptodateness of the parity */
-			/* should be able to compute the missing block(s) and write to spare */
-			if ( failed_needupdate[0] ^ failed_needupdate[1] ) {
-				if (uptodate+1 != disks)
-					BUG();
-				compute_block_1(sh, failed_needupdate[0] ? failed_num[0] : failed_num[1]);
-				uptodate++;
-			} else if ( failed_needupdate[0] & failed_needupdate[1] ) {
-				if (uptodate+2 != disks)
-					BUG();
-				compute_block_2(sh, failed_num[0], failed_num[1]);
-				uptodate += 2;
+		/* If !tmp_page, we cannot do the calculations,
+		 * but as we have set STRIPE_HANDLE, we will soon be called
+		 * by stripe_handle with a tmp_page - just wait until then.
+		 */
+		if (tmp_page) {
+			if (failed == q_failed) {
+				/* The only possible failed device holds 'Q', so it makes
+				 * sense to check P (If anything else were failed, we would
+				 * have used P to recreate it).
+				 */
+				compute_block_1(sh, pd_idx, 1);
+				if (!page_is_zero(sh->dev[pd_idx].page)) {
+					compute_block_1(sh,pd_idx,0);
+					update_p = 1;
+				}
+			}
+			if (!q_failed && failed < 2) {
+				/* q is not failed, and we didn't use it to generate
+				 * anything, so it makes sense to check it
+				 */
+				memcpy(page_address(tmp_page),
+				       page_address(sh->dev[qd_idx].page),
+				       STRIPE_SIZE);
+				compute_parity(sh, UPDATE_PARITY);
+				if (memcmp(page_address(tmp_page),
+					   page_address(sh->dev[qd_idx].page),
+					   STRIPE_SIZE)!= 0) {
+					clear_bit(STRIPE_INSYNC, &sh->state);
+					update_q = 1;
+				}
+			}
+			if (update_p || update_q) {
+				conf->mddev->resync_mismatches += STRIPE_SECTORS;
+				if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery))
+					/* don't try to repair!! */
+					update_p = update_q = 0;
 			}
-#else
-			compute_block_2(sh, failed_num[0], failed_num[1]);
-			uptodate += failed_needupdate[0] + failed_needupdate[1];
-#endif
 
-			if (uptodate != disks)
-				BUG();
+			/* now write out any block on a failed drive,
+			 * or P or Q if they need it
+			 */
 
-			PRINTK("Marking for sync stripe %llu blocks %d,%d\n",
-			       (unsigned long long)sh->sector, failed_num[0], failed_num[1]);
+			if (failed == 2) {
+				dev = &sh->dev[failed_num[1]];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+				set_bit(R5_Syncio, &dev->flags);
+			}
+			if (failed >= 1) {
+				dev = &sh->dev[failed_num[0]];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+				set_bit(R5_Syncio, &dev->flags);
+			}
 
-			/**** FIX: Should we really do both of these unconditionally? ****/
-			adev = &sh->dev[failed_num[0]];
-			locked += !test_bit(R5_LOCKED, &adev->flags);
-			set_bit(R5_LOCKED, &adev->flags);
-			set_bit(R5_Wantwrite, &adev->flags);
-			bdev = &sh->dev[failed_num[1]];
-			locked += !test_bit(R5_LOCKED, &bdev->flags);
-			set_bit(R5_LOCKED, &bdev->flags);
+			if (update_p) {
+				dev = &sh->dev[pd_idx];
+				locked ++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+				set_bit(R5_Syncio, &dev->flags);
+			}
+			if (update_q) {
+				dev = &sh->dev[qd_idx];
+				locked++;
+				set_bit(R5_LOCKED, &dev->flags);
+				set_bit(R5_Wantwrite, &dev->flags);
+				set_bit(R5_Syncio, &dev->flags);
+			}
 			clear_bit(STRIPE_DEGRADED, &sh->state);
-			set_bit(R5_Wantwrite, &bdev->flags);
 
 			set_bit(STRIPE_INSYNC, &sh->state);
-			set_bit(R5_Syncio, &adev->flags);
-			set_bit(R5_Syncio, &bdev->flags);
 		}
 	}
+
 	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
 		md_done_sync(conf->mddev, STRIPE_SECTORS,1);
 		clear_bit(STRIPE_SYNCING, &sh->state);
@@ -1664,7 +1687,7 @@ static int make_request (request_queue_t *q, struct bio * bi)
 			}
 			finish_wait(&conf->wait_for_overlap, &w);
 			raid6_plug_device(conf);
-			handle_stripe(sh);
+			handle_stripe(sh, NULL);
 			release_stripe(sh);
 		} else {
 			/* cannot get stripe for read-ahead, just give-up */
@@ -1728,6 +1751,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 		return rv;
 	}
 	if (!bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 1) &&
+	    !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
 	    !conf->fullsync && sync_blocks >= STRIPE_SECTORS) {
 		/* we can skip this block, and probably more */
 		sync_blocks /= STRIPE_SECTORS;
@@ -1765,7 +1789,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
 	clear_bit(STRIPE_INSYNC, &sh->state);
 	spin_unlock(&sh->lock);
 
-	handle_stripe(sh);
+	handle_stripe(sh, NULL);
 	release_stripe(sh);
 
 	return STRIPE_SECTORS;
@@ -1821,7 +1845,7 @@ static void raid6d (mddev_t *mddev)
 		spin_unlock_irq(&conf->device_lock);
 
 		handled++;
-		handle_stripe(sh);
+		handle_stripe(sh, conf->spare_page);
 		release_stripe(sh);
 
 		spin_lock_irq(&conf->device_lock);
@@ -1860,6 +1884,10 @@ static int run(mddev_t *mddev)
 		goto abort;
 	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
 
+	conf->spare_page = alloc_page(GFP_KERNEL);
+	if (!conf->spare_page)
+		goto abort;
+
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
 	init_waitqueue_head(&conf->wait_for_overlap);
@@ -1996,6 +2024,8 @@ static int run(mddev_t *mddev)
 abort:
 	if (conf) {
 		print_raid6_conf(conf);
+		if (conf->spare_page)
+			page_cache_release(conf->spare_page);
 		if (conf->stripe_hashtbl)
 			free_pages((unsigned long) conf->stripe_hashtbl,
 							HASH_PAGES_ORDER);
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index f025ba6fb14c..e9c1c0d4f90b 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -228,6 +228,8 @@ struct raid5_private_data {
 					    * Cleared when a sync completes.
 					    */
 
+	struct page 		*spare_page; /* Used when checking P/Q in raid6 */
+
 	/*
 	 * Free stripes pool
 	 */
-- 
cgit v1.2.3


From ddaf22abaa831763e75775e6d4c7693504237997 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:19 -0800
Subject: [PATCH] md: attempt to auto-correct read errors in raid1

On a read-error we suspend the array, then synchronously read the block from
other arrays until we find one where we can read it.  Then we try writing the
good data back everywhere and make sure it works.  If any write or subsequent
read fails, only then do we fail the device out of the array.

To be able to suspend the array, we need to also keep track of how many
requests are queued for handling by raid1d.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/md.c            |   1 +
 drivers/md/raid1.c         | 115 +++++++++++++++++++++++++++++++++++++++++----
 include/linux/raid/raid1.h |   3 ++
 3 files changed, 109 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 64e7da3701a5..1364a1c97e6f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -461,6 +461,7 @@ int sync_page_io(struct block_device *bdev, sector_t sector, int size,
 	bio_put(bio);
 	return ret;
 }
+EXPORT_SYMBOL(sync_page_io);
 
 static int read_disk_sb(mdk_rdev_t * rdev, int size)
 {
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c618015f07f6..b3856db8d6c2 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -191,6 +191,7 @@ static void reschedule_retry(r1bio_t *r1_bio)
 
 	spin_lock_irqsave(&conf->device_lock, flags);
 	list_add(&r1_bio->retry_list, &conf->retry_list);
+	conf->nr_queued ++;
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	wake_up(&conf->wait_barrier);
@@ -245,9 +246,9 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error(r1_bio->mddev, conf->mirrors[mirror].rdev);
-	else
+	update_head_pos(mirror, r1_bio);
+
+	if (uptodate || conf->working_disks <= 1) {
 		/*
 		 * Set R1BIO_Uptodate in our master bio, so that
 		 * we will return a good error code for to the higher
@@ -259,14 +260,8 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
 		 */
 		set_bit(R1BIO_Uptodate, &r1_bio->state);
 
-	update_head_pos(mirror, r1_bio);
-
-	/*
-	 * we have only one bio on the read side
-	 */
-	if (uptodate)
 		raid_end_bio_io(r1_bio);
-	else {
+	} else {
 		/*
 		 * oops, read error:
 		 */
@@ -653,6 +648,32 @@ static void allow_barrier(conf_t *conf)
 	wake_up(&conf->wait_barrier);
 }
 
+static void freeze_array(conf_t *conf)
+{
+	/* stop syncio and normal IO and wait for everything to
+	 * go quite.
+	 * We increment barrier and nr_waiting, and then
+	 * wait until barrier+nr_pending match nr_queued+2
+	 */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier++;
+	conf->nr_waiting++;
+	wait_event_lock_irq(conf->wait_barrier,
+			    conf->barrier+conf->nr_pending == conf->nr_queued+2,
+			    conf->resync_lock,
+			    raid1_unplug(conf->mddev->queue));
+	spin_unlock_irq(&conf->resync_lock);
+}
+static void unfreeze_array(conf_t *conf)
+{
+	/* reverse the effect of the freeze */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier--;
+	conf->nr_waiting--;
+	wake_up(&conf->wait_barrier);
+	spin_unlock_irq(&conf->resync_lock);
+}
+
 
 /* duplicate the data pages for behind I/O */
 static struct page **alloc_behind_pages(struct bio *bio)
@@ -1196,6 +1217,7 @@ static void raid1d(mddev_t *mddev)
 			break;
 		r1_bio = list_entry(head->prev, r1bio_t, retry_list);
 		list_del(head->prev);
+		conf->nr_queued--;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		mddev = r1_bio->mddev;
@@ -1235,6 +1257,74 @@ static void raid1d(mddev_t *mddev)
 				}
 		} else {
 			int disk;
+
+			/* we got a read error. Maybe the drive is bad.  Maybe just
+			 * the block and we can fix it.
+			 * We freeze all other IO, and try reading the block from
+			 * other devices.  When we find one, we re-write
+			 * and check it that fixes the read error.
+			 * This is all done synchronously while the array is
+			 * frozen
+			 */
+			sector_t sect = r1_bio->sector;
+			int sectors = r1_bio->sectors;
+			freeze_array(conf);
+			while(sectors) {
+				int s = sectors;
+				int d = r1_bio->read_disk;
+				int success = 0;
+
+				if (s > (PAGE_SIZE>>9))
+					s = PAGE_SIZE >> 9;
+
+				do {
+					rdev = conf->mirrors[d].rdev;
+					if (rdev &&
+					    test_bit(In_sync, &rdev->flags) &&
+					    sync_page_io(rdev->bdev,
+							 sect + rdev->data_offset,
+							 s<<9,
+							 conf->tmppage, READ))
+						success = 1;
+					else {
+						d++;
+						if (d == conf->raid_disks)
+							d = 0;
+					}
+				} while (!success && d != r1_bio->read_disk);
+
+				if (success) {
+					/* write it back and re-read */
+					while (d != r1_bio->read_disk) {
+						if (d==0)
+							d = conf->raid_disks;
+						d--;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, WRITE) == 0 ||
+							    sync_page_io(rdev->bdev,
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, READ) == 0) {
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+							}
+						}
+					}
+				} else {
+					/* Cannot read from anywhere -- bye bye array */
+					md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
+					break;
+				}
+				sectors -= s;
+				sect += s;
+			}
+
+
+			unfreeze_array(conf);
+
 			bio = r1_bio->bios[r1_bio->read_disk];
 			if ((disk=read_balance(conf, r1_bio)) == -1) {
 				printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -1529,6 +1619,10 @@ static int run(mddev_t *mddev)
 
 	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
 
+	conf->tmppage = alloc_page(GFP_KERNEL);
+	if (!conf->tmppage)
+		goto out_no_mem;
+
 	conf->poolinfo = kmalloc(sizeof(*conf->poolinfo), GFP_KERNEL);
 	if (!conf->poolinfo)
 		goto out_no_mem;
@@ -1635,6 +1729,7 @@ out_free_conf:
 		if (conf->r1bio_pool)
 			mempool_destroy(conf->r1bio_pool);
 		kfree(conf->mirrors);
+		__free_page(conf->tmppage);
 		kfree(conf->poolinfo);
 		kfree(conf);
 		mddev->private = NULL;
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index c55674252533..cbe4238d3f9f 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -46,6 +46,7 @@ struct r1_private_data_s {
 	spinlock_t		resync_lock;
 	int			nr_pending;
 	int			nr_waiting;
+	int			nr_queued;
 	int			barrier;
 	sector_t		next_resync;
 	int			fullsync;  /* set to 1 if a full sync is needed,
@@ -57,6 +58,8 @@ struct r1_private_data_s {
 
 	struct pool_info	*poolinfo;
 
+	struct page		*tmppage;
+
 	mempool_t *r1bio_pool;
 	mempool_t *r1buf_pool;
 };
-- 
cgit v1.2.3


From cf30a473a02901fe4db37abc0b0fa26dd5ba3f72 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:23 -0800
Subject: [PATCH] md: handle errors when read-only

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid1.c         | 18 +++++++++++-------
 include/linux/raid/raid1.h |  7 +++++++
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 14a8fe0349c7..a8bc93d6ff63 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -154,7 +154,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio)
 
 	for (i = 0; i < conf->raid_disks; i++) {
 		struct bio **bio = r1_bio->bios + i;
-		if (*bio)
+		if (*bio && *bio != IO_BLOCKED)
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -419,11 +419,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 		new_disk = 0;
 
 		for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+		     r1_bio->bios[new_disk] == IO_BLOCKED ||
 		     !rdev || !test_bit(In_sync, &rdev->flags)
 			     || test_bit(WriteMostly, &rdev->flags);
 		     rdev = rcu_dereference(conf->mirrors[++new_disk].rdev)) {
 
-			if (rdev && test_bit(In_sync, &rdev->flags))
+			if (rdev && test_bit(In_sync, &rdev->flags) &&
+				r1_bio->bios[new_disk] != IO_BLOCKED)
 				wonly_disk = new_disk;
 
 			if (new_disk == conf->raid_disks - 1) {
@@ -437,11 +439,13 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 
 	/* make sure the disk is operational */
 	for (rdev = rcu_dereference(conf->mirrors[new_disk].rdev);
+	     r1_bio->bios[new_disk] == IO_BLOCKED ||
 	     !rdev || !test_bit(In_sync, &rdev->flags) ||
 		     test_bit(WriteMostly, &rdev->flags);
 	     rdev = rcu_dereference(conf->mirrors[new_disk].rdev)) {
 
-		if (rdev && test_bit(In_sync, &rdev->flags))
+		if (rdev && test_bit(In_sync, &rdev->flags) &&
+		    r1_bio->bios[new_disk] != IO_BLOCKED)
 			wonly_disk = new_disk;
 
 		if (new_disk <= 0)
@@ -478,7 +482,7 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio)
 
 		rdev = rcu_dereference(conf->mirrors[disk].rdev);
 
-		if (!rdev ||
+		if (!rdev || r1_bio->bios[disk] == IO_BLOCKED ||
 		    !test_bit(In_sync, &rdev->flags) ||
 		    test_bit(WriteMostly, &rdev->flags))
 			continue;
@@ -1335,7 +1339,7 @@ static void raid1d(mddev_t *mddev)
 			sector_t sect = r1_bio->sector;
 			int sectors = r1_bio->sectors;
 			freeze_array(conf);
-			while(sectors) {
+			if (mddev->ro == 0) while(sectors) {
 				int s = sectors;
 				int d = r1_bio->read_disk;
 				int success = 0;
@@ -1388,7 +1392,6 @@ static void raid1d(mddev_t *mddev)
 				sect += s;
 			}
 
-
 			unfreeze_array(conf);
 
 			bio = r1_bio->bios[r1_bio->read_disk];
@@ -1399,7 +1402,8 @@ static void raid1d(mddev_t *mddev)
 				       (unsigned long long)r1_bio->sector);
 				raid_end_bio_io(r1_bio);
 			} else {
-				r1_bio->bios[r1_bio->read_disk] = NULL;
+				r1_bio->bios[r1_bio->read_disk] =
+					mddev->ro ? IO_BLOCKED : NULL;
 				r1_bio->read_disk = disk;
 				bio_put(bio);
 				bio = bio_clone(r1_bio->master_bio, GFP_NOIO);
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h
index cbe4238d3f9f..9d5494aaac0f 100644
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -109,6 +109,13 @@ struct r1bio_s {
 	/* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/
 };
 
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error.  To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
 /* bits for r1bio.state */
 #define	R1BIO_Uptodate	0
 #define	R1BIO_IsSync	1
-- 
cgit v1.2.3


From 9910f16af35419a5382fa7850eecc220103036fa Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:24 -0800
Subject: [PATCH] md: fix up some rdev rcu locking in raid5/6

There is this "FIXME" comment with a typo in it!!  that been annoying me for
days, so I just had to remove it.

conf->disks[i].rdev should only be accessed if
  - we know we hold a reference or
  - the mddev->reconfig_sem is down or
  - we have a rcu_readlock

handle_stripe was referencing rdev in three places without any of these.  For
the first two, get an rcu_readlock.  For the last, the same access
(md_sync_acct call) is made a little later after the rdev has been claimed
under and rcu_readlock, if R5_Syncio is set.  So just use that access...
However R5_Syncio isn't really needed as the 'syncing' variable contains the
same information.  So use that instead.

Issues, comment, and fix are identical in raid5 and raid6.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid5.c         | 16 ++++++++--------
 drivers/md/raid6main.c     | 19 ++++++++-----------
 include/linux/raid/raid5.h |  1 -
 3 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 0d016a844ec6..0222ba1a6d35 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -961,11 +961,11 @@ static void handle_stripe(struct stripe_head *sh)
 	syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	/* Now to look around and see what can be done */
 
+	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
-		clear_bit(R5_Syncio, &dev->flags);
 
 		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
 			i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1004,7 +1004,7 @@ static void handle_stripe(struct stripe_head *sh)
 				non_overwrite++;
 		}
 		if (dev->written) written++;
-		rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
 			clear_bit(R5_ReadError, &dev->flags);
@@ -1017,6 +1017,7 @@ static void handle_stripe(struct stripe_head *sh)
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
+	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 		" to_write=%d failed=%d failed_num=%d\n",
 		locked, uptodate, to_read, to_write, failed, failed_num);
@@ -1028,10 +1029,13 @@ static void handle_stripe(struct stripe_head *sh)
 			int bitmap_end = 0;
 
 			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-				mdk_rdev_t *rdev = conf->disks[i].rdev;
+				mdk_rdev_t *rdev;
+				rcu_read_lock();
+				rdev = rcu_dereference(conf->disks[i].rdev);
 				if (rdev && test_bit(In_sync, &rdev->flags))
 					/* multiple read failures in one stripe */
 					md_error(conf->mddev, rdev);
+				rcu_read_unlock();
 			}
 
 			spin_lock_irq(&conf->device_lock);
@@ -1180,9 +1184,6 @@ static void handle_stripe(struct stripe_head *sh)
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n", 
 						i, syncing);
-					if (syncing)
-						md_sync_acct(conf->disks[i].rdev->bdev,
-							     STRIPE_SECTORS);
 				}
 			}
 		}
@@ -1326,7 +1327,6 @@ static void handle_stripe(struct stripe_head *sh)
 			clear_bit(STRIPE_DEGRADED, &sh->state);
 			locked++;
 			set_bit(STRIPE_INSYNC, &sh->state);
-			set_bit(R5_Syncio, &dev->flags);
 		}
 	}
 	if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -1392,7 +1392,7 @@ static void handle_stripe(struct stripe_head *sh)
 		rcu_read_unlock();
  
 		if (rdev) {
-			if (test_bit(R5_Syncio, &sh->dev[i].flags))
+			if (syncing)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 7a51553d8be5..b5b7a8d0b165 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1060,11 +1060,11 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 	syncing = test_bit(STRIPE_SYNCING, &sh->state);
 	/* Now to look around and see what can be done */
 
+	rcu_read_lock();
 	for (i=disks; i--; ) {
 		mdk_rdev_t *rdev;
 		dev = &sh->dev[i];
 		clear_bit(R5_Insync, &dev->flags);
-		clear_bit(R5_Syncio, &dev->flags);
 
 		PRINTK("check %d: state 0x%lx read %p write %p written %p\n",
 			i, dev->flags, dev->toread, dev->towrite, dev->written);
@@ -1103,7 +1103,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 				non_overwrite++;
 		}
 		if (dev->written) written++;
-		rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+		rdev = rcu_dereference(conf->disks[i].rdev);
 		if (!rdev || !test_bit(In_sync, &rdev->flags)) {
 			/* The ReadError flag will just be confusing now */
 			clear_bit(R5_ReadError, &dev->flags);
@@ -1117,6 +1117,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 		} else
 			set_bit(R5_Insync, &dev->flags);
 	}
+	rcu_read_unlock();
 	PRINTK("locked=%d uptodate=%d to_read=%d"
 	       " to_write=%d failed=%d failed_num=%d,%d\n",
 	       locked, uptodate, to_read, to_write, failed,
@@ -1129,10 +1130,13 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 			int bitmap_end = 0;
 
 			if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
-				mdk_rdev_t *rdev = conf->disks[i].rdev;
+				mdk_rdev_t *rdev;
+				rcu_read_lock();
+				rdev = rcu_dereference(conf->disks[i].rdev);
 				if (rdev && test_bit(In_sync, &rdev->flags))
 					/* multiple read failures in one stripe */
 					md_error(conf->mddev, rdev);
+				rcu_read_unlock();
 			}
 
 			spin_lock_irq(&conf->device_lock);
@@ -1307,9 +1311,6 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 					locked++;
 					PRINTK("Reading block %d (sync=%d)\n",
 						i, syncing);
-					if (syncing)
-						md_sync_acct(conf->disks[i].rdev->bdev,
-							     STRIPE_SECTORS);
 				}
 			}
 		}
@@ -1463,14 +1464,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 				locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
-				set_bit(R5_Syncio, &dev->flags);
 			}
 			if (failed >= 1) {
 				dev = &sh->dev[failed_num[0]];
 				locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
-				set_bit(R5_Syncio, &dev->flags);
 			}
 
 			if (update_p) {
@@ -1478,14 +1477,12 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 				locked ++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
-				set_bit(R5_Syncio, &dev->flags);
 			}
 			if (update_q) {
 				dev = &sh->dev[qd_idx];
 				locked++;
 				set_bit(R5_LOCKED, &dev->flags);
 				set_bit(R5_Wantwrite, &dev->flags);
-				set_bit(R5_Syncio, &dev->flags);
 			}
 			clear_bit(STRIPE_DEGRADED, &sh->state);
 
@@ -1557,7 +1554,7 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 		rcu_read_unlock();
 
 		if (rdev) {
-			if (test_bit(R5_Syncio, &sh->dev[i].flags))
+			if (syncing)
 				md_sync_acct(rdev->bdev, STRIPE_SECTORS);
 
 			bi->bi_bdev = rdev->bdev;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index e9c1c0d4f90b..28fcd7533ac4 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -152,7 +152,6 @@ struct stripe_head {
 #define	R5_Insync	3	/* rdev && rdev->in_sync at start */
 #define	R5_Wantread	4	/* want to schedule a read */
 #define	R5_Wantwrite	5
-#define	R5_Syncio	6	/* this io need to be accounted as resync io */
 #define	R5_Overlap	7	/* There is a pending overlapping request on this block */
 #define	R5_ReadError	8	/* seen a read error here recently */
 #define	R5_ReWrite	9	/* have tried to over-write the readerror */
-- 
cgit v1.2.3


From 4443ae10ca15d07922ceda622f03db8865fa3d13 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:28 -0800
Subject: [PATCH] md: auto-correct correctable read errors in raid10

Largely just a cross-port from raid1.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid10.c         | 127 ++++++++++++++++++++++++++++++++++++++------
 include/linux/raid/raid10.h |   2 +
 2 files changed, 114 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 1fa70c34b7d2..64bb4ddc6798 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -209,6 +209,7 @@ static void reschedule_retry(r10bio_t *r10_bio)
 
 	spin_lock_irqsave(&conf->device_lock, flags);
 	list_add(&r10_bio->retry_list, &conf->retry_list);
+	conf->nr_queued ++;
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 
 	md_wakeup_thread(mddev->thread);
@@ -254,9 +255,9 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
 	/*
 	 * this branch is our 'one mirror IO has finished' event handler:
 	 */
-	if (!uptodate)
-		md_error(r10_bio->mddev, conf->mirrors[dev].rdev);
-	else
+	update_head_pos(slot, r10_bio);
+
+	if (uptodate) {
 		/*
 		 * Set R10BIO_Uptodate in our master bio, so that
 		 * we will return a good error code to the higher
@@ -267,15 +268,8 @@ static int raid10_end_read_request(struct bio *bio, unsigned int bytes_done, int
 		 * wait for the 'master' bio.
 		 */
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
-
-	update_head_pos(slot, r10_bio);
-
-	/*
-	 * we have only one bio on the read side
-	 */
-	if (uptodate)
 		raid_end_bio_io(r10_bio);
-	else {
+	} else {
 		/*
 		 * oops, read error:
 		 */
@@ -714,6 +708,33 @@ static void allow_barrier(conf_t *conf)
 	wake_up(&conf->wait_barrier);
 }
 
+static void freeze_array(conf_t *conf)
+{
+	/* stop syncio and normal IO and wait for everything to
+	 * go quite.
+	 * We increment barrier and nr_waiting, and then
+	 * wait until barrier+nr_pending match nr_queued+2
+	 */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier++;
+	conf->nr_waiting++;
+	wait_event_lock_irq(conf->wait_barrier,
+			    conf->barrier+conf->nr_pending == conf->nr_queued+2,
+			    conf->resync_lock,
+			    raid10_unplug(conf->mddev->queue));
+	spin_unlock_irq(&conf->resync_lock);
+}
+
+static void unfreeze_array(conf_t *conf)
+{
+	/* reverse the effect of the freeze */
+	spin_lock_irq(&conf->resync_lock);
+	conf->barrier--;
+	conf->nr_waiting--;
+	wake_up(&conf->wait_barrier);
+	spin_unlock_irq(&conf->resync_lock);
+}
+
 static int make_request(request_queue_t *q, struct bio * bio)
 {
 	mddev_t *mddev = q->queuedata;
@@ -1338,6 +1359,7 @@ static void raid10d(mddev_t *mddev)
 			break;
 		r10_bio = list_entry(head->prev, r10bio_t, retry_list);
 		list_del(head->prev);
+		conf->nr_queued--;
 		spin_unlock_irqrestore(&conf->device_lock, flags);
 
 		mddev = r10_bio->mddev;
@@ -1350,6 +1372,78 @@ static void raid10d(mddev_t *mddev)
 			unplug = 1;
 		} else {
 			int mirror;
+			/* we got a read error. Maybe the drive is bad.  Maybe just
+			 * the block and we can fix it.
+			 * We freeze all other IO, and try reading the block from
+			 * other devices.  When we find one, we re-write
+			 * and check it that fixes the read error.
+			 * This is all done synchronously while the array is
+			 * frozen.
+			 */
+			int sect = 0; /* Offset from r10_bio->sector */
+			int sectors = r10_bio->sectors;
+			freeze_array(conf);
+			if (mddev->ro == 0) while(sectors) {
+				int s = sectors;
+				int sl = r10_bio->read_slot;
+				int success = 0;
+
+				if (s > (PAGE_SIZE>>9))
+					s = PAGE_SIZE >> 9;
+
+				do {
+					int d = r10_bio->devs[sl].devnum;
+					rdev = conf->mirrors[d].rdev;
+					if (rdev &&
+					    test_bit(In_sync, &rdev->flags) &&
+					    sync_page_io(rdev->bdev,
+							 r10_bio->devs[sl].addr +
+							 sect + rdev->data_offset,
+							 s<<9,
+							 conf->tmppage, READ))
+						success = 1;
+					else {
+						sl++;
+						if (sl == conf->copies)
+							sl = 0;
+					}
+				} while (!success && sl != r10_bio->read_slot);
+
+				if (success) {
+					/* write it back and re-read */
+					while (sl != r10_bio->read_slot) {
+						int d;
+						if (sl==0)
+							sl = conf->copies;
+						sl--;
+						d = r10_bio->devs[sl].devnum;
+						rdev = conf->mirrors[d].rdev;
+						if (rdev &&
+						    test_bit(In_sync, &rdev->flags)) {
+							if (sync_page_io(rdev->bdev,
+									 r10_bio->devs[sl].addr +
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, WRITE) == 0 ||
+							    sync_page_io(rdev->bdev,
+									 r10_bio->devs[sl].addr +
+									 sect + rdev->data_offset,
+									 s<<9, conf->tmppage, READ) == 0) {
+								/* Well, this device is dead */
+								md_error(mddev, rdev);
+							}
+						}
+					}
+				} else {
+					/* Cannot read from anywhere -- bye bye array */
+					md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev);
+					break;
+				}
+				sectors -= s;
+				sect += s;
+			}
+
+			unfreeze_array(conf);
+
 			bio = r10_bio->devs[r10_bio->read_slot].bio;
 			r10_bio->devs[r10_bio->read_slot].bio = NULL;
 			bio_put(bio);
@@ -1793,22 +1887,24 @@ static int run(mddev_t *mddev)
 	 * bookkeeping area. [whatever we allocate in run(),
 	 * should be freed in stop()]
 	 */
-	conf = kmalloc(sizeof(conf_t), GFP_KERNEL);
+	conf = kzalloc(sizeof(conf_t), GFP_KERNEL);
 	mddev->private = conf;
 	if (!conf) {
 		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
 			mdname(mddev));
 		goto out;
 	}
-	memset(conf, 0, sizeof(*conf));
-	conf->mirrors = kmalloc(sizeof(struct mirror_info)*mddev->raid_disks,
+	conf->mirrors = kzalloc(sizeof(struct mirror_info)*mddev->raid_disks,
 				 GFP_KERNEL);
 	if (!conf->mirrors) {
 		printk(KERN_ERR "raid10: couldn't allocate memory for %s\n",
 		       mdname(mddev));
 		goto out_free_conf;
 	}
-	memset(conf->mirrors, 0, sizeof(struct mirror_info)*mddev->raid_disks);
+
+	conf->tmppage = alloc_page(GFP_KERNEL);
+	if (!conf->tmppage)
+		goto out_free_conf;
 
 	conf->near_copies = nc;
 	conf->far_copies = fc;
@@ -1918,6 +2014,7 @@ static int run(mddev_t *mddev)
 out_free_conf:
 	if (conf->r10bio_pool)
 		mempool_destroy(conf->r10bio_pool);
+	put_page(conf->tmppage);
 	kfree(conf->mirrors);
 	kfree(conf);
 	mddev->private = NULL;
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index b660cbf628d8..dfa528385e3f 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -42,6 +42,7 @@ struct r10_private_data_s {
 	spinlock_t		resync_lock;
 	int nr_pending;
 	int nr_waiting;
+	int nr_queued;
 	int barrier;
 	sector_t		next_resync;
 	int			fullsync;  /* set to 1 if a full sync is needed,
@@ -53,6 +54,7 @@ struct r10_private_data_s {
 
 	mempool_t *r10bio_pool;
 	mempool_t *r10buf_pool;
+	struct page		*tmppage;
 };
 
 typedef struct r10_private_data_s conf_t;
-- 
cgit v1.2.3


From 0eb3ff12aa8a12538ef681dc83f4361636a0699f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:29 -0800
Subject: [PATCH] md: raid10 read-error handling - resync and read-only

Add in correct read-error handling for resync and read-only situations.

When read-only, we don't over-write, so we need to mark the failed drive in
the r10_bio so we don't re-try it.  During resync, we always read all blocks,
so if there is a read error, we simply over-write it with the good block that
we found (assuming we found one).

Note that the recovery case still isn't handled in an interesting way.  There
is nothing useful to do for the 2-copies case.  If there are 3 or more copies,
then we could try reading from one of the non-missing copies, but this is a
bit complicated and very rarely would be used, so I'm leaving it for now.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid10.c         | 56 ++++++++++++++++++++++++++++-----------------
 include/linux/raid/raid10.h |  7 ++++++
 2 files changed, 42 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 64bb4ddc6798..3f8df2ecbae3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
 
 	for (i = 0; i < conf->copies; i++) {
 		struct bio **bio = & r10_bio->devs[i].bio;
-		if (*bio)
+		if (*bio && *bio != IO_BLOCKED)
 			bio_put(*bio);
 		*bio = NULL;
 	}
@@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 		disk = r10_bio->devs[slot].devnum;
 
 		while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+		       r10_bio->devs[slot].bio == IO_BLOCKED ||
 		       !test_bit(In_sync, &rdev->flags)) {
 			slot++;
 			if (slot == conf->copies) {
@@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 	slot = 0;
 	disk = r10_bio->devs[slot].devnum;
 	while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+	       r10_bio->devs[slot].bio == IO_BLOCKED ||
 	       !test_bit(In_sync, &rdev->flags)) {
 		slot ++;
 		if (slot == conf->copies) {
@@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
 
 
 		if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+		    r10_bio->devs[nslot].bio == IO_BLOCKED ||
 		    !test_bit(In_sync, &rdev->flags))
 			continue;
 
@@ -1104,7 +1107,6 @@ abort:
 
 static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 {
-	int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
 	r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
 	conf_t *conf = mddev_to_conf(r10_bio->mddev);
 	int i,d;
@@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 		BUG();
 	update_head_pos(i, r10_bio);
 	d = r10_bio->devs[i].devnum;
-	if (!uptodate)
+
+	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+		set_bit(R10BIO_Uptodate, &r10_bio->state);
+	else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
 		md_error(r10_bio->mddev,
 			 conf->mirrors[d].rdev);
 
@@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 	fbio = r10_bio->devs[i].bio;
 
 	/* now find blocks with errors */
-	for (i=first+1 ; i < conf->copies ; i++) {
-		int vcnt, j, d;
+	for (i=0 ; i < conf->copies ; i++) {
+		int  j, d;
+		int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
 
-		if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-			continue;
-		/* We know that the bi_io_vec layout is the same for
-		 * both 'first' and 'i', so we just compare them.
-		 * All vec entries are PAGE_SIZE;
-		 */
 		tbio = r10_bio->devs[i].bio;
-		vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
-		for (j = 0; j < vcnt; j++)
-			if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-				   page_address(tbio->bi_io_vec[j].bv_page),
-				   PAGE_SIZE))
-				break;
-		if (j == vcnt)
+
+		if (tbio->bi_end_io != end_sync_read)
+			continue;
+		if (i == first)
 			continue;
-		mddev->resync_mismatches += r10_bio->sectors;
+		if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+			/* We know that the bi_io_vec layout is the same for
+			 * both 'first' and 'i', so we just compare them.
+			 * All vec entries are PAGE_SIZE;
+			 */
+			for (j = 0; j < vcnt; j++)
+				if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
+					   page_address(tbio->bi_io_vec[j].bv_page),
+					   PAGE_SIZE))
+					break;
+			if (j == vcnt)
+				continue;
+			mddev->resync_mismatches += r10_bio->sectors;
+		}
 		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
 			/* Don't fix anything. */
 			continue;
@@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
 
 	atomic_inc(&conf->mirrors[d].rdev->nr_pending);
 	md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-	generic_make_request(wbio);
+	if (test_bit(R10BIO_Uptodate, &r10_bio->state))
+		generic_make_request(wbio);
+	else
+		bio_endio(wbio, wbio->bi_size, -EIO);
 }
 
 
@@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev)
 			unfreeze_array(conf);
 
 			bio = r10_bio->devs[r10_bio->read_slot].bio;
-			r10_bio->devs[r10_bio->read_slot].bio = NULL;
+			r10_bio->devs[r10_bio->read_slot].bio =
+				mddev->ro ? IO_BLOCKED : NULL;
 			bio_put(bio);
 			mirror = read_balance(conf, r10_bio);
 			if (mirror == -1) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h
index dfa528385e3f..b1103298a8c2 100644
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -104,6 +104,13 @@ struct r10bio_s {
 	} devs[0];
 };
 
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error.  To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
 /* bits for r10bio.state */
 #define	R10BIO_Uptodate	0
 #define	R10BIO_IsSync	1
-- 
cgit v1.2.3


From fccddba060f2b4916a30aa27acc3d03b01bb981e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:33 -0800
Subject: [PATCH] md: tidy up raid5/6 hash table code

- replace open-coded hash chain with hlist macros

- Fix hash-table size at one page - it is already quite generous, so there
  will never be a need to use multiple pages, so no need for __get_free_pages

No functional change.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/raid5.c         | 40 ++++++++++++++--------------------------
 drivers/md/raid6main.c     | 46 +++++++++++++++++-----------------------------
 include/linux/raid/raid5.h |  4 ++--
 3 files changed, 33 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9fc50487e2ed..6e4db95cebb1 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -35,12 +35,10 @@
 #define STRIPE_SHIFT		(PAGE_SHIFT - 9)
 #define STRIPE_SECTORS		(STRIPE_SIZE>>9)
 #define	IO_THRESHOLD		1
-#define HASH_PAGES		1
-#define HASH_PAGES_ORDER	0
-#define NR_HASH			(HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))
+#define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
 
-#define stripe_hash(conf, sect)	((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])
+#define stripe_hash(conf, sect)	(&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
 
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
@@ -113,29 +111,21 @@ static void release_stripe(struct stripe_head *sh)
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
-static void remove_hash(struct stripe_head *sh)
+static inline void remove_hash(struct stripe_head *sh)
 {
 	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
-	if (sh->hash_pprev) {
-		if (sh->hash_next)
-			sh->hash_next->hash_pprev = sh->hash_pprev;
-		*sh->hash_pprev = sh->hash_next;
-		sh->hash_pprev = NULL;
-	}
+	hlist_del_init(&sh->hash);
 }
 
-static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
+static inline void insert_hash(raid5_conf_t *conf, struct stripe_head *sh)
 {
-	struct stripe_head **shp = &stripe_hash(conf, sh->sector);
+	struct hlist_head *hp = stripe_hash(conf, sh->sector);
 
 	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
 	CHECK_DEVLOCK();
-	if ((sh->hash_next = *shp) != NULL)
-		(*shp)->hash_pprev = &sh->hash_next;
-	*shp = sh;
-	sh->hash_pprev = shp;
+	hlist_add_head(&sh->hash, hp);
 }
 
 
@@ -228,10 +218,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i
 static struct stripe_head *__find_stripe(raid5_conf_t *conf, sector_t sector)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 
 	CHECK_DEVLOCK();
 	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next)
+	hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash)
 		if (sh->sector == sector)
 			return sh;
 	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
@@ -1835,9 +1826,8 @@ static int run(mddev_t *mddev)
 
 	conf->mddev = mddev;
 
-	if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)
+	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
-	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
 
 	spin_lock_init(&conf->device_lock);
 	init_waitqueue_head(&conf->wait_for_stripe);
@@ -1972,9 +1962,7 @@ static int run(mddev_t *mddev)
 abort:
 	if (conf) {
 		print_raid5_conf(conf);
-		if (conf->stripe_hashtbl)
-			free_pages((unsigned long) conf->stripe_hashtbl,
-							HASH_PAGES_ORDER);
+		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -1991,7 +1979,7 @@ static int stop(mddev_t *mddev)
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	shrink_stripes(conf);
-	free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
+	kfree(conf->stripe_hashtbl);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	sysfs_remove_group(&mddev->kobj, &raid5_attrs_group);
 	kfree(conf);
@@ -2019,12 +2007,12 @@ static void print_sh (struct stripe_head *sh)
 static void printall (raid5_conf_t *conf)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 	int i;
 
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < NR_HASH; i++) {
-		sh = conf->stripe_hashtbl[i];
-		for (; sh; sh = sh->hash_next) {
+		hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
 			if (sh->raid_conf != conf)
 				continue;
 			print_sh(sh);
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 4062fc16ac2b..79b5244f44f4 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -40,12 +40,10 @@
 #define STRIPE_SHIFT		(PAGE_SHIFT - 9)
 #define STRIPE_SECTORS		(STRIPE_SIZE>>9)
 #define	IO_THRESHOLD		1
-#define HASH_PAGES		1
-#define HASH_PAGES_ORDER	0
-#define NR_HASH			(HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *))
+#define NR_HASH			(PAGE_SIZE / sizeof(struct hlist_head))
 #define HASH_MASK		(NR_HASH - 1)
 
-#define stripe_hash(conf, sect)	((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK])
+#define stripe_hash(conf, sect)	(&((conf)->stripe_hashtbl[((sect) >> STRIPE_SHIFT) & HASH_MASK]))
 
 /* bio's attached to a stripe+device for I/O are linked together in bi_sector
  * order without overlap.  There may be several bio's per stripe+device, and
@@ -132,29 +130,21 @@ static void release_stripe(struct stripe_head *sh)
 	spin_unlock_irqrestore(&conf->device_lock, flags);
 }
 
-static void remove_hash(struct stripe_head *sh)
+static inline void remove_hash(struct stripe_head *sh)
 {
 	PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
-	if (sh->hash_pprev) {
-		if (sh->hash_next)
-			sh->hash_next->hash_pprev = sh->hash_pprev;
-		*sh->hash_pprev = sh->hash_next;
-		sh->hash_pprev = NULL;
-	}
+	hlist_del_init(&sh->hash);
 }
 
-static __inline__ void insert_hash(raid6_conf_t *conf, struct stripe_head *sh)
+static inline void insert_hash(raid6_conf_t *conf, struct stripe_head *sh)
 {
-	struct stripe_head **shp = &stripe_hash(conf, sh->sector);
+	struct hlist_head *hp = stripe_hash(conf, sh->sector);
 
 	PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector);
 
 	CHECK_DEVLOCK();
-	if ((sh->hash_next = *shp) != NULL)
-		(*shp)->hash_pprev = &sh->hash_next;
-	*shp = sh;
-	sh->hash_pprev = shp;
+	hlist_add_head(&sh->hash, hp);
 }
 
 
@@ -247,10 +237,11 @@ static inline void init_stripe(struct stripe_head *sh, sector_t sector, int pd_i
 static struct stripe_head *__find_stripe(raid6_conf_t *conf, sector_t sector)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 
 	CHECK_DEVLOCK();
 	PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector);
-	for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next)
+	hlist_for_each_entry (sh, hn,  stripe_hash(conf, sector), hash)
 		if (sh->sector == sector)
 			return sh;
 	PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector);
@@ -1931,17 +1922,15 @@ static int run(mddev_t *mddev)
 		return -EIO;
 	}
 
-	mddev->private = kmalloc (sizeof (raid6_conf_t)
-				  + mddev->raid_disks * sizeof(struct disk_info),
-				  GFP_KERNEL);
+	mddev->private = kzalloc(sizeof (raid6_conf_t)
+				 + mddev->raid_disks * sizeof(struct disk_info),
+				 GFP_KERNEL);
 	if ((conf = mddev->private) == NULL)
 		goto abort;
-	memset (conf, 0, sizeof (*conf) + mddev->raid_disks * sizeof(struct disk_info) );
 	conf->mddev = mddev;
 
-	if ((conf->stripe_hashtbl = (struct stripe_head **) __get_free_pages(GFP_ATOMIC, HASH_PAGES_ORDER)) == NULL)
+	if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
 		goto abort;
-	memset(conf->stripe_hashtbl, 0, HASH_PAGES * PAGE_SIZE);
 
 	conf->spare_page = alloc_page(GFP_KERNEL);
 	if (!conf->spare_page)
@@ -2085,9 +2074,7 @@ abort:
 		print_raid6_conf(conf);
 		if (conf->spare_page)
 			put_page(conf->spare_page);
-		if (conf->stripe_hashtbl)
-			free_pages((unsigned long) conf->stripe_hashtbl,
-							HASH_PAGES_ORDER);
+		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
 	mddev->private = NULL;
@@ -2104,7 +2091,7 @@ static int stop (mddev_t *mddev)
 	md_unregister_thread(mddev->thread);
 	mddev->thread = NULL;
 	shrink_stripes(conf);
-	free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER);
+	kfree(conf->stripe_hashtbl);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	kfree(conf);
 	mddev->private = NULL;
@@ -2131,12 +2118,13 @@ static void print_sh (struct seq_file *seq, struct stripe_head *sh)
 static void printall (struct seq_file *seq, raid6_conf_t *conf)
 {
 	struct stripe_head *sh;
+	struct hlist_node *hn;
 	int i;
 
 	spin_lock_irq(&conf->device_lock);
 	for (i = 0; i < NR_HASH; i++) {
 		sh = conf->stripe_hashtbl[i];
-		for (; sh; sh = sh->hash_next) {
+		hlist_for_each_entry(sh, hn, &conf->stripe_hashtbl[i], hash) {
 			if (sh->raid_conf != conf)
 				continue;
 			print_sh(seq, sh);
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h
index 28fcd7533ac4..394da8207b34 100644
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -126,7 +126,7 @@
  */
 
 struct stripe_head {
-	struct stripe_head	*hash_next, **hash_pprev; /* hash pointers */
+	struct hlist_node	hash;
 	struct list_head	lru;			/* inactive_list or handle_list */
 	struct raid5_private_data	*raid_conf;
 	sector_t		sector;			/* sector of this row */
@@ -204,7 +204,7 @@ struct disk_info {
 };
 
 struct raid5_private_data {
-	struct stripe_head	**stripe_hashtbl;
+	struct hlist_head	*stripe_hashtbl;
 	mddev_t			*mddev;
 	struct disk_info	*spare;
 	int			chunk_size, level, algorithm;
-- 
cgit v1.2.3


From 2604b703b6b3db80e3c75ce472a54dfd0b7bf9f4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:36 -0800
Subject: [PATCH] md: remove personality numbering from md

md supports multiple different RAID level, each being implemented by a
'personality' (which is often in a separate module).

These personalities have fairly artificial 'numbers'.  The numbers
are use to:
 1- provide an index into an array where the various personalities
    are recorded
 2- identify the module (via an alias) which implements are particular
    personality.

Neither of these uses really justify the existence of personality numbers.
The array can be replaced by a linked list which is searched (array lookup
only happens very rarely).  Module identification can be done using an alias
based on level rather than 'personality' number.

The current 'raid5' modules support two level (4 and 5) but only one
personality.  This slight awkwardness (which was handled in the mapping from
level to personality) can be better handled by allowing raid5 to register 2
personalities.

With this change in place, the core md module does not need to have an
exhaustive list of all possible personalities, so other personalities can be
added independently.

This patch also moves the check for chunksize being non-zero into the ->run
routines for the personalities that need it, rather than having it in core-md.
 This has a side effect of allowing 'faulty' and 'linear' not to have a
chunk-size set.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/faulty.c       |  8 +++--
 drivers/md/linear.c       | 10 +++---
 drivers/md/md.c           | 79 +++++++++++++++++------------------------------
 drivers/md/multipath.c    | 11 +++----
 drivers/md/raid0.c        | 14 ++++++---
 drivers/md/raid1.c        |  9 +++---
 drivers/md/raid10.c       | 16 +++++-----
 drivers/md/raid5.c        | 34 +++++++++++++++++---
 drivers/md/raid6main.c    | 10 +++---
 include/linux/raid/md.h   |  4 +--
 include/linux/raid/md_k.h | 63 ++++++-------------------------------
 init/do_mounts_md.c       | 22 ++++++-------
 12 files changed, 125 insertions(+), 155 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index 0248f8e7eac0..f12e83086897 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -316,9 +316,10 @@ static int stop(mddev_t *mddev)
 	return 0;
 }
 
-static mdk_personality_t faulty_personality =
+static struct mdk_personality faulty_personality =
 {
 	.name		= "faulty",
+	.level		= LEVEL_FAULTY,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -329,15 +330,16 @@ static mdk_personality_t faulty_personality =
 
 static int __init raid_init(void)
 {
-	return register_md_personality(FAULTY, &faulty_personality);
+	return register_md_personality(&faulty_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(FAULTY);
+	unregister_md_personality(&faulty_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-10"); /* faulty */
+MODULE_ALIAS("md-level--5");
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index f46c98d05b44..79dee8159217 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -351,9 +351,10 @@ static void linear_status (struct seq_file *seq, mddev_t *mddev)
 }
 
 
-static mdk_personality_t linear_personality=
+static struct mdk_personality linear_personality =
 {
 	.name		= "linear",
+	.level		= LEVEL_LINEAR,
 	.owner		= THIS_MODULE,
 	.make_request	= linear_make_request,
 	.run		= linear_run,
@@ -363,16 +364,17 @@ static mdk_personality_t linear_personality=
 
 static int __init linear_init (void)
 {
-	return register_md_personality (LINEAR, &linear_personality);
+	return register_md_personality (&linear_personality);
 }
 
 static void linear_exit (void)
 {
-	unregister_md_personality (LINEAR);
+	unregister_md_personality (&linear_personality);
 }
 
 
 module_init(linear_init);
 module_exit(linear_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("md-personality-1"); /* LINEAR */
+MODULE_ALIAS("md-personality-1"); /* LINEAR - degrecated*/
+MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index a6a066fc92e3..07f180f95b47 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -68,7 +68,7 @@
 static void autostart_arrays (int part);
 #endif
 
-static mdk_personality_t *pers[MAX_PERSONALITY];
+static LIST_HEAD(pers_list);
 static DEFINE_SPINLOCK(pers_lock);
 
 /*
@@ -303,6 +303,15 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 	return NULL;
 }
 
+static struct mdk_personality *find_pers(int level)
+{
+	struct mdk_personality *pers;
+	list_for_each_entry(pers, &pers_list, list)
+		if (pers->level == level)
+			return pers;
+	return NULL;
+}
+
 static inline sector_t calc_dev_sboffset(struct block_device *bdev)
 {
 	sector_t size = bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
@@ -1744,7 +1753,7 @@ static void analyze_sbs(mddev_t * mddev)
 static ssize_t
 level_show(mddev_t *mddev, char *page)
 {
-	mdk_personality_t *p = mddev->pers;
+	struct mdk_personality *p = mddev->pers;
 	if (p == NULL && mddev->raid_disks == 0)
 		return 0;
 	if (mddev->level >= 0)
@@ -1960,11 +1969,12 @@ static int start_dirty_degraded;
 
 static int do_md_run(mddev_t * mddev)
 {
-	int pnum, err;
+	int err;
 	int chunk_size;
 	struct list_head *tmp;
 	mdk_rdev_t *rdev;
 	struct gendisk *disk;
+	struct mdk_personality *pers;
 	char b[BDEVNAME_SIZE];
 
 	if (list_empty(&mddev->disks))
@@ -1981,20 +1991,8 @@ static int do_md_run(mddev_t * mddev)
 		analyze_sbs(mddev);
 
 	chunk_size = mddev->chunk_size;
-	pnum = level_to_pers(mddev->level);
 
-	if ((pnum != MULTIPATH) && (pnum != RAID1)) {
-		if (!chunk_size) {
-			/*
-			 * 'default chunksize' in the old md code used to
-			 * be PAGE_SIZE, baaad.
-			 * we abort here to be on the safe side. We don't
-			 * want to continue the bad practice.
-			 */
-			printk(KERN_ERR 
-				"no chunksize specified, see 'man raidtab'\n");
-			return -EINVAL;
-		}
+	if (chunk_size) {
 		if (chunk_size > MAX_CHUNK_SIZE) {
 			printk(KERN_ERR "too big chunk_size: %d > %d\n",
 				chunk_size, MAX_CHUNK_SIZE);
@@ -2030,10 +2028,7 @@ static int do_md_run(mddev_t * mddev)
 	}
 
 #ifdef CONFIG_KMOD
-	if (!pers[pnum])
-	{
-		request_module("md-personality-%d", pnum);
-	}
+	request_module("md-level-%d", mddev->level);
 #endif
 
 	/*
@@ -2055,14 +2050,14 @@ static int do_md_run(mddev_t * mddev)
 		return -ENOMEM;
 
 	spin_lock(&pers_lock);
-	if (!pers[pnum] || !try_module_get(pers[pnum]->owner)) {
+	pers = find_pers(mddev->level);
+	if (!pers || !try_module_get(pers->owner)) {
 		spin_unlock(&pers_lock);
-		printk(KERN_WARNING "md: personality %d is not loaded!\n",
-		       pnum);
+		printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
+		       mddev->level);
 		return -EINVAL;
 	}
-
-	mddev->pers = pers[pnum];
+	mddev->pers = pers;
 	spin_unlock(&pers_lock);
 
 	mddev->recovery = 0;
@@ -3701,15 +3696,14 @@ static int md_seq_show(struct seq_file *seq, void *v)
 	struct list_head *tmp2;
 	mdk_rdev_t *rdev;
 	struct mdstat_info *mi = seq->private;
-	int i;
 	struct bitmap *bitmap;
 
 	if (v == (void*)1) {
+		struct mdk_personality *pers;
 		seq_printf(seq, "Personalities : ");
 		spin_lock(&pers_lock);
-		for (i = 0; i < MAX_PERSONALITY; i++)
-			if (pers[i])
-				seq_printf(seq, "[%s] ", pers[i]->name);
+		list_for_each_entry(pers, &pers_list, list)
+			seq_printf(seq, "[%s] ", pers->name);
 
 		spin_unlock(&pers_lock);
 		seq_printf(seq, "\n");
@@ -3870,35 +3864,20 @@ static struct file_operations md_seq_fops = {
 	.poll		= mdstat_poll,
 };
 
-int register_md_personality(int pnum, mdk_personality_t *p)
+int register_md_personality(struct mdk_personality *p)
 {
-	if (pnum >= MAX_PERSONALITY) {
-		printk(KERN_ERR
-		       "md: tried to install personality %s as nr %d, but max is %lu\n",
-		       p->name, pnum, MAX_PERSONALITY-1);
-		return -EINVAL;
-	}
-
 	spin_lock(&pers_lock);
-	if (pers[pnum]) {
-		spin_unlock(&pers_lock);
-		return -EBUSY;
-	}
-
-	pers[pnum] = p;
-	printk(KERN_INFO "md: %s personality registered as nr %d\n", p->name, pnum);
+	list_add_tail(&p->list, &pers_list);
+	printk(KERN_INFO "md: %s personality registered for level %d\n", p->name, p->level);
 	spin_unlock(&pers_lock);
 	return 0;
 }
 
-int unregister_md_personality(int pnum)
+int unregister_md_personality(struct mdk_personality *p)
 {
-	if (pnum >= MAX_PERSONALITY)
-		return -EINVAL;
-
-	printk(KERN_INFO "md: %s personality unregistered\n", pers[pnum]->name);
+	printk(KERN_INFO "md: %s personality unregistered\n", p->name);
 	spin_lock(&pers_lock);
-	pers[pnum] = NULL;
+	list_del_init(&p->list);
 	spin_unlock(&pers_lock);
 	return 0;
 }
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 97a56aaaef6d..d4d838e3f9f8 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -35,9 +35,6 @@
 #define	NR_RESERVED_BUFS	32
 
 
-static mdk_personality_t multipath_personality;
-
-
 static void *mp_pool_alloc(gfp_t gfp_flags, void *data)
 {
 	struct multipath_bh *mpb;
@@ -553,9 +550,10 @@ static int multipath_stop (mddev_t *mddev)
 	return 0;
 }
 
-static mdk_personality_t multipath_personality=
+static struct mdk_personality multipath_personality =
 {
 	.name		= "multipath",
+	.level		= LEVEL_MULTIPATH,
 	.owner		= THIS_MODULE,
 	.make_request	= multipath_make_request,
 	.run		= multipath_run,
@@ -568,15 +566,16 @@ static mdk_personality_t multipath_personality=
 
 static int __init multipath_init (void)
 {
-	return register_md_personality (MULTIPATH, &multipath_personality);
+	return register_md_personality (&multipath_personality);
 }
 
 static void __exit multipath_exit (void)
 {
-	unregister_md_personality (MULTIPATH);
+	unregister_md_personality (&multipath_personality);
 }
 
 module_init(multipath_init);
 module_exit(multipath_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
+MODULE_ALIAS("md-level--4");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index b4eaa67fabde..7fb69e29391b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -275,7 +275,11 @@ static int raid0_run (mddev_t *mddev)
 	mdk_rdev_t *rdev;
 	struct list_head *tmp;
 
-	printk("%s: setting max_sectors to %d, segment boundary to %d\n",
+	if (mddev->chunk_size == 0) {
+		printk(KERN_ERR "md/raid0: non-zero chunk size required.\n");
+		return -EINVAL;
+	}
+	printk(KERN_INFO "%s: setting max_sectors to %d, segment boundary to %d\n",
 	       mdname(mddev),
 	       mddev->chunk_size >> 9,
 	       (mddev->chunk_size>>1)-1);
@@ -507,9 +511,10 @@ static void raid0_status (struct seq_file *seq, mddev_t *mddev)
 	return;
 }
 
-static mdk_personality_t raid0_personality=
+static struct mdk_personality raid0_personality=
 {
 	.name		= "raid0",
+	.level		= 0,
 	.owner		= THIS_MODULE,
 	.make_request	= raid0_make_request,
 	.run		= raid0_run,
@@ -519,15 +524,16 @@ static mdk_personality_t raid0_personality=
 
 static int __init raid0_init (void)
 {
-	return register_md_personality (RAID0, &raid0_personality);
+	return register_md_personality (&raid0_personality);
 }
 
 static void raid0_exit (void)
 {
-	unregister_md_personality (RAID0);
+	unregister_md_personality (&raid0_personality);
 }
 
 module_init(raid0_init);
 module_exit(raid0_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-2"); /* RAID0 */
+MODULE_ALIAS("md-level-0");
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index c42ef1c99fa0..6e0f59ed3d80 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -47,7 +47,6 @@
  */
 #define	NR_RAID1_BIOS 256
 
-static mdk_personality_t raid1_personality;
 
 static void unplug_slaves(mddev_t *mddev);
 
@@ -2036,9 +2035,10 @@ static void raid1_quiesce(mddev_t *mddev, int state)
 }
 
 
-static mdk_personality_t raid1_personality =
+static struct mdk_personality raid1_personality =
 {
 	.name		= "raid1",
+	.level		= 1,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2056,15 +2056,16 @@ static mdk_personality_t raid1_personality =
 
 static int __init raid_init(void)
 {
-	return register_md_personality(RAID1, &raid1_personality);
+	return register_md_personality(&raid1_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(RAID1);
+	unregister_md_personality(&raid1_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-3"); /* RAID1 */
+MODULE_ALIAS("md-level-1");
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 253322ae9195..f23d52c5df94 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1883,11 +1883,11 @@ static int run(mddev_t *mddev)
 	int nc, fc;
 	sector_t stride, size;
 
-	if (mddev->level != 10) {
-		printk(KERN_ERR "raid10: %s: raid level not set correctly... (%d)\n",
-		       mdname(mddev), mddev->level);
-		goto out;
+	if (mddev->chunk_size == 0) {
+		printk(KERN_ERR "md/raid10: non-zero chunk size required.\n");
+		return -EINVAL;
 	}
+
 	nc = mddev->layout & 255;
 	fc = (mddev->layout >> 8) & 255;
 	if ((nc*fc) <2 || (nc*fc) > mddev->raid_disks ||
@@ -2072,9 +2072,10 @@ static void raid10_quiesce(mddev_t *mddev, int state)
 	}
 }
 
-static mdk_personality_t raid10_personality =
+static struct mdk_personality raid10_personality =
 {
 	.name		= "raid10",
+	.level		= 10,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2090,15 +2091,16 @@ static mdk_personality_t raid10_personality =
 
 static int __init raid_init(void)
 {
-	return register_md_personality(RAID10, &raid10_personality);
+	return register_md_personality(&raid10_personality);
 }
 
 static void raid_exit(void)
 {
-	unregister_md_personality(RAID10);
+	unregister_md_personality(&raid10_personality);
 }
 
 module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-9"); /* RAID10 */
+MODULE_ALIAS("md-level-10");
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6e4db95cebb1..b0cfd3ca9ca0 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2187,9 +2187,10 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 	}
 }
 
-static mdk_personality_t raid5_personality=
+static struct mdk_personality raid5_personality =
 {
 	.name		= "raid5",
+	.level		= 5,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2204,17 +2205,40 @@ static mdk_personality_t raid5_personality=
 	.quiesce	= raid5_quiesce,
 };
 
-static int __init raid5_init (void)
+static struct mdk_personality raid4_personality =
 {
-	return register_md_personality (RAID5, &raid5_personality);
+	.name		= "raid4",
+	.level		= 4,
+	.owner		= THIS_MODULE,
+	.make_request	= make_request,
+	.run		= run,
+	.stop		= stop,
+	.status		= status,
+	.error_handler	= error,
+	.hot_add_disk	= raid5_add_disk,
+	.hot_remove_disk= raid5_remove_disk,
+	.spare_active	= raid5_spare_active,
+	.sync_request	= sync_request,
+	.resize		= raid5_resize,
+	.quiesce	= raid5_quiesce,
+};
+
+static int __init raid5_init(void)
+{
+	register_md_personality(&raid5_personality);
+	register_md_personality(&raid4_personality);
+	return 0;
 }
 
-static void raid5_exit (void)
+static void raid5_exit(void)
 {
-	unregister_md_personality (RAID5);
+	unregister_md_personality(&raid5_personality);
+	unregister_md_personality(&raid4_personality);
 }
 
 module_init(raid5_init);
 module_exit(raid5_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-4"); /* RAID5 */
+MODULE_ALIAS("md-level-5");
+MODULE_ALIAS("md-level-4");
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 79b5244f44f4..950e5fa6e1f2 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -2304,9 +2304,10 @@ static void raid6_quiesce(mddev_t *mddev, int state)
 	}
 }
 
-static mdk_personality_t raid6_personality=
+static struct mdk_personality raid6_personality =
 {
 	.name		= "raid6",
+	.level		= 6,
 	.owner		= THIS_MODULE,
 	.make_request	= make_request,
 	.run		= run,
@@ -2321,7 +2322,7 @@ static mdk_personality_t raid6_personality=
 	.quiesce	= raid6_quiesce,
 };
 
-static int __init raid6_init (void)
+static int __init raid6_init(void)
 {
 	int e;
 
@@ -2329,15 +2330,16 @@ static int __init raid6_init (void)
 	if ( e )
 		return e;
 
-	return register_md_personality (RAID6, &raid6_personality);
+	return register_md_personality(&raid6_personality);
 }
 
 static void raid6_exit (void)
 {
-	unregister_md_personality (RAID6);
+	unregister_md_personality(&raid6_personality);
 }
 
 module_init(raid6_init);
 module_exit(raid6_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-8"); /* RAID6 */
+MODULE_ALIAS("md-level-6");
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h
index 13e7c4b62367..b6e0bcad84e1 100644
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -71,8 +71,8 @@
  */
 #define MD_PATCHLEVEL_VERSION           3
 
-extern int register_md_personality (int p_num, mdk_personality_t *p);
-extern int unregister_md_personality (int p_num);
+extern int register_md_personality (struct mdk_personality *p);
+extern int unregister_md_personality (struct mdk_personality *p);
 extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev),
 				mddev_t *mddev, const char *name);
 extern void md_unregister_thread (mdk_thread_t *thread);
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 1dd587b5975a..e559fb701aa1 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -18,62 +18,19 @@
 /* and dm-bio-list.h is not under include/linux because.... ??? */
 #include "../../../drivers/md/dm-bio-list.h"
 
-#define MD_RESERVED       0UL
-#define LINEAR            1UL
-#define RAID0             2UL
-#define RAID1             3UL
-#define RAID5             4UL
-#define TRANSLUCENT       5UL
-#define HSM               6UL
-#define MULTIPATH         7UL
-#define RAID6		  8UL
-#define	RAID10		  9UL
-#define FAULTY		  10UL
-#define MAX_PERSONALITY   11UL
-
 #define	LEVEL_MULTIPATH		(-4)
 #define	LEVEL_LINEAR		(-1)
 #define	LEVEL_FAULTY		(-5)
 
+/* we need a value for 'no level specified' and 0
+ * means 'raid0', so we need something else.  This is
+ * for internal use only
+ */
+#define	LEVEL_NONE		(-1000000)
+
 #define MaxSector (~(sector_t)0)
 #define MD_THREAD_NAME_MAX 14
 
-static inline int pers_to_level (int pers)
-{
-	switch (pers) {
-		case FAULTY:		return LEVEL_FAULTY;
-		case MULTIPATH:		return LEVEL_MULTIPATH;
-		case HSM:		return -3;
-		case TRANSLUCENT:	return -2;
-		case LINEAR:		return LEVEL_LINEAR;
-		case RAID0:		return 0;
-		case RAID1:		return 1;
-		case RAID5:		return 5;
-		case RAID6:		return 6;
-		case RAID10:		return 10;
-	}
-	BUG();
-	return MD_RESERVED;
-}
-
-static inline int level_to_pers (int level)
-{
-	switch (level) {
-		case LEVEL_FAULTY: return FAULTY;
-		case LEVEL_MULTIPATH: return MULTIPATH;
-		case -3: return HSM;
-		case -2: return TRANSLUCENT;
-		case LEVEL_LINEAR: return LINEAR;
-		case 0: return RAID0;
-		case 1: return RAID1;
-		case 4:
-		case 5: return RAID5;
-		case 6: return RAID6;
-		case 10: return RAID10;
-	}
-	return MD_RESERVED;
-}
-
 typedef struct mddev_s mddev_t;
 typedef struct mdk_rdev_s mdk_rdev_t;
 
@@ -140,12 +97,10 @@ struct mdk_rdev_s
 					 */
 };
 
-typedef struct mdk_personality_s mdk_personality_t;
-
 struct mddev_s
 {
 	void				*private;
-	mdk_personality_t		*pers;
+	struct mdk_personality		*pers;
 	dev_t				unit;
 	int				md_minor;
 	struct list_head 		disks;
@@ -266,9 +221,11 @@ static inline void md_sync_acct(struct block_device *bdev, unsigned long nr_sect
         atomic_add(nr_sectors, &bdev->bd_contains->bd_disk->sync_io);
 }
 
-struct mdk_personality_s
+struct mdk_personality
 {
 	char *name;
+	int level;
+	struct list_head list;
 	struct module *owner;
 	int (*make_request)(request_queue_t *q, struct bio *bio);
 	int (*run)(mddev_t *mddev);
diff --git a/init/do_mounts_md.c b/init/do_mounts_md.c
index 3fbc3555ce96..f6f36806f84a 100644
--- a/init/do_mounts_md.c
+++ b/init/do_mounts_md.c
@@ -17,7 +17,7 @@ static int __initdata raid_noautodetect, raid_autopart;
 static struct {
 	int minor;
 	int partitioned;
-	int pers;
+	int level;
 	int chunk;
 	char *device_names;
 } md_setup_args[MAX_MD_DEVS] __initdata;
@@ -47,7 +47,7 @@ extern int mdp_major;
  */
 static int __init md_setup(char *str)
 {
-	int minor, level, factor, fault, pers, partitioned = 0;
+	int minor, level, factor, fault, partitioned = 0;
 	char *pername = "";
 	char *str1;
 	int ent;
@@ -78,7 +78,7 @@ static int __init md_setup(char *str)
 	}
 	if (ent >= md_setup_ents)
 		md_setup_ents++;
-	switch (get_option(&str, &level)) {	/* RAID Personality */
+	switch (get_option(&str, &level)) {	/* RAID level */
 	case 2: /* could be 0 or -1.. */
 		if (level == 0 || level == LEVEL_LINEAR) {
 			if (get_option(&str, &factor) != 2 ||	/* Chunk Size */
@@ -86,16 +86,12 @@ static int __init md_setup(char *str)
 				printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
 				return 0;
 			}
-			md_setup_args[ent].pers = level;
+			md_setup_args[ent].level = level;
 			md_setup_args[ent].chunk = 1 << (factor+12);
-			if (level ==  LEVEL_LINEAR) {
-				pers = LINEAR;
+			if (level ==  LEVEL_LINEAR)
 				pername = "linear";
-			} else {
-				pers = RAID0;
+			else
 				pername = "raid0";
-			}
-			md_setup_args[ent].pers = pers;
 			break;
 		}
 		/* FALL THROUGH */
@@ -103,7 +99,7 @@ static int __init md_setup(char *str)
 		str = str1;
 		/* FALL THROUGH */
 	case 0:
-		md_setup_args[ent].pers = 0;
+		md_setup_args[ent].level = LEVEL_NONE;
 		pername="super-block";
 	}
 
@@ -190,10 +186,10 @@ static void __init md_setup_drive(void)
 			continue;
 		}
 
-		if (md_setup_args[ent].pers) {
+		if (md_setup_args[ent].level != LEVEL_NONE) {
 			/* non-persistent */
 			mdu_array_info_t ainfo;
-			ainfo.level = pers_to_level(md_setup_args[ent].pers);
+			ainfo.level = md_setup_args[ent].level;
 			ainfo.size = 0;
 			ainfo.nr_disks =0;
 			ainfo.raid_disks =0;
-- 
cgit v1.2.3


From 1345b1d8adbdeceb1c871d9a4af5e2a700b341c6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:40 -0800
Subject: [PATCH] md: define and use safe_put_page for md

md sometimes call put_page on NULL pointers (treating it like kfree).  This is
not safe, so define and use a 'safe_put_page' which checks for NULL.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/bitmap.c       | 3 +--
 drivers/md/raid1.c        | 8 ++++----
 drivers/md/raid10.c       | 8 ++++----
 drivers/md/raid6main.c    | 3 +--
 include/linux/raid/md_k.h | 5 +++++
 5 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index abe415f0c039..ee4a3424a8a3 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -626,8 +626,7 @@ static void bitmap_file_unmap(struct bitmap *bitmap)
 	kfree(map);
 	kfree(attr);
 
-	if (sb_page)
-		put_page(sb_page);
+	safe_put_page(sb_page);
 }
 
 static void bitmap_stop_daemon(struct bitmap *bitmap);
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 39c10a65683d..feea4eeca1d9 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -136,7 +136,7 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data)
 out_free_pages:
 	for (i=0; i < RESYNC_PAGES ; i++)
 		for (j=0 ; j < pi->raid_disks; j++)
-			put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
+			safe_put_page(r1_bio->bios[j]->bi_io_vec[i].bv_page);
 	j = -1;
 out_free_bio:
 	while ( ++j < pi->raid_disks )
@@ -156,7 +156,7 @@ static void r1buf_pool_free(void *__r1_bio, void *data)
 			if (j == 0 ||
 			    r1bio->bios[j]->bi_io_vec[i].bv_page !=
 			    r1bio->bios[0]->bi_io_vec[i].bv_page)
-				put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
+				safe_put_page(r1bio->bios[j]->bi_io_vec[i].bv_page);
 		}
 	for (i=0 ; i < pi->raid_disks; i++)
 		bio_put(r1bio->bios[i]);
@@ -381,7 +381,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int
 			/* free extra copy of the data pages */
 			int i = bio->bi_vcnt;
 			while (i--)
-				put_page(bio->bi_io_vec[i].bv_page);
+				safe_put_page(bio->bi_io_vec[i].bv_page);
 		}
 		/* clear the bitmap if all writes complete successfully */
 		bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector,
@@ -1907,7 +1907,7 @@ out_free_conf:
 		if (conf->r1bio_pool)
 			mempool_destroy(conf->r1bio_pool);
 		kfree(conf->mirrors);
-		put_page(conf->tmppage);
+		safe_put_page(conf->tmppage);
 		kfree(conf->poolinfo);
 		kfree(conf);
 		mddev->private = NULL;
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 9647ebb0983a..fb952000fae2 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -132,10 +132,10 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data)
 
 out_free_pages:
 	for ( ; i > 0 ; i--)
-		put_page(bio->bi_io_vec[i-1].bv_page);
+		safe_put_page(bio->bi_io_vec[i-1].bv_page);
 	while (j--)
 		for (i = 0; i < RESYNC_PAGES ; i++)
-			put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
+			safe_put_page(r10_bio->devs[j].bio->bi_io_vec[i].bv_page);
 	j = -1;
 out_free_bio:
 	while ( ++j < nalloc )
@@ -155,7 +155,7 @@ static void r10buf_pool_free(void *__r10_bio, void *data)
 		struct bio *bio = r10bio->devs[j].bio;
 		if (bio) {
 			for (i = 0; i < RESYNC_PAGES; i++) {
-				put_page(bio->bi_io_vec[i].bv_page);
+				safe_put_page(bio->bi_io_vec[i].bv_page);
 				bio->bi_io_vec[i].bv_page = NULL;
 			}
 			bio_put(bio);
@@ -2042,7 +2042,7 @@ static int run(mddev_t *mddev)
 out_free_conf:
 	if (conf->r10bio_pool)
 		mempool_destroy(conf->r10bio_pool);
-	put_page(conf->tmppage);
+	safe_put_page(conf->tmppage);
 	kfree(conf->mirrors);
 	kfree(conf);
 	mddev->private = NULL;
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 950e5fa6e1f2..06b32bd671a3 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -2072,8 +2072,7 @@ static int run(mddev_t *mddev)
 abort:
 	if (conf) {
 		print_raid6_conf(conf);
-		if (conf->spare_page)
-			put_page(conf->spare_page);
+		safe_put_page(conf->spare_page);
 		kfree(conf->stripe_hashtbl);
 		kfree(conf);
 	}
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index e559fb701aa1..12b3203e3419 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -324,5 +324,10 @@ do {									\
 	__wait_event_lock_irq(wq, condition, lock, cmd);		\
 } while (0)
 
+static inline void safe_put_page(struct page *p)
+{
+	if (p) put_page(p);
+}
+
 #endif
 
-- 
cgit v1.2.3


From 2989ddbd6e1d9638a188311b896362c4bf7b7c25 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:43 -0800
Subject: [PATCH] md: make a couple of names in md.c static

.. because they aren't used outside md.c

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/md.c           | 4 ++--
 include/linux/raid/md_k.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index c1613854f38d..3cf089349e77 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -144,7 +144,7 @@ static int start_readonly;
  *  start array, stop array, error, add device, remove device,
  *  start build, activate spare
  */
-DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
+static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
 static atomic_t md_event_count;
 void md_new_event(mddev_t *mddev)
 {
@@ -279,7 +279,7 @@ static inline void mddev_unlock(mddev_t * mddev)
 	md_wakeup_thread(mddev->thread);
 }
 
-mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
+static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
 	mdk_rdev_t * rdev;
 	struct list_head *tmp;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 12b3203e3419..0fb5af6d622d 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -263,8 +263,6 @@ static inline char * mdname (mddev_t * mddev)
 	return mddev->gendisk ? mddev->gendisk->disk_name : "mdX";
 }
 
-extern mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr);
-
 /*
  * iterates through some rdev ringlist. It's safe to remove the
  * current 'rdev'. Dont touch 'tmp' though.
-- 
cgit v1.2.3


From d9d166c2a9d5d01af34396793950aa695883eed4 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:51 -0800
Subject: [PATCH] md: allow array level to be set textually via sysfs

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/md.txt      |  8 +++++++
 drivers/md/faulty.c       |  1 +
 drivers/md/linear.c       |  3 ++-
 drivers/md/md.c           | 61 +++++++++++++++++++++++++++++++++++++----------
 drivers/md/multipath.c    |  1 +
 drivers/md/raid0.c        |  1 +
 drivers/md/raid1.c        |  1 +
 drivers/md/raid10.c       |  1 +
 drivers/md/raid5.c        |  2 ++
 drivers/md/raid6main.c    |  1 +
 include/linux/raid/md_k.h |  1 +
 11 files changed, 67 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index c5512afd5917..fd43fd2cad2f 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -189,6 +189,14 @@ All md devices contain:
      1.2 (newer format in varying locations) or "none" indicating that
      the kernel isn't managing metadata at all.
 
+  level
+     The raid 'level' for this array.  The name will often (but not
+     always) be the same as the name of the module that implements the
+     level.  To be auto-loaded the module must have an alias
+        md-$LEVEL  e.g. md-raid5
+     This can be written only while the array is being assembled, not
+     after it is started.
+
 As component devices are added to an md array, they appear in the 'md'
 directory as new directories named
       dev-XXX
diff --git a/drivers/md/faulty.c b/drivers/md/faulty.c
index f12e83086897..a7a5ab554338 100644
--- a/drivers/md/faulty.c
+++ b/drivers/md/faulty.c
@@ -342,4 +342,5 @@ module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-10"); /* faulty */
+MODULE_ALIAS("md-faulty");
 MODULE_ALIAS("md-level--5");
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 79dee8159217..777585458c85 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -376,5 +376,6 @@ static void linear_exit (void)
 module_init(linear_init);
 module_exit(linear_exit);
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("md-personality-1"); /* LINEAR - degrecated*/
+MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
+MODULE_ALIAS("md-linear");
 MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ecc0166ba779..594d8c312e6a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -303,12 +303,15 @@ static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
 	return NULL;
 }
 
-static struct mdk_personality *find_pers(int level)
+static struct mdk_personality *find_pers(int level, char *clevel)
 {
 	struct mdk_personality *pers;
-	list_for_each_entry(pers, &pers_list, list)
-		if (pers->level == level)
+	list_for_each_entry(pers, &pers_list, list) {
+		if (level != LEVEL_NONE && pers->level == level)
 			return pers;
+		if (strcmp(pers->name, clevel)==0)
+			return pers;
+	}
 	return NULL;
 }
 
@@ -715,6 +718,7 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->ctime = sb->ctime;
 		mddev->utime = sb->utime;
 		mddev->level = sb->level;
+		mddev->clevel[0] = 0;
 		mddev->layout = sb->layout;
 		mddev->raid_disks = sb->raid_disks;
 		mddev->size = sb->size;
@@ -1051,6 +1055,7 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
 		mddev->ctime = le64_to_cpu(sb->ctime) & ((1ULL << 32)-1);
 		mddev->utime = le64_to_cpu(sb->utime) & ((1ULL << 32)-1);
 		mddev->level = le32_to_cpu(sb->level);
+		mddev->clevel[0] = 0;
 		mddev->layout = le32_to_cpu(sb->layout);
 		mddev->raid_disks = le32_to_cpu(sb->raid_disks);
 		mddev->size = le64_to_cpu(sb->size)/2;
@@ -1774,15 +1779,36 @@ static ssize_t
 level_show(mddev_t *mddev, char *page)
 {
 	struct mdk_personality *p = mddev->pers;
-	if (p == NULL && mddev->raid_disks == 0)
-		return 0;
-	if (mddev->level >= 0)
-		return sprintf(page, "raid%d\n", mddev->level);
-	else
+	if (p)
 		return sprintf(page, "%s\n", p->name);
+	else if (mddev->clevel[0])
+		return sprintf(page, "%s\n", mddev->clevel);
+	else if (mddev->level != LEVEL_NONE)
+		return sprintf(page, "%d\n", mddev->level);
+	else
+		return 0;
+}
+
+static ssize_t
+level_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int rv = len;
+	if (mddev->pers)
+		return -EBUSY;
+	if (len == 0)
+		return 0;
+	if (len >= sizeof(mddev->clevel))
+		return -ENOSPC;
+	strncpy(mddev->clevel, buf, len);
+	if (mddev->clevel[len-1] == '\n')
+		len--;
+	mddev->clevel[len] = 0;
+	mddev->level = LEVEL_NONE;
+	return rv;
 }
 
-static struct md_sysfs_entry md_level = __ATTR_RO(level);
+static struct md_sysfs_entry md_level =
+__ATTR(level, 0644, level_show, level_store);
 
 static ssize_t
 raid_disks_show(mddev_t *mddev, char *page)
@@ -2158,7 +2184,10 @@ static int do_md_run(mddev_t * mddev)
 	}
 
 #ifdef CONFIG_KMOD
-	request_module("md-level-%d", mddev->level);
+	if (mddev->level != LEVEL_NONE)
+		request_module("md-level-%d", mddev->level);
+	else if (mddev->clevel[0])
+		request_module("md-%s", mddev->clevel);
 #endif
 
 	/*
@@ -2180,15 +2209,21 @@ static int do_md_run(mddev_t * mddev)
 		return -ENOMEM;
 
 	spin_lock(&pers_lock);
-	pers = find_pers(mddev->level);
+	pers = find_pers(mddev->level, mddev->clevel);
 	if (!pers || !try_module_get(pers->owner)) {
 		spin_unlock(&pers_lock);
-		printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
-		       mddev->level);
+		if (mddev->level != LEVEL_NONE)
+			printk(KERN_WARNING "md: personality for level %d is not loaded!\n",
+			       mddev->level);
+		else
+			printk(KERN_WARNING "md: personality for level %s is not loaded!\n",
+			       mddev->clevel);
 		return -EINVAL;
 	}
 	mddev->pers = pers;
 	spin_unlock(&pers_lock);
+	mddev->level = pers->level;
+	strlcpy(mddev->clevel, pers->name, sizeof(mddev->clevel));
 
 	mddev->recovery = 0;
 	mddev->resync_max_sectors = mddev->size << 1; /* may be over-ridden by personality */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index d4d838e3f9f8..e6aa309a66d7 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -578,4 +578,5 @@ module_init(multipath_init);
 module_exit(multipath_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-7"); /* MULTIPATH */
+MODULE_ALIAS("md-multipath");
 MODULE_ALIAS("md-level--4");
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 7fb69e29391b..abbca150202b 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -536,4 +536,5 @@ module_init(raid0_init);
 module_exit(raid0_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-2"); /* RAID0 */
+MODULE_ALIAS("md-raid0");
 MODULE_ALIAS("md-level-0");
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 7d4465f93064..181c9616d5f1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -2092,4 +2092,5 @@ module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-3"); /* RAID1 */
+MODULE_ALIAS("md-raid1");
 MODULE_ALIAS("md-level-1");
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8e6f6dfddb2b..201dc7168a5f 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -2117,4 +2117,5 @@ module_init(raid_init);
 module_exit(raid_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-9"); /* RAID10 */
+MODULE_ALIAS("md-raid10");
 MODULE_ALIAS("md-level-10");
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index b0cfd3ca9ca0..9cc844f455bf 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2240,5 +2240,7 @@ module_init(raid5_init);
 module_exit(raid5_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-4"); /* RAID5 */
+MODULE_ALIAS("md-raid5");
+MODULE_ALIAS("md-raid4");
 MODULE_ALIAS("md-level-5");
 MODULE_ALIAS("md-level-4");
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 06b32bd671a3..84dd875bb2f6 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -2341,4 +2341,5 @@ module_init(raid6_init);
 module_exit(raid6_exit);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("md-personality-8"); /* RAID6 */
+MODULE_ALIAS("md-raid6");
 MODULE_ALIAS("md-level-6");
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 0fb5af6d622d..686463115438 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -119,6 +119,7 @@ struct mddev_s
 	int				chunk_size;
 	time_t				ctime, utime;
 	int				level, layout;
+	char				clevel[16];
 	int				raid_disks;
 	int				max_disks;
 	sector_t			size; /* used size of component devices */
-- 
cgit v1.2.3


From 4dbcdc751cb25ffca3a8374cbc5ab6de961cc545 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:20:52 -0800
Subject: [PATCH] md: count corrected read errors per drive

Store this total in superblock (As appropriate), and make it available to
userspace via sysfs.

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/md.txt      | 11 +++++++++++
 drivers/md/md.c           | 27 ++++++++++++++++++++++++++-
 drivers/md/raid1.c        |  2 ++
 drivers/md/raid10.c       | 11 ++++++++---
 drivers/md/raid5.c        |  3 +++
 drivers/md/raid6main.c    |  3 +++
 include/linux/raid/md_k.h |  4 ++++
 7 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index fd43fd2cad2f..a3eadf8e1701 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -222,6 +222,17 @@ Each directory contains:
 			 of being recoverred to
 	This list make grow in future.
 
+      errors
+	An approximate count of read errors that have been detected on
+	this device but have not caused the device to be evicted from
+	the array (either because they were corrected or because they
+	happened while the array was read-only).  When using version-1
+	metadata, this value persists across restarts of the array.
+
+	This value can be written while assembling an array thus
+	providing an ongoing count for arrays with metadata managed by
+	userspace.
+
 
 An active md device will also contain and entry for each active device
 in the array.  These are named
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 594d8c312e6a..32a4e2311e43 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1000,6 +1000,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	}
 	rdev->preferred_minor = 0xffff;
 	rdev->data_offset = le64_to_cpu(sb->data_offset);
+	atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read));
 
 	rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256;
 	bmask = queue_hardsect_size(rdev->bdev->bd_disk->queue)-1;
@@ -1139,6 +1140,8 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	else
 		sb->resync_offset = cpu_to_le64(0);
 
+	sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+
 	if (mddev->bitmap && mddev->bitmap_file == NULL) {
 		sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset);
 		sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
@@ -1592,9 +1595,30 @@ super_show(mdk_rdev_t *rdev, char *page)
 }
 static struct rdev_sysfs_entry rdev_super = __ATTR_RO(super);
 
+static ssize_t
+errors_show(mdk_rdev_t *rdev, char *page)
+{
+	return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
+}
+
+static ssize_t
+errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+{
+	char *e;
+	unsigned long n = simple_strtoul(buf, &e, 10);
+	if (*buf && (*e == 0 || *e == '\n')) {
+		atomic_set(&rdev->corrected_errors, n);
+		return len;
+	}
+	return -EINVAL;
+}
+static struct rdev_sysfs_entry rdev_errors =
+__ATTR(errors, 0644, errors_show, errors_store);
+
 static struct attribute *rdev_default_attrs[] = {
 	&rdev_state.attr,
 	&rdev_super.attr,
+	&rdev_errors.attr,
 	NULL,
 };
 static ssize_t
@@ -1674,6 +1698,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi
 	rdev->data_offset = 0;
 	atomic_set(&rdev->nr_pending, 0);
 	atomic_set(&rdev->read_errors, 0);
+	atomic_set(&rdev->corrected_errors, 0);
 
 	size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
 	if (!size) {
@@ -4729,7 +4754,7 @@ static int set_ro(const char *val, struct kernel_param *kp)
 	int num = simple_strtoul(val, &e, 10);
 	if (*val && (*e == '\0' || *e == '\n')) {
 		start_readonly = num;
-		return 0;;
+		return 0;
 	}
 	return -EINVAL;
 }
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 181c9616d5f1..a06ff91f27e2 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -1265,6 +1265,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
 					if (r1_bio->bios[d]->bi_end_io != end_sync_read)
 						continue;
 					rdev = conf->mirrors[d].rdev;
+					atomic_add(s, &rdev->corrected_errors);
 					if (sync_page_io(rdev->bdev,
 							 sect + rdev->data_offset,
 							 s<<9,
@@ -1463,6 +1464,7 @@ static void raid1d(mddev_t *mddev)
 							d = conf->raid_disks;
 						d--;
 						rdev = conf->mirrors[d].rdev;
+						atomic_add(s, &rdev->corrected_errors);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
 							if (sync_page_io(rdev->bdev,
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 201dc7168a5f..9e658e519a27 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1122,9 +1122,13 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
 
 	if (test_bit(BIO_UPTODATE, &bio->bi_flags))
 		set_bit(R10BIO_Uptodate, &r10_bio->state);
-	else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
-		md_error(r10_bio->mddev,
-			 conf->mirrors[d].rdev);
+	else {
+		atomic_add(r10_bio->sectors,
+			   &conf->mirrors[d].rdev->corrected_errors);
+		if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
+			md_error(r10_bio->mddev,
+				 conf->mirrors[d].rdev);
+	}
 
 	/* for reconstruct, we always reschedule after a read.
 	 * for resync, only after all reads
@@ -1430,6 +1434,7 @@ static void raid10d(mddev_t *mddev)
 						sl--;
 						d = r10_bio->devs[sl].devnum;
 						rdev = conf->mirrors[d].rdev;
+						atomic_add(s, &rdev->corrected_errors);
 						if (rdev &&
 						    test_bit(In_sync, &rdev->flags)) {
 							if (sync_page_io(rdev->bdev,
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9cc844f455bf..54f4a9847e38 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -1400,6 +1400,9 @@ static void handle_stripe(struct stripe_head *sh)
 			bi->bi_io_vec[0].bv_offset = 0;
 			bi->bi_size = STRIPE_SIZE;
 			bi->bi_next = NULL;
+			if (rw == WRITE &&
+			    test_bit(R5_ReWrite, &sh->dev[i].flags))
+				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
 			generic_make_request(bi);
 		} else {
 			if (rw == 1)
diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c
index 84dd875bb2f6..8c823d686a60 100644
--- a/drivers/md/raid6main.c
+++ b/drivers/md/raid6main.c
@@ -1562,6 +1562,9 @@ static void handle_stripe(struct stripe_head *sh, struct page *tmp_page)
 			bi->bi_io_vec[0].bv_offset = 0;
 			bi->bi_size = STRIPE_SIZE;
 			bi->bi_next = NULL;
+			if (rw == WRITE &&
+			    test_bit(R5_ReWrite, &sh->dev[i].flags))
+				atomic_add(STRIPE_SECTORS, &rdev->corrected_errors);
 			generic_make_request(bi);
 		} else {
 			if (rw == 1)
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 686463115438..68b929c079ab 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -95,6 +95,10 @@ struct mdk_rdev_s
 	atomic_t	read_errors;	/* number of consecutive read errors that
 					 * we have tried to ignore.
 					 */
+	atomic_t	corrected_errors; /* number of corrected read errors,
+					   * for reporting to userspace and storing
+					   * in superblock.
+					   */
 };
 
 struct mddev_s
-- 
cgit v1.2.3


From 88202a0c84e1951d6630d1d557d4801a8cc5b5ef Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Fri, 6 Jan 2006 00:21:36 -0800
Subject: [PATCH] md: allow sync-speed to be controlled per-device

Also export current (average) speed and status in sysfs.

Signed-off-by: Neil Brown <neilb@suse.de>
Acked-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/md.txt      |  22 ++++++++++
 drivers/md/md.c           | 110 +++++++++++++++++++++++++++++++++++++++++++---
 include/linux/raid/md_k.h |   4 ++
 3 files changed, 131 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/md.txt b/Documentation/md.txt
index b8d172b254f7..03a13c462cf2 100644
--- a/Documentation/md.txt
+++ b/Documentation/md.txt
@@ -207,6 +207,28 @@ All md devices contain:
      available.  It will then appear at md/dev-XXX (depending on the
      name of the device) and further configuration is then possible.
 
+   sync_speed_min
+   sync_speed_max
+     This are similar to /proc/sys/dev/raid/speed_limit_{min,max}
+     however they only apply to the particular array.
+     If no value has been written to these, of if the word 'system'
+     is written, then the system-wide value is used.  If a value,
+     in kibibytes-per-second is written, then it is used.
+     When the files are read, they show the currently active value
+     followed by "(local)" or "(system)" depending on whether it is
+     a locally set or system-wide value.
+
+   sync_completed
+     This shows the number of sectors that have been completed of
+     whatever the current sync_action is, followed by the number of
+     sectors in total that could need to be processed.  The two
+     numbers are separated by a '/'  thus effectively showing one
+     value, a fraction of the process that is complete.
+
+   sync_speed
+     This shows the current actual speed, in K/sec, of the current
+     sync_action.  It is averaged over the last 30 seconds.
+
 
 As component devices are added to an md array, they appear in the 'md'
 directory as new directories named
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 825e235b791b..1b76fb29fb70 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -81,10 +81,22 @@ static DEFINE_SPINLOCK(pers_lock);
  * idle IO detection.
  *
  * you can change it via /proc/sys/dev/raid/speed_limit_min and _max.
+ * or /sys/block/mdX/md/sync_speed_{min,max}
  */
 
 static int sysctl_speed_limit_min = 1000;
 static int sysctl_speed_limit_max = 200000;
+static inline int speed_min(mddev_t *mddev)
+{
+	return mddev->sync_speed_min ?
+		mddev->sync_speed_min : sysctl_speed_limit_min;
+}
+
+static inline int speed_max(mddev_t *mddev)
+{
+	return mddev->sync_speed_max ?
+		mddev->sync_speed_max : sysctl_speed_limit_max;
+}
 
 static struct ctl_table_header *raid_table_header;
 
@@ -2197,6 +2209,90 @@ md_scan_mode = __ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
 static struct md_sysfs_entry
 md_mismatches = __ATTR_RO(mismatch_cnt);
 
+static ssize_t
+sync_min_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d (%s)\n", speed_min(mddev),
+		       mddev->sync_speed_min ? "local": "system");
+}
+
+static ssize_t
+sync_min_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int min;
+	char *e;
+	if (strncmp(buf, "system", 6)==0) {
+		mddev->sync_speed_min = 0;
+		return len;
+	}
+	min = simple_strtoul(buf, &e, 10);
+	if (buf == e || (*e && *e != '\n') || min <= 0)
+		return -EINVAL;
+	mddev->sync_speed_min = min;
+	return len;
+}
+
+static struct md_sysfs_entry md_sync_min =
+__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
+
+static ssize_t
+sync_max_show(mddev_t *mddev, char *page)
+{
+	return sprintf(page, "%d (%s)\n", speed_max(mddev),
+		       mddev->sync_speed_max ? "local": "system");
+}
+
+static ssize_t
+sync_max_store(mddev_t *mddev, const char *buf, size_t len)
+{
+	int max;
+	char *e;
+	if (strncmp(buf, "system", 6)==0) {
+		mddev->sync_speed_max = 0;
+		return len;
+	}
+	max = simple_strtoul(buf, &e, 10);
+	if (buf == e || (*e && *e != '\n') || max <= 0)
+		return -EINVAL;
+	mddev->sync_speed_max = max;
+	return len;
+}
+
+static struct md_sysfs_entry md_sync_max =
+__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
+
+
+static ssize_t
+sync_speed_show(mddev_t *mddev, char *page)
+{
+	unsigned long resync, dt, db;
+	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+	dt = ((jiffies - mddev->resync_mark) / HZ);
+	if (!dt) dt++;
+	db = resync - (mddev->resync_mark_cnt);
+	return sprintf(page, "%ld\n", db/dt/2); /* K/sec */
+}
+
+static struct md_sysfs_entry
+md_sync_speed = __ATTR_RO(sync_speed);
+
+static ssize_t
+sync_completed_show(mddev_t *mddev, char *page)
+{
+	unsigned long max_blocks, resync;
+
+	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
+		max_blocks = mddev->resync_max_sectors;
+	else
+		max_blocks = mddev->size << 1;
+
+	resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active));
+	return sprintf(page, "%lu / %lu\n", resync, max_blocks);
+}
+
+static struct md_sysfs_entry
+md_sync_completed = __ATTR_RO(sync_completed);
+
 static struct attribute *md_default_attrs[] = {
 	&md_level.attr,
 	&md_raid_disks.attr,
@@ -2210,6 +2306,10 @@ static struct attribute *md_default_attrs[] = {
 static struct attribute *md_redundancy_attrs[] = {
 	&md_scan_mode.attr,
 	&md_mismatches.attr,
+	&md_sync_min.attr,
+	&md_sync_max.attr,
+	&md_sync_speed.attr,
+	&md_sync_completed.attr,
 	NULL,
 };
 static struct attribute_group md_redundancy_group = {
@@ -4433,10 +4533,10 @@ static void md_do_sync(mddev_t *mddev)
 
 	printk(KERN_INFO "md: syncing RAID array %s\n", mdname(mddev));
 	printk(KERN_INFO "md: minimum _guaranteed_ reconstruction speed:"
-		" %d KB/sec/disc.\n", sysctl_speed_limit_min);
+		" %d KB/sec/disc.\n", speed_min(mddev));
 	printk(KERN_INFO "md: using maximum available idle IO bandwidth "
 	       "(but not more than %d KB/sec) for reconstruction.\n",
-	       sysctl_speed_limit_max);
+	       speed_max(mddev));
 
 	is_mddev_idle(mddev); /* this also initializes IO event counters */
 	/* we don't use the checkpoint if there's a bitmap */
@@ -4477,7 +4577,7 @@ static void md_do_sync(mddev_t *mddev)
 
 		skipped = 0;
 		sectors = mddev->pers->sync_request(mddev, j, &skipped,
-					    currspeed < sysctl_speed_limit_min);
+					    currspeed < speed_min(mddev));
 		if (sectors == 0) {
 			set_bit(MD_RECOVERY_ERR, &mddev->recovery);
 			goto out;
@@ -4542,8 +4642,8 @@ static void md_do_sync(mddev_t *mddev)
 		currspeed = ((unsigned long)(io_sectors-mddev->resync_mark_cnt))/2
 			/((jiffies-mddev->resync_mark)/HZ +1) +1;
 
-		if (currspeed > sysctl_speed_limit_min) {
-			if ((currspeed > sysctl_speed_limit_max) ||
+		if (currspeed > speed_min(mddev)) {
+			if ((currspeed > speed_max(mddev)) ||
 					!is_mddev_idle(mddev)) {
 				msleep(500);
 				goto repeat;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 68b929c079ab..617b9506c760 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -143,6 +143,10 @@ struct mddev_s
 	sector_t			resync_mismatches; /* count of sectors where
 							    * parity/replica mismatch found
 							    */
+	/* if zero, use the system-wide default */
+	int				sync_speed_min;
+	int				sync_speed_max;
+
 	int				ok_start_degraded;
 	/* recovery/resync flags 
 	 * NEEDED:   we might need to start a resync/recover
-- 
cgit v1.2.3


From 9b847548663ef1039dd49f0eb4463d001e596bc3 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@suse.de>
Date: Fri, 6 Jan 2006 09:28:07 +0100
Subject: [PATCH] Suspend support for libata

This patch adds suspend patch to libata, and ata_piix in particular. For
most low level drivers, they should just need to add the 4 hooks to
work. As I can only test ata_piix, I didn't enable it for more
though.

Suspend support is the single most important feature on a notebook, and
most new notebooks have sata drives. It's quite embarrassing that we
_still_ do not support this. Right now, it's perfectly possible to
suspend the drive in mid-transfer.

Signed-off-by: Jens Axboe <axboe@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/scsi/ata_piix.c    |   4 ++
 drivers/scsi/libata-core.c | 114 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/scsi/libata-scsi.c |  16 +++++++
 drivers/scsi/scsi_sysfs.c  |  31 ++++++++++++
 include/linux/ata.h        |   2 +
 include/linux/libata.h     |   8 ++++
 include/scsi/scsi_host.h   |   6 +++
 7 files changed, 181 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/scsi/ata_piix.c b/drivers/scsi/ata_piix.c
index 0ea27873b9ff..f79630340028 100644
--- a/drivers/scsi/ata_piix.c
+++ b/drivers/scsi/ata_piix.c
@@ -166,6 +166,8 @@ static struct pci_driver piix_pci_driver = {
 	.id_table		= piix_pci_tbl,
 	.probe			= piix_init_one,
 	.remove			= ata_pci_remove_one,
+	.suspend		= ata_pci_device_suspend,
+	.resume			= ata_pci_device_resume,
 };
 
 static struct scsi_host_template piix_sht = {
@@ -186,6 +188,8 @@ static struct scsi_host_template piix_sht = {
 	.slave_configure	= ata_scsi_slave_config,
 	.bios_param		= ata_std_bios_param,
 	.ordered_flush		= 1,
+	.resume			= ata_scsi_device_resume,
+	.suspend		= ata_scsi_device_suspend,
 };
 
 static const struct ata_port_operations piix_pata_ops = {
diff --git a/drivers/scsi/libata-core.c b/drivers/scsi/libata-core.c
index 9ea102587914..9c66d4059399 100644
--- a/drivers/scsi/libata-core.c
+++ b/drivers/scsi/libata-core.c
@@ -4154,6 +4154,96 @@ err_out:
  *	Inherited from caller.
  */
 
+/*
+ * Execute a 'simple' command, that only consists of the opcode 'cmd' itself,
+ * without filling any other registers
+ */
+static int ata_do_simple_cmd(struct ata_port *ap, struct ata_device *dev,
+			     u8 cmd)
+{
+	struct ata_taskfile tf;
+	int err;
+
+	ata_tf_init(ap, &tf, dev->devno);
+
+	tf.command = cmd;
+	tf.flags |= ATA_TFLAG_DEVICE;
+	tf.protocol = ATA_PROT_NODATA;
+
+	err = ata_exec_internal(ap, dev, &tf, DMA_NONE, NULL, 0);
+	if (err)
+		printk(KERN_ERR "%s: ata command failed: %d\n",
+				__FUNCTION__, err);
+
+	return err;
+}
+
+static int ata_flush_cache(struct ata_port *ap, struct ata_device *dev)
+{
+	u8 cmd;
+
+	if (!ata_try_flush_cache(dev))
+		return 0;
+
+	if (ata_id_has_flush_ext(dev->id))
+		cmd = ATA_CMD_FLUSH_EXT;
+	else
+		cmd = ATA_CMD_FLUSH;
+
+	return ata_do_simple_cmd(ap, dev, cmd);
+}
+
+static int ata_standby_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_STANDBYNOW1);
+}
+
+static int ata_start_drive(struct ata_port *ap, struct ata_device *dev)
+{
+	return ata_do_simple_cmd(ap, dev, ATA_CMD_IDLEIMMEDIATE);
+}
+
+/**
+ *	ata_device_resume - wakeup a previously suspended devices
+ *
+ *	Kick the drive back into action, by sending it an idle immediate
+ *	command and making sure its transfer mode matches between drive
+ *	and host.
+ *
+ */
+int ata_device_resume(struct ata_port *ap, struct ata_device *dev)
+{
+	if (ap->flags & ATA_FLAG_SUSPENDED) {
+		ap->flags &= ~ATA_FLAG_SUSPENDED;
+		ata_set_mode(ap);
+	}
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_start_drive(ap, dev);
+
+	return 0;
+}
+
+/**
+ *	ata_device_suspend - prepare a device for suspend
+ *
+ *	Flush the cache on the drive, if appropriate, then issue a
+ *	standbynow command.
+ *
+ */
+int ata_device_suspend(struct ata_port *ap, struct ata_device *dev)
+{
+	if (!ata_dev_present(dev))
+		return 0;
+	if (dev->class == ATA_DEV_ATA)
+		ata_flush_cache(ap, dev);
+
+	ata_standby_drive(ap, dev);
+	ap->flags |= ATA_FLAG_SUSPENDED;
+	return 0;
+}
+
 int ata_port_start (struct ata_port *ap)
 {
 	struct device *dev = ap->host_set->dev;
@@ -4902,6 +4992,23 @@ int pci_test_config_bits(struct pci_dev *pdev, const struct pci_bits *bits)
 
 	return (tmp == bits->val) ? 1 : 0;
 }
+
+int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	pci_save_state(pdev);
+	pci_disable_device(pdev);
+	pci_set_power_state(pdev, PCI_D3hot);
+	return 0;
+}
+
+int ata_pci_device_resume(struct pci_dev *pdev)
+{
+	pci_set_power_state(pdev, PCI_D0);
+	pci_restore_state(pdev);
+	pci_enable_device(pdev);
+	pci_set_master(pdev);
+	return 0;
+}
 #endif /* CONFIG_PCI */
 
 
@@ -5005,4 +5112,11 @@ EXPORT_SYMBOL_GPL(ata_pci_host_stop);
 EXPORT_SYMBOL_GPL(ata_pci_init_native_mode);
 EXPORT_SYMBOL_GPL(ata_pci_init_one);
 EXPORT_SYMBOL_GPL(ata_pci_remove_one);
+EXPORT_SYMBOL_GPL(ata_pci_device_suspend);
+EXPORT_SYMBOL_GPL(ata_pci_device_resume);
 #endif /* CONFIG_PCI */
+
+EXPORT_SYMBOL_GPL(ata_device_suspend);
+EXPORT_SYMBOL_GPL(ata_device_resume);
+EXPORT_SYMBOL_GPL(ata_scsi_device_suspend);
+EXPORT_SYMBOL_GPL(ata_scsi_device_resume);
diff --git a/drivers/scsi/libata-scsi.c b/drivers/scsi/libata-scsi.c
index e0439be4b573..c1ebede14a48 100644
--- a/drivers/scsi/libata-scsi.c
+++ b/drivers/scsi/libata-scsi.c
@@ -396,6 +396,22 @@ void ata_dump_status(unsigned id, struct ata_taskfile *tf)
 	}
 }
 
+int ata_scsi_device_resume(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_resume(ap, dev);
+}
+
+int ata_scsi_device_suspend(struct scsi_device *sdev)
+{
+	struct ata_port *ap = (struct ata_port *) &sdev->host->hostdata[0];
+	struct ata_device *dev = &ap->device[sdev->id];
+
+	return ata_device_suspend(ap, dev);
+}
+
 /**
  *	ata_to_sense_error - convert ATA error to SCSI error
  *	@id: ATA device number
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 15842b1f0f4a..ea7f3a433572 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -263,9 +263,40 @@ static int scsi_bus_match(struct device *dev, struct device_driver *gendrv)
 	return (sdp->inq_periph_qual == SCSI_INQ_PQ_CON)? 1: 0;
 }
 
+static int scsi_bus_suspend(struct device * dev, pm_message_t state)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err;
+
+	err = scsi_device_quiesce(sdev);
+	if (err)
+		return err;
+
+	if (sht->suspend)
+		err = sht->suspend(sdev);
+
+	return err;
+}
+
+static int scsi_bus_resume(struct device * dev)
+{
+	struct scsi_device *sdev = to_scsi_device(dev);
+	struct scsi_host_template *sht = sdev->host->hostt;
+	int err = 0;
+
+	if (sht->resume)
+		err = sht->resume(sdev);
+
+	scsi_device_resume(sdev);
+	return err;
+}
+
 struct bus_type scsi_bus_type = {
         .name		= "scsi",
         .match		= scsi_bus_match,
+	.suspend	= scsi_bus_suspend,
+	.resume		= scsi_bus_resume,
 };
 
 int scsi_sysfs_register(void)
diff --git a/include/linux/ata.h b/include/linux/ata.h
index d2873b732bb1..3eb80c391b39 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -141,6 +141,8 @@ enum {
 	ATA_CMD_PACKET		= 0xA0,
 	ATA_CMD_VERIFY		= 0x40,
 	ATA_CMD_VERIFY_EXT	= 0x42,
+ 	ATA_CMD_STANDBYNOW1	= 0xE0,
+ 	ATA_CMD_IDLEIMMEDIATE	= 0xE1,
 	ATA_CMD_INIT_DEV_PARAMS	= 0x91,
 
 	/* SETFEATURES stuff */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index e828e172ccbf..cdab75c209a0 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -124,6 +124,8 @@ enum {
 	ATA_FLAG_DEBUGMSG	= (1 << 10),
 	ATA_FLAG_NO_ATAPI	= (1 << 11), /* No ATAPI support */
 
+	ATA_FLAG_SUSPENDED	= (1 << 12), /* port is suspended */
+
 	ATA_QCFLAG_ACTIVE	= (1 << 1), /* cmd not yet ack'd to scsi lyer */
 	ATA_QCFLAG_SG		= (1 << 3), /* have s/g table? */
 	ATA_QCFLAG_SINGLE	= (1 << 4), /* no s/g, just a single buffer */
@@ -436,6 +438,8 @@ extern void ata_std_ports(struct ata_ioports *ioaddr);
 extern int ata_pci_init_one (struct pci_dev *pdev, struct ata_port_info **port_info,
 			     unsigned int n_ports);
 extern void ata_pci_remove_one (struct pci_dev *pdev);
+extern int ata_pci_device_suspend(struct pci_dev *pdev, pm_message_t state);
+extern int ata_pci_device_resume(struct pci_dev *pdev);
 #endif /* CONFIG_PCI */
 extern int ata_device_add(const struct ata_probe_ent *ent);
 extern void ata_host_set_remove(struct ata_host_set *host_set);
@@ -445,6 +449,10 @@ extern int ata_scsi_queuecmd(struct scsi_cmnd *cmd, void (*done)(struct scsi_cmn
 extern int ata_scsi_error(struct Scsi_Host *host);
 extern int ata_scsi_release(struct Scsi_Host *host);
 extern unsigned int ata_host_intr(struct ata_port *ap, struct ata_queued_cmd *qc);
+extern int ata_scsi_device_resume(struct scsi_device *);
+extern int ata_scsi_device_suspend(struct scsi_device *);
+extern int ata_device_resume(struct ata_port *, struct ata_device *);
+extern int ata_device_suspend(struct ata_port *, struct ata_device *);
 extern int ata_ratelimit(void);
 
 /*
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 6cbb1982ed03..6297885a35e7 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -295,6 +295,12 @@ struct scsi_host_template {
 	 */
 	int (*proc_info)(struct Scsi_Host *, char *, char **, off_t, int, int);
 
+	/*
+	 * suspend support
+	 */
+	int (*resume)(struct scsi_device *);
+	int (*suspend)(struct scsi_device *);
+
 	/*
 	 * Name of proc directory
 	 */
-- 
cgit v1.2.3


From 22905f775dd6a8b73be99826dcad07ceec00244b Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 16 Nov 2005 15:07:01 -0800
Subject: identify multipage ->writepages() calls

 NFS needs to be able to distinguish between single-page ->writepage() calls and
 multipage ->writepages() calls.

 For the single-page writepage calls NFS can kick off the I/O within the
 context of ->writepage().

 For multipage ->writepages calls, nfs_writepage() will leave the I/O pending
 and nfs_writepages() will kick off the I/O when it all has been queued up
 within NFS.

 Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
 Signed-off-by: Andrew Morton <akpm@osdl.org>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/writeback.h |  9 +++++----
 mm/page-writeback.c       | 10 ++++++++--
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 64a36ba43b2f..b096159086e8 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -53,10 +53,11 @@ struct writeback_control {
 	loff_t start;
 	loff_t end;
 
-	unsigned nonblocking:1;			/* Don't get stuck on request queues */
-	unsigned encountered_congestion:1;	/* An output: a queue is full */
-	unsigned for_kupdate:1;			/* A kupdate writeback */
-	unsigned for_reclaim:1;			/* Invoked from the page allocator */
+	unsigned nonblocking:1;		/* Don't get stuck on request queues */
+	unsigned encountered_congestion:1; /* An output: a queue is full */
+	unsigned for_kupdate:1;		/* A kupdate writeback */
+	unsigned for_reclaim:1;		/* Invoked from the page allocator */
+	unsigned for_writepages:1;	/* This is a writepages() call */
 };
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0166ea15c9ee..5240e426c1f7 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -550,11 +550,17 @@ void __init page_writeback_init(void)
 
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
+	int ret;
+
 	if (wbc->nr_to_write <= 0)
 		return 0;
+	wbc->for_writepages = 1;
 	if (mapping->a_ops->writepages)
-		return mapping->a_ops->writepages(mapping, wbc);
-	return generic_writepages(mapping, wbc);
+		ret =  mapping->a_ops->writepages(mapping, wbc);
+	else
+		ret = generic_writepages(mapping, wbc);
+	wbc->for_writepages = 0;
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3


From abbcf28f23d53e8ec56a91f3528743913fa2694a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:03 +0100
Subject: SUNRPC: Yet more RPC cleanups

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  3 ++-
 net/sunrpc/clnt.c            | 32 +++++++++++++++++---------------
 net/sunrpc/pmap_clnt.c       |  8 +++-----
 net/sunrpc/sched.c           | 31 ++++++++++++-------------------
 4 files changed, 34 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 4d77e90d0b30..4c4b2dc8aca5 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -233,6 +233,7 @@ struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent);
 void		rpc_init_task(struct rpc_task *, struct rpc_clnt *,
 					rpc_action exitfunc, int flags);
 void		rpc_release_task(struct rpc_task *);
+void		rpc_exit_task(struct rpc_task *);
 void		rpc_killall_tasks(struct rpc_clnt *);
 int		rpc_execute(struct rpc_task *);
 void		rpc_run_child(struct rpc_task *parent, struct rpc_task *child,
@@ -259,7 +260,7 @@ void		rpc_destroy_mempool(void);
 static inline void rpc_exit(struct rpc_task *task, int status)
 {
 	task->tk_status = status;
-	task->tk_action = NULL;
+	task->tk_action = rpc_exit_task;
 }
 
 #ifdef RPC_DEBUG
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 61c3abeaccae..6ab4cbd8a901 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -511,7 +511,7 @@ rpc_call_setup(struct rpc_task *task, struct rpc_message *msg, int flags)
 	if (task->tk_status == 0)
 		task->tk_action = call_start;
 	else
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 }
 
 void
@@ -892,7 +892,7 @@ call_transmit(struct rpc_task *task)
 	if (task->tk_status < 0)
 		return;
 	if (!task->tk_msg.rpc_proc->p_decode) {
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 		rpc_wake_up_task(task);
 	}
 	return;
@@ -1039,13 +1039,14 @@ call_decode(struct rpc_task *task)
 				sizeof(req->rq_rcv_buf)) != 0);
 
 	/* Verify the RPC header */
-	if (!(p = call_verify(task))) {
-		if (task->tk_action == NULL)
-			return;
-		goto out_retry;
+	p = call_verify(task);
+	if (IS_ERR(p)) {
+		if (p == ERR_PTR(-EAGAIN))
+			goto out_retry;
+		return;
 	}
 
-	task->tk_action = NULL;
+	task->tk_action = rpc_exit_task;
 
 	if (decode)
 		task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
@@ -1138,7 +1139,7 @@ call_verify(struct rpc_task *task)
 
 	if ((n = ntohl(*p++)) != RPC_REPLY) {
 		printk(KERN_WARNING "call_verify: not an RPC reply: %x\n", n);
-		goto out_retry;
+		goto out_garbage;
 	}
 	if ((n = ntohl(*p++)) != RPC_MSG_ACCEPTED) {
 		if (--len < 0)
@@ -1168,7 +1169,7 @@ call_verify(struct rpc_task *task)
 							task->tk_pid);
 			rpcauth_invalcred(task);
 			task->tk_action = call_refresh;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_BADCRED:
 		case RPC_AUTH_BADVERF:
 			/* possibly garbled cred/verf? */
@@ -1178,7 +1179,7 @@ call_verify(struct rpc_task *task)
 			dprintk("RPC: %4d call_verify: retry garbled creds\n",
 							task->tk_pid);
 			task->tk_action = call_bind;
-			return NULL;
+			goto out_retry;
 		case RPC_AUTH_TOOWEAK:
 			printk(KERN_NOTICE "call_verify: server requires stronger "
 			       "authentication.\n");
@@ -1193,7 +1194,7 @@ call_verify(struct rpc_task *task)
 	}
 	if (!(p = rpcauth_checkverf(task, p))) {
 		printk(KERN_WARNING "call_verify: auth check failed\n");
-		goto out_retry;		/* bad verifier, retry */
+		goto out_garbage;		/* bad verifier, retry */
 	}
 	len = p - (u32 *)iov->iov_base - 1;
 	if (len < 0)
@@ -1230,23 +1231,24 @@ call_verify(struct rpc_task *task)
 		/* Also retry */
 	}
 
-out_retry:
+out_garbage:
 	task->tk_client->cl_stats->rpcgarbage++;
 	if (task->tk_garb_retry) {
 		task->tk_garb_retry--;
 		dprintk("RPC %s: retrying %4d\n", __FUNCTION__, task->tk_pid);
 		task->tk_action = call_bind;
-		return NULL;
+out_retry:
+		return ERR_PTR(-EAGAIN);
 	}
 	printk(KERN_WARNING "RPC %s: retry failed, exit EIO\n", __FUNCTION__);
 out_eio:
 	error = -EIO;
 out_err:
 	rpc_exit(task, error);
-	return NULL;
+	return ERR_PTR(error);
 out_overflow:
 	printk(KERN_WARNING "RPC %s: server reply was truncated.\n", __FUNCTION__);
-	goto out_retry;
+	goto out_garbage;
 }
 
 static int rpcproc_encode_null(void *rqstp, u32 *data, void *obj)
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index a398575f94b8..cad4568fbbe2 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -90,8 +90,7 @@ bailout:
 	map->pm_binding = 0;
 	rpc_wake_up(&map->pm_bindwait);
 	spin_unlock(&pmap_lock);
-	task->tk_status = -EIO;
-	task->tk_action = NULL;
+	rpc_exit(task, -EIO);
 }
 
 #ifdef CONFIG_ROOT_NFS
@@ -138,11 +137,10 @@ pmap_getport_done(struct rpc_task *task)
 			task->tk_pid, task->tk_status, clnt->cl_port);
 	if (task->tk_status < 0) {
 		/* Make the calling task exit with an error */
-		task->tk_action = NULL;
+		task->tk_action = rpc_exit_task;
 	} else if (clnt->cl_port == 0) {
 		/* Program not registered */
-		task->tk_status = -EACCES;
-		task->tk_action = NULL;
+		rpc_exit(task, -EACCES);
 	} else {
 		/* byte-swap port number first */
 		clnt->cl_port = htons(clnt->cl_port);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 54e60a657500..3fcf7b0e1f6c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -555,28 +555,22 @@ __rpc_atrun(struct rpc_task *task)
 }
 
 /*
- * Helper that calls task->tk_exit if it exists and then returns
- * true if we should exit __rpc_execute.
+ * Helper that calls task->tk_exit if it exists
  */
-static inline int __rpc_do_exit(struct rpc_task *task)
+void rpc_exit_task(struct rpc_task *task)
 {
+	task->tk_action = NULL;
 	if (task->tk_exit != NULL) {
-		lock_kernel();
 		task->tk_exit(task);
-		unlock_kernel();
-		/* If tk_action is non-null, we should restart the call */
 		if (task->tk_action != NULL) {
-			if (!RPC_ASSASSINATED(task)) {
-				/* Release RPC slot and buffer memory */
-				xprt_release(task);
-				rpc_free(task);
-				return 0;
-			}
-			printk(KERN_ERR "RPC: dead task tried to walk away.\n");
+			WARN_ON(RPC_ASSASSINATED(task));
+			/* Always release the RPC slot and buffer memory */
+			xprt_release(task);
+			rpc_free(task);
 		}
 	}
-	return 1;
 }
+EXPORT_SYMBOL(rpc_exit_task);
 
 static int rpc_wait_bit_interruptible(void *word)
 {
@@ -631,12 +625,11 @@ static int __rpc_execute(struct rpc_task *task)
 		 * by someone else.
 		 */
 		if (!RPC_IS_QUEUED(task)) {
-			if (task->tk_action != NULL) {
-				lock_kernel();
-				task->tk_action(task);
-				unlock_kernel();
-			} else if (__rpc_do_exit(task))
+			if (task->tk_action == NULL)
 				break;
+			lock_kernel();
+			task->tk_action(task);
+			unlock_kernel();
 		}
 
 		/*
-- 
cgit v1.2.3


From 963d8fe53339128ee46a7701f2e36305f0ccff8c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:04 +0100
Subject: RPC: Clean up RPC task structure

 Shrink the RPC task structure. Instead of storing separate pointers
 for task->tk_exit and task->tk_release, put them in a structure.

 Also pass the user data pointer as a parameter instead of passing it via
 task->tk_calldata. This enables us to nest callbacks.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c          |  38 ++++++++-------
 fs/lockd/svc4proc.c          |  15 +++---
 fs/lockd/svclock.c           |  14 ++++--
 fs/lockd/svcproc.c           |  14 ++++--
 fs/nfs/direct.c              |   1 -
 fs/nfs/nfs3proc.c            |  44 +++++++++++-------
 fs/nfs/nfs4proc.c            | 107 +++++++++++++++++++++++++------------------
 fs/nfs/proc.c                |  28 +++++++----
 fs/nfs/read.c                |  10 ++--
 fs/nfs/unlink.c              |  19 ++++----
 fs/nfs/write.c               |  21 +++------
 fs/nfsd/nfs4callback.c       |  10 ++--
 include/linux/lockd/lockd.h  |   2 +-
 include/linux/nfs_fs.h       |  12 +++--
 include/linux/sunrpc/clnt.h  |   3 +-
 include/linux/sunrpc/sched.h |  20 +++++---
 net/sunrpc/clnt.c            |  15 +++---
 net/sunrpc/sched.c           |  53 +++++++++++----------
 18 files changed, 241 insertions(+), 185 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index c5a33648e9fd..816333cd377b 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -26,11 +26,12 @@
 static int	nlmclnt_test(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_lock(struct nlm_rqst *, struct file_lock *);
 static int	nlmclnt_unlock(struct nlm_rqst *, struct file_lock *);
-static void	nlmclnt_unlock_callback(struct rpc_task *);
-static void	nlmclnt_cancel_callback(struct rpc_task *);
 static int	nlm_stat_to_errno(u32 stat);
 static void	nlmclnt_locks_init_private(struct file_lock *fl, struct nlm_host *host);
 
+static const struct rpc_call_ops nlmclnt_unlock_ops;
+static const struct rpc_call_ops nlmclnt_cancel_ops;
+
 /*
  * Cookie counter for NLM requests
  */
@@ -399,8 +400,7 @@ in_grace_period:
 /*
  * Generic NLM call, async version.
  */
-int
-nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+int nlmsvc_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -419,13 +419,12 @@ nlmsvc_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 	msg.rpc_proc = &clnt->cl_procinfo[proc];
 
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
 
 	return status;
 }
 
-static int
-nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
+static int nlmclnt_async_call(struct nlm_rqst *req, u32 proc, const struct rpc_call_ops *tk_ops)
 {
 	struct nlm_host	*host = req->a_host;
 	struct rpc_clnt	*clnt;
@@ -448,7 +447,7 @@ nlmclnt_async_call(struct nlm_rqst *req, u32 proc, rpc_action callback)
 	/* Increment host refcount */
 	nlm_get_host(host);
         /* bootstrap and kick off the async RPC call */
-        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, callback, req);
+        status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, tk_ops, req);
 	if (status < 0)
 		nlm_release_host(host);
 	return status;
@@ -664,7 +663,7 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 
 	if (req->a_flags & RPC_TASK_ASYNC) {
 		status = nlmclnt_async_call(req, NLMPROC_UNLOCK,
-					nlmclnt_unlock_callback);
+					&nlmclnt_unlock_ops);
 		/* Hrmf... Do the unlock early since locks_remove_posix()
 		 * really expects us to free the lock synchronously */
 		do_vfs_lock(fl);
@@ -692,10 +691,9 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
 	return -ENOLCK;
 }
 
-static void
-nlmclnt_unlock_callback(struct rpc_task *task)
+static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*req = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*req = data;
 	int		status = req->a_res.status;
 
 	if (RPC_ASSASSINATED(task))
@@ -722,6 +720,10 @@ die:
 	rpc_restart_call(task);
 }
 
+static const struct rpc_call_ops nlmclnt_unlock_ops = {
+	.rpc_call_done = nlmclnt_unlock_callback,
+};
+
 /*
  * Cancel a blocked lock request.
  * We always use an async RPC call for this in order not to hang a
@@ -750,8 +752,7 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 
 	nlmclnt_setlockargs(req, fl);
 
-	status = nlmclnt_async_call(req, NLMPROC_CANCEL,
-					nlmclnt_cancel_callback);
+	status = nlmclnt_async_call(req, NLMPROC_CANCEL, &nlmclnt_cancel_ops);
 	if (status < 0) {
 		nlmclnt_release_lockargs(req);
 		kfree(req);
@@ -765,10 +766,9 @@ nlmclnt_cancel(struct nlm_host *host, struct file_lock *fl)
 	return status;
 }
 
-static void
-nlmclnt_cancel_callback(struct rpc_task *task)
+static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*req = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*req = data;
 
 	if (RPC_ASSASSINATED(task))
 		goto die;
@@ -807,6 +807,10 @@ retry_cancel:
 	rpc_delay(task, 30 * HZ);
 }
 
+static const struct rpc_call_ops nlmclnt_cancel_ops = {
+	.rpc_call_done = nlmclnt_cancel_callback,
+};
+
 /*
  * Convert an NLM status code to a generic kernel errno
  */
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 489670e21769..4063095d849e 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -22,7 +22,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 static u32	nlm4svc_callback(struct svc_rqst *, u32, struct nlm_res *);
-static void	nlm4svc_callback_exit(struct rpc_task *);
+
+static const struct rpc_call_ops nlm4svc_callback_ops;
 
 /*
  * Obtain client and file from arguments
@@ -470,7 +471,6 @@ nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
 }
 
 
-
 /*
  * This is the generic lockd callback for async RPC calls
  */
@@ -494,7 +494,7 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, nlm4svc_callback_exit) < 0)
+	if (nlmsvc_async_call(call, proc, &nlm4svc_callback_ops) < 0)
 		goto error;
 
 	return rpc_success;
@@ -504,10 +504,9 @@ nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	return rpc_system_err;
 }
 
-static void
-nlm4svc_callback_exit(struct rpc_task *task)
+static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*call = data;
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: %4d callback failed (errno = %d)\n",
@@ -517,6 +516,10 @@ nlm4svc_callback_exit(struct rpc_task *task)
 	kfree(call);
 }
 
+static const struct rpc_call_ops nlm4svc_callback_ops = {
+	.rpc_call_done = nlm4svc_callback_exit,
+};
+
 /*
  * NLM Server procedures.
  */
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 49f959796b66..87d09a0d8f64 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -41,7 +41,8 @@
 
 static void	nlmsvc_insert_block(struct nlm_block *block, unsigned long);
 static int	nlmsvc_remove_block(struct nlm_block *block);
-static void	nlmsvc_grant_callback(struct rpc_task *task);
+
+static const struct rpc_call_ops nlmsvc_grant_ops;
 
 /*
  * The list of blocked locks to retry
@@ -562,7 +563,7 @@ callback:
 	/* Call the client */
 	nlm_get_host(block->b_call.a_host);
 	if (nlmsvc_async_call(&block->b_call, NLMPROC_GRANTED_MSG,
-						nlmsvc_grant_callback) < 0)
+						&nlmsvc_grant_ops) < 0)
 		nlm_release_host(block->b_call.a_host);
 	up(&file->f_sema);
 }
@@ -575,10 +576,9 @@ callback:
  * chain once more in order to have it removed by lockd itself (which can
  * then sleep on the file semaphore without disrupting e.g. the nfs client).
  */
-static void
-nlmsvc_grant_callback(struct rpc_task *task)
+static void nlmsvc_grant_callback(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst		*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst		*call = data;
 	struct nlm_block	*block;
 	unsigned long		timeout;
 	struct sockaddr_in	*peer_addr = RPC_PEERADDR(task->tk_client);
@@ -614,6 +614,10 @@ nlmsvc_grant_callback(struct rpc_task *task)
 	nlm_release_host(call->a_host);
 }
 
+static const struct rpc_call_ops nlmsvc_grant_ops = {
+	.rpc_call_done = nlmsvc_grant_callback,
+};
+
 /*
  * We received a GRANT_RES callback. Try to find the corresponding
  * block.
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 757e344cf200..3bc437e0cf5b 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -23,7 +23,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 static u32	nlmsvc_callback(struct svc_rqst *, u32, struct nlm_res *);
-static void	nlmsvc_callback_exit(struct rpc_task *);
+
+static const struct rpc_call_ops nlmsvc_callback_ops;
 
 #ifdef CONFIG_LOCKD_V4
 static u32
@@ -518,7 +519,7 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	call->a_host  = host;
 	memcpy(&call->a_args, resp, sizeof(*resp));
 
-	if (nlmsvc_async_call(call, proc, nlmsvc_callback_exit) < 0)
+	if (nlmsvc_async_call(call, proc, &nlmsvc_callback_ops) < 0)
 		goto error;
 
 	return rpc_success;
@@ -528,10 +529,9 @@ nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_res *resp)
 	return rpc_system_err;
 }
 
-static void
-nlmsvc_callback_exit(struct rpc_task *task)
+static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-	struct nlm_rqst	*call = (struct nlm_rqst *) task->tk_calldata;
+	struct nlm_rqst	*call = data;
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: %4d callback failed (errno = %d)\n",
@@ -541,6 +541,10 @@ nlmsvc_callback_exit(struct rpc_task *task)
 	kfree(call);
 }
 
+static const struct rpc_call_ops nlmsvc_callback_ops = {
+	.rpc_call_done = nlmsvc_callback_exit,
+};
+
 /*
  * NLM Server procedures.
  */
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 079228817603..a834423942c7 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -269,7 +269,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 
 		data->task.tk_cookie = (unsigned long) inode;
 		data->task.tk_calldata = data;
-		data->task.tk_release = nfs_readdata_release;
 		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 92c870d19ccd..c172a7584646 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -732,19 +732,23 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void
-nfs3_read_done(struct rpc_task *task)
+static void nfs3_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
 	/* Call back common NFS readpage processing */
 	if (task->tk_status >= 0)
 		nfs_refresh_inode(data->inode, &data->fattr);
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_read_ops = {
+	.rpc_call_done = nfs3_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs3_proc_read_setup(struct nfs_read_data *data)
 {
@@ -762,23 +766,26 @@ nfs3_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs3_write_done(struct rpc_task *task)
+static void nfs3_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data;
+	struct nfs_write_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
-	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_write_ops = {
+	.rpc_call_done = nfs3_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -806,23 +813,26 @@ nfs3_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs3_commit_done(struct rpc_task *task)
+static void nfs3_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data;
+	struct nfs_write_data *data = calldata;
 
 	if (nfs3_async_handle_jukebox(task))
 		return;
-	data = (struct nfs_write_data *)task->tk_calldata;
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_commit_done(task);
+	nfs_commit_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs3_commit_ops = {
+	.rpc_call_done = nfs3_commit_done,
+	.rpc_release = nfs_commit_release,
+};
+
 static void
 nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 {
@@ -840,7 +850,7 @@ nfs3_proc_commit_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs3_commit_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs3_commit_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index f988a9417b13..3d5d3c07d621 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -196,14 +196,12 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 
 /* Helper for asynchronous RPC calls */
 static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
-		rpc_action tk_exit, void *calldata)
+		const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task *task;
 
-	if (!(task = rpc_new_task(clnt, tk_exit, RPC_TASK_ASYNC)))
+	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
 		return -ENOMEM;
-
-	task->tk_calldata = calldata;
 	task->tk_action = tk_begin;
 	rpc_execute(task);
 	return 0;
@@ -867,10 +865,10 @@ struct nfs4_closedata {
 	struct nfs_fattr fattr;
 };
 
-static void nfs4_free_closedata(struct nfs4_closedata *calldata)
+static void nfs4_free_closedata(void *data)
 {
-	struct nfs4_state *state = calldata->state;
-	struct nfs4_state_owner *sp = state->owner;
+	struct nfs4_closedata *calldata = data;
+	struct nfs4_state_owner *sp = calldata->state->owner;
 
 	nfs4_put_open_state(calldata->state);
 	nfs_free_seqid(calldata->arg.seqid);
@@ -878,9 +876,9 @@ static void nfs4_free_closedata(struct nfs4_closedata *calldata)
 	kfree(calldata);
 }
 
-static void nfs4_close_done(struct rpc_task *task)
+static void nfs4_close_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
@@ -904,7 +902,6 @@ static void nfs4_close_done(struct rpc_task *task)
 			}
 	}
 	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
-	nfs4_free_closedata(calldata);
 }
 
 static void nfs4_close_begin(struct rpc_task *task)
@@ -918,10 +915,8 @@ static void nfs4_close_begin(struct rpc_task *task)
 		.rpc_cred = state->owner->so_cred,
 	};
 	int mode = 0, old_mode;
-	int status;
 
-	status = nfs_wait_on_sequence(calldata->arg.seqid, task);
-	if (status != 0)
+	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
 	/* Recalculate the new open mode in case someone reopened the file
 	 * while we were waiting in line to be scheduled.
@@ -937,9 +932,8 @@ static void nfs4_close_begin(struct rpc_task *task)
 	spin_unlock(&calldata->inode->i_lock);
 	spin_unlock(&state->owner->so_lock);
 	if (mode == old_mode || test_bit(NFS_DELEGATED_STATE, &state->flags)) {
-		nfs4_free_closedata(calldata);
-		task->tk_exit = NULL;
-		rpc_exit(task, 0);
+		/* Note: exit _without_ calling nfs4_close_done */
+		task->tk_action = NULL;
 		return;
 	}
 	nfs_fattr_init(calldata->res.fattr);
@@ -949,6 +943,11 @@ static void nfs4_close_begin(struct rpc_task *task)
 	rpc_call_setup(task, &msg, 0);
 }
 
+static const struct rpc_call_ops nfs4_close_ops = {
+	.rpc_call_done = nfs4_close_done,
+	.rpc_release = nfs4_free_closedata,
+};
+
 /* 
  * It is possible for data to be read/written from a mem-mapped file 
  * after the sys_close call (which hits the vfs layer as a flush).
@@ -982,7 +981,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->res.server = server;
 
 	status = nfs4_call_async(server->client, nfs4_close_begin,
-			nfs4_close_done, calldata);
+			&nfs4_close_ops, calldata);
 	if (status == 0)
 		goto out;
 
@@ -2125,10 +2124,9 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 	return err;
 }
 
-static void
-nfs4_read_done(struct rpc_task *task)
+static void nfs4_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 	struct inode *inode = data->inode;
 
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2138,9 +2136,14 @@ nfs4_read_done(struct rpc_task *task)
 	if (task->tk_status > 0)
 		renew_lease(NFS_SERVER(inode), data->timestamp);
 	/* Call back common NFS readpage processing */
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_read_ops = {
+	.rpc_call_done = nfs4_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs4_proc_read_setup(struct nfs_read_data *data)
 {
@@ -2160,14 +2163,13 @@ nfs4_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs4_write_done(struct rpc_task *task)
+static void nfs4_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2179,9 +2181,14 @@ nfs4_write_done(struct rpc_task *task)
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	}
 	/* Call back common NFS writeback processing */
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_write_ops = {
+	.rpc_call_done = nfs4_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -2214,14 +2221,13 @@ nfs4_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs4_commit_done(struct rpc_task *task)
+static void nfs4_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 	struct inode *inode = data->inode;
 	
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
@@ -2231,9 +2237,14 @@ nfs4_commit_done(struct rpc_task *task)
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(inode, data->res.fattr);
 	/* Call back common NFS writeback processing */
-	nfs_commit_done(task);
+	nfs_commit_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs4_commit_ops = {
+	.rpc_call_done = nfs4_commit_done,
+	.rpc_release = nfs_commit_release,
+};
+
 static void
 nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 {
@@ -2255,7 +2266,7 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs4_commit_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs4_commit_ops, data);
 	rpc_call_setup(task, &msg, 0);	
 }
 
@@ -2263,11 +2274,10 @@ nfs4_proc_commit_setup(struct nfs_write_data *data, int how)
  * nfs4_proc_async_renew(): This is not one of the nfs_rpc_ops; it is a special
  * standalone procedure for queueing an asynchronous RENEW.
  */
-static void
-renew_done(struct rpc_task *task)
+static void nfs4_renew_done(struct rpc_task *task, void *data)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
-	unsigned long timestamp = (unsigned long)task->tk_calldata;
+	unsigned long timestamp = (unsigned long)data;
 
 	if (task->tk_status < 0) {
 		switch (task->tk_status) {
@@ -2284,6 +2294,10 @@ renew_done(struct rpc_task *task)
 	spin_unlock(&clp->cl_lock);
 }
 
+static const struct rpc_call_ops nfs4_renew_ops = {
+	.rpc_call_done = nfs4_renew_done,
+};
+
 int
 nfs4_proc_async_renew(struct nfs4_client *clp)
 {
@@ -2294,7 +2308,7 @@ nfs4_proc_async_renew(struct nfs4_client *clp)
 	};
 
 	return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_SOFT,
-			renew_done, (void *)jiffies);
+			&nfs4_renew_ops, (void *)jiffies);
 }
 
 int
@@ -2866,15 +2880,16 @@ static void nfs4_locku_release_calldata(struct nfs4_unlockdata *calldata)
 	}
 }
 
-static void nfs4_locku_complete(struct nfs4_unlockdata *calldata)
+static void nfs4_locku_complete(void *data)
 {
+	struct nfs4_unlockdata *calldata = data;
 	complete(&calldata->completion);
 	nfs4_locku_release_calldata(calldata);
 }
 
-static void nfs4_locku_done(struct rpc_task *task)
+static void nfs4_locku_done(struct rpc_task *task, void *data)
 {
-	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+	struct nfs4_unlockdata *calldata = data;
 
 	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
 	switch (task->tk_status) {
@@ -2890,10 +2905,8 @@ static void nfs4_locku_done(struct rpc_task *task)
 		default:
 			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
 				rpc_restart_call(task);
-				return;
 			}
 	}
-	nfs4_locku_complete(calldata);
 }
 
 static void nfs4_locku_begin(struct rpc_task *task)
@@ -2911,14 +2924,18 @@ static void nfs4_locku_begin(struct rpc_task *task)
 	if (status != 0)
 		return;
 	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
-		nfs4_locku_complete(calldata);
-		task->tk_exit = NULL;
-		rpc_exit(task, 0);
+		/* Note: exit _without_ running nfs4_locku_done */
+		task->tk_action = NULL;
 		return;
 	}
 	rpc_call_setup(task, &msg, 0);
 }
 
+static const struct rpc_call_ops nfs4_locku_ops = {
+	.rpc_call_done = nfs4_locku_done,
+	.rpc_release = nfs4_locku_complete,
+};
+
 static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
 	struct nfs4_unlockdata *calldata;
@@ -2963,7 +2980,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	init_completion(&calldata->completion);
 
 	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
-			nfs4_locku_done, calldata);
+			&nfs4_locku_ops, calldata);
 	if (status == 0)
 		wait_for_completion_interruptible(&calldata->completion);
 	do_vfs_lock(request->fl_file, request);
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e1e3ca5d746b..6145e82b45e8 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -547,10 +547,9 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
 
 extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
 
-static void
-nfs_read_done(struct rpc_task *task)
+static void nfs_read_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 
 	if (task->tk_status >= 0) {
 		nfs_refresh_inode(data->inode, data->res.fattr);
@@ -560,9 +559,14 @@ nfs_read_done(struct rpc_task *task)
 		if (data->args.offset + data->args.count >= data->res.fattr->size)
 			data->res.eof = 1;
 	}
-	nfs_readpage_result(task);
+	nfs_readpage_result(task, calldata);
 }
 
+static const struct rpc_call_ops nfs_read_ops = {
+	.rpc_call_done = nfs_read_done,
+	.rpc_release = nfs_readdata_release,
+};
+
 static void
 nfs_proc_read_setup(struct nfs_read_data *data)
 {
@@ -580,20 +584,24 @@ nfs_proc_read_setup(struct nfs_read_data *data)
 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs_read_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_read_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
-static void
-nfs_write_done(struct rpc_task *task)
+static void nfs_write_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data *data = calldata;
 
 	if (task->tk_status >= 0)
 		nfs_post_op_update_inode(data->inode, data->res.fattr);
-	nfs_writeback_done(task);
+	nfs_writeback_done(task, calldata);
 }
 
+static const struct rpc_call_ops nfs_write_ops = {
+	.rpc_call_done = nfs_write_done,
+	.rpc_release = nfs_writedata_release,
+};
+
 static void
 nfs_proc_write_setup(struct nfs_write_data *data, int how)
 {
@@ -614,7 +622,7 @@ nfs_proc_write_setup(struct nfs_write_data *data, int how)
 	flags = (how & FLUSH_SYNC) ? 0 : RPC_TASK_ASYNC;
 
 	/* Finalize the task. */
-	rpc_init_task(task, NFS_CLIENT(inode), nfs_write_done, flags);
+	rpc_init_task(task, NFS_CLIENT(inode), flags, &nfs_write_ops, data);
 	rpc_call_setup(task, &msg, 0);
 }
 
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 5f20eafba8ec..21486242c3d3 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -42,9 +42,8 @@ mempool_t *nfs_rdata_mempool;
 
 #define MIN_POOL_READ	(32)
 
-void nfs_readdata_release(struct rpc_task *task)
+void nfs_readdata_release(void *data)
 {
-        struct nfs_read_data   *data = (struct nfs_read_data *)task->tk_calldata;
         nfs_readdata_free(data);
 }
 
@@ -220,9 +219,6 @@ static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
 	NFS_PROTO(inode)->read_setup(data);
 
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_readdata_release;
 
 	dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 			data->task.tk_pid,
@@ -452,9 +448,9 @@ static void nfs_readpage_result_full(struct nfs_read_data *data, int status)
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
  */
-void nfs_readpage_result(struct rpc_task *task)
+void nfs_readpage_result(struct rpc_task *task, void *calldata)
 {
-	struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata;
+	struct nfs_read_data *data = calldata;
 	struct nfs_readargs *argp = &data->args;
 	struct nfs_readres *resp = &data->res;
 	int status = task->tk_status;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index d639d172d568..1494484ba86d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -116,10 +116,9 @@ nfs_async_unlink_init(struct rpc_task *task)
  *
  * Do the directory attribute update.
  */
-static void
-nfs_async_unlink_done(struct rpc_task *task)
+static void nfs_async_unlink_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	struct dentry		*dir = data->dir;
 	struct inode		*dir_i;
 
@@ -141,13 +140,17 @@ nfs_async_unlink_done(struct rpc_task *task)
  * We need to call nfs_put_unlinkdata as a 'tk_release' task since the
  * rpc_task would be freed too.
  */
-static void
-nfs_async_unlink_release(struct rpc_task *task)
+static void nfs_async_unlink_release(void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	nfs_put_unlinkdata(data);
 }
 
+static const struct rpc_call_ops nfs_unlink_ops = {
+	.rpc_call_done = nfs_async_unlink_done,
+	.rpc_release = nfs_async_unlink_release,
+};
+
 /**
  * nfs_async_unlink - asynchronous unlinking of a file
  * @dentry: dentry to unlink
@@ -179,10 +182,8 @@ nfs_async_unlink(struct dentry *dentry)
 	data->count = 1;
 
 	task = &data->task;
-	rpc_init_task(task, clnt, nfs_async_unlink_done , RPC_TASK_ASYNC);
-	task->tk_calldata = data;
+	rpc_init_task(task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data);
 	task->tk_action = nfs_async_unlink_init;
-	task->tk_release = nfs_async_unlink_release;
 
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 95d00f9132d0..80bc4ea1b824 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -104,9 +104,8 @@ static inline void nfs_commit_free(struct nfs_write_data *p)
 	mempool_free(p, nfs_commit_mempool);
 }
 
-static void nfs_writedata_release(struct rpc_task *task)
+void nfs_writedata_release(void *wdata)
 {
-	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_writedata_free(wdata);
 }
 
@@ -871,9 +870,6 @@ static void nfs_write_rpcsetup(struct nfs_page *req,
 
 	data->task.tk_priority = flush_task_priority(how);
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_writedata_release;
 
 	dprintk("NFS: %4d initiated write call (req %s/%Ld, %u bytes @ offset %Lu)\n",
 		data->task.tk_pid,
@@ -1131,9 +1127,9 @@ static void nfs_writeback_done_full(struct nfs_write_data *data, int status)
 /*
  * This function is called when the WRITE call is complete.
  */
-void nfs_writeback_done(struct rpc_task *task)
+void nfs_writeback_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = (struct nfs_write_data *) task->tk_calldata;
+	struct nfs_write_data	*data = calldata;
 	struct nfs_writeargs	*argp = &data->args;
 	struct nfs_writeres	*resp = &data->res;
 
@@ -1200,9 +1196,8 @@ void nfs_writeback_done(struct rpc_task *task)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-static void nfs_commit_release(struct rpc_task *task)
+void nfs_commit_release(void *wdata)
 {
-	struct nfs_write_data	*wdata = (struct nfs_write_data *)task->tk_calldata;
 	nfs_commit_free(wdata);
 }
 
@@ -1238,9 +1233,6 @@ static void nfs_commit_rpcsetup(struct list_head *head,
 
 	data->task.tk_priority = flush_task_priority(how);
 	data->task.tk_cookie = (unsigned long)inode;
-	data->task.tk_calldata = data;
-	/* Release requests */
-	data->task.tk_release = nfs_commit_release;
 	
 	dprintk("NFS: %4d initiated commit call\n", data->task.tk_pid);
 }
@@ -1277,10 +1269,9 @@ nfs_commit_list(struct list_head *head, int how)
 /*
  * COMMIT call returned
  */
-void
-nfs_commit_done(struct rpc_task *task)
+void nfs_commit_done(struct rpc_task *task, void *calldata)
 {
-	struct nfs_write_data	*data = (struct nfs_write_data *)task->tk_calldata;
+	struct nfs_write_data	*data = calldata;
 	struct nfs_page		*req;
 	int res = 0;
 
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 583c0710e45e..cf92008f219a 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -53,7 +53,7 @@
 #define NFSPROC4_CB_COMPOUND 1
 
 /* declarations */
-static void nfs4_cb_null(struct rpc_task *task);
+static const struct rpc_call_ops nfs4_cb_null_ops;
 
 /* Index of predefined Linux callback client operations */
 
@@ -447,7 +447,7 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	msg.rpc_cred = nfsd4_lookupcred(clp,0);
 	if (IS_ERR(msg.rpc_cred))
 		goto out_rpciod;
-	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, nfs4_cb_null, NULL);
+	status = rpc_call_async(clnt, &msg, RPC_TASK_ASYNC, &nfs4_cb_null_ops, NULL);
 	put_rpccred(msg.rpc_cred);
 
 	if (status != 0) {
@@ -469,7 +469,7 @@ out_err:
 }
 
 static void
-nfs4_cb_null(struct rpc_task *task)
+nfs4_cb_null(struct rpc_task *task, void *dummy)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
 	struct nfs4_callback *cb = &clp->cl_callback;
@@ -488,6 +488,10 @@ out:
 	put_nfs4_client(clp);
 }
 
+static const struct rpc_call_ops nfs4_cb_null_ops = {
+	.rpc_call_done = nfs4_cb_null,
+};
+
 /*
  * called with dp->dl_count inc'ed.
  * nfs4_lock_state() may or may not have been called.
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 16d4e5a08e1d..95c8fea293ba 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -172,7 +172,7 @@ extern struct nlm_host *nlm_find_client(void);
 /*
  * Server-side lock handling
  */
-int		  nlmsvc_async_call(struct nlm_rqst *, u32, rpc_action);
+int		  nlmsvc_async_call(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
 u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 2516adeccecf..4dff705d2ff2 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -406,10 +406,12 @@ extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *task);
+extern void nfs_writeback_done(struct rpc_task *task, void *data);
+extern void nfs_writedata_release(void *data);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-extern void nfs_commit_done(struct rpc_task *);
+extern void nfs_commit_done(struct rpc_task *, void *data);
+extern void nfs_commit_release(void *data);
 #endif
 
 /*
@@ -481,7 +483,9 @@ static inline void nfs_writedata_free(struct nfs_write_data *p)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
 		struct list_head *, unsigned);
-extern void nfs_readpage_result(struct rpc_task *);
+extern void nfs_readpage_result(struct rpc_task *, void *);
+extern void  nfs_readdata_release(void *data);
+
 
 /*
  * Allocate and free nfs_read_data structures
@@ -501,8 +505,6 @@ static inline void nfs_readdata_free(struct nfs_read_data *p)
 	mempool_free(p, nfs_rdata_mempool);
 }
 
-extern void  nfs_readdata_release(struct rpc_task *task);
-
 /*
  * linux/fs/nfs3proc.c
  */
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index ab151bbb66df..b0ab959eca65 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -126,7 +126,8 @@ int		rpc_register(u32, u32, int, unsigned short, int *);
 void		rpc_call_setup(struct rpc_task *, struct rpc_message *, int);
 
 int		rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg,
-			       int flags, rpc_action callback, void *clntdata);
+			       int flags, const struct rpc_call_ops *tk_ops,
+			       void *calldata);
 int		rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg,
 			      int flags);
 void		rpc_restart_call(struct rpc_task *);
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 4c4b2dc8aca5..581d8cdc3b86 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -27,6 +27,7 @@ struct rpc_message {
 	struct rpc_cred *	rpc_cred;	/* Credentials */
 };
 
+struct rpc_call_ops;
 struct rpc_wait_queue;
 struct rpc_wait {
 	struct list_head	list;		/* wait queue links */
@@ -61,13 +62,12 @@ struct rpc_task {
 	 * timeout_fn   to be executed by timer bottom half
 	 * callback	to be executed after waking up
 	 * action	next procedure for async tasks
-	 * exit		exit async task and report to caller
+	 * tk_ops	caller callbacks
 	 */
 	void			(*tk_timeout_fn)(struct rpc_task *);
 	void			(*tk_callback)(struct rpc_task *);
 	void			(*tk_action)(struct rpc_task *);
-	void			(*tk_exit)(struct rpc_task *);
-	void			(*tk_release)(struct rpc_task *);
+	const struct rpc_call_ops *tk_ops;
 	void *			tk_calldata;
 
 	/*
@@ -111,6 +111,12 @@ struct rpc_task {
 
 typedef void			(*rpc_action)(struct rpc_task *);
 
+struct rpc_call_ops {
+	void (*rpc_call_done)(struct rpc_task *, void *);
+	void (*rpc_release)(void *);
+};
+
+
 /*
  * RPC task flags
  */
@@ -228,10 +234,12 @@ struct rpc_wait_queue {
 /*
  * Function prototypes
  */
-struct rpc_task *rpc_new_task(struct rpc_clnt *, rpc_action, int flags);
+struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
+				const struct rpc_call_ops *ops, void *data);
 struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent);
-void		rpc_init_task(struct rpc_task *, struct rpc_clnt *,
-					rpc_action exitfunc, int flags);
+void		rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
+				int flags, const struct rpc_call_ops *ops,
+				void *data);
 void		rpc_release_task(struct rpc_task *);
 void		rpc_exit_task(struct rpc_task *);
 void		rpc_killall_tasks(struct rpc_clnt *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 6ab4cbd8a901..8b2f75bc006d 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -374,10 +374,14 @@ out:
  * Default callback for async RPC calls
  */
 static void
-rpc_default_callback(struct rpc_task *task)
+rpc_default_callback(struct rpc_task *task, void *data)
 {
 }
 
+static const struct rpc_call_ops rpc_default_ops = {
+	.rpc_call_done = rpc_default_callback,
+};
+
 /*
  *	Export the signal mask handling for synchronous code that
  *	sleeps on RPC calls
@@ -432,7 +436,7 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 	BUG_ON(flags & RPC_TASK_ASYNC);
 
 	status = -ENOMEM;
-	task = rpc_new_task(clnt, NULL, flags);
+	task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
 	if (task == NULL)
 		goto out;
 
@@ -459,7 +463,7 @@ out:
  */
 int
 rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
-	       rpc_action callback, void *data)
+	       const struct rpc_call_ops *tk_ops, void *data)
 {
 	struct rpc_task	*task;
 	sigset_t	oldset;
@@ -472,12 +476,9 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
 	flags |= RPC_TASK_ASYNC;
 
 	/* Create/initialize a new RPC task */
-	if (!callback)
-		callback = rpc_default_callback;
 	status = -ENOMEM;
-	if (!(task = rpc_new_task(clnt, callback, flags)))
+	if (!(task = rpc_new_task(clnt, flags, tk_ops, data)))
 		goto out;
-	task->tk_calldata = data;
 
 	/* Mask signals on GSS_AUTH upcalls */
 	rpc_task_sigmask(task, &oldset);		
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 3fcf7b0e1f6c..8d6233d3248b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -555,13 +555,13 @@ __rpc_atrun(struct rpc_task *task)
 }
 
 /*
- * Helper that calls task->tk_exit if it exists
+ * Helper that calls task->tk_ops->rpc_call_done if it exists
  */
 void rpc_exit_task(struct rpc_task *task)
 {
 	task->tk_action = NULL;
-	if (task->tk_exit != NULL) {
-		task->tk_exit(task);
+	if (task->tk_ops->rpc_call_done != NULL) {
+		task->tk_ops->rpc_call_done(task, task->tk_calldata);
 		if (task->tk_action != NULL) {
 			WARN_ON(RPC_ASSASSINATED(task));
 			/* Always release the RPC slot and buffer memory */
@@ -747,7 +747,7 @@ rpc_free(struct rpc_task *task)
 /*
  * Creation and deletion of RPC task structures
  */
-void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action callback, int flags)
+void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	memset(task, 0, sizeof(*task));
 	init_timer(&task->tk_timer);
@@ -755,7 +755,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
-	task->tk_exit   = callback;
+	task->tk_ops = tk_ops;
+	task->tk_calldata = calldata;
 
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
@@ -784,6 +785,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, rpc_action call
 	list_add_tail(&task->tk_task, &all_tasks);
 	spin_unlock(&rpc_sched_lock);
 
+	BUG_ON(task->tk_ops == NULL);
+
 	dprintk("RPC: %4d new task procpid %d\n", task->tk_pid,
 				current->pid);
 }
@@ -794,8 +797,7 @@ rpc_alloc_task(void)
 	return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
-static void
-rpc_default_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rpc_task *task)
 {
 	dprintk("RPC: %4d freeing task\n", task->tk_pid);
 	mempool_free(task, rpc_task_mempool);
@@ -806,8 +808,7 @@ rpc_default_free_task(struct rpc_task *task)
  * clean up after an allocation failure, as the client may
  * have specified "oneshot".
  */
-struct rpc_task *
-rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
+struct rpc_task *rpc_new_task(struct rpc_clnt *clnt, int flags, const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task	*task;
 
@@ -815,10 +816,7 @@ rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
 	if (!task)
 		goto cleanup;
 
-	rpc_init_task(task, clnt, callback, flags);
-
-	/* Replace tk_release */
-	task->tk_release = rpc_default_free_task;
+	rpc_init_task(task, clnt, flags, tk_ops, calldata);
 
 	dprintk("RPC: %4d allocated task\n", task->tk_pid);
 	task->tk_flags |= RPC_TASK_DYNAMIC;
@@ -838,6 +836,8 @@ cleanup:
 
 void rpc_release_task(struct rpc_task *task)
 {
+	const struct rpc_call_ops *tk_ops = task->tk_ops;
+	void *calldata = task->tk_calldata;
 	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 #ifdef RPC_DEBUG
@@ -869,8 +869,10 @@ void rpc_release_task(struct rpc_task *task)
 #ifdef RPC_DEBUG
 	task->tk_magic = 0;
 #endif
-	if (task->tk_release)
-		task->tk_release(task);
+	if (task->tk_flags & RPC_TASK_DYNAMIC)
+		rpc_free_task(task);
+	if (tk_ops->rpc_release)
+		tk_ops->rpc_release(calldata);
 }
 
 /**
@@ -883,12 +885,11 @@ void rpc_release_task(struct rpc_task *task)
  *
  * Caller must hold childq.lock
  */
-static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
+static inline struct rpc_task *rpc_find_parent(struct rpc_task *child, struct rpc_task *parent)
 {
-	struct rpc_task	*task, *parent;
+	struct rpc_task	*task;
 	struct list_head *le;
 
-	parent = (struct rpc_task *) child->tk_calldata;
 	task_for_each(task, le, &childq.tasks[0])
 		if (task == parent)
 			return parent;
@@ -896,18 +897,22 @@ static inline struct rpc_task *rpc_find_parent(struct rpc_task *child)
 	return NULL;
 }
 
-static void rpc_child_exit(struct rpc_task *child)
+static void rpc_child_exit(struct rpc_task *child, void *calldata)
 {
 	struct rpc_task	*parent;
 
 	spin_lock_bh(&childq.lock);
-	if ((parent = rpc_find_parent(child)) != NULL) {
+	if ((parent = rpc_find_parent(child, calldata)) != NULL) {
 		parent->tk_status = child->tk_status;
 		__rpc_wake_up_task(parent);
 	}
 	spin_unlock_bh(&childq.lock);
 }
 
+static const struct rpc_call_ops rpc_child_ops = {
+	.rpc_call_done = rpc_child_exit,
+};
+
 /*
  * Note: rpc_new_task releases the client after a failure.
  */
@@ -916,11 +921,9 @@ rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
 {
 	struct rpc_task	*task;
 
-	task = rpc_new_task(clnt, NULL, RPC_TASK_ASYNC | RPC_TASK_CHILD);
+	task = rpc_new_task(clnt, RPC_TASK_ASYNC | RPC_TASK_CHILD, &rpc_child_ops, parent);
 	if (!task)
 		goto fail;
-	task->tk_exit = rpc_child_exit;
-	task->tk_calldata = parent;
 	return task;
 
 fail:
@@ -1056,7 +1059,7 @@ void rpc_show_tasks(void)
 		return;
 	}
 	printk("-pid- proc flgs status -client- -prog- --rqstp- -timeout "
-		"-rpcwait -action- --exit--\n");
+		"-rpcwait -action- ---ops--\n");
 	alltask_for_each(t, le, &all_tasks) {
 		const char *rpc_waitq = "none";
 
@@ -1071,7 +1074,7 @@ void rpc_show_tasks(void)
 			(t->tk_client ? t->tk_client->cl_prog : 0),
 			t->tk_rqstp, t->tk_timeout,
 			rpc_waitq,
-			t->tk_action, t->tk_exit);
+			t->tk_action, t->tk_ops);
 	}
 	spin_unlock(&rpc_sched_lock);
 }
-- 
cgit v1.2.3


From 4ce70ada1ff1d0b80916ec9ec5764ce44a50a54f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:05 +0100
Subject: SUNRPC: Further cleanups

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c            | 21 +++++++++++----------
 fs/nfs/unlink.c              | 13 +++++--------
 include/linux/sunrpc/sched.h |  1 +
 net/sunrpc/sched.c           | 10 ++++++++++
 4 files changed, 27 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3d5d3c07d621..368b75b3bcba 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -195,14 +195,13 @@ static void update_changeattr(struct inode *inode, struct nfs4_change_info *cinf
 }
 
 /* Helper for asynchronous RPC calls */
-static int nfs4_call_async(struct rpc_clnt *clnt, rpc_action tk_begin,
+static int nfs4_call_async(struct rpc_clnt *clnt,
 		const struct rpc_call_ops *tk_ops, void *calldata)
 {
 	struct rpc_task *task;
 
 	if (!(task = rpc_new_task(clnt, RPC_TASK_ASYNC, tk_ops, calldata)))
 		return -ENOMEM;
-	task->tk_action = tk_begin;
 	rpc_execute(task);
 	return 0;
 }
@@ -882,6 +881,8 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	struct nfs4_state *state = calldata->state;
 	struct nfs_server *server = NFS_SERVER(calldata->inode);
 
+	if (RPC_ASSASSINATED(task))
+		return;
         /* hmm. we are done with the inode, and in the process of freeing
 	 * the state_owner. we keep this around to process errors
 	 */
@@ -904,9 +905,9 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 	nfs_refresh_inode(calldata->inode, calldata->res.fattr);
 }
 
-static void nfs4_close_begin(struct rpc_task *task)
+static void nfs4_close_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs4_closedata *calldata = (struct nfs4_closedata *)task->tk_calldata;
+	struct nfs4_closedata *calldata = data;
 	struct nfs4_state *state = calldata->state;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE],
@@ -944,6 +945,7 @@ static void nfs4_close_begin(struct rpc_task *task)
 }
 
 static const struct rpc_call_ops nfs4_close_ops = {
+	.rpc_call_prepare = nfs4_close_prepare,
 	.rpc_call_done = nfs4_close_done,
 	.rpc_release = nfs4_free_closedata,
 };
@@ -980,8 +982,7 @@ int nfs4_do_close(struct inode *inode, struct nfs4_state *state)
 	calldata->res.fattr = &calldata->fattr;
 	calldata->res.server = server;
 
-	status = nfs4_call_async(server->client, nfs4_close_begin,
-			&nfs4_close_ops, calldata);
+	status = nfs4_call_async(server->client, &nfs4_close_ops, calldata);
 	if (status == 0)
 		goto out;
 
@@ -2909,9 +2910,9 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 	}
 }
 
-static void nfs4_locku_begin(struct rpc_task *task)
+static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 {
-	struct nfs4_unlockdata *calldata = (struct nfs4_unlockdata *)task->tk_calldata;
+	struct nfs4_unlockdata *calldata = data;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKU],
 		.rpc_argp       = &calldata->arg,
@@ -2932,6 +2933,7 @@ static void nfs4_locku_begin(struct rpc_task *task)
 }
 
 static const struct rpc_call_ops nfs4_locku_ops = {
+	.rpc_call_prepare = nfs4_locku_prepare,
 	.rpc_call_done = nfs4_locku_done,
 	.rpc_release = nfs4_locku_complete,
 };
@@ -2979,8 +2981,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	atomic_set(&calldata->refcount, 2);
 	init_completion(&calldata->completion);
 
-	status = nfs4_call_async(NFS_SERVER(inode)->client, nfs4_locku_begin,
-			&nfs4_locku_ops, calldata);
+	status = nfs4_call_async(NFS_SERVER(inode)->client, &nfs4_locku_ops, calldata);
 	if (status == 0)
 		wait_for_completion_interruptible(&calldata->completion);
 	do_vfs_lock(request->fl_file, request);
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 1494484ba86d..a65c7b53d558 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -87,10 +87,9 @@ nfs_copy_dname(struct dentry *dentry, struct nfs_unlinkdata *data)
  * We delay initializing RPC info until after the call to dentry_iput()
  * in order to minimize races against rename().
  */
-static void
-nfs_async_unlink_init(struct rpc_task *task)
+static void nfs_async_unlink_init(struct rpc_task *task, void *calldata)
 {
-	struct nfs_unlinkdata	*data = (struct nfs_unlinkdata *)task->tk_calldata;
+	struct nfs_unlinkdata	*data = calldata;
 	struct dentry		*dir = data->dir;
 	struct rpc_message	msg = {
 		.rpc_cred	= data->cred,
@@ -147,6 +146,7 @@ static void nfs_async_unlink_release(void *calldata)
 }
 
 static const struct rpc_call_ops nfs_unlink_ops = {
+	.rpc_call_prepare = nfs_async_unlink_init,
 	.rpc_call_done = nfs_async_unlink_done,
 	.rpc_release = nfs_async_unlink_release,
 };
@@ -160,7 +160,6 @@ nfs_async_unlink(struct dentry *dentry)
 {
 	struct dentry	*dir = dentry->d_parent;
 	struct nfs_unlinkdata	*data;
-	struct rpc_task	*task;
 	struct rpc_clnt	*clnt = NFS_CLIENT(dir->d_inode);
 	int		status = -ENOMEM;
 
@@ -181,15 +180,13 @@ nfs_async_unlink(struct dentry *dentry)
 	nfs_deletes = data;
 	data->count = 1;
 
-	task = &data->task;
-	rpc_init_task(task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data);
-	task->tk_action = nfs_async_unlink_init;
+	rpc_init_task(&data->task, clnt, RPC_TASK_ASYNC, &nfs_unlink_ops, data);
 
 	spin_lock(&dentry->d_lock);
 	dentry->d_flags |= DCACHE_NFSFS_RENAMED;
 	spin_unlock(&dentry->d_lock);
 
-	rpc_sleep_on(&nfs_delete_queue, task, NULL, NULL);
+	rpc_sleep_on(&nfs_delete_queue, &data->task, NULL, NULL);
 	status = 0;
  out:
 	return status;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 581d8cdc3b86..ac1326fc3e1a 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -112,6 +112,7 @@ struct rpc_task {
 typedef void			(*rpc_action)(struct rpc_task *);
 
 struct rpc_call_ops {
+	void (*rpc_call_prepare)(struct rpc_task *, void *);
 	void (*rpc_call_done)(struct rpc_task *, void *);
 	void (*rpc_release)(void *);
 };
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 8d6233d3248b..2d74a1672028 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -554,6 +554,14 @@ __rpc_atrun(struct rpc_task *task)
 	rpc_wake_up_task(task);
 }
 
+/*
+ * Helper to call task->tk_ops->rpc_call_prepare
+ */
+static void rpc_prepare_task(struct rpc_task *task)
+{
+	task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
+}
+
 /*
  * Helper that calls task->tk_ops->rpc_call_done if it exists
  */
@@ -756,6 +764,8 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
 	task->tk_ops = tk_ops;
+	if (tk_ops->rpc_call_prepare != NULL)
+		task->tk_action = rpc_prepare_task;
 	task->tk_calldata = calldata;
 
 	/* Initialize retry counters */
-- 
cgit v1.2.3


From 44c288732fdbd7e38460d156a40d29590bf93bce Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:06 +0100
Subject: NFSv4: stateful NFSv4 RPC call interface

 The NFSv4 model requires us to complete all RPC calls that might
 establish state on the server whether or not the user wants to
 interrupt it. We may also need to schedule new work (including
 new RPC calls) in order to cancel the new state.

 The asynchronous RPC model will allow us to ensure that RPC calls
 always complete, but in order to allow for "synchronous" RPC, we
 want to add the ability to wait for completion.
 The waits are, of course, interruptible.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c              |  1 -
 include/linux/sunrpc/sched.h | 21 ++++++++++--
 net/sunrpc/sched.c           | 78 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 78 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index a834423942c7..ae2be0744191 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -268,7 +268,6 @@ static void nfs_direct_read_schedule(struct nfs_direct_req *dreq,
 		NFS_PROTO(inode)->read_setup(data);
 
 		data->task.tk_cookie = (unsigned long) inode;
-		data->task.tk_calldata = data;
 		data->complete = nfs_direct_read_result;
 
 		lock_kernel();
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ac1326fc3e1a..94b0afa4ab05 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -42,6 +42,7 @@ struct rpc_task {
 #ifdef RPC_DEBUG
 	unsigned long		tk_magic;	/* 0xf00baa */
 #endif
+	atomic_t		tk_count;	/* Reference count */
 	struct list_head	tk_task;	/* global list of tasks */
 	struct rpc_clnt *	tk_client;	/* RPC client */
 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
@@ -78,7 +79,6 @@ struct rpc_task {
 	struct timer_list	tk_timer;	/* kernel timer */
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
 	unsigned short		tk_flags;	/* misc flags */
-	unsigned char		tk_active   : 1;/* Task has been activated */
 	unsigned char		tk_priority : 2;/* Task priority */
 	unsigned long		tk_runstate;	/* Task run status */
 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
@@ -136,7 +136,6 @@ struct rpc_call_ops {
 #define RPC_IS_SWAPPER(t)	((t)->tk_flags & RPC_TASK_SWAPPER)
 #define RPC_DO_ROOTOVERRIDE(t)	((t)->tk_flags & RPC_TASK_ROOTCREDS)
 #define RPC_ASSASSINATED(t)	((t)->tk_flags & RPC_TASK_KILLED)
-#define RPC_IS_ACTIVATED(t)	((t)->tk_active)
 #define RPC_DO_CALLBACK(t)	((t)->tk_callback != NULL)
 #define RPC_IS_SOFT(t)		((t)->tk_flags & RPC_TASK_SOFT)
 #define RPC_TASK_UNINTERRUPTIBLE(t) ((t)->tk_flags & RPC_TASK_NOINTR)
@@ -145,6 +144,7 @@ struct rpc_call_ops {
 #define RPC_TASK_QUEUED		1
 #define RPC_TASK_WAKEUP		2
 #define RPC_TASK_HAS_TIMER	3
+#define RPC_TASK_ACTIVE		4
 
 #define RPC_IS_RUNNING(t)	(test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
 #define rpc_set_running(t)	(set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate))
@@ -175,6 +175,15 @@ struct rpc_call_ops {
 		smp_mb__after_clear_bit(); \
 	} while (0)
 
+#define RPC_IS_ACTIVATED(t)	(test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
+#define rpc_set_active(t)	(set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
+#define rpc_clear_active(t)	\
+	do { \
+		smp_mb__before_clear_bit(); \
+		clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \
+		smp_mb__after_clear_bit(); \
+	} while(0)
+
 /*
  * Task priorities.
  * Note: if you change these, you must also change
@@ -237,6 +246,8 @@ struct rpc_wait_queue {
  */
 struct rpc_task *rpc_new_task(struct rpc_clnt *, int flags,
 				const struct rpc_call_ops *ops, void *data);
+struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+				const struct rpc_call_ops *ops, void *data);
 struct rpc_task *rpc_new_child(struct rpc_clnt *, struct rpc_task *parent);
 void		rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
 				int flags, const struct rpc_call_ops *ops,
@@ -260,6 +271,7 @@ void *		rpc_malloc(struct rpc_task *, size_t);
 int		rpciod_up(void);
 void		rpciod_down(void);
 void		rpciod_wake_up(void);
+int		__rpc_wait_for_completion_task(struct rpc_task *task, int (*)(void *));
 #ifdef RPC_DEBUG
 void		rpc_show_tasks(void);
 #endif
@@ -272,6 +284,11 @@ static inline void rpc_exit(struct rpc_task *task, int status)
 	task->tk_action = rpc_exit_task;
 }
 
+static inline int rpc_wait_for_completion_task(struct rpc_task *task)
+{
+	return __rpc_wait_for_completion_task(task, NULL);
+}
+
 #ifdef RPC_DEBUG
 static inline const char * rpc_qname(struct rpc_wait_queue *q)
 {
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 2d74a1672028..82d158dad16d 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -264,6 +264,35 @@ void rpc_init_wait_queue(struct rpc_wait_queue *queue, const char *qname)
 }
 EXPORT_SYMBOL(rpc_init_wait_queue);
 
+static int rpc_wait_bit_interruptible(void *word)
+{
+	if (signal_pending(current))
+		return -ERESTARTSYS;
+	schedule();
+	return 0;
+}
+
+/*
+ * Mark an RPC call as having completed by clearing the 'active' bit
+ */
+static inline void rpc_mark_complete_task(struct rpc_task *task)
+{
+	rpc_clear_active(task);
+	wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
+}
+
+/*
+ * Allow callers to wait for completion of an RPC call
+ */
+int __rpc_wait_for_completion_task(struct rpc_task *task, int (*action)(void *))
+{
+	if (action == NULL)
+		action = rpc_wait_bit_interruptible;
+	return wait_on_bit(&task->tk_runstate, RPC_TASK_ACTIVE,
+			action, TASK_INTERRUPTIBLE);
+}
+EXPORT_SYMBOL(__rpc_wait_for_completion_task);
+
 /*
  * Make an RPC task runnable.
  *
@@ -299,10 +328,7 @@ static void rpc_make_runnable(struct rpc_task *task)
 static inline void
 rpc_schedule_run(struct rpc_task *task)
 {
-	/* Don't run a child twice! */
-	if (RPC_IS_ACTIVATED(task))
-		return;
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_make_runnable(task);
 }
 
@@ -324,8 +350,7 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 	}
 
 	/* Mark the task as being activated if so needed */
-	if (!RPC_IS_ACTIVATED(task))
-		task->tk_active = 1;
+	rpc_set_active(task);
 
 	__rpc_add_wait_queue(q, task);
 
@@ -580,14 +605,6 @@ void rpc_exit_task(struct rpc_task *task)
 }
 EXPORT_SYMBOL(rpc_exit_task);
 
-static int rpc_wait_bit_interruptible(void *word)
-{
-	if (signal_pending(current))
-		return -ERESTARTSYS;
-	schedule();
-	return 0;
-}
-
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
  */
@@ -680,6 +697,8 @@ static int __rpc_execute(struct rpc_task *task)
 	dprintk("RPC: %4d exit() = %d\n", task->tk_pid, task->tk_status);
 	status = task->tk_status;
 
+	/* Wake up anyone who is waiting for task completion */
+	rpc_mark_complete_task(task);
 	/* Release all resources associated with the task */
 	rpc_release_task(task);
 	return status;
@@ -697,9 +716,7 @@ static int __rpc_execute(struct rpc_task *task)
 int
 rpc_execute(struct rpc_task *task)
 {
-	BUG_ON(task->tk_active);
-
-	task->tk_active = 1;
+	rpc_set_active(task);
 	rpc_set_running(task);
 	return __rpc_execute(task);
 }
@@ -761,6 +778,7 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
 	init_timer(&task->tk_timer);
 	task->tk_timer.data     = (unsigned long) task;
 	task->tk_timer.function = (void (*)(unsigned long)) rpc_run_timer;
+	atomic_set(&task->tk_count, 1);
 	task->tk_client = clnt;
 	task->tk_flags  = flags;
 	task->tk_ops = tk_ops;
@@ -848,11 +866,13 @@ void rpc_release_task(struct rpc_task *task)
 {
 	const struct rpc_call_ops *tk_ops = task->tk_ops;
 	void *calldata = task->tk_calldata;
-	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 #ifdef RPC_DEBUG
 	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif
+	if (!atomic_dec_and_test(&task->tk_count))
+		return;
+	dprintk("RPC: %4d release task\n", task->tk_pid);
 
 	/* Remove from global task list */
 	spin_lock(&rpc_sched_lock);
@@ -860,7 +880,6 @@ void rpc_release_task(struct rpc_task *task)
 	spin_unlock(&rpc_sched_lock);
 
 	BUG_ON (RPC_IS_QUEUED(task));
-	task->tk_active = 0;
 
 	/* Synchronously delete any running timer */
 	rpc_delete_timer(task);
@@ -885,6 +904,27 @@ void rpc_release_task(struct rpc_task *task)
 		tk_ops->rpc_release(calldata);
 }
 
+/**
+ * rpc_run_task - Allocate a new RPC task, then run rpc_execute against it
+ * @clnt - pointer to RPC client
+ * @flags - RPC flags
+ * @ops - RPC call ops
+ * @data - user call data
+ */
+struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
+					const struct rpc_call_ops *ops,
+					void *data)
+{
+	struct rpc_task *task;
+	task = rpc_new_task(clnt, flags, ops, data);
+	if (task == NULL)
+		return ERR_PTR(-ENOMEM);
+	atomic_inc(&task->tk_count);
+	rpc_execute(task);
+	return task;
+}
+EXPORT_SYMBOL(rpc_run_task);
+
 /**
  * rpc_find_parent - find the parent of a child task.
  * @child: child task
-- 
cgit v1.2.3


From cdd4e68b5f0ed12c64b3e2be83655d2a47588a74 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:12 +0100
Subject: NFSv4: Make open_confirm() asynchronous too

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 101 +++++++++++++++++++++++++++++++++++++-----------
 fs/nfs/nfs4xdr.c        |   7 +---
 include/linux/nfs_xdr.h |   2 +-
 3 files changed, 81 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index aed8701c1a36..8154f2579469 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -57,7 +57,8 @@
 #define NFS4_POLL_RETRY_MIN	(1*HZ)
 #define NFS4_POLL_RETRY_MAX	(15*HZ)
 
-static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid);
+struct nfs4_opendata;
+static int _nfs4_proc_open_confirm(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
 static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *);
 static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry);
@@ -198,6 +199,8 @@ struct nfs4_opendata {
 	atomic_t count;
 	struct nfs_openargs o_arg;
 	struct nfs_openres o_res;
+	struct nfs_open_confirmargs c_arg;
+	struct nfs_open_confirmres c_res;
 	struct nfs_fattr f_attr;
 	struct nfs_fattr dir_attr;
 	struct dentry *dentry;
@@ -249,6 +252,9 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
 		p->o_arg.u.attrs = &p->attrs;
 		memcpy(&p->attrs, attrs, sizeof(p->attrs));
 	}
+	p->c_arg.fh = &p->o_res.fh;
+	p->c_arg.stateid = &p->o_res.stateid;
+	p->c_arg.seqid = p->o_arg.seqid;
 	return p;
 err_free:
 	kfree(p);
@@ -433,8 +439,7 @@ static int _nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state
 	if (status != 0)
 		goto out_free;
 	if(opendata->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM) {
-		status = _nfs4_proc_open_confirm(server->client, NFS_FH(inode),
-				sp, &opendata->o_res.stateid, opendata->o_arg.seqid);
+		status = _nfs4_proc_open_confirm(opendata);
 		if (status != 0)
 			goto out_free;
 	}
@@ -472,28 +477,79 @@ int nfs4_open_delegation_recall(struct dentry *dentry, struct nfs4_state *state)
 	return err;
 }
 
-static int _nfs4_proc_open_confirm(struct rpc_clnt *clnt, const struct nfs_fh *fh, struct nfs4_state_owner *sp, nfs4_stateid *stateid, struct nfs_seqid *seqid)
+static void nfs4_open_confirm_prepare(struct rpc_task *task, void *calldata)
 {
-	struct nfs_open_confirmargs arg = {
-		.fh             = fh,
-		.seqid          = seqid,
-		.stateid	= *stateid,
-	};
-	struct nfs_open_confirmres res;
-	struct 	rpc_message msg = {
-		.rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
-		.rpc_argp       = &arg,
-		.rpc_resp       = &res,
-		.rpc_cred	= sp->so_cred,
+	struct nfs4_opendata *data = calldata;
+	struct  rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_CONFIRM],
+		.rpc_argp = &data->c_arg,
+		.rpc_resp = &data->c_res,
+		.rpc_cred = data->owner->so_cred,
 	};
+	rpc_call_setup(task, &msg, 0);
+}
+
+static void nfs4_open_confirm_done(struct rpc_task *task, void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+
+	data->rpc_status = task->tk_status;
+	if (RPC_ASSASSINATED(task))
+		return;
+	if (data->rpc_status == 0)
+		memcpy(data->o_res.stateid.data, data->c_res.stateid.data,
+				sizeof(data->o_res.stateid.data));
+	nfs_increment_open_seqid(data->rpc_status, data->c_arg.seqid);
+	nfs_confirm_seqid(&data->owner->so_seqid, data->rpc_status);
+}
+
+static void nfs4_open_confirm_release(void *calldata)
+{
+	struct nfs4_opendata *data = calldata;
+	struct nfs4_state *state = NULL;
+
+	/* If this request hasn't been cancelled, do nothing */
+	if (data->cancelled == 0)
+		goto out_free;
+	/* In case of error, no cleanup! */
+	if (data->rpc_status != 0)
+		goto out_free;
+	nfs_confirm_seqid(&data->owner->so_seqid, 0);
+	state = nfs4_opendata_to_nfs4_state(data);
+	if (state != NULL)
+		nfs4_close_state(state, data->o_arg.open_flags);
+out_free:
+	nfs4_opendata_free(data);
+}
+
+static const struct rpc_call_ops nfs4_open_confirm_ops = {
+	.rpc_call_prepare = nfs4_open_confirm_prepare,
+	.rpc_call_done = nfs4_open_confirm_done,
+	.rpc_release = nfs4_open_confirm_release,
+};
+
+/*
+ * Note: On error, nfs4_proc_open_confirm will free the struct nfs4_opendata
+ */
+static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
+{
+	struct nfs_server *server = NFS_SERVER(data->dir->d_inode);
+	struct rpc_task *task;
 	int status;
 
-	status = rpc_call_sync(clnt, &msg, RPC_TASK_NOINTR);
-	/* Confirm the sequence as being established */
-	nfs_confirm_seqid(&sp->so_seqid, status);
-	nfs_increment_open_seqid(status, seqid);
-	if (status >= 0)
-		memcpy(stateid, &res.stateid, sizeof(*stateid));
+	atomic_inc(&data->count);
+	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_open_confirm_ops, data);
+	if (IS_ERR(task)) {
+		nfs4_opendata_free(data);
+		return PTR_ERR(task);
+	}
+	status = nfs4_wait_for_completion_rpc_task(task);
+	if (status != 0) {
+		data->cancelled = 1;
+		smp_wmb();
+	} else
+		status = data->rpc_status;
+	rpc_release_task(task);
 	return status;
 }
 
@@ -602,8 +658,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
 	} else
 		nfs_refresh_inode(dir, o_res->dir_attr);
 	if(o_res->rflags & NFS4_OPEN_RESULT_CONFIRM) {
-		status = _nfs4_proc_open_confirm(server->client, &o_res->fh,
-				data->owner, &o_res->stateid, o_arg->seqid);
+		status = _nfs4_proc_open_confirm(data);
 		if (status != 0)
 			return status;
 	}
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index db2bcf722f91..2ba9906f2a51 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -964,9 +964,9 @@ static int encode_open_confirm(struct xdr_stream *xdr, const struct nfs_open_con
 {
 	uint32_t *p;
 
-	RESERVE_SPACE(8+sizeof(arg->stateid.data));
+	RESERVE_SPACE(8+sizeof(arg->stateid->data));
 	WRITE32(OP_OPEN_CONFIRM);
-	WRITEMEM(arg->stateid.data, sizeof(arg->stateid.data));
+	WRITEMEM(arg->stateid->data, sizeof(arg->stateid->data));
 	WRITE32(arg->seqid->sequence->counter);
 
 	return 0;
@@ -1535,9 +1535,6 @@ static int nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct n
 	};
 	int status;
 
-	status = nfs_wait_on_sequence(args->seqid, req->rq_task);
-	if (status != 0)
-		goto out;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 40718669b9c8..518cfa5cd024 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -137,7 +137,7 @@ struct nfs_openres {
  */
 struct nfs_open_confirmargs {
 	const struct nfs_fh *	fh;
-	nfs4_stateid            stateid;
+	nfs4_stateid *		stateid;
 	struct nfs_seqid *	seqid;
 };
 
-- 
cgit v1.2.3


From 911d1aaf26fc4d771174d98fcab710a44e2a5fa0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:16 +0100
Subject: NFSv4: locking XDR cleanup

 Get rid of some unnecessary intermediate structures

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4proc.c       | 192 ++++++++++++++++++++----------------------------
 fs/nfs/nfs4xdr.c        | 131 +++++++++++++++++++--------------
 include/linux/nfs_xdr.h |  52 ++++++-------
 3 files changed, 179 insertions(+), 196 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3ecb7da220f5..857125705b6f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2935,43 +2935,17 @@ nfs4_set_lock_task_retry(unsigned long timeout)
 	return timeout;
 }
 
-static inline int
-nfs4_lck_type(int cmd, struct file_lock *request)
-{
-	/* set lock type */
-	switch (request->fl_type) {
-		case F_RDLCK:
-			return IS_SETLKW(cmd) ? NFS4_READW_LT : NFS4_READ_LT;
-		case F_WRLCK:
-			return IS_SETLKW(cmd) ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
-		case F_UNLCK:
-			return NFS4_WRITE_LT; 
-	}
-	BUG();
-	return 0;
-}
-
-static inline uint64_t
-nfs4_lck_length(struct file_lock *request)
-{
-	if (request->fl_end == OFFSET_MAX)
-		return ~(uint64_t)0;
-	return request->fl_end - request->fl_start + 1;
-}
-
 static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock *request)
 {
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_client *clp = server->nfs4_state;
-	struct nfs_lockargs arg = {
+	struct nfs_lockt_args arg = {
 		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
+		.fl = request,
 	};
-	struct nfs_lockres res = {
-		.server = server,
+	struct nfs_lockt_res res = {
+		.denied = request,
 	};
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCKT],
@@ -2979,36 +2953,23 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
 		.rpc_resp       = &res,
 		.rpc_cred	= state->owner->so_cred,
 	};
-	struct nfs_lowner nlo;
 	struct nfs4_lock_state *lsp;
 	int status;
 
 	down_read(&clp->cl_sem);
-	nlo.clientid = clp->cl_clientid;
+	arg.lock_owner.clientid = clp->cl_clientid;
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
 		goto out;
 	lsp = request->fl_u.nfs4_fl.owner;
-	nlo.id = lsp->ls_id; 
-	arg.u.lockt = &nlo;
+	arg.lock_owner.id = lsp->ls_id; 
 	status = rpc_call_sync(server->client, &msg, 0);
-	if (!status) {
-		request->fl_type = F_UNLCK;
-	} else if (status == -NFS4ERR_DENIED) {
-		int64_t len, start, end;
-		start = res.u.denied.offset;
-		len = res.u.denied.length;
-		end = start + len - 1;
-		if (end < 0 || len == 0)
-			request->fl_end = OFFSET_MAX;
-		else
-			request->fl_end = (loff_t)end;
-		request->fl_start = (loff_t)start;
-		request->fl_type = F_WRLCK;
-		if (res.u.denied.type & 1)
-			request->fl_type = F_RDLCK;
-		request->fl_pid = 0;
-		status = 0;
+	switch (status) {
+		case 0:
+			request->fl_type = F_UNLCK;
+			break;
+		case -NFS4ERR_DENIED:
+			status = 0;
 	}
 out:
 	up_read(&clp->cl_sem);
@@ -3048,17 +3009,42 @@ static int do_vfs_lock(struct file *file, struct file_lock *fl)
 }
 
 struct nfs4_unlockdata {
-	struct nfs_lockargs arg;
-	struct nfs_locku_opargs luargs;
-	struct nfs_lockres res;
+	struct nfs_locku_args arg;
+	struct nfs_locku_res res;
 	struct nfs4_lock_state *lsp;
 	struct nfs_open_context *ctx;
+	struct file_lock fl;
+	const struct nfs_server *server;
 };
 
+static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
+		struct nfs_open_context *ctx,
+		struct nfs4_lock_state *lsp,
+		struct nfs_seqid *seqid)
+{
+	struct nfs4_unlockdata *p;
+	struct inode *inode = lsp->ls_state->inode;
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p == NULL)
+		return NULL;
+	p->arg.fh = NFS_FH(inode);
+	p->arg.fl = &p->fl;
+	p->arg.seqid = seqid;
+	p->arg.stateid = &lsp->ls_stateid;
+	p->lsp = lsp;
+	atomic_inc(&lsp->ls_count);
+	/* Ensure we don't close file until we're done freeing locks! */
+	p->ctx = get_nfs_open_context(ctx);
+	memcpy(&p->fl, fl, sizeof(p->fl));
+	p->server = NFS_SERVER(inode);
+	return p;
+}
+
 static void nfs4_locku_release_calldata(void *data)
 {
 	struct nfs4_unlockdata *calldata = data;
-	nfs_free_seqid(calldata->luargs.seqid);
+	nfs_free_seqid(calldata->arg.seqid);
 	nfs4_put_lock_state(calldata->lsp);
 	put_nfs_open_context(calldata->ctx);
 	kfree(calldata);
@@ -3070,19 +3056,19 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 
 	if (RPC_ASSASSINATED(task))
 		return;
-	nfs_increment_lock_seqid(task->tk_status, calldata->luargs.seqid);
+	nfs_increment_lock_seqid(task->tk_status, calldata->arg.seqid);
 	switch (task->tk_status) {
 		case 0:
 			memcpy(calldata->lsp->ls_stateid.data,
-					calldata->res.u.stateid.data,
+					calldata->res.stateid.data,
 					sizeof(calldata->lsp->ls_stateid.data));
 			break;
 		case -NFS4ERR_STALE_STATEID:
 		case -NFS4ERR_EXPIRED:
-			nfs4_schedule_state_recovery(calldata->res.server->nfs4_state);
+			nfs4_schedule_state_recovery(calldata->server->nfs4_state);
 			break;
 		default:
-			if (nfs4_async_handle_error(task, calldata->res.server) == -EAGAIN) {
+			if (nfs4_async_handle_error(task, calldata->server) == -EAGAIN) {
 				rpc_restart_call(task);
 			}
 	}
@@ -3097,10 +3083,8 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
 		.rpc_resp       = &calldata->res,
 		.rpc_cred	= calldata->lsp->ls_state->owner->so_cred,
 	};
-	int status;
 
-	status = nfs_wait_on_sequence(calldata->luargs.seqid, task);
-	if (status != 0)
+	if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
 		return;
 	if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) {
 		/* Note: exit _without_ running nfs4_locku_done */
@@ -3121,43 +3105,32 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 	struct nfs4_unlockdata *calldata;
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
+	struct nfs_seqid *seqid;
 	struct nfs4_lock_state *lsp;
 	struct rpc_task *task;
 	int status = 0;
 
 	/* Is this a delegated lock? */
 	if (test_bit(NFS_DELEGATED_STATE, &state->flags))
-		goto out;
+		goto out_unlock;
+	/* Is this open_owner holding any locks on the server? */
+	if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
+		goto out_unlock;
 
 	status = nfs4_set_lock_state(state, request);
 	if (status != 0)
-		goto out;
+		goto out_unlock;
 	lsp = request->fl_u.nfs4_fl.owner;
-	/* We might have lost the locks! */
-	if ((lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0)
-		goto out;
 	status = -ENOMEM;
-	calldata = kmalloc(sizeof(*calldata), GFP_KERNEL);
+	seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (seqid == NULL)
+		goto out_unlock;
+	calldata = nfs4_alloc_unlockdata(request, request->fl_file->private_data,
+			lsp, seqid);
 	if (calldata == NULL)
-		goto out;
-	calldata->luargs.seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	if (calldata->luargs.seqid == NULL) {
-		kfree(calldata);
-		goto out;
-	}
-	calldata->luargs.stateid = &lsp->ls_stateid;
-	calldata->arg.fh = NFS_FH(inode);
-	calldata->arg.type = nfs4_lck_type(cmd, request);
-	calldata->arg.offset = request->fl_start;
-	calldata->arg.length = nfs4_lck_length(request);
-	calldata->arg.u.locku = &calldata->luargs;
-	calldata->res.server = server;
-	calldata->lsp = lsp;
-	atomic_inc(&lsp->ls_count);
-
-	/* Ensure we don't close file until we're done freeing locks! */
-	calldata->ctx = get_nfs_open_context((struct nfs_open_context*)request->fl_file->private_data);
-
+		goto out_free_seqid;
+	/* Unlock _before_ we do the RPC call */
+	do_vfs_lock(request->fl_file, request);
 	task = rpc_run_task(server->client, RPC_TASK_ASYNC, &nfs4_locku_ops, calldata);
 	if (!IS_ERR(task)) {
 		status = nfs4_wait_for_completion_rpc_task(task);
@@ -3166,7 +3139,10 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
 		status = PTR_ERR(task);
 		nfs4_locku_release_calldata(calldata);
 	}
-out:
+	return status;
+out_free_seqid:
+	nfs_free_seqid(seqid);
+out_unlock:
 	do_vfs_lock(request->fl_file, request);
 	return status;
 }
@@ -3176,27 +3152,19 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	struct inode *inode = state->inode;
 	struct nfs_server *server = NFS_SERVER(inode);
 	struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
-	struct nfs_lock_opargs largs = {
+	struct nfs_lock_args arg = {
+		.fh = NFS_FH(inode),
+		.fl = request,
 		.lock_stateid = &lsp->ls_stateid,
 		.open_stateid = &state->stateid,
 		.lock_owner = {
 			.clientid = server->nfs4_state->cl_clientid,
 			.id = lsp->ls_id,
 		},
+		.block = (IS_SETLKW(cmd)) ? 1 : 0,
 		.reclaim = reclaim,
 	};
-	struct nfs_lockargs arg = {
-		.fh = NFS_FH(inode),
-		.type = nfs4_lck_type(cmd, request),
-		.offset = request->fl_start,
-		.length = nfs4_lck_length(request),
-		.u = {
-			.lock = &largs,
-		},
-	};
-	struct nfs_lockres res = {
-		.server = server,
-	};
+	struct nfs_lock_res res;
 	struct rpc_message msg = {
 		.rpc_proc	= &nfs4_procedures[NFSPROC4_CLNT_LOCK],
 		.rpc_argp       = &arg,
@@ -3205,37 +3173,37 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *r
 	};
 	int status = -ENOMEM;
 
-	largs.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
-	if (largs.lock_seqid == NULL)
+	arg.lock_seqid = nfs_alloc_seqid(&lsp->ls_seqid);
+	if (arg.lock_seqid == NULL)
 		return -ENOMEM;
 	if (!(lsp->ls_seqid.flags & NFS_SEQID_CONFIRMED)) {
 		struct nfs4_state_owner *owner = state->owner;
 
-		largs.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
-		if (largs.open_seqid == NULL)
+		arg.open_seqid = nfs_alloc_seqid(&owner->so_seqid);
+		if (arg.open_seqid == NULL)
 			goto out;
-		largs.new_lock_owner = 1;
+		arg.new_lock_owner = 1;
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
 		/* increment open seqid on success, and seqid mutating errors */
-		if (largs.new_lock_owner != 0) {
-			nfs_increment_open_seqid(status, largs.open_seqid);
+		if (arg.new_lock_owner != 0) {
+			nfs_increment_open_seqid(status, arg.open_seqid);
 			if (status == 0)
 				nfs_confirm_seqid(&lsp->ls_seqid, 0);
 		}
-		nfs_free_seqid(largs.open_seqid);
+		nfs_free_seqid(arg.open_seqid);
 	} else
 		status = rpc_call_sync(server->client, &msg, RPC_TASK_NOINTR);
 	/* increment lock seqid on success, and seqid mutating errors*/
-	nfs_increment_lock_seqid(status, largs.lock_seqid);
+	nfs_increment_lock_seqid(status, arg.lock_seqid);
 	/* save the returned stateid. */
 	if (status == 0) {
-		memcpy(lsp->ls_stateid.data, res.u.stateid.data,
+		memcpy(lsp->ls_stateid.data, res.stateid.data,
 				sizeof(lsp->ls_stateid.data));
 		lsp->ls_flags |= NFS_LOCK_INITIALIZED;
 	} else if (status == -NFS4ERR_DENIED)
 		status = -EAGAIN;
 out:
-	nfs_free_seqid(largs.lock_seqid);
+	nfs_free_seqid(arg.lock_seqid);
 	return status;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 3100172822c9..a7b5de899c6d 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -742,69 +742,80 @@ static int encode_link(struct xdr_stream *xdr, const struct qstr *name)
 	return 0;
 }
 
+static inline int nfs4_lock_type(struct file_lock *fl, int block)
+{
+	if ((fl->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) == F_RDLCK)
+		return block ? NFS4_READW_LT : NFS4_READ_LT;
+	return block ? NFS4_WRITEW_LT : NFS4_WRITE_LT;
+}
+
+static inline uint64_t nfs4_lock_length(struct file_lock *fl)
+{
+	if (fl->fl_end == OFFSET_MAX)
+		return ~(uint64_t)0;
+	return fl->fl_end - fl->fl_start + 1;
+}
+
 /*
  * opcode,type,reclaim,offset,length,new_lock_owner = 32
  * open_seqid,open_stateid,lock_seqid,lock_owner.clientid, lock_owner.id = 40
  */
-static int encode_lock(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_lock(struct xdr_stream *xdr, const struct nfs_lock_args *args)
 {
 	uint32_t *p;
-	struct nfs_lock_opargs *opargs = arg->u.lock;
 
 	RESERVE_SPACE(32);
 	WRITE32(OP_LOCK);
-	WRITE32(arg->type); 
-	WRITE32(opargs->reclaim);
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
-	WRITE32(opargs->new_lock_owner);
-	if (opargs->new_lock_owner){
+	WRITE32(nfs4_lock_type(args->fl, args->block));
+	WRITE32(args->reclaim);
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
+	WRITE32(args->new_lock_owner);
+	if (args->new_lock_owner){
 		RESERVE_SPACE(40);
-		WRITE32(opargs->open_seqid->sequence->counter);
-		WRITEMEM(opargs->open_stateid->data, sizeof(opargs->open_stateid->data));
-		WRITE32(opargs->lock_seqid->sequence->counter);
-		WRITE64(opargs->lock_owner.clientid);
+		WRITE32(args->open_seqid->sequence->counter);
+		WRITEMEM(args->open_stateid->data, sizeof(args->open_stateid->data));
+		WRITE32(args->lock_seqid->sequence->counter);
+		WRITE64(args->lock_owner.clientid);
 		WRITE32(4);
-		WRITE32(opargs->lock_owner.id);
+		WRITE32(args->lock_owner.id);
 	}
 	else {
 		RESERVE_SPACE(20);
-		WRITEMEM(opargs->lock_stateid->data, sizeof(opargs->lock_stateid->data));
-		WRITE32(opargs->lock_seqid->sequence->counter);
+		WRITEMEM(args->lock_stateid->data, sizeof(args->lock_stateid->data));
+		WRITE32(args->lock_seqid->sequence->counter);
 	}
 
 	return 0;
 }
 
-static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_lockt(struct xdr_stream *xdr, const struct nfs_lockt_args *args)
 {
 	uint32_t *p;
-	struct nfs_lowner *opargs = arg->u.lockt;
 
 	RESERVE_SPACE(40);
 	WRITE32(OP_LOCKT);
-	WRITE32(arg->type);
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
-	WRITE64(opargs->clientid);
+	WRITE32(nfs4_lock_type(args->fl, 0));
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
+	WRITE64(args->lock_owner.clientid);
 	WRITE32(4);
-	WRITE32(opargs->id);
+	WRITE32(args->lock_owner.id);
 
 	return 0;
 }
 
-static int encode_locku(struct xdr_stream *xdr, const struct nfs_lockargs *arg)
+static int encode_locku(struct xdr_stream *xdr, const struct nfs_locku_args *args)
 {
 	uint32_t *p;
-	struct nfs_locku_opargs *opargs = arg->u.locku;
 
 	RESERVE_SPACE(44);
 	WRITE32(OP_LOCKU);
-	WRITE32(arg->type);
-	WRITE32(opargs->seqid->sequence->counter);
-	WRITEMEM(opargs->stateid->data, sizeof(opargs->stateid->data));
-	WRITE64(arg->offset);
-	WRITE64(arg->length);
+	WRITE32(nfs4_lock_type(args->fl, 0));
+	WRITE32(args->seqid->sequence->counter);
+	WRITEMEM(args->stateid->data, sizeof(args->stateid->data));
+	WRITE64(args->fl->fl_start);
+	WRITE64(nfs4_lock_length(args->fl));
 
 	return 0;
 }
@@ -1596,21 +1607,20 @@ out:
 /*
  * Encode a LOCK request
  */
-static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lock_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
 		.nops   = 2,
 	};
-	struct nfs_lock_opargs *opargs = args->u.lock;
 	int status;
 
-	status = nfs_wait_on_sequence(opargs->lock_seqid, req->rq_task);
+	status = nfs_wait_on_sequence(args->lock_seqid, req->rq_task);
 	if (status != 0)
 		goto out;
 	/* Do we need to do an open_to_lock_owner? */
-	if (opargs->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
-		opargs->new_lock_owner = 0;
+	if (args->lock_seqid->sequence->flags & NFS_SEQID_CONFIRMED)
+		args->new_lock_owner = 0;
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
 	status = encode_putfh(&xdr, args->fh);
@@ -1624,7 +1634,7 @@ out:
 /*
  * Encode a LOCKT request
  */
-static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockt_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -1645,7 +1655,7 @@ out:
 /*
  * Encode a LOCKU request
  */
-static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
+static int nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_locku_args *args)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2949,55 +2959,64 @@ static int decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
 /*
  * We create the owner, so we know a proper owner.id length is 4.
  */
-static int decode_lock_denied (struct xdr_stream *xdr, struct nfs_lock_denied *denied)
+static int decode_lock_denied (struct xdr_stream *xdr, struct file_lock *fl)
 {
+	uint64_t offset, length, clientid;
 	uint32_t *p;
-	uint32_t namelen;
+	uint32_t namelen, type;
 
 	READ_BUF(32);
-	READ64(denied->offset);
-	READ64(denied->length);
-	READ32(denied->type);
-	READ64(denied->owner.clientid);
+	READ64(offset);
+	READ64(length);
+	READ32(type);
+	if (fl != NULL) {
+		fl->fl_start = (loff_t)offset;
+		fl->fl_end = fl->fl_start + (loff_t)length - 1;
+		if (length == ~(uint64_t)0)
+			fl->fl_end = OFFSET_MAX;
+		fl->fl_type = F_WRLCK;
+		if (type & 1)
+			fl->fl_type = F_RDLCK;
+		fl->fl_pid = 0;
+	}
+	READ64(clientid);
 	READ32(namelen);
 	READ_BUF(namelen);
-	if (namelen == 4)
-		READ32(denied->owner.id);
 	return -NFS4ERR_DENIED;
 }
 
-static int decode_lock(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_lock(struct xdr_stream *xdr, struct nfs_lock_res *res)
 {
 	uint32_t *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCK);
 	if (status == 0) {
-		READ_BUF(sizeof(res->u.stateid.data));
-		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
+		READ_BUF(sizeof(res->stateid.data));
+		COPYMEM(res->stateid.data, sizeof(res->stateid.data));
 	} else if (status == -NFS4ERR_DENIED)
-		return decode_lock_denied(xdr, &res->u.denied);
+		return decode_lock_denied(xdr, NULL);
 	return status;
 }
 
-static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_lockt(struct xdr_stream *xdr, struct nfs_lockt_res *res)
 {
 	int status;
 	status = decode_op_hdr(xdr, OP_LOCKT);
 	if (status == -NFS4ERR_DENIED)
-		return decode_lock_denied(xdr, &res->u.denied);
+		return decode_lock_denied(xdr, res->denied);
 	return status;
 }
 
-static int decode_locku(struct xdr_stream *xdr, struct nfs_lockres *res)
+static int decode_locku(struct xdr_stream *xdr, struct nfs_locku_res *res)
 {
 	uint32_t *p;
 	int status;
 
 	status = decode_op_hdr(xdr, OP_LOCKU);
 	if (status == 0) {
-		READ_BUF(sizeof(res->u.stateid.data));
-		COPYMEM(res->u.stateid.data, sizeof(res->u.stateid.data));
+		READ_BUF(sizeof(res->stateid.data));
+		COPYMEM(res->stateid.data, sizeof(res->stateid.data));
 	}
 	return status;
 }
@@ -3861,7 +3880,7 @@ out:
 /*
  * Decode LOCK response
  */
-static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_lock(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lock_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3882,7 +3901,7 @@ out:
 /*
  * Decode LOCKT response
  */
-static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_lockt(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockt_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -3903,7 +3922,7 @@ out:
 /*
  * Decode LOCKU response
  */
-static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_lockres *res)
+static int nfs4_xdr_dec_locku(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_locku_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 518cfa5cd024..b8b0eed98ec9 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -165,50 +165,46 @@ struct nfs_closeres {
  *  * Arguments to the lock,lockt, and locku call.
  *   */
 struct nfs_lowner {
-	__u64           clientid;
-	u32                     id;
+	__u64			clientid;
+	u32			id;
 };
 
-struct nfs_lock_opargs {
+struct nfs_lock_args {
+	struct nfs_fh *		fh;
+	struct file_lock *	fl;
 	struct nfs_seqid *	lock_seqid;
 	nfs4_stateid *		lock_stateid;
 	struct nfs_seqid *	open_seqid;
 	nfs4_stateid *		open_stateid;
-	struct nfs_lowner       lock_owner;
-	__u32                   reclaim;
-	__u32                   new_lock_owner;
+	struct nfs_lowner	lock_owner;
+	unsigned char		block : 1;
+	unsigned char		reclaim : 1;
+	unsigned char		new_lock_owner : 1;
+};
+
+struct nfs_lock_res {
+	nfs4_stateid			stateid;
 };
 
-struct nfs_locku_opargs {
+struct nfs_locku_args {
+	struct nfs_fh *		fh;
+	struct file_lock *	fl;
 	struct nfs_seqid *	seqid;
 	nfs4_stateid *		stateid;
 };
 
-struct nfs_lockargs {
-	struct nfs_fh *         fh;
-	__u32                   type;
-	__u64                   offset; 
-	__u64                   length; 
-	union {
-		struct nfs_lock_opargs  *lock;    /* LOCK  */
-		struct nfs_lowner       *lockt;  /* LOCKT */
-		struct nfs_locku_opargs *locku;  /* LOCKU */
-	} u;
+struct nfs_locku_res {
+	nfs4_stateid			stateid;
 };
 
-struct nfs_lock_denied {
-	__u64                   offset;
-	__u64                   length;
-	__u32                   type;
-	struct nfs_lowner   	owner;
+struct nfs_lockt_args {
+	struct nfs_fh *		fh;
+	struct file_lock *	fl;
+	struct nfs_lowner	lock_owner;
 };
 
-struct nfs_lockres {
-	union {
-		nfs4_stateid            stateid;/* LOCK success, LOCKU */
-		struct nfs_lock_denied  denied; /* LOCK failed, LOCKT success */
-	} u;
-	const struct nfs_server *	server;
+struct nfs_lockt_res {
+	struct file_lock *	denied; /* LOCK, LOCKT failed */
 };
 
 struct nfs4_delegreturnargs {
-- 
cgit v1.2.3


From a911fd9a6046200e439b4af172e8379c0942eec3 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Wed, 30 Nov 2005 18:08:59 -0500
Subject: NFS: simplify inlined bit ops in nfs_page.h

 Minor cleanup:  inlined bit ops in nfs_page.h can be simpler.

 Test plan:
 Write-intensive workload against a server that requires COMMITs.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/nfs_page.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index da2e077b65e2..66e2ed658527 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -79,9 +79,7 @@ extern  void nfs_clear_page_writeback(struct nfs_page *req);
 static inline int
 nfs_lock_request_dontget(struct nfs_page *req)
 {
-	if (test_and_set_bit(PG_BUSY, &req->wb_flags))
-		return 0;
-	return 1;
+	return !test_and_set_bit(PG_BUSY, &req->wb_flags);
 }
 
 /*
@@ -125,9 +123,7 @@ nfs_list_remove_request(struct nfs_page *req)
 static inline int
 nfs_defer_commit(struct nfs_page *req)
 {
-	if (test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags))
-		return 0;
-	return 1;
+	return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags);
 }
 
 static inline void
@@ -141,9 +137,7 @@ nfs_clear_commit(struct nfs_page *req)
 static inline int
 nfs_defer_reschedule(struct nfs_page *req)
 {
-	if (test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags))
-		return 0;
-	return 1;
+	return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags);
 }
 
 static inline void
-- 
cgit v1.2.3


From 40859d7ee64ed6bfad8a4e93f9bb5c1074afadff Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Wed, 30 Nov 2005 18:09:02 -0500
Subject: NFS: support large reads and writes on the wire

 Most NFS server implementations allow up to 64KB reads and writes on the
 wire.  The Solaris NFS server allows up to a megabyte, for instance.

 Now the Linux NFS client supports transfer sizes up to 1MB, too.  This will
 help reduce protocol and context switch overhead on read/write intensive NFS
 workloads, and support larger atomic read and write operations on servers
 that support them.

 Test-plan:
 Connectathon and iozone on mount point with wsize=rsize>32768 over TCP.
 Tests with NFS over UDP to verify the maximum RPC payload size cap.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/direct.c            |  5 +++--
 fs/nfs/inode.c             | 25 ++++++++++---------------
 fs/nfs/nfsroot.c           |  4 ++--
 fs/nfs/read.c              |  6 +++---
 fs/nfs/write.c             | 29 ++++++++++++++++++++++-------
 include/linux/nfs_fs.h     | 41 +++++++++++++++++++++++++++++++++++------
 include/linux/nfs_xdr.h    | 29 ++++++++++++++++-------------
 include/linux/sunrpc/xdr.h |  5 -----
 8 files changed, 91 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index f69d95aa78b2..fd7ac5e841c1 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -154,6 +154,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 	struct list_head *list;
 	struct nfs_direct_req *dreq;
 	unsigned int reads = 0;
+	unsigned int rpages = (rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	dreq = kmem_cache_alloc(nfs_direct_cachep, SLAB_KERNEL);
 	if (!dreq)
@@ -167,7 +168,7 @@ static struct nfs_direct_req *nfs_direct_read_alloc(size_t nbytes, unsigned int
 
 	list = &dreq->list;
 	for(;;) {
-		struct nfs_read_data *data = nfs_readdata_alloc();
+		struct nfs_read_data *data = nfs_readdata_alloc(rpages);
 
 		if (unlikely(!data)) {
 			while (!list_empty(list)) {
@@ -431,7 +432,7 @@ static ssize_t nfs_direct_write_seg(struct inode *inode,
 	struct nfs_writeverf first_verf;
 	struct nfs_write_data *wdata;
 
-	wdata = nfs_writedata_alloc();
+	wdata = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!wdata)
 		return -ENOMEM;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4e6558df54b8..acde2c5725bf 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -221,10 +221,10 @@ nfs_calc_block_size(u64 tsize)
 static inline unsigned long
 nfs_block_size(unsigned long bsize, unsigned char *nrbitsp)
 {
-	if (bsize < 1024)
-		bsize = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	else if (bsize >= NFS_MAX_FILE_IO_BUFFER_SIZE)
-		bsize = NFS_MAX_FILE_IO_BUFFER_SIZE;
+	if (bsize < NFS_MIN_FILE_IO_SIZE)
+		bsize = NFS_DEF_FILE_IO_SIZE;
+	else if (bsize >= NFS_MAX_FILE_IO_SIZE)
+		bsize = NFS_MAX_FILE_IO_SIZE;
 
 	return nfs_block_bits(bsize, nrbitsp);
 }
@@ -307,20 +307,15 @@ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
 	max_rpc_payload = nfs_block_size(rpc_max_payload(server->client), NULL);
 	if (server->rsize > max_rpc_payload)
 		server->rsize = max_rpc_payload;
-	if (server->wsize > max_rpc_payload)
-		server->wsize = max_rpc_payload;
-
+	if (server->rsize > NFS_MAX_FILE_IO_SIZE)
+		server->rsize = NFS_MAX_FILE_IO_SIZE;
 	server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-	if (server->rpages > NFS_READ_MAXIOV) {
-		server->rpages = NFS_READ_MAXIOV;
-		server->rsize = server->rpages << PAGE_CACHE_SHIFT;
-	}
 
+	if (server->wsize > max_rpc_payload)
+		server->wsize = max_rpc_payload;
+	if (server->wsize > NFS_MAX_FILE_IO_SIZE)
+		server->wsize = NFS_MAX_FILE_IO_SIZE;
 	server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
-        if (server->wpages > NFS_WRITE_MAXIOV) {
-		server->wpages = NFS_WRITE_MAXIOV;
-                server->wsize = server->wpages << PAGE_CACHE_SHIFT;
-	}
 
 	if (sb->s_blocksize == 0)
 		sb->s_blocksize = nfs_block_bits(server->wsize,
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 1b272a135a31..985cc53b8dd5 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -296,8 +296,8 @@ static int __init root_nfs_name(char *name)
 	nfs_port          = -1;
 	nfs_data.version  = NFS_MOUNT_VERSION;
 	nfs_data.flags    = NFS_MOUNT_NONLM;	/* No lockd in nfs root yet */
-	nfs_data.rsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
-	nfs_data.wsize    = NFS_DEF_FILE_IO_BUFFER_SIZE;
+	nfs_data.rsize    = NFS_DEF_FILE_IO_SIZE;
+	nfs_data.wsize    = NFS_DEF_FILE_IO_SIZE;
 	nfs_data.acregmin = 3;
 	nfs_data.acregmax = 60;
 	nfs_data.acdirmin = 30;
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 21486242c3d3..05eb43fadf8e 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -83,7 +83,7 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result;
 	struct nfs_read_data *rdata;
 
-	rdata = nfs_readdata_alloc();
+	rdata = nfs_readdata_alloc(1);
 	if (!rdata)
 		return -ENOMEM;
 
@@ -283,7 +283,7 @@ static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
 
 	nbytes = req->wb_bytes;
 	for(;;) {
-		data = nfs_readdata_alloc();
+		data = nfs_readdata_alloc(1);
 		if (!data)
 			goto out_bad;
 		INIT_LIST_HEAD(&data->pages);
@@ -339,7 +339,7 @@ static int nfs_pagein_one(struct list_head *head, struct inode *inode)
 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
 		return nfs_pagein_multi(head, inode);
 
-	data = nfs_readdata_alloc();
+	data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages);
 	if (!data)
 		goto out_bad;
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 80bc4ea1b824..1ce0c200df16 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -89,18 +89,33 @@ static mempool_t *nfs_commit_mempool;
 
 static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
 
-static inline struct nfs_write_data *nfs_commit_alloc(void)
+static inline struct nfs_write_data *nfs_commit_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, SLAB_NOFS);
+
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_commit_mempool);
+				p = NULL;
+			}
+		}
 	}
 	return p;
 }
 
 static inline void nfs_commit_free(struct nfs_write_data *p)
 {
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
 	mempool_free(p, nfs_commit_mempool);
 }
 
@@ -167,7 +182,7 @@ static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
 	int		result, written = 0;
 	struct nfs_write_data *wdata;
 
-	wdata = nfs_writedata_alloc();
+	wdata = nfs_writedata_alloc(1);
 	if (!wdata)
 		return -ENOMEM;
 
@@ -909,7 +924,7 @@ static int nfs_flush_multi(struct list_head *head, struct inode *inode, int how)
 
 	nbytes = req->wb_bytes;
 	for (;;) {
-		data = nfs_writedata_alloc();
+		data = nfs_writedata_alloc(1);
 		if (!data)
 			goto out_bad;
 		list_add(&data->pages, &list);
@@ -973,7 +988,7 @@ static int nfs_flush_one(struct list_head *head, struct inode *inode, int how)
 	if (NFS_SERVER(inode)->wsize < PAGE_CACHE_SIZE)
 		return nfs_flush_multi(head, inode, how);
 
-	data = nfs_writedata_alloc();
+	data = nfs_writedata_alloc(NFS_SERVER(inode)->wpages);
 	if (!data)
 		goto out_bad;
 
@@ -1241,12 +1256,12 @@ static void nfs_commit_rpcsetup(struct list_head *head,
  * Commit dirty pages
  */
 static int
-nfs_commit_list(struct list_head *head, int how)
+nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 {
 	struct nfs_write_data	*data;
 	struct nfs_page         *req;
 
-	data = nfs_commit_alloc();
+	data = nfs_commit_alloc(NFS_SERVER(inode)->wpages);
 
 	if (!data)
 		goto out_bad;
@@ -1351,7 +1366,7 @@ int nfs_commit_inode(struct inode *inode, int how)
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&nfsi->req_lock);
 	if (res) {
-		error = nfs_commit_list(&head, how);
+		error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
 	}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4dff705d2ff2..d38010ba6477 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -38,9 +38,6 @@
 # define NFS_DEBUG
 #endif
 
-#define NFS_MAX_FILE_IO_BUFFER_SIZE	32768
-#define NFS_DEF_FILE_IO_BUFFER_SIZE	4096
-
 /* Default timeout values */
 #define NFS_MAX_UDP_TIMEOUT	(60*HZ)
 #define NFS_MAX_TCP_TIMEOUT	(600*HZ)
@@ -462,18 +459,33 @@ static inline int nfs_wb_page(struct inode *inode, struct page* page)
  */
 extern mempool_t *nfs_wdata_mempool;
 
-static inline struct nfs_write_data *nfs_writedata_alloc(void)
+static inline struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_wdata_mempool, SLAB_NOFS);
+
 	if (p) {
 		memset(p, 0, sizeof(*p));
 		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_wdata_mempool);
+				p = NULL;
+			}
+		}
 	}
 	return p;
 }
 
 static inline void nfs_writedata_free(struct nfs_write_data *p)
 {
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
 	mempool_free(p, nfs_wdata_mempool);
 }
 
@@ -492,16 +504,33 @@ extern void  nfs_readdata_release(void *data);
  */
 extern mempool_t *nfs_rdata_mempool;
 
-static inline struct nfs_read_data *nfs_readdata_alloc(void)
+static inline struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount)
 {
 	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, SLAB_NOFS);
-	if (p)
+
+	if (p) {
 		memset(p, 0, sizeof(*p));
+		INIT_LIST_HEAD(&p->pages);
+		if (pagecount < NFS_PAGEVEC_SIZE)
+			p->pagevec = &p->page_array[0];
+		else {
+			size_t size = ++pagecount * sizeof(struct page *);
+			p->pagevec = kmalloc(size, GFP_NOFS);
+			if (p->pagevec) {
+				memset(p->pagevec, 0, size);
+			} else {
+				mempool_free(p, nfs_rdata_mempool);
+				p = NULL;
+			}
+		}
+	}
 	return p;
 }
 
 static inline void nfs_readdata_free(struct nfs_read_data *p)
 {
+	if (p && (p->pagevec != &p->page_array[0]))
+		kfree(p->pagevec);
 	mempool_free(p, nfs_rdata_mempool);
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index b8b0eed98ec9..9f422fd87673 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -4,6 +4,16 @@
 #include <linux/sunrpc/xprt.h>
 #include <linux/nfsacl.h>
 
+/*
+ * To change the maximum rsize and wsize supported by the NFS client, adjust
+ * NFS_MAX_FILE_IO_SIZE.  64KB is a typical maximum, but some servers can
+ * support a megabyte or more.  The default is left at 4096 bytes, which is
+ * reasonable for NFS over UDP.
+ */
+#define NFS_MAX_FILE_IO_SIZE	(1048576U)
+#define NFS_DEF_FILE_IO_SIZE	(4096U)
+#define NFS_MIN_FILE_IO_SIZE	(1024U)
+
 struct nfs4_fsid {
 	__u64 major;
 	__u64 minor;
@@ -215,12 +225,6 @@ struct nfs4_delegreturnargs {
 /*
  * Arguments to the read call.
  */
-
-#define NFS_READ_MAXIOV		(9U)
-#if (NFS_READ_MAXIOV > (MAX_IOVEC -2))
-#error "NFS_READ_MAXIOV is too large"
-#endif
-
 struct nfs_readargs {
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
@@ -239,11 +243,6 @@ struct nfs_readres {
 /*
  * Arguments to the write call.
  */
-#define NFS_WRITE_MAXIOV	(9U)
-#if (NFS_WRITE_MAXIOV > (MAX_IOVEC -2))
-#error "NFS_WRITE_MAXIOV is too large"
-#endif
-
 struct nfs_writeargs {
 	struct nfs_fh *		fh;
 	struct nfs_open_context *context;
@@ -674,6 +673,8 @@ struct nfs4_server_caps_res {
 
 struct nfs_page;
 
+#define NFS_PAGEVEC_SIZE	(8U)
+
 struct nfs_read_data {
 	int			flags;
 	struct rpc_task		task;
@@ -682,13 +683,14 @@ struct nfs_read_data {
 	struct nfs_fattr	fattr;	/* fattr storage */
 	struct list_head	pages;	/* Coalesced read requests */
 	struct nfs_page		*req;	/* multi ops per nfs_page */
-	struct page		*pagevec[NFS_READ_MAXIOV];
+	struct page		**pagevec;
 	struct nfs_readargs args;
 	struct nfs_readres  res;
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
 	void (*complete) (struct nfs_read_data *, int);
+	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
 struct nfs_write_data {
@@ -700,13 +702,14 @@ struct nfs_write_data {
 	struct nfs_writeverf	verf;
 	struct list_head	pages;		/* Coalesced requests we wish to flush */
 	struct nfs_page		*req;		/* multi ops per nfs_page */
-	struct page		*pagevec[NFS_WRITE_MAXIOV];
+	struct page		**pagevec;
 	struct nfs_writeargs	args;		/* argument struct */
 	struct nfs_writeres	res;		/* result struct */
 #ifdef CONFIG_NFS_V4
 	unsigned long		timestamp;	/* For lease renewal */
 #endif
 	void (*complete) (struct nfs_write_data *, int);
+	struct page		*page_array[NFS_PAGEVEC_SIZE + 1];
 };
 
 struct nfs_access_entry;
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5da968729cf8..5676794ee34f 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -134,11 +134,6 @@ xdr_adjust_iovec(struct kvec *iov, u32 *p)
 	return iov->iov_len = ((u8 *) p - (u8 *) iov->iov_base);
 }
 
-/*
- * Maximum number of iov's we use.
- */
-#define MAX_IOVEC	(12)
-
 /*
  * XDR buffer helper functions
  */
-- 
cgit v1.2.3


From 70b9ecbdb9c5fdc731f8780bffd45d9519020c4a Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:34 +0100
Subject: NFS: Make stat() return updated mtimes after a write()

 The SuS states that a call to write() will cause mtime to be updated on
 the file. In order to satisfy that requirement, we need to flush out
 any cached writes in nfs_getattr().
 Speed things up slightly by not committing the writes.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         |  2 ++
 fs/nfs/write.c         | 23 ++++++++++++-----------
 include/linux/nfs_fs.h |  1 +
 3 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index acde2c5725bf..2c7f8aac1dec 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -952,6 +952,8 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
 	int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME;
 	int err;
 
+	/* Flush out writes to the server in order to update c/mtime */
+	nfs_sync_inode(inode, 0, 0, FLUSH_WAIT|FLUSH_NOCOMMIT);
 	if (__IS_FLG(inode, MS_NOATIME))
 		need_atime = 0;
 	else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 1ce0c200df16..9449b6835509 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1377,22 +1377,23 @@ int nfs_commit_inode(struct inode *inode, int how)
 int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
 		  unsigned int npages, int how)
 {
-	int	error,
-		wait;
+	int nocommit = how & FLUSH_NOCOMMIT;
+	int wait = how & FLUSH_WAIT;
+	int error;
 
-	wait = how & FLUSH_WAIT;
-	how &= ~FLUSH_WAIT;
+	how &= ~(FLUSH_WAIT|FLUSH_NOCOMMIT);
 
 	do {
-		error = 0;
-		if (wait)
+		if (wait) {
 			error = nfs_wait_on_requests(inode, idx_start, npages);
-		if (error == 0)
-			error = nfs_flush_inode(inode, idx_start, npages, how);
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (error == 0)
+			if (error != 0)
+				continue;
+		}
+		error = nfs_flush_inode(inode, idx_start, npages, how);
+		if (error != 0)
+			continue;
+		if (!nocommit)
 			error = nfs_commit_inode(inode, how);
-#endif
 	} while (error > 0);
 	return error;
 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index d38010ba6477..408d82d3d97c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -62,6 +62,7 @@
 #define FLUSH_STABLE		4	/* commit to stable storage */
 #define FLUSH_LOWPRI		8	/* low priority background flush */
 #define FLUSH_HIGHPRI		16	/* high priority memory reclaim flush */
+#define FLUSH_NOCOMMIT		32	/* Don't send the NFSv3/v4 COMMIT */
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From fa178f29c0f8a0dce748181a5351f4a92fd4f455 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:38 +0100
Subject: NFSv4: Ensure DELEGRETURN returns attributes

 Upon return of a write delegation, the server will almost always bump the
 change attribute. Ensure that we pick up that change so that we don't
 invalidate our data cache unnecessarily.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/delegation.c     |  2 --
 fs/nfs/nfs4proc.c       | 17 +++++++++++++----
 fs/nfs/nfs4xdr.c        | 33 ++++++++++++++++++++++-----------
 include/linux/nfs_xdr.h |  6 ++++++
 4 files changed, 41 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index d2ee09b38cee..66cc720e3927 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -159,8 +159,6 @@ static int nfs_do_return_delegation(struct inode *inode, struct nfs_delegation *
 {
 	int res = 0;
 
-	__nfs_revalidate_inode(NFS_SERVER(inode), inode);
-
 	res = nfs4_proc_delegreturn(inode, delegation->cred, &delegation->stateid);
 	nfs_free_delegation(delegation);
 	return res;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b3349154994b..984ca3454d04 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2920,11 +2920,12 @@ nfs4_proc_setclientid_confirm(struct nfs4_client *clp, struct rpc_cred *cred)
 
 struct nfs4_delegreturndata {
 	struct nfs4_delegreturnargs args;
+	struct nfs4_delegreturnres res;
 	struct nfs_fh fh;
 	nfs4_stateid stateid;
 	struct rpc_cred *cred;
 	unsigned long timestamp;
-	const struct nfs_server *server;
+	struct nfs_fattr fattr;
 	int rpc_status;
 };
 
@@ -2934,8 +2935,10 @@ static void nfs4_delegreturn_prepare(struct rpc_task *task, void *calldata)
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DELEGRETURN],
 		.rpc_argp = &data->args,
+		.rpc_resp = &data->res,
 		.rpc_cred = data->cred,
 	};
+	nfs_fattr_init(data->res.fattr);
 	rpc_call_setup(task, &msg, 0);
 }
 
@@ -2944,7 +2947,7 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 	struct nfs4_delegreturndata *data = calldata;
 	data->rpc_status = task->tk_status;
 	if (data->rpc_status == 0)
-		renew_lease(data->server, data->timestamp);
+		renew_lease(data->res.server, data->timestamp);
 }
 
 static void nfs4_delegreturn_release(void *calldata)
@@ -2964,6 +2967,7 @@ const static struct rpc_call_ops nfs4_delegreturn_ops = {
 static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, const nfs4_stateid *stateid)
 {
 	struct nfs4_delegreturndata *data;
+	struct nfs_server *server = NFS_SERVER(inode);
 	struct rpc_task *task;
 	int status;
 
@@ -2972,11 +2976,13 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 		return -ENOMEM;
 	data->args.fhandle = &data->fh;
 	data->args.stateid = &data->stateid;
+	data->args.bitmask = server->attr_bitmask;
 	nfs_copy_fh(&data->fh, NFS_FH(inode));
 	memcpy(&data->stateid, stateid, sizeof(data->stateid));
+	data->res.fattr = &data->fattr;
+	data->res.server = server;
 	data->cred = get_rpccred(cred);
 	data->timestamp = jiffies;
-	data->server = NFS_SERVER(inode);
 	data->rpc_status = 0;
 
 	task = rpc_run_task(NFS_CLIENT(inode), RPC_TASK_ASYNC, &nfs4_delegreturn_ops, data);
@@ -2985,8 +2991,11 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
 		return PTR_ERR(task);
 	}
 	status = nfs4_wait_for_completion_rpc_task(task);
-	if (status == 0)
+	if (status == 0) {
 		status = data->rpc_status;
+		if (status == 0)
+			nfs_post_op_update_inode(inode, &data->fattr);
+	}
 	rpc_release_task(task);
 	return status;
 }
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 5d6bda43dfaa..12be1d682164 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -392,9 +392,11 @@ static int nfs_stat_to_errno(int);
 				decode_getattr_maxsz)
 #define NFS4_enc_delegreturn_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
-				encode_delegreturn_maxsz)
+				encode_delegreturn_maxsz + \
+				encode_getattr_maxsz)
 #define NFS4_dec_delegreturn_sz (compound_decode_hdr_maxsz + \
-				decode_delegreturn_maxsz)
+				decode_delegreturn_maxsz + \
+				decode_getattr_maxsz)
 #define NFS4_enc_getacl_sz	(compound_encode_hdr_maxsz + \
 				encode_putfh_maxsz + \
 				encode_getattr_maxsz)
@@ -1983,14 +1985,20 @@ static int nfs4_xdr_enc_delegreturn(struct rpc_rqst *req, uint32_t *p, const str
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
-		.nops = 2,
+		.nops = 3,
 	};
 	int status;
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, &hdr);
-	if ((status = encode_putfh(&xdr, args->fhandle)) == 0)
-		status = encode_delegreturn(&xdr, args->stateid);
+	status = encode_putfh(&xdr, args->fhandle);
+	if (status != 0)
+		goto out;
+	status = encode_delegreturn(&xdr, args->stateid);
+	if (status != 0)
+		goto out;
+	status = encode_getfattr(&xdr, args->bitmask);
+out:
 	return status;
 }
 
@@ -4184,7 +4192,7 @@ static int nfs4_xdr_dec_setclientid_confirm(struct rpc_rqst *req, uint32_t *p, s
 /*
  * DELEGRETURN request
  */
-static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *dummy)
+static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_delegreturnres *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -4192,11 +4200,14 @@ static int nfs4_xdr_dec_delegreturn(struct rpc_rqst *rqstp, uint32_t *p, void *d
 
 	xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
-	if (status == 0) {
-		status = decode_putfh(&xdr);
-		if (status == 0)
-			status = decode_delegreturn(&xdr);
-	}
+	if (status != 0)
+		goto out;
+	status = decode_putfh(&xdr);
+	if (status != 0)
+		goto out;
+	status = decode_delegreturn(&xdr);
+	decode_getfattr(&xdr, res->fattr, res->server);
+out:
 	return status;
 }
 
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 9f422fd87673..6d6f69ec5675 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -220,6 +220,12 @@ struct nfs_lockt_res {
 struct nfs4_delegreturnargs {
 	const struct nfs_fh *fhandle;
 	const nfs4_stateid *stateid;
+	const u32 * bitmask;
+};
+
+struct nfs4_delegreturnres {
+	struct nfs_fattr * fattr;
+	const struct nfs_server *server;
 };
 
 /*
-- 
cgit v1.2.3


From a72b44222d222749d54b3e370d825094352e389f Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:41 +0100
Subject: NFSv4: Allow user to set the port used by the NFSv4 callback channel

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 Documentation/kernel-parameters.txt |  4 ++
 fs/nfs/Makefile                     |  1 +
 fs/nfs/callback.c                   |  3 +-
 fs/nfs/callback.h                   |  1 +
 fs/nfs/inode.c                      | 37 ++++++++++++++++++-
 fs/nfs/sysctl.c                     | 74 +++++++++++++++++++++++++++++++++++++
 include/linux/nfs_fs.h              | 11 ++++++
 7 files changed, 128 insertions(+), 3 deletions(-)
 create mode 100644 fs/nfs/sysctl.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 61a56b100c62..309c9cec6e7c 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -910,6 +910,10 @@ running once the system is up.
 	nfsroot=	[NFS] nfs root filesystem for disk-less boxes.
 			See Documentation/nfsroot.txt.
 
+	nfs.callback_tcpport=
+			[NFS] set the TCP port on which the NFSv4 callback
+			channel should listen.
+
 	nmi_watchdog=	[KNL,BUGS=IA-32] Debugging features for SMP kernels
 
 	no387		[BUGS=IA-32] Tells the kernel to use the 387 maths
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 8b3bb715d177..ec61fd56a1a9 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -13,4 +13,5 @@ nfs-$(CONFIG_NFS_V4)	+= nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o \
 			   delegation.o idmap.o \
 			   callback.o callback_xdr.o callback_proc.o
 nfs-$(CONFIG_NFS_DIRECTIO) += direct.o
+nfs-$(CONFIG_SYSCTL) += sysctl.o
 nfs-objs		:= $(nfs-y)
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 30cae3602867..fcd97406a778 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -34,6 +34,7 @@ static struct nfs_callback_data nfs_callback_info;
 static DECLARE_MUTEX(nfs_callback_sema);
 static struct svc_program nfs4_callback_program;
 
+unsigned int nfs_callback_set_tcpport;
 unsigned short nfs_callback_tcpport;
 
 /*
@@ -98,7 +99,7 @@ int nfs_callback_up(void)
 	if (!serv)
 		goto out_err;
 	/* FIXME: We don't want to register this socket with the portmapper */
-	ret = svc_makesock(serv, IPPROTO_TCP, 0);
+	ret = svc_makesock(serv, IPPROTO_TCP, nfs_callback_set_tcpport);
 	if (ret < 0)
 		goto out_destroy;
 	if (!list_empty(&serv->sv_permsocks)) {
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index a0db2d4f9415..b252e7fe53a5 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -65,6 +65,7 @@ extern unsigned nfs4_callback_recall(struct cb_recallargs *args, void *dummy);
 extern int nfs_callback_up(void);
 extern int nfs_callback_down(void);
 
+extern unsigned int nfs_callback_set_tcpport;
 extern unsigned short nfs_callback_tcpport;
 
 #endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 7270b1d73d30..648cb1aef3b1 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -40,6 +40,7 @@
 #include <asm/uaccess.h>
 
 #include "nfs4_fs.h"
+#include "callback.h"
 #include "delegation.h"
 
 #define NFSDBG_FACILITY		NFSDBG_VFS
@@ -2036,6 +2037,21 @@ static struct file_system_type nfs4_fs_type = {
 	.fs_flags	= FS_ODD_RENAME|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
 };
 
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+static int param_set_port(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	if (endp == val || *endp || num < nfs_set_port_min || num > nfs_set_port_max)
+		return -EINVAL;
+	*((int *)kp->arg) = num;
+	return 0;
+}
+
+module_param_call(callback_tcpport, param_set_port, param_get_int,
+		 &nfs_callback_set_tcpport, 0644);
+
 #define nfs4_init_once(nfsi) \
 	do { \
 		INIT_LIST_HEAD(&(nfsi)->open_states); \
@@ -2043,8 +2059,25 @@ static struct file_system_type nfs4_fs_type = {
 		nfsi->delegation_state = 0; \
 		init_rwsem(&nfsi->rwsem); \
 	} while(0)
-#define register_nfs4fs() register_filesystem(&nfs4_fs_type)
-#define unregister_nfs4fs() unregister_filesystem(&nfs4_fs_type)
+
+static inline int register_nfs4fs(void)
+{
+	int ret;
+
+	ret = nfs_register_sysctl();
+	if (ret != 0)
+		return ret;
+	ret = register_filesystem(&nfs4_fs_type);
+	if (ret != 0)
+		nfs_unregister_sysctl();
+	return ret;
+}
+
+static inline void unregister_nfs4fs(void)
+{
+	unregister_filesystem(&nfs4_fs_type);
+	nfs_unregister_sysctl();
+}
 #else
 #define nfs4_init_once(nfsi) \
 	do { } while (0)
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
new file mode 100644
index 000000000000..fdc64b59a4ee
--- /dev/null
+++ b/fs/nfs/sysctl.c
@@ -0,0 +1,74 @@
+/*
+ * linux/fs/nfs/sysctl.c
+ *
+ * Sysctl interface to NFS parameters
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/linkage.h>
+#include <linux/ctype.h>
+#include <linux/fs.h>
+#include <linux/sysctl.h>
+#include <linux/module.h>
+#include <linux/nfs4.h>
+
+#include "callback.h"
+
+static const int nfs_set_port_min = 0;
+static const int nfs_set_port_max = 65535;
+static struct ctl_table_header *nfs_callback_sysctl_table;
+/*
+ * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
+ * Use the same values as fs/lockd/svc.c
+ */
+#define CTL_UNNUMBERED -2
+
+static ctl_table nfs_cb_sysctls[] = {
+#ifdef CONFIG_NFS_V4
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "nfs_callback_tcpport",
+		.data = &nfs_callback_set_tcpport,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_minmax,
+		.extra1 = (int *)&nfs_set_port_min,
+		.extra2 = (int *)&nfs_set_port_max,
+	},
+#endif
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_cb_sysctl_dir[] = {
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "nfs",
+		.mode = 0555,
+		.child = nfs_cb_sysctls,
+	},
+	{ .ctl_name = 0 }
+};
+
+static ctl_table nfs_cb_sysctl_root[] = {
+	{
+		.ctl_name = CTL_FS,
+		.procname = "fs",
+		.mode = 0555,
+		.child = nfs_cb_sysctl_dir,
+	},
+	{ .ctl_name = 0 }
+};
+
+int nfs_register_sysctl(void)
+{
+	nfs_callback_sysctl_table = register_sysctl_table(nfs_cb_sysctl_root, 0);
+	if (nfs_callback_sysctl_table == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+void nfs_unregister_sysctl(void)
+{
+	unregister_sysctl_table(nfs_callback_sysctl_table);
+	nfs_callback_sysctl_table = NULL;
+}
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 408d82d3d97c..547d649b274e 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -391,6 +391,17 @@ extern int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fh, struct nfs_
  */
 extern struct inode_operations nfs_symlink_inode_operations;
 
+/*
+ * linux/fs/nfs/sysctl.c
+ */
+#ifdef CONFIG_SYSCTL
+extern int nfs_register_sysctl(void);
+extern void nfs_unregister_sysctl(void);
+#else
+#define nfs_register_sysctl() do { } while(0)
+#define nfs_unregister_sysctl() do { } while(0)
+#endif
+
 /*
  * linux/fs/nfs/unlink.c
  */
-- 
cgit v1.2.3


From fb459f45f7c7689714023d41b3dca999bb90a5d3 Mon Sep 17 00:00:00 2001
From: Adrian Bunk <bunk@stusta.de>
Date: Tue, 3 Jan 2006 09:55:41 +0100
Subject: SUNRPC: net/sunrpc/xdr.c: remove xdr_decode_string()

 This patch removes ths unused function xdr_decode_string().

Signed-off-by: Adrian Bunk <bunk@stusta.de>
Acked-by: Neil Brown <neilb@suse.de>
Acked-by: Charles Lever <Charles.Lever@netapp.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h |  1 -
 net/sunrpc/xdr.c           | 21 ---------------------
 2 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 5676794ee34f..84c35d42d250 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -91,7 +91,6 @@ struct xdr_buf {
 u32 *	xdr_encode_opaque_fixed(u32 *p, const void *ptr, unsigned int len);
 u32 *	xdr_encode_opaque(u32 *p, const void *ptr, unsigned int len);
 u32 *	xdr_encode_string(u32 *p, const char *s);
-u32 *	xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen);
 u32 *	xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen);
 u32 *	xdr_encode_netobj(u32 *p, const struct xdr_netobj *);
 u32 *	xdr_decode_netobj(u32 *p, struct xdr_netobj *);
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index aaf08cdd19f0..ca4bfa57e116 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -92,27 +92,6 @@ xdr_encode_string(u32 *p, const char *string)
 	return xdr_encode_array(p, string, strlen(string));
 }
 
-u32 *
-xdr_decode_string(u32 *p, char **sp, int *lenp, int maxlen)
-{
-	unsigned int	len;
-	char		*string;
-
-	if ((len = ntohl(*p++)) > maxlen)
-		return NULL;
-	if (lenp)
-		*lenp = len;
-	if ((len % 4) != 0) {
-		string = (char *) p;
-	} else {
-		string = (char *) (p - 1);
-		memmove(string, p, len);
-	}
-	string[len] = '\0';
-	*sp = string;
-	return p + XDR_QUADLEN(len);
-}
-
 u32 *
 xdr_decode_string_inplace(u32 *p, char **sp, int *lenp, int maxlen)
 {
-- 
cgit v1.2.3


From 64a318ee2af9000df482d7a125c3b3e1f1007404 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 3 Jan 2006 09:55:46 +0100
Subject: NLM: Further cancel fixes

 If the server receives an NLM cancel call and finds no waiting lock to
 cancel, then chances are the lock has already been applied, and the client
 just hadn't yet processed the NLM granted callback before it sent the
 cancel.

 The Open Group text, for example, perimts a server to return either success
 (LCK_GRANTED) or failure (LCK_DENIED) in this case.  But returning an error
 seems more helpful; the client may be able to use it to recognize that a
 race has occurred and to recover from the race.

 So, modify the relevant functions to return an error in this case.

 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/svclock.c | 15 ++++++++++-----
 fs/locks.c         |  7 ++++++-
 include/linux/fs.h |  2 +-
 3 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index b56d439bad82..9cfced65d4a2 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -227,25 +227,27 @@ failed:
  * It is the caller's responsibility to check whether the file
  * can be closed hereafter.
  */
-static void
+static int
 nlmsvc_delete_block(struct nlm_block *block, int unlock)
 {
 	struct file_lock	*fl = &block->b_call.a_args.lock.fl;
 	struct nlm_file		*file = block->b_file;
 	struct nlm_block	**bp;
+	int status = 0;
 
 	dprintk("lockd: deleting block %p...\n", block);
 
 	/* Remove block from list */
 	nlmsvc_remove_block(block);
-	posix_unblock_lock(file->f_file, fl);
+	if (unlock)
+		status = posix_unblock_lock(file->f_file, fl);
 
 	/* If the block is in the middle of a GRANT callback,
 	 * don't kill it yet. */
 	if (block->b_incall) {
 		nlmsvc_insert_block(block, NLM_NEVER);
 		block->b_done = 1;
-		return;
+		return status;
 	}
 
 	/* Remove block from file's list of blocks */
@@ -260,6 +262,7 @@ nlmsvc_delete_block(struct nlm_block *block, int unlock)
 		nlm_release_host(block->b_host);
 	nlmclnt_freegrantargs(&block->b_call);
 	kfree(block);
+	return status;
 }
 
 /*
@@ -270,6 +273,7 @@ int
 nlmsvc_traverse_blocks(struct nlm_host *host, struct nlm_file *file, int action)
 {
 	struct nlm_block	*block, *next;
+	/* XXX: Will everything get cleaned up if we don't unlock here? */
 
 	down(&file->f_sema);
 	for (block = file->f_blocks; block; block = next) {
@@ -439,6 +443,7 @@ u32
 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	*block;
+	int status = 0;
 
 	dprintk("lockd: nlmsvc_cancel(%s/%ld, pi=%d, %Ld-%Ld)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -449,9 +454,9 @@ nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 
 	down(&file->f_sema);
 	if ((block = nlmsvc_lookup_block(file, lock, 1)) != NULL)
-		nlmsvc_delete_block(block, 1);
+		status = nlmsvc_delete_block(block, 1);
 	up(&file->f_sema);
-	return nlm_granted;
+	return status ? nlm_lck_denied : nlm_granted;
 }
 
 /*
diff --git a/fs/locks.c b/fs/locks.c
index 75650d52fe60..fb32d6218e21 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1958,13 +1958,18 @@ EXPORT_SYMBOL(posix_block_lock);
  *
  *	lockd needs to block waiting for locks.
  */
-void
+int
 posix_unblock_lock(struct file *filp, struct file_lock *waiter)
 {
+	int status = 0;
+
 	lock_kernel();
 	if (waiter->fl_next)
 		__locks_delete_block(waiter);
+	else
+		status = -ENOENT;
 	unlock_kernel();
+	return status;
 }
 
 EXPORT_SYMBOL(posix_unblock_lock);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 115e72be25d0..2c9c48d65630 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -760,7 +760,7 @@ extern struct file_lock *posix_test_lock(struct file *, struct file_lock *);
 extern int posix_lock_file(struct file *, struct file_lock *);
 extern int posix_lock_file_wait(struct file *, struct file_lock *);
 extern void posix_block_lock(struct file_lock *, struct file_lock *);
-extern void posix_unblock_lock(struct file *, struct file_lock *);
+extern int posix_unblock_lock(struct file *, struct file_lock *);
 extern int posix_locks_deadlock(struct file_lock *, struct file_lock *);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags);
-- 
cgit v1.2.3


From 02107148349f31eee7c0fb06fd7a880df73dbd20 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Tue, 3 Jan 2006 09:55:49 +0100
Subject: SUNRPC: switchable buffer allocation

 Add RPC client transport switch support for replacing buffer management
 on a per-transport basis.

 In the current IPv4 socket transport implementation, RPC buffers are
 allocated as needed for each RPC message that is sent.  Some transport
 implementations may choose to use pre-allocated buffers for encoding,
 sending, receiving, and unmarshalling RPC messages, however.  For
 transports capable of direct data placement, the buffers can be carved
 out of a pre-registered area of memory rather than from a slab cache.

 Test-plan:
 Millions of fsx operations.  Performance characterization with "sio" and
 "iozone".  Use oprofile and other tools to look for significant regression
 in CPU utilization.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  3 +--
 include/linux/sunrpc/xprt.h  | 10 ++++-----
 net/sunrpc/clnt.c            | 14 +++++++------
 net/sunrpc/sched.c           | 50 ++++++++++++++++++++++++--------------------
 net/sunrpc/xprt.c            |  3 +++
 net/sunrpc/xprtsock.c        |  5 +++++
 6 files changed, 49 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 94b0afa4ab05..8b25629accd8 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -52,8 +52,6 @@ struct rpc_task {
 	 * RPC call state
 	 */
 	struct rpc_message	tk_msg;		/* RPC call info */
-	__u32 *			tk_buffer;	/* XDR buffer */
-	size_t			tk_bufsize;
 	__u8			tk_garb_retry;
 	__u8			tk_cred_retry;
 
@@ -268,6 +266,7 @@ struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
 void		rpc_wake_up_status(struct rpc_wait_queue *, int);
 void		rpc_delay(struct rpc_task *, unsigned long);
 void *		rpc_malloc(struct rpc_task *, size_t);
+void		rpc_free(struct rpc_task *);
 int		rpciod_up(void);
 void		rpciod_down(void);
 void		rpciod_wake_up(void);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3b8b6e823c70..7885b9621ce3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -79,21 +79,19 @@ struct rpc_rqst {
 	void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
 	struct list_head	rq_list;
 
+	__u32 *			rq_buffer;	/* XDR encode buffer */
+	size_t			rq_bufsize;
+
 	struct xdr_buf		rq_private_buf;		/* The receive buffer
 							 * used in the softirq.
 							 */
 	unsigned long		rq_majortimeo;	/* major timeout alarm */
 	unsigned long		rq_timeout;	/* Current timeout value */
 	unsigned int		rq_retries;	/* # of retries */
-	/*
-	 * For authentication (e.g. auth_des)
-	 */
-	u32			rq_creddata[2];
 	
 	/*
 	 * Partial send handling
 	 */
-	
 	u32			rq_bytes_sent;	/* Bytes we have sent */
 
 	unsigned long		rq_xtime;	/* when transmitted */
@@ -107,6 +105,8 @@ struct rpc_xprt_ops {
 	int		(*reserve_xprt)(struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
 	void		(*connect)(struct rpc_task *task);
+	void *		(*buf_alloc)(struct rpc_task *task, size_t size);
+	void		(*buf_free)(struct rpc_task *task);
 	int		(*send_request)(struct rpc_task *task);
 	void		(*set_retrans_timeout)(struct rpc_task *task);
 	void		(*timer)(struct rpc_task *task);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b23c0d328c9c..25cba94c5683 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -644,24 +644,26 @@ call_reserveresult(struct rpc_task *task)
 
 /*
  * 2.	Allocate the buffer. For details, see sched.c:rpc_malloc.
- *	(Note: buffer memory is freed in rpc_task_release).
+ *	(Note: buffer memory is freed in xprt_release).
  */
 static void
 call_allocate(struct rpc_task *task)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	unsigned int	bufsiz;
 
 	dprintk("RPC: %4d call_allocate (status %d)\n", 
 				task->tk_pid, task->tk_status);
 	task->tk_action = call_bind;
-	if (task->tk_buffer)
+	if (req->rq_buffer)
 		return;
 
 	/* FIXME: compute buffer requirements more exactly using
 	 * auth->au_wslack */
 	bufsiz = task->tk_msg.rpc_proc->p_bufsiz + RPC_SLACK_SPACE;
 
-	if (rpc_malloc(task, bufsiz << 1) != NULL)
+	if (xprt->ops->buf_alloc(task, bufsiz << 1) != NULL)
 		return;
 	printk(KERN_INFO "RPC: buffer allocation failed for task %p\n", task); 
 
@@ -704,14 +706,14 @@ call_encode(struct rpc_task *task)
 				task->tk_pid, task->tk_status);
 
 	/* Default buffer setup */
-	bufsiz = task->tk_bufsize >> 1;
-	sndbuf->head[0].iov_base = (void *)task->tk_buffer;
+	bufsiz = req->rq_bufsize >> 1;
+	sndbuf->head[0].iov_base = (void *)req->rq_buffer;
 	sndbuf->head[0].iov_len  = bufsiz;
 	sndbuf->tail[0].iov_len  = 0;
 	sndbuf->page_len	 = 0;
 	sndbuf->len		 = 0;
 	sndbuf->buflen		 = bufsiz;
-	rcvbuf->head[0].iov_base = (void *)((char *)task->tk_buffer + bufsiz);
+	rcvbuf->head[0].iov_base = (void *)((char *)req->rq_buffer + bufsiz);
 	rcvbuf->head[0].iov_len  = bufsiz;
 	rcvbuf->tail[0].iov_len  = 0;
 	rcvbuf->page_len	 = 0;
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 48510e3ffa02..7415406aa1ae 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -41,8 +41,6 @@ static mempool_t	*rpc_buffer_mempool __read_mostly;
 
 static void			__rpc_default_timer(struct rpc_task *task);
 static void			rpciod_killall(void);
-static void			rpc_free(struct rpc_task *task);
-
 static void			rpc_async_schedule(void *);
 
 /*
@@ -599,7 +597,6 @@ void rpc_exit_task(struct rpc_task *task)
 			WARN_ON(RPC_ASSASSINATED(task));
 			/* Always release the RPC slot and buffer memory */
 			xprt_release(task);
-			rpc_free(task);
 		}
 	}
 }
@@ -724,17 +721,19 @@ static void rpc_async_schedule(void *arg)
 	__rpc_execute((struct rpc_task *)arg);
 }
 
-/*
- * Allocate memory for RPC purposes.
+/**
+ * rpc_malloc - allocate an RPC buffer
+ * @task: RPC task that will use this buffer
+ * @size: requested byte size
  *
  * We try to ensure that some NFS reads and writes can always proceed
  * by using a mempool when allocating 'small' buffers.
  * In order to avoid memory starvation triggering more writebacks of
  * NFS requests, we use GFP_NOFS rather than GFP_KERNEL.
  */
-void *
-rpc_malloc(struct rpc_task *task, size_t size)
+void * rpc_malloc(struct rpc_task *task, size_t size)
 {
+	struct rpc_rqst *req = task->tk_rqstp;
 	gfp_t	gfp;
 
 	if (task->tk_flags & RPC_TASK_SWAPPER)
@@ -743,27 +742,33 @@ rpc_malloc(struct rpc_task *task, size_t size)
 		gfp = GFP_NOFS;
 
 	if (size > RPC_BUFFER_MAXSIZE) {
-		task->tk_buffer =  kmalloc(size, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = size;
+		req->rq_buffer = kmalloc(size, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = size;
 	} else {
-		task->tk_buffer =  mempool_alloc(rpc_buffer_mempool, gfp);
-		if (task->tk_buffer)
-			task->tk_bufsize = RPC_BUFFER_MAXSIZE;
+		req->rq_buffer = mempool_alloc(rpc_buffer_mempool, gfp);
+		if (req->rq_buffer)
+			req->rq_bufsize = RPC_BUFFER_MAXSIZE;
 	}
-	return task->tk_buffer;
+	return req->rq_buffer;
 }
 
-static void
-rpc_free(struct rpc_task *task)
+/**
+ * rpc_free - free buffer allocated via rpc_malloc
+ * @task: RPC task with a buffer to be freed
+ *
+ */
+void rpc_free(struct rpc_task *task)
 {
-	if (task->tk_buffer) {
-		if (task->tk_bufsize == RPC_BUFFER_MAXSIZE)
-			mempool_free(task->tk_buffer, rpc_buffer_mempool);
+	struct rpc_rqst *req = task->tk_rqstp;
+
+	if (req->rq_buffer) {
+		if (req->rq_bufsize == RPC_BUFFER_MAXSIZE)
+			mempool_free(req->rq_buffer, rpc_buffer_mempool);
 		else
-			kfree(task->tk_buffer);
-		task->tk_buffer = NULL;
-		task->tk_bufsize = 0;
+			kfree(req->rq_buffer);
+		req->rq_buffer = NULL;
+		req->rq_bufsize = 0;
 	}
 }
 
@@ -887,7 +892,6 @@ void rpc_release_task(struct rpc_task *task)
 		xprt_release(task);
 	if (task->tk_msg.rpc_cred)
 		rpcauth_unbindcred(task);
-	rpc_free(task);
 	if (task->tk_client) {
 		rpc_release_client(task->tk_client);
 		task->tk_client = NULL;
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6dda3860351f..069a6cbd49ea 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -838,6 +838,8 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
 	req->rq_timeout = xprt->timeout.to_initval;
 	req->rq_task	= task;
 	req->rq_xprt    = xprt;
+	req->rq_buffer  = NULL;
+	req->rq_bufsize = 0;
 	req->rq_xid     = xprt_alloc_xid(xprt);
 	req->rq_release_snd_buf = NULL;
 	dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
@@ -867,6 +869,7 @@ void xprt_release(struct rpc_task *task)
 		mod_timer(&xprt->timer,
 				xprt->last_used + xprt->idle_timeout);
 	spin_unlock_bh(&xprt->transport_lock);
+	xprt->ops->buf_free(task);
 	task->tk_rqstp = NULL;
 	if (req->rq_release_snd_buf)
 		req->rq_release_snd_buf(req);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 77e8800d4127..51f07c9a751b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -28,6 +28,7 @@
 #include <linux/udp.h>
 #include <linux/tcp.h>
 #include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/sched.h>
 #include <linux/file.h>
 
 #include <net/sock.h>
@@ -1161,6 +1162,8 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_udp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_rtt,
 	.timer			= xs_udp_timer,
@@ -1173,6 +1176,8 @@ static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
 	.connect		= xs_connect,
+	.buf_alloc		= rpc_malloc,
+	.buf_free		= rpc_free,
 	.send_request		= xs_tcp_send_request,
 	.set_retrans_timeout	= xprt_set_retrans_timeout_def,
 	.close			= xs_close,
-- 
cgit v1.2.3


From 35f5a422ce1af836007f811b613c440d0e348e06 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Tue, 3 Jan 2006 09:55:50 +0100
Subject: SUNRPC: new interface to force an RPC rebind

 We'd like to hide fields in rpc_xprt and rpc_clnt from upper layer protocols.
 Start by creating an API to force RPC rebind, replacing logic that simply
 sets cl_port to zero.

 Test-plan:
 Destructive testing (unplugging the network temporarily).  Connectathon
 with UDP and TCP.  NFSv2/3 and NFSv4 mounting should be carefully checked.
 Probably need to rig a server where certain services aren't running, or
 that returns an error for some typical operation.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/host.c             |  4 ++--
 include/linux/sunrpc/clnt.h |  1 +
 net/sunrpc/clnt.c           | 21 +++++++++++++++------
 3 files changed, 18 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index c4c8601096e0..82f7a0b1d8ae 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -177,7 +177,7 @@ nlm_bind_host(struct nlm_host *host)
 	if ((clnt = host->h_rpcclnt) != NULL) {
 		xprt = clnt->cl_xprt;
 		if (time_after_eq(jiffies, host->h_nextrebind)) {
-			clnt->cl_port = 0;
+			rpc_force_rebind(clnt);
 			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 			dprintk("lockd: next rebind in %ld jiffies\n",
 					host->h_nextrebind - jiffies);
@@ -217,7 +217,7 @@ nlm_rebind_host(struct nlm_host *host)
 {
 	dprintk("lockd: rebind host %s\n", host->h_name);
 	if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
-		host->h_rpcclnt->cl_port = 0;
+		rpc_force_rebind(host->h_rpcclnt);
 		host->h_nextrebind = jiffies + NLM_HOST_REBIND;
 	}
 }
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index b0ab959eca65..3d605765f84b 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -135,6 +135,7 @@ void		rpc_clnt_sigmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_clnt_sigunmask(struct rpc_clnt *clnt, sigset_t *oldset);
 void		rpc_setbufsize(struct rpc_clnt *, unsigned int, unsigned int);
 size_t		rpc_max_payload(struct rpc_clnt *);
+void		rpc_force_rebind(struct rpc_clnt *);
 int		rpc_ping(struct rpc_clnt *clnt, int flags);
 
 static __inline__
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 25cba94c5683..2789d3083fe7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -538,6 +538,18 @@ size_t rpc_max_payload(struct rpc_clnt *clnt)
 }
 EXPORT_SYMBOL(rpc_max_payload);
 
+/**
+ * rpc_force_rebind - force transport to check that remote port is unchanged
+ * @clnt: client to rebind
+ *
+ */
+void rpc_force_rebind(struct rpc_clnt *clnt)
+{
+	if (clnt->cl_autobind)
+		clnt->cl_port = 0;
+}
+EXPORT_SYMBOL(rpc_force_rebind);
+
 /*
  * Restart an (async) RPC call. Usually called from within the
  * exit handler.
@@ -853,8 +865,7 @@ call_connect_status(struct rpc_task *task)
 	}
 
 	/* Something failed: remote service port may have changed */
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 	switch (status) {
 	case -ENOTCONN:
@@ -935,8 +946,7 @@ call_status(struct rpc_task *task)
 		break;
 	case -ECONNREFUSED:
 	case -ENOTCONN:
-		if (clnt->cl_autobind)
-			clnt->cl_port = 0;
+		rpc_force_rebind(clnt);
 		task->tk_action = call_bind;
 		break;
 	case -EAGAIN:
@@ -995,8 +1005,7 @@ call_timeout(struct rpc_task *task)
 		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
 	}
-	if (clnt->cl_autobind)
-		clnt->cl_port = 0;
+	rpc_force_rebind(clnt);
 
 retry:
 	clnt->cl_stats->rpcretrans++;
-- 
cgit v1.2.3


From 922004120b10dcb0ce04b55014168e8a7a8c1a0e Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Tue, 3 Jan 2006 09:55:51 +0100
Subject: SUNRPC: transport switch API for setting port number

 At some point, transport endpoint addresses will no longer be IPv4.  To hide
 the structure of the rpc_xprt's address field from ULPs and port mappers,
 add an API for setting the port number during an RPC bind operation.

 Test-plan:
 Destructive testing (unplugging the network temporarily).  Connectathon
 with UDP and TCP.  NFSv2/3 and NFSv4 mounting should be carefully checked.
 Probably need to rig a server where certain services aren't running, or
 that returns an error for some typical operation.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/pmap_clnt.c      |  8 +++++---
 net/sunrpc/xprtsock.c       | 14 ++++++++++++++
 3 files changed, 20 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 7885b9621ce3..dd860128ceda 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -104,6 +104,7 @@ struct rpc_xprt_ops {
 	void		(*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
 	int		(*reserve_xprt)(struct rpc_task *task);
 	void		(*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
+	void		(*set_port)(struct rpc_xprt *xprt, unsigned short port);
 	void		(*connect)(struct rpc_task *task);
 	void *		(*buf_alloc)(struct rpc_task *task, size_t size);
 	void		(*buf_free)(struct rpc_task *task);
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index cad4568fbbe2..0935adb91b3c 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -131,10 +131,13 @@ static void
 pmap_getport_done(struct rpc_task *task)
 {
 	struct rpc_clnt	*clnt = task->tk_client;
+	struct rpc_xprt *xprt = task->tk_xprt;
 	struct rpc_portmap *map = clnt->cl_pmap;
 
 	dprintk("RPC: %4d pmap_getport_done(status %d, port %d)\n",
 			task->tk_pid, task->tk_status, clnt->cl_port);
+
+	xprt->ops->set_port(xprt, 0);
 	if (task->tk_status < 0) {
 		/* Make the calling task exit with an error */
 		task->tk_action = rpc_exit_task;
@@ -142,9 +145,8 @@ pmap_getport_done(struct rpc_task *task)
 		/* Program not registered */
 		rpc_exit(task, -EACCES);
 	} else {
-		/* byte-swap port number first */
+		xprt->ops->set_port(xprt, clnt->cl_port);
 		clnt->cl_port = htons(clnt->cl_port);
-		clnt->cl_xprt->addr.sin_port = clnt->cl_port;
 	}
 	spin_lock(&pmap_lock);
 	map->pm_binding = 0;
@@ -205,7 +207,7 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 	xprt = xprt_create_proto(proto, srvaddr, NULL);
 	if (IS_ERR(xprt))
 		return (struct rpc_clnt *)xprt;
-	xprt->addr.sin_port = htons(RPC_PMAP_PORT);
+	xprt->ops->set_port(xprt, RPC_PMAP_PORT);
 	if (!privileged)
 		xprt->resvport = 0;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 51f07c9a751b..3e8893001479 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -921,6 +921,18 @@ static void xs_udp_timer(struct rpc_task *task)
 	xprt_adjust_cwnd(task, -ETIMEDOUT);
 }
 
+/**
+ * xs_set_port - reset the port number in the remote endpoint address
+ * @xprt: generic transport
+ * @port: new port number
+ *
+ */
+static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+	dprintk("RPC:      setting port for xprt %p to %u\n", xprt, port);
+	xprt->addr.sin_port = htons(port);
+}
+
 static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
 {
 	struct sockaddr_in myaddr = {
@@ -1161,6 +1173,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 	.set_buffer_size	= xs_udp_set_buffer_size,
 	.reserve_xprt		= xprt_reserve_xprt_cong,
 	.release_xprt		= xprt_release_xprt_cong,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
@@ -1175,6 +1188,7 @@ static struct rpc_xprt_ops xs_udp_ops = {
 static struct rpc_xprt_ops xs_tcp_ops = {
 	.reserve_xprt		= xprt_reserve_xprt,
 	.release_xprt		= xprt_release_xprt,
+	.set_port		= xs_set_port,
 	.connect		= xs_connect,
 	.buf_alloc		= rpc_malloc,
 	.buf_free		= rpc_free,
-- 
cgit v1.2.3


From f518e35aec984036903c1003e867f833747a9d79 Mon Sep 17 00:00:00 2001
From: Chuck Lever <cel@netapp.com>
Date: Tue, 3 Jan 2006 09:55:52 +0100
Subject: SUNRPC: get rid of cl_chatty

 Clean up: Every ULP that uses the in-kernel RPC client, except the NLM
 client, sets cl_chatty.  There's no reason why NLM shouldn't set it, so
 just get rid of cl_chatty and always be verbose.

 Test-plan:
 Compile with CONFIG_NFS enabled.

 Signed-off-by: Chuck Lever <cel@netapp.com>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/lockd/clntproc.c         |  3 +--
 fs/lockd/mon.c              |  1 -
 fs/nfs/inode.c              |  2 --
 fs/nfs/mount_clnt.c         |  1 -
 fs/nfsd/nfs4callback.c      |  1 -
 include/linux/sunrpc/clnt.h |  1 -
 net/sunrpc/clnt.c           | 10 ++++------
 net/sunrpc/pmap_clnt.c      |  1 -
 8 files changed, 5 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 816333cd377b..145524039577 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -222,8 +222,7 @@ nlmclnt_proc(struct inode *inode, int cmd, struct file_lock *fl)
 			goto done;
 		}
 		clnt->cl_softrtry = nfssrv->client->cl_softrtry;
-		clnt->cl_intr     = nfssrv->client->cl_intr;
-		clnt->cl_chatty   = nfssrv->client->cl_chatty;
+		clnt->cl_intr = nfssrv->client->cl_intr;
 	}
 
 	/* Keep the old signal mask */
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 2d144abe84ad..0edc03e67966 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -123,7 +123,6 @@ nsm_create(void)
 	if (IS_ERR(clnt))
 		goto out_err;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty   = 1;
 	clnt->cl_oneshot  = 1;
 	return clnt;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 648cb1aef3b1..4625479a6b62 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -413,7 +413,6 @@ nfs_create_client(struct nfs_server *server, const struct nfs_mount_data *data)
 
 	clnt->cl_intr     = 1;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty   = 1;
 
 	return clnt;
 
@@ -1838,7 +1837,6 @@ static int nfs4_fill_super(struct super_block *sb, struct nfs4_mount_data *data,
 		}
 		clnt->cl_intr     = 1;
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clp->cl_rpcclient = clnt;
 		memcpy(clp->cl_ipaddr, server->ip_addr, sizeof(clp->cl_ipaddr));
 		nfs_idmap_new(clp);
diff --git a/fs/nfs/mount_clnt.c b/fs/nfs/mount_clnt.c
index 0e82617f2de0..db99b8f678f8 100644
--- a/fs/nfs/mount_clnt.c
+++ b/fs/nfs/mount_clnt.c
@@ -82,7 +82,6 @@ mnt_create(char *hostname, struct sockaddr_in *srvaddr, int version,
 				RPC_AUTH_UNIX);
 	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 		clnt->cl_intr = 1;
 	}
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index cf92008f219a..d828662d737d 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -431,7 +431,6 @@ nfsd4_probe_callback(struct nfs4_client *clp)
 	}
 	clnt->cl_intr = 0;
 	clnt->cl_softrtry = 1;
-	clnt->cl_chatty = 1;
 
 	/* Kick rpciod, put the call on the wire. */
 
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 3d605765f84b..f147e6b84332 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -49,7 +49,6 @@ struct rpc_clnt {
 
 	unsigned int		cl_softrtry : 1,/* soft timeouts */
 				cl_intr     : 1,/* interruptible */
-				cl_chatty   : 1,/* be verbose */
 				cl_autobind : 1,/* use getport() */
 				cl_oneshot  : 1,/* dispose after use */
 				cl_dead     : 1;/* abandoned */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2789d3083fe7..5530ac8c6df9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -957,8 +957,7 @@ call_status(struct rpc_task *task)
 		rpc_exit(task, status);
 		break;
 	default:
-		if (clnt->cl_chatty)
-			printk("%s: RPC call returned error %d\n",
+		printk("%s: RPC call returned error %d\n",
 			       clnt->cl_protname, -status);
 		rpc_exit(task, status);
 		break;
@@ -993,14 +992,13 @@ call_timeout(struct rpc_task *task)
 
 	dprintk("RPC: %4d call_timeout (major)\n", task->tk_pid);
 	if (RPC_IS_SOFT(task)) {
-		if (clnt->cl_chatty)
-			printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
+		printk(KERN_NOTICE "%s: server %s not responding, timed out\n",
 				clnt->cl_protname, clnt->cl_server);
 		rpc_exit(task, -EIO);
 		return;
 	}
 
-	if (clnt->cl_chatty && !(task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (!(task->tk_flags & RPC_CALL_MAJORSEEN)) {
 		task->tk_flags |= RPC_CALL_MAJORSEEN;
 		printk(KERN_NOTICE "%s: server %s not responding, still trying\n",
 			clnt->cl_protname, clnt->cl_server);
@@ -1027,7 +1025,7 @@ call_decode(struct rpc_task *task)
 	dprintk("RPC: %4d call_decode (status %d)\n", 
 				task->tk_pid, task->tk_status);
 
-	if (clnt->cl_chatty && (task->tk_flags & RPC_CALL_MAJORSEEN)) {
+	if (task->tk_flags & RPC_CALL_MAJORSEEN) {
 		printk(KERN_NOTICE "%s: server %s OK\n",
 			clnt->cl_protname, clnt->cl_server);
 		task->tk_flags &= ~RPC_CALL_MAJORSEEN;
diff --git a/net/sunrpc/pmap_clnt.c b/net/sunrpc/pmap_clnt.c
index 0935adb91b3c..8139ce68e915 100644
--- a/net/sunrpc/pmap_clnt.c
+++ b/net/sunrpc/pmap_clnt.c
@@ -217,7 +217,6 @@ pmap_create(char *hostname, struct sockaddr_in *srvaddr, int proto, int privileg
 				RPC_AUTH_UNIX);
 	if (!IS_ERR(clnt)) {
 		clnt->cl_softrtry = 1;
-		clnt->cl_chatty   = 1;
 		clnt->cl_oneshot  = 1;
 	}
 	return clnt;
-- 
cgit v1.2.3


From 632e3bdc5006334cea894d078660b691685e1075 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:55 +0100
Subject: SUNRPC: Ensure client closes the socket when server initiates a close

 If the server decides to close the RPC socket, we currently don't actually
 respond until either another RPC call is scheduled, or until xprt_autoclose()
 gets called by the socket expiry timer (which may be up to 5 minutes
 later).

 This patch ensures that xprt_autoclose() is called much sooner if the
 server closes the socket.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 +
 net/sunrpc/xprt.c           | 33 ++++++++++++++++-----------------
 net/sunrpc/xprtsock.c       | 12 ++++++++++--
 3 files changed, 27 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index dd860128ceda..6ef99b14ff09 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -254,6 +254,7 @@ int			xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
 #define XPRT_LOCKED		(0)
 #define XPRT_CONNECTED		(1)
 #define XPRT_CONNECTING		(2)
+#define XPRT_CLOSE_WAIT		(3)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 069a6cbd49ea..8bc0d5acf0da 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -119,6 +119,17 @@ out_sleep:
 	return 0;
 }
 
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+	xprt->snd_task = NULL;
+	if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) {
+		smp_mb__before_clear_bit();
+		clear_bit(XPRT_LOCKED, &xprt->state);
+		smp_mb__after_clear_bit();
+	} else
+		schedule_work(&xprt->task_cleanup);
+}
+
 /*
  * xprt_reserve_xprt_cong - serialize write access to transports
  * @task: task that is requesting access to the transport
@@ -145,9 +156,7 @@ int xprt_reserve_xprt_cong(struct rpc_task *task)
 		}
 		return 1;
 	}
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 out_sleep:
 	dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
 	task->tk_timeout = 0;
@@ -193,9 +202,7 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
 	return;
 
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
@@ -222,9 +229,7 @@ static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
 		return;
 	}
 out_unlock:
-	smp_mb__before_clear_bit();
-	clear_bit(XPRT_LOCKED, &xprt->state);
-	smp_mb__after_clear_bit();
+	xprt_clear_locked(xprt);
 }
 
 /**
@@ -237,10 +242,7 @@ out_unlock:
 void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next(xprt);
 	}
 }
@@ -256,10 +258,7 @@ void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
 void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
 {
 	if (xprt->snd_task == task) {
-		xprt->snd_task = NULL;
-		smp_mb__before_clear_bit();
-		clear_bit(XPRT_LOCKED, &xprt->state);
-		smp_mb__after_clear_bit();
+		xprt_clear_locked(xprt);
 		__xprt_lock_write_next_cong(xprt);
 	}
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 3e8893001479..c458f8d1d6d1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -425,7 +425,7 @@ static void xs_close(struct rpc_xprt *xprt)
 	struct sock *sk = xprt->inet;
 
 	if (!sk)
-		return;
+		goto clear_close_wait;
 
 	dprintk("RPC:      xs_close xprt %p\n", xprt);
 
@@ -442,6 +442,10 @@ static void xs_close(struct rpc_xprt *xprt)
 	sk->sk_no_check = 0;
 
 	sock_release(sock);
+clear_close_wait:
+	smp_mb__before_clear_bit();
+	clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
+	smp_mb__after_clear_bit();
 }
 
 /**
@@ -801,9 +805,13 @@ static void xs_tcp_state_change(struct sock *sk)
 	case TCP_SYN_SENT:
 	case TCP_SYN_RECV:
 		break;
+	case TCP_CLOSE_WAIT:
+		/* Try to schedule an autoclose RPC calls */
+		set_bit(XPRT_CLOSE_WAIT, &xprt->state);
+		if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
+			schedule_work(&xprt->task_cleanup);
 	default:
 		xprt_disconnect(xprt);
-		break;
 	}
  out:
 	read_unlock(&sk->sk_callback_lock);
-- 
cgit v1.2.3


From 58df095b732529ade8f4051b41d7c29731afecd6 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Tue, 3 Jan 2006 09:55:57 +0100
Subject: NFSv4: Allow entries in the idmap cache to expire

 If someone changes the uid/gid mapping in userland, then we do eventually
 want those changes to be propagated to the kernel. Currently the kernel
 assumes that it may cache entries forever.

 Add an expiration time + garbage collector for idmap entries.

 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 Documentation/kernel-parameters.txt |  4 ++++
 fs/nfs/idmap.c                      |  9 +++++++++
 fs/nfs/inode.c                      | 14 ++++++++++++++
 fs/nfs/sysctl.c                     | 10 ++++++++++
 include/linux/nfs_idmap.h           |  2 ++
 5 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 309c9cec6e7c..a482fde09bbb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -914,6 +914,10 @@ running once the system is up.
 			[NFS] set the TCP port on which the NFSv4 callback
 			channel should listen.
 
+	nfs.idmap_cache_timeout=
+			[NFS] set the maximum lifetime for idmapper cache
+			entries.
+
 	nmi_watchdog=	[KNL,BUGS=IA-32] Debugging features for SMP kernels
 
 	no387		[BUGS=IA-32] Tells the kernel to use the 387 maths
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index ffb8df91dc34..821edd30333b 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -54,7 +54,11 @@
 
 #define IDMAP_HASH_SZ          128
 
+/* Default cache timeout is 10 minutes */
+unsigned int nfs_idmap_cache_timeout = 600 * HZ;
+
 struct idmap_hashent {
+	unsigned long ih_expires;
 	__u32 ih_id;
 	int ih_namelen;
 	char ih_name[IDMAP_NAMESZ];
@@ -149,6 +153,8 @@ idmap_lookup_name(struct idmap_hashtable *h, const char *name, size_t len)
 
 	if (he->ih_namelen != len || memcmp(he->ih_name, name, len) != 0)
 		return NULL;
+	if (time_after(jiffies, he->ih_expires))
+		return NULL;
 	return he;
 }
 
@@ -164,6 +170,8 @@ idmap_lookup_id(struct idmap_hashtable *h, __u32 id)
 	struct idmap_hashent *he = idmap_id_hash(h, id);
 	if (he->ih_id != id || he->ih_namelen == 0)
 		return NULL;
+	if (time_after(jiffies, he->ih_expires))
+		return NULL;
 	return he;
 }
 
@@ -192,6 +200,7 @@ idmap_update_entry(struct idmap_hashent *he, const char *name,
 	memcpy(he->ih_name, name, namelen);
 	he->ih_name[namelen] = '\0';
 	he->ih_namelen = namelen;
+	he->ih_expires = jiffies + nfs_idmap_cache_timeout;
 }
 
 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 4625479a6b62..e7bd0d92600f 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -2050,6 +2050,20 @@ static int param_set_port(const char *val, struct kernel_param *kp)
 module_param_call(callback_tcpport, param_set_port, param_get_int,
 		 &nfs_callback_set_tcpport, 0644);
 
+static int param_set_idmap_timeout(const char *val, struct kernel_param *kp)
+{
+	char *endp;
+	int num = simple_strtol(val, &endp, 0);
+	int jif = num * HZ;
+	if (endp == val || *endp || num < 0 || jif < num)
+		return -EINVAL;
+	*((int *)kp->arg) = jif;
+	return 0;
+}
+
+module_param_call(idmap_cache_timeout, param_set_idmap_timeout, param_get_int,
+		 &nfs_idmap_cache_timeout, 0644);
+
 #define nfs4_init_once(nfsi) \
 	do { \
 		INIT_LIST_HEAD(&(nfsi)->open_states); \
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index fdc64b59a4ee..4c486eb867ca 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -11,6 +11,7 @@
 #include <linux/sysctl.h>
 #include <linux/module.h>
 #include <linux/nfs4.h>
+#include <linux/nfs_idmap.h>
 
 #include "callback.h"
 
@@ -35,6 +36,15 @@ static ctl_table nfs_cb_sysctls[] = {
 		.extra1 = (int *)&nfs_set_port_min,
 		.extra2 = (int *)&nfs_set_port_max,
 	},
+	{
+		.ctl_name = CTL_UNNUMBERED,
+		.procname = "idmap_cache_timeout",
+		.data = &nfs_idmap_cache_timeout,
+		.maxlen = sizeof(int),
+		.mode = 0644,
+		.proc_handler = &proc_dointvec_jiffies,
+		.strategy = &sysctl_jiffies,
+	},
 #endif
 	{ .ctl_name = 0 }
 };
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h
index a0f1f25e0ead..102e56094296 100644
--- a/include/linux/nfs_idmap.h
+++ b/include/linux/nfs_idmap.h
@@ -71,6 +71,8 @@ int nfs_map_name_to_uid(struct nfs4_client *, const char *, size_t, __u32 *);
 int nfs_map_group_to_gid(struct nfs4_client *, const char *, size_t, __u32 *);
 int nfs_map_uid_to_name(struct nfs4_client *, __u32, char *);
 int nfs_map_gid_to_group(struct nfs4_client *, __u32, char *);
+
+extern unsigned int nfs_idmap_cache_timeout;
 #endif /* __KERNEL__ */
 
 #endif /* NFS_IDMAP_H */
-- 
cgit v1.2.3


From 9eed129bbde80cbd7ffeacaa1555ba1e0c9a0997 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Tue, 3 Jan 2006 09:56:00 +0100
Subject: SUNRPC: Update the spkm3 code to use the make_checksum interface

 Also update the tokenlen calculations to accomodate g_token_size().

 Signed-off-by: Andy Adamson <andros@citi.umich.edu>
 Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
 Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_spkm3.h       |  2 +-
 net/sunrpc/auth_gss/gss_spkm3_seal.c   | 11 +++++------
 net/sunrpc/auth_gss/gss_spkm3_token.c  |  3 ++-
 net/sunrpc/auth_gss/gss_spkm3_unseal.c |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_spkm3.h b/include/linux/sunrpc/gss_spkm3.h
index 0beb2cf00a84..336e218c2782 100644
--- a/include/linux/sunrpc/gss_spkm3.h
+++ b/include/linux/sunrpc/gss_spkm3.h
@@ -48,7 +48,7 @@ u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struc
 #define CKSUMTYPE_RSA_MD5            0x0007
 
 s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
-                   struct xdr_netobj *cksum);
+                   int body_offset, struct xdr_netobj *cksum);
 void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits);
 int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, 
                    int explen);
diff --git a/net/sunrpc/auth_gss/gss_spkm3_seal.c b/net/sunrpc/auth_gss/gss_spkm3_seal.c
index d1e12b25d6e2..86fbf7c3e39c 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_seal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_seal.c
@@ -59,7 +59,7 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	char			tokhdrbuf[25];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = NULL};
 	struct xdr_netobj	mic_hdr = {.len = 0, .data = tokhdrbuf};
-	int			tmsglen, tokenlen = 0;
+	int			tokenlen = 0;
 	unsigned char		*ptr;
 	s32			now;
 	int			ctxelen = 0, ctxzbit = 0;
@@ -92,24 +92,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 	}
 
 	if (toktype == SPKM_MIC_TOK) {
-		tmsglen = 0;
 		/* Calculate checksum over the mic-header */
 		asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
 		spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
 		                         ctxelen, ctxzbit);
 
 		if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 
-		                             text, &md5cksum))
+		                             text, 0, &md5cksum))
 			goto out_err;
 
 		asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
-		tokenlen = 10 + ctxelen + 1 + 2 + md5elen + 1;
+		tokenlen = 10 + ctxelen + 1 + md5elen + 1;
 
 		/* Create token header using generic routines */
-		token->len = g_token_size(&ctx->mech_used, tokenlen + tmsglen);
+		token->len = g_token_size(&ctx->mech_used, tokenlen);
 
 		ptr = token->data;
-		g_make_token_header(&ctx->mech_used, tokenlen + tmsglen, &ptr);
+		g_make_token_header(&ctx->mech_used, tokenlen, &ptr);
 
 		spkm3_make_mic_token(&ptr, tokenlen, &mic_hdr, &md5cksum, md5elen, md5zbit);
 	} else if (toktype == SPKM_WRAP_TOK) { /* Not Supported */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_token.c b/net/sunrpc/auth_gss/gss_spkm3_token.c
index 1f824578d773..af0d7ce74686 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_token.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_token.c
@@ -182,6 +182,7 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
  * *tokp points to the beginning of the SPKM_MIC token  described 
  * in rfc 2025, section 3.2.1: 
  *
+ * toklen is the inner token length
  */
 void
 spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hdr, struct xdr_netobj *md5cksum, int md5elen, int md5zbit)
@@ -189,7 +190,7 @@ spkm3_make_mic_token(unsigned char **tokp, int toklen, struct xdr_netobj *mic_hd
 	unsigned char *ict = *tokp;
 
 	*(u8 *)ict++ = 0xa4;
-	*(u8 *)ict++ = toklen - 2; 
+	*(u8 *)ict++ = toklen;
 	memcpy(ict, mic_hdr->data, mic_hdr->len);
 	ict += mic_hdr->len;
 
diff --git a/net/sunrpc/auth_gss/gss_spkm3_unseal.c b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
index 241d5b30dfcb..96851b0ba1ba 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_unseal.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_unseal.c
@@ -95,7 +95,7 @@ spkm3_read_token(struct spkm3_ctx *ctx,
 		ret = GSS_S_DEFECTIVE_TOKEN;
 		code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 
 					mic_hdrlen + 2, 
-		                        message_buffer, &md5cksum);
+		                        message_buffer, 0, &md5cksum);
 
 		if (code)
 			goto out;
-- 
cgit v1.2.3


From 367cb704212cd0c9273ba2b1e62523139210563b Mon Sep 17 00:00:00 2001
From: Sam Ravnborg <sam@mars.ravnborg.org>
Date: Fri, 6 Jan 2006 21:17:50 +0100
Subject: kbuild: un-stringnify KBUILD_MODNAME

Now when kbuild passes KBUILD_MODNAME with "" do not __stringify it when
used. Remove __stringnify for all users.
This also fixes the output of:

$ ls -l /sys/module/
drwxr-xr-x 4 root root 0 2006-01-05 14:24 pcmcia
drwxr-xr-x 4 root root 0 2006-01-05 14:24 pcmcia_core
drwxr-xr-x 3 root root 0 2006-01-05 14:24 "processor"
drwxr-xr-x 3 root root 0 2006-01-05 14:24 "psmouse"

The quoting of the module names will be gone again.
Thanks to GregKH + Kay Sievers for reproting this.

Signed-off-by: Sam Ravnborg <sam@ravnborg.org>
---
 drivers/media/dvb/cinergyT2/cinergyT2.c | 2 +-
 drivers/media/dvb/ttpci/budget.h        | 2 +-
 drivers/media/video/tda9840.c           | 2 +-
 drivers/media/video/tea6415c.c          | 2 +-
 drivers/media/video/tea6420.c           | 2 +-
 include/linux/moduleparam.h             | 2 +-
 include/media/saa7146.h                 | 6 +++---
 net/ipv4/netfilter/ip_nat_ftp.c         | 2 +-
 net/ipv4/netfilter/ip_nat_irc.c         | 2 +-
 security/capability.c                   | 6 ++----
 10 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/cinergyT2/cinergyT2.c b/drivers/media/dvb/cinergyT2/cinergyT2.c
index b996fb59b7e4..1d69bf031fb9 100644
--- a/drivers/media/dvb/cinergyT2/cinergyT2.c
+++ b/drivers/media/dvb/cinergyT2/cinergyT2.c
@@ -60,7 +60,7 @@ MODULE_PARM_DESC(debug, "Turn on/off debugging (default:off).");
 #define dprintk(level, args...)						\
 do {									\
 	if ((debug & level)) {						\
-		printk("%s: %s(): ", __stringify(KBUILD_MODNAME),	\
+		printk("%s: %s(): ", KBUILD_MODNAME,			\
 		       __FUNCTION__);					\
 		printk(args); }						\
 } while (0)
diff --git a/drivers/media/dvb/ttpci/budget.h b/drivers/media/dvb/ttpci/budget.h
index fdaa3318ad3a..c8d48cfba277 100644
--- a/drivers/media/dvb/ttpci/budget.h
+++ b/drivers/media/dvb/ttpci/budget.h
@@ -19,7 +19,7 @@ extern int budget_debug;
 #endif
 
 #define dprintk(level,args...) \
-	    do { if ((budget_debug & level)) { printk("%s: %s(): ",__stringify(KBUILD_MODNAME), __FUNCTION__); printk(args); } } while (0)
+	    do { if ((budget_debug & level)) { printk("%s: %s(): ", KBUILD_MODNAME, __FUNCTION__); printk(args); } } while (0)
 
 struct budget_info {
 	char *name;
diff --git a/drivers/media/video/tda9840.c b/drivers/media/video/tda9840.c
index 1794686612c6..0cb5c7e9a884 100644
--- a/drivers/media/video/tda9840.c
+++ b/drivers/media/video/tda9840.c
@@ -34,7 +34,7 @@ static int debug = 0;		/* insmod parameter */
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off).");
 #define dprintk(args...) \
-            do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0)
+            do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0)
 
 #define	SWITCH		0x00
 #define	LEVEL_ADJUST	0x02
diff --git a/drivers/media/video/tea6415c.c b/drivers/media/video/tea6415c.c
index ee3688348b66..09149dad1b84 100644
--- a/drivers/media/video/tea6415c.c
+++ b/drivers/media/video/tea6415c.c
@@ -36,7 +36,7 @@ static int debug = 0;		/* insmod parameter */
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off).");
 #define dprintk(args...) \
-            do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0)
+            do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0)
 
 #define TEA6415C_NUM_INPUTS	8
 #define TEA6415C_NUM_OUTPUTS	6
diff --git a/drivers/media/video/tea6420.c b/drivers/media/video/tea6420.c
index 17975c19da5e..e908f917c8d2 100644
--- a/drivers/media/video/tea6420.c
+++ b/drivers/media/video/tea6420.c
@@ -36,7 +36,7 @@ static int debug = 0;		/* insmod parameter */
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "Turn on/off device debugging (default:off).");
 #define dprintk(args...) \
-            do { if (debug) { printk("%s: %s()[%d]: ",__stringify(KBUILD_MODNAME), __FUNCTION__, __LINE__); printk(args); } } while (0)
+            do { if (debug) { printk("%s: %s()[%d]: ", KBUILD_MODNAME, __FUNCTION__, __LINE__); printk(args); } } while (0)
 
 /* addresses to scan, found only at 0x4c and/or 0x4d (7-Bit) */
 static unsigned short normal_i2c[] = { I2C_TEA6420_1, I2C_TEA6420_2, I2C_CLIENT_END };
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 368ec8e45bd0..b5c98c43779e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -10,7 +10,7 @@
 #ifdef MODULE
 #define MODULE_PARAM_PREFIX /* empty */
 #else
-#define MODULE_PARAM_PREFIX __stringify(KBUILD_MODNAME) "."
+#define MODULE_PARAM_PREFIX KBUILD_MODNAME "."
 #endif
 
 #ifdef MODULE
diff --git a/include/media/saa7146.h b/include/media/saa7146.h
index e5be2b9b846b..2bc634fcb7bb 100644
--- a/include/media/saa7146.h
+++ b/include/media/saa7146.h
@@ -21,14 +21,14 @@
 
 extern unsigned int saa7146_debug;
 
-//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),__stringify(KBUILD_MODNAME),__FUNCTION__)
+//#define DEBUG_PROLOG printk("(0x%08x)(0x%08x) %s: %s(): ",(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,RPS_ADDR0))),(dev==0?-1:(dev->mem==0?-1:saa7146_read(dev,IER))),KBUILD_MODNAME,__FUNCTION__)
 
 #ifndef DEBUG_VARIABLE
 	#define DEBUG_VARIABLE saa7146_debug
 #endif
 
-#define DEBUG_PROLOG printk("%s: %s(): ",__stringify(KBUILD_MODNAME),__FUNCTION__)
-#define INFO(x) { printk("%s: ",__stringify(KBUILD_MODNAME)); printk x; }
+#define DEBUG_PROLOG printk("%s: %s(): ",KBUILD_MODNAME,__FUNCTION__)
+#define INFO(x) { printk("%s: ",KBUILD_MODNAME); printk x; }
 
 #define ERR(x) { DEBUG_PROLOG; printk x; }
 
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
index d83757a70d9f..b8daab3c64af 100644
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ b/net/ipv4/netfilter/ip_nat_ftp.c
@@ -171,7 +171,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
index de31942babe3..461c833eaca1 100644
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ b/net/ipv4/netfilter/ip_nat_irc.c
@@ -113,7 +113,7 @@ static int __init init(void)
 /* Prior to 2.6.11, we had a ports param.  No longer, but don't break users. */
 static int warn_set(const char *val, struct kernel_param *kp)
 {
-	printk(KERN_INFO __stringify(KBUILD_MODNAME)
+	printk(KERN_INFO KBUILD_MODNAME
 	       ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
 	return 0;
 }
diff --git a/security/capability.c b/security/capability.c
index ec18d6075625..f9b35cc0b248 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -49,8 +49,6 @@ static struct security_operations capability_ops = {
 	.vm_enough_memory =             cap_vm_enough_memory,
 };
 
-#define MY_NAME __stringify(KBUILD_MODNAME)
-
 /* flag to keep track of how we were registered */
 static int secondary;
 
@@ -67,7 +65,7 @@ static int __init capability_init (void)
 	/* register ourselves with the security framework */
 	if (register_security (&capability_ops)) {
 		/* try registering with primary module */
-		if (mod_reg_security (MY_NAME, &capability_ops)) {
+		if (mod_reg_security (KBUILD_MODNAME, &capability_ops)) {
 			printk (KERN_INFO "Failure registering capabilities "
 				"with primary security module.\n");
 			return -EINVAL;
@@ -85,7 +83,7 @@ static void __exit capability_exit (void)
 		return;
 	/* remove ourselves from the security framework */
 	if (secondary) {
-		if (mod_unreg_security (MY_NAME, &capability_ops))
+		if (mod_unreg_security (KBUILD_MODNAME, &capability_ops))
 			printk (KERN_INFO "Failure unregistering capabilities "
 				"with primary module.\n");
 		return;
-- 
cgit v1.2.3


From 4bad4dc919573dbe9a5b41dd9edff279e99822d7 Mon Sep 17 00:00:00 2001
From: Kris Katterjohn <kjak@ispwest.com>
Date: Fri, 6 Jan 2006 13:08:20 -0800
Subject: [NET]: Change sk_run_filter()'s return type in net/core/filter.c

It should return an unsigned value, and fix sk_filter() as well.

Signed-off-by: Kris Katterjohn <kjak@ispwest.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 2 +-
 include/net/sock.h     | 4 ++--
 net/core/filter.c      | 6 +++---
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 3ba843c46382..c6cb8f095088 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -143,7 +143,7 @@ static inline unsigned int sk_filter_len(struct sk_filter *fp)
 struct sk_buff;
 struct sock;
 
-extern int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen);
+extern unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen);
 extern int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 extern int sk_chk_filter(struct sock_filter *filter, int flen);
 #endif /* __KERNEL__ */
diff --git a/include/net/sock.h b/include/net/sock.h
index 6961700ff3a0..1806e5b61419 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,8 +856,8 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock)
 		
 		filter = sk->sk_filter;
 		if (filter) {
-			int pkt_len = sk_run_filter(skb, filter->insns,
-						    filter->len);
+			unsigned int pkt_len = sk_run_filter(skb, filter->insns,
+							     filter->len);
 			if (!pkt_len)
 				err = -EPERM;
 			else
diff --git a/net/core/filter.c b/net/core/filter.c
index 8964d3445588..9eb9d0017a01 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -75,7 +75,7 @@ static inline void *load_pointer(struct sk_buff *skb, int k,
  * len is the number of filter blocks in the array.
  */
  
-int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
+unsigned int sk_run_filter(struct sk_buff *skb, struct sock_filter *filter, int flen)
 {
 	struct sock_filter *fentry;	/* We walk down these */
 	void *ptr;
@@ -241,9 +241,9 @@ load_b:
 			A = X;
 			continue;
 		case BPF_RET|BPF_K:
-			return ((unsigned int)fentry->k);
+			return fentry->k;
 		case BPF_RET|BPF_A:
-			return ((unsigned int)A);
+			return A;
 		case BPF_ST:
 			mem[fentry->k] = A;
 			continue;
-- 
cgit v1.2.3


From 76ab608d86cf1ef5c5c46819b5733eb9f9f964f8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 6 Jan 2006 13:24:29 -0800
Subject: [NET]: Endian-annotate struct iphdr

And fix trivial warnings that emerged.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ip.h         | 10 +++++-----
 net/ipv4/ip_fragment.c     |  2 +-
 net/ipv4/ip_output.c       |  4 ++--
 net/ipv4/ipvs/ip_vs_xmit.c |  2 +-
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ip.h b/include/linux/ip.h
index 9e2eb9a602eb..4b55cf1df732 100644
--- a/include/linux/ip.h
+++ b/include/linux/ip.h
@@ -90,14 +90,14 @@ struct iphdr {
 #error	"Please fix <asm/byteorder.h>"
 #endif
 	__u8	tos;
-	__u16	tot_len;
-	__u16	id;
-	__u16	frag_off;
+	__be16	tot_len;
+	__be16	id;
+	__be16	frag_off;
 	__u8	ttl;
 	__u8	protocol;
 	__u16	check;
-	__u32	saddr;
-	__u32	daddr;
+	__be32	saddr;
+	__be32	daddr;
 	/*The options start here. */
 };
 
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index ce2b70ce4018..2a8adda15e11 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -383,7 +383,7 @@ out_nomem:
  */
 static inline struct ipq *ip_find(struct iphdr *iph, u32 user)
 {
-	__u16 id = iph->id;
+	__be16 id = iph->id;
 	__u32 saddr = iph->saddr;
 	__u32 daddr = iph->daddr;
 	__u8 protocol = iph->protocol;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 71da31818cfc..8b1c9bd0091e 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -418,7 +418,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
 	struct sk_buff *skb2;
 	unsigned int mtu, hlen, left, len, ll_rs;
 	int offset;
-	int not_last_frag;
+	__be16 not_last_frag;
 	struct rtable *rt = (struct rtable*)skb->dst;
 	int err = 0;
 
@@ -1180,7 +1180,7 @@ int ip_push_pending_frames(struct sock *sk)
 	struct ip_options *opt = NULL;
 	struct rtable *rt = inet->cork.rt;
 	struct iphdr *iph;
-	int df = 0;
+	__be16 df = 0;
 	__u8 ttl;
 	int err = 0;
 
diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c
index 3b87482049cf..52c12e9edbbc 100644
--- a/net/ipv4/ipvs/ip_vs_xmit.c
+++ b/net/ipv4/ipvs/ip_vs_xmit.c
@@ -322,7 +322,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
 	struct net_device *tdev;		/* Device to other host */
 	struct iphdr  *old_iph = skb->nh.iph;
 	u8     tos = old_iph->tos;
-	u16    df = old_iph->frag_off;
+	__be16 df = old_iph->frag_off;
 	struct iphdr  *iph;			/* Our new IP header */
 	int    max_headroom;			/* The extra header space needed */
 	int    mtu;
-- 
cgit v1.2.3


From a2167dc62e9142b9a4bfb20f7e001c0f0a26fd8c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 6 Jan 2006 13:24:54 -0800
Subject: [NET]: Endian-annotate in_aton()

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet.h | 2 +-
 net/core/utils.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/inet.h b/include/linux/inet.h
index 3b5e9fdff872..6c5587af118d 100644
--- a/include/linux/inet.h
+++ b/include/linux/inet.h
@@ -45,6 +45,6 @@
 #ifdef __KERNEL__
 #include <linux/types.h>
 
-extern __u32 in_aton(const char *str);
+extern __be32 in_aton(const char *str);
 #endif
 #endif	/* _LINUX_INET_H */
diff --git a/net/core/utils.c b/net/core/utils.c
index 587eb7787deb..ac1d1fcf8673 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -162,7 +162,7 @@ EXPORT_SYMBOL(net_srandom);
  * is otherwise not dependent on the TCP/IP stack.
  */
 
-__u32 in_aton(const char *str)
+__be32 in_aton(const char *str)
 {
 	unsigned long l;
 	unsigned int val;
-- 
cgit v1.2.3