From 392ee1e6dd901db6c4504617476f6442ed91f72d Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 8 Mar 2007 13:04:57 -0700
Subject: [PATCH] msi: Safer state caching.

There are two ways pci_save_state and pci_restore_state are used.  As
helper functions during suspend/resume, and as helper functions around
a hardware reset event.  When used as helper functions around a hardware
reset event there is no reason to believe the calls will be paired, nor
is there a good reason to believe that if we restore the msi state from
before the reset that it will match the current msi state.  Since arch
code may change the msi message without going through the driver, drivers
currently do not have enough information to even know when to call
pci_save_state to ensure they will have msi state in sync with the other
kernel irq reception data structures.

It turns out the solution is straight forward, cache the state in the
existing msi data structures (not the magic pci saved things) and
have the msi code update the cached state each time we write to the hardware.
This means we never need to read the hardware to figure out what the hardware
state should be.

By modifying the caching in this manner we get to remove our save_state
routines and only need to provide restore_state routines.

The only fields that were at all tricky to regenerate were the msi and msi-x
control registers and the way we regenerate them currently is a bit dependent
upon assumptions on how we use the allow msi registers to be configured and used
making the code a little bit brittle.  If we ever change what cases we allow
or how we configure the msi bits we can address the fragility then.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/msi.h      | 8 +++-----
 include/linux/pci_regs.h | 1 +
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 74c8a2ecc9dd..e38fe6822cb4 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -17,7 +17,7 @@ struct msi_desc {
 	struct {
 		__u8	type	: 5; 	/* {0: unused, 5h:MSI, 11h:MSI-X} */
 		__u8	maskbit	: 1; 	/* mask-pending bit supported ?   */
-		__u8	unused	: 1;
+		__u8	masked	: 1;
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit  */
 		__u8	pos;	 	/* Location of the msi capability */
 		__u16	entry_nr;    	/* specific enabled entry 	  */
@@ -32,10 +32,8 @@ struct msi_desc {
 	void __iomem *mask_base;
 	struct pci_dev *dev;
 
-#ifdef CONFIG_PM
-	/* PM save area for MSIX address/data */
-	struct msi_msg msg_save;
-#endif
+	/* Last set MSI message */
+	struct msi_msg msg;
 };
 
 /*
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index f09cce2357ff..495d368390e0 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -296,6 +296,7 @@
 #define PCI_MSIX_FLAGS		2
 #define  PCI_MSIX_FLAGS_QSIZE	0x7FF
 #define  PCI_MSIX_FLAGS_ENABLE	(1 << 15)
+#define  PCI_MSIX_FLAGS_MASKALL	(1 << 14)
 #define PCI_MSIX_FLAGS_BIRMASK	(7 << 0)
 #define PCI_MSIX_FLAGS_BITMASK	(1 << 0)
 
-- 
cgit v1.2.3


From 9f35575dfc172f0a93fb464761883c8f49599b7a Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 8 Mar 2007 13:06:13 -0700
Subject: [PATCH] pci: Repair pci_save/restore_state so we can restore one save
 many times.

Because we do not reserve space for the pci-x and pci-e state in struct
pci dev we need to dynamically allocate it.  However because we need
to support restore being called multiple times after a single save
it is never safe to free the buffers we have allocated to hold the
state.

So this patch modifies the save routines to first check to see
if we have already allocated a state buffer before allocating
a new one.  Then the restore routines are modified to not free
the state after restoring it.  Simple and it fixes some subtle
error path handling bugs, that are hard to test for.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
Acked-by: Auke Kok <auke-jan.h.kok@intel.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/pci/pci.c   | 12 ++++++------
 include/linux/pci.h |  5 -----
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 6048c0c637a0..d3eab057b2d3 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -551,7 +551,9 @@ static int pci_save_pcie_state(struct pci_dev *dev)
 	if (pos <= 0)
 		return 0;
 
-	save_state = kzalloc(sizeof(*save_state) + sizeof(u16) * 4, GFP_KERNEL);
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state)
+		save_state = kzalloc(sizeof(*save_state) + sizeof(u16) * 4, GFP_KERNEL);
 	if (!save_state) {
 		dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n");
 		return -ENOMEM;
@@ -582,8 +584,6 @@ static void pci_restore_pcie_state(struct pci_dev *dev)
 	pci_write_config_word(dev, pos + PCI_EXP_LNKCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_SLTCTL, cap[i++]);
 	pci_write_config_word(dev, pos + PCI_EXP_RTCTL, cap[i++]);
-	pci_remove_saved_cap(save_state);
-	kfree(save_state);
 }
 
 
@@ -597,7 +597,9 @@ static int pci_save_pcix_state(struct pci_dev *dev)
 	if (pos <= 0)
 		return 0;
 
-	save_state = kzalloc(sizeof(*save_state) + sizeof(u16), GFP_KERNEL);
+	save_state = pci_find_saved_cap(dev, PCI_CAP_ID_EXP);
+	if (!save_state)
+		save_state = kzalloc(sizeof(*save_state) + sizeof(u16), GFP_KERNEL);
 	if (!save_state) {
 		dev_err(&dev->dev, "Out of memory in pci_save_pcie_state\n");
 		return -ENOMEM;
@@ -622,8 +624,6 @@ static void pci_restore_pcix_state(struct pci_dev *dev)
 	cap = (u16 *)&save_state->data[0];
 
 	pci_write_config_word(dev, pos + PCI_X_CMD, cap[i++]);
-	pci_remove_saved_cap(save_state);
-	kfree(save_state);
 }
 
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 78417e421b4c..481ea0663f19 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -209,11 +209,6 @@ static inline void pci_add_saved_cap(struct pci_dev *pci_dev,
 	hlist_add_head(&new_cap->next, &pci_dev->saved_cap_space);
 }
 
-static inline void pci_remove_saved_cap(struct pci_cap_saved_state *cap)
-{
-	hlist_del(&cap->next);
-}
-
 /*
  *  For PCI devices, the region numbers are assigned this way:
  *
-- 
cgit v1.2.3


From 04ff97086b1a3237bbd1fe6390fa80fe75207e23 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Mon, 12 Mar 2007 16:17:58 +0000
Subject: [PATCH] sanitize security_getprocattr() API

have it return the buffer it had allocated

Acked-by: Stephen Smalley <sds@tycho.nsa.gov>
Acked-by: James Morris <jmorris@namei.org>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/base.c           | 21 ++++++---------------
 include/linux/security.h |  8 ++++----
 security/dummy.c         |  2 +-
 security/selinux/hooks.c |  8 ++++++--
 4 files changed, 17 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 01f7769da8e6..989af5e55d1b 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1558,29 +1558,20 @@ static ssize_t proc_pid_attr_read(struct file * file, char __user * buf,
 				  size_t count, loff_t *ppos)
 {
 	struct inode * inode = file->f_path.dentry->d_inode;
-	unsigned long page;
+	char *p = NULL;
 	ssize_t length;
 	struct task_struct *task = get_proc_task(inode);
 
-	length = -ESRCH;
 	if (!task)
-		goto out_no_task;
-
-	if (count > PAGE_SIZE)
-		count = PAGE_SIZE;
-	length = -ENOMEM;
-	if (!(page = __get_free_page(GFP_KERNEL)))
-		goto out;
+		return -ESRCH;
 
 	length = security_getprocattr(task,
 				      (char*)file->f_path.dentry->d_name.name,
-				      (void*)page, count);
-	if (length >= 0)
-		length = simple_read_from_buffer(buf, count, ppos, (char *)page, length);
-	free_page(page);
-out:
+				      &p);
 	put_task_struct(task);
-out_no_task:
+	if (length > 0)
+		length = simple_read_from_buffer(buf, count, ppos, p, length);
+	kfree(p);
 	return length;
 }
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 7f88d97575fd..47e82c120f9a 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1324,7 +1324,7 @@ struct security_operations {
 
 	void (*d_instantiate) (struct dentry *dentry, struct inode *inode);
 
- 	int (*getprocattr)(struct task_struct *p, char *name, void *value, size_t size);
+ 	int (*getprocattr)(struct task_struct *p, char *name, char **value);
  	int (*setprocattr)(struct task_struct *p, char *name, void *value, size_t size);
 	int (*secid_to_secctx)(u32 secid, char **secdata, u32 *seclen);
 	void (*release_secctx)(char *secdata, u32 seclen);
@@ -2092,9 +2092,9 @@ static inline void security_d_instantiate (struct dentry *dentry, struct inode *
 	security_ops->d_instantiate (dentry, inode);
 }
 
-static inline int security_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
 {
-	return security_ops->getprocattr(p, name, value, size);
+	return security_ops->getprocattr(p, name, value);
 }
 
 static inline int security_setprocattr(struct task_struct *p, char *name, void *value, size_t size)
@@ -2749,7 +2749,7 @@ static inline int security_sem_semop (struct sem_array * sma,
 static inline void security_d_instantiate (struct dentry *dentry, struct inode *inode)
 { }
 
-static inline int security_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static inline int security_getprocattr(struct task_struct *p, char *name, char **value)
 {
 	return -EINVAL;
 }
diff --git a/security/dummy.c b/security/dummy.c
index 558795b237d6..8ffd76405b5b 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -907,7 +907,7 @@ static void dummy_d_instantiate (struct dentry *dentry, struct inode *inode)
 	return;
 }
 
-static int dummy_getprocattr(struct task_struct *p, char *name, void *value, size_t size)
+static int dummy_getprocattr(struct task_struct *p, char *name, char **value)
 {
 	return -EINVAL;
 }
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index 19a385e9968e..d41e24d6ae41 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -4468,11 +4468,12 @@ static void selinux_d_instantiate (struct dentry *dentry, struct inode *inode)
 }
 
 static int selinux_getprocattr(struct task_struct *p,
-			       char *name, void *value, size_t size)
+			       char *name, char **value)
 {
 	struct task_security_struct *tsec;
 	u32 sid;
 	int error;
+	unsigned len;
 
 	if (current != p) {
 		error = task_has_perm(current, p, PROCESS__GETATTR);
@@ -4500,7 +4501,10 @@ static int selinux_getprocattr(struct task_struct *p,
 	if (!sid)
 		return 0;
 
-	return selinux_getsecurity(sid, value, size);
+	error = security_sid_to_context(sid, value, &len);
+	if (error)
+		return error;
+	return len;
 }
 
 static int selinux_setprocattr(struct task_struct *p,
-- 
cgit v1.2.3


From d9a9cdfb078d755e648d53ec25b7370f84ee5729 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 15 Mar 2007 15:50:34 -0400
Subject: [PATCH] sysfs and driver core: add callback helper, used by SCSI and
 S390

This patch (as868) adds a helper routine for device drivers that need
to set up a callback to perform some action in a different process's
context.  This is intended for use by attribute methods that want to
unregister themselves or their parent device.  Attribute method calls
are mutually exclusive with unregistration, so such actions cannot be
taken directly.

Two attribute methods are converted to use the new helper routine: one
for SCSI device deletion and one for System/390 ccwgroup devices.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: Cornelia Huck <cornelia.huck@de.ibm.com>
Cc: Oliver Neukum <oneukum@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/core.c         | 29 ++++++++++++++++++++++++
 drivers/s390/cio/ccwgroup.c | 18 ++++++++++++---
 drivers/scsi/scsi_sysfs.c   | 14 +++++++++++-
 fs/sysfs/file.c             | 54 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/device.h      |  2 ++
 include/linux/sysfs.h       |  9 ++++++++
 6 files changed, 122 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index f191afe62b4d..ad0f4a2f25c4 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -407,6 +407,35 @@ void device_remove_bin_file(struct device *dev, struct bin_attribute *attr)
 }
 EXPORT_SYMBOL_GPL(device_remove_bin_file);
 
+/**
+ * device_schedule_callback - helper to schedule a callback for a device
+ * @dev: device.
+ * @func: callback function to invoke later.
+ *
+ * Attribute methods must not unregister themselves or their parent device
+ * (which would amount to the same thing).  Attempts to do so will deadlock,
+ * since unregistration is mutually exclusive with driver callbacks.
+ *
+ * Instead methods can call this routine, which will attempt to allocate
+ * and schedule a workqueue request to call back @func with @dev as its
+ * argument in the workqueue's process context.  @dev will be pinned until
+ * @func returns.
+ *
+ * Returns 0 if the request was submitted, -ENOMEM if storage could not
+ * be allocated.
+ *
+ * NOTE: This routine won't work if CONFIG_SYSFS isn't set!  It uses an
+ * underlying sysfs routine (since it is intended for use by attribute
+ * methods), and if sysfs isn't available you'll get nothing but -ENOSYS.
+ */
+int device_schedule_callback(struct device *dev,
+		void (*func)(struct device *))
+{
+	return sysfs_schedule_callback(&dev->kobj,
+			(void (*)(void *)) func, dev);
+}
+EXPORT_SYMBOL_GPL(device_schedule_callback);
+
 static void klist_children_get(struct klist_node *n)
 {
 	struct device *dev = container_of(n, struct device, knode_parent);
diff --git a/drivers/s390/cio/ccwgroup.c b/drivers/s390/cio/ccwgroup.c
index d48e3ca4752c..5aeb68e732b0 100644
--- a/drivers/s390/cio/ccwgroup.c
+++ b/drivers/s390/cio/ccwgroup.c
@@ -71,19 +71,31 @@ __ccwgroup_remove_symlinks(struct ccwgroup_device *gdev)
  * Provide an 'ungroup' attribute so the user can remove group devices no
  * longer needed or accidentially created. Saves memory :)
  */
+static void ccwgroup_ungroup_callback(struct device *dev)
+{
+	struct ccwgroup_device *gdev = to_ccwgroupdev(dev);
+
+	__ccwgroup_remove_symlinks(gdev);
+	device_unregister(dev);
+}
+
 static ssize_t
 ccwgroup_ungroup_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct ccwgroup_device *gdev;
+	int rc;
 
 	gdev = to_ccwgroupdev(dev);
 
 	if (gdev->state != CCWGROUP_OFFLINE)
 		return -EINVAL;
 
-	__ccwgroup_remove_symlinks(gdev);
-	device_unregister(dev);
-
+	/* Note that we cannot unregister the device from one of its
+	 * attribute methods, so we have to use this roundabout approach.
+	 */
+	rc = device_schedule_callback(dev, ccwgroup_ungroup_callback);
+	if (rc)
+		count = rc;
 	return count;
 }
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index c275dcac3f18..939de0de18bc 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -452,10 +452,22 @@ store_rescan_field (struct device *dev, struct device_attribute *attr, const cha
 }
 static DEVICE_ATTR(rescan, S_IWUSR, NULL, store_rescan_field);
 
+static void sdev_store_delete_callback(struct device *dev)
+{
+	scsi_remove_device(to_scsi_device(dev));
+}
+
 static ssize_t sdev_store_delete(struct device *dev, struct device_attribute *attr, const char *buf,
 				 size_t count)
 {
-	scsi_remove_device(to_scsi_device(dev));
+	int rc;
+
+	/* An attribute cannot be unregistered by one of its own methods,
+	 * so we have to use this roundabout approach.
+	 */
+	rc = device_schedule_callback(dev, sdev_store_delete_callback);
+	if (rc)
+		count = rc;
 	return count;
 };
 static DEVICE_ATTR(delete, S_IWUSR, NULL, sdev_store_delete);
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 8d4d839a9d88..1bafdf6e171c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -629,6 +629,60 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 }
 EXPORT_SYMBOL_GPL(sysfs_remove_file_from_group);
 
+struct sysfs_schedule_callback_struct {
+	struct kobject 		*kobj;
+	void			(*func)(void *);
+	void			*data;
+	struct work_struct	work;
+};
+
+static void sysfs_schedule_callback_work(struct work_struct *work)
+{
+	struct sysfs_schedule_callback_struct *ss = container_of(work,
+			struct sysfs_schedule_callback_struct, work);
+
+	(ss->func)(ss->data);
+	kobject_put(ss->kobj);
+	kfree(ss);
+}
+
+/**
+ * sysfs_schedule_callback - helper to schedule a callback for a kobject
+ * @kobj: object we're acting for.
+ * @func: callback function to invoke later.
+ * @data: argument to pass to @func.
+ *
+ * sysfs attribute methods must not unregister themselves or their parent
+ * kobject (which would amount to the same thing).  Attempts to do so will
+ * deadlock, since unregistration is mutually exclusive with driver
+ * callbacks.
+ *
+ * Instead methods can call this routine, which will attempt to allocate
+ * and schedule a workqueue request to call back @func with @data as its
+ * argument in the workqueue's process context.  @kobj will be pinned
+ * until @func returns.
+ *
+ * Returns 0 if the request was submitted, -ENOMEM if storage could not
+ * be allocated.
+ */
+int sysfs_schedule_callback(struct kobject *kobj, void (*func)(void *),
+		void *data)
+{
+	struct sysfs_schedule_callback_struct *ss;
+
+	ss = kmalloc(sizeof(*ss), GFP_KERNEL);
+	if (!ss)
+		return -ENOMEM;
+	kobject_get(kobj);
+	ss->kobj = kobj;
+	ss->func = func;
+	ss->data = data;
+	INIT_WORK(&ss->work, sysfs_schedule_callback_work);
+	schedule_work(&ss->work);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(sysfs_schedule_callback);
+
 
 EXPORT_SYMBOL_GPL(sysfs_create_file);
 EXPORT_SYMBOL_GPL(sysfs_remove_file);
diff --git a/include/linux/device.h b/include/linux/device.h
index 39a3199a826d..caad9bba9652 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -353,6 +353,8 @@ extern int __must_check device_create_bin_file(struct device *dev,
 					       struct bin_attribute *attr);
 extern void device_remove_bin_file(struct device *dev,
 				   struct bin_attribute *attr);
+extern int device_schedule_callback(struct device *dev,
+		void (*func)(struct device *));
 
 /* device resource management */
 typedef void (*dr_release_t)(struct device *dev, void *res);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 523405e1e1f6..0544edda7168 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -78,6 +78,9 @@ struct sysfs_ops {
 
 #ifdef CONFIG_SYSFS
 
+extern int sysfs_schedule_callback(struct kobject *kobj,
+		void (*func)(void *), void *data);
+
 extern int __must_check
 sysfs_create_dir(struct kobject *, struct dentry *);
 
@@ -132,6 +135,12 @@ extern int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
 
+static inline int sysfs_schedule_callback(struct kobject *kobj,
+		void (*func)(void *), void *data)
+{
+	return -ENOSYS;
+}
+
 static inline int sysfs_create_dir(struct kobject * k, struct dentry *shadow)
 {
 	return 0;
-- 
cgit v1.2.3


From 2189850f42beff23af32d847bd043cd1d1811a80 Mon Sep 17 00:00:00 2001
From: Evgeniy Dushistov <dushistov@mail.ru>
Date: Fri, 16 Mar 2007 13:38:07 -0800
Subject: [PATCH] ufs2: more correct work with time

This patch corrects work with time in UFS2 case.

1) According to UFS2 disk layout modification/access and so on "time"
   should be hold in two variables one 64bit for seconds and another 32bit for
   nanoseconds,

   at now for some unknown reason we suppose that "inode time" holds in
   three variables 32bit for seconds, 32bit for milliseconds and 32bit for
   nanoseconds.

2) We set amount of nanoseconds in "VFS inode" to 0 during read, instead of
   getting values from "on disk inode"(this should close
   http://bugzilla.kernel.org/show_bug.cgi?id=7991).

Signed-off-by: Evgeniy Dushistov <dushistov@mail.ru>
Cc: Bjoern Jacke <bjoern@j3e.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ufs/ialloc.c        |  5 ++---
 fs/ufs/inode.c         | 24 ++++++++++++------------
 include/linux/ufs_fs.h |  8 ++++----
 3 files changed, 18 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ufs/ialloc.c b/fs/ufs/ialloc.c
index b868878009b6..c28a8b6f2feb 100644
--- a/fs/ufs/ialloc.c
+++ b/fs/ufs/ialloc.c
@@ -343,9 +343,8 @@ cg_found:
 		lock_buffer(bh);
 		ufs2_inode = (struct ufs2_inode *)bh->b_data;
 		ufs2_inode += ufs_inotofsbo(inode->i_ino);
-		ufs2_inode->ui_birthtime.tv_sec =
-			cpu_to_fs32(sb, CURRENT_TIME_SEC.tv_sec);
-		ufs2_inode->ui_birthtime.tv_usec = 0;
+		ufs2_inode->ui_birthtime = cpu_to_fs64(sb, CURRENT_TIME.tv_sec);
+		ufs2_inode->ui_birthnsec = cpu_to_fs32(sb, CURRENT_TIME.tv_nsec);
 		mark_buffer_dirty(bh);
 		unlock_buffer(bh);
 		if (sb->s_flags & MS_SYNCHRONOUS)
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index fb34ad03e224..366618dd698d 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -668,12 +668,12 @@ static void ufs2_read_inode(struct inode *inode, struct ufs2_inode *ufs2_inode)
 	inode->i_gid = fs32_to_cpu(sb, ufs2_inode->ui_gid);
 
 	inode->i_size = fs64_to_cpu(sb, ufs2_inode->ui_size);
-	inode->i_atime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_atime.tv_sec);
-	inode->i_ctime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_ctime.tv_sec);
-	inode->i_mtime.tv_sec = fs32_to_cpu(sb, ufs2_inode->ui_mtime.tv_sec);
-	inode->i_mtime.tv_nsec = 0;
-	inode->i_atime.tv_nsec = 0;
-	inode->i_ctime.tv_nsec = 0;
+	inode->i_atime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_atime);
+	inode->i_ctime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_ctime);
+	inode->i_mtime.tv_sec = fs64_to_cpu(sb, ufs2_inode->ui_mtime);
+	inode->i_atime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_atimensec);
+	inode->i_ctime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_ctimensec);
+	inode->i_mtime.tv_nsec = fs32_to_cpu(sb, ufs2_inode->ui_mtimensec);
 	inode->i_blocks = fs64_to_cpu(sb, ufs2_inode->ui_blocks);
 	inode->i_generation = fs32_to_cpu(sb, ufs2_inode->ui_gen);
 	ufsi->i_flags = fs32_to_cpu(sb, ufs2_inode->ui_flags);
@@ -803,12 +803,12 @@ static void ufs2_update_inode(struct inode *inode, struct ufs2_inode *ufs_inode)
 	ufs_inode->ui_gid = cpu_to_fs32(sb, inode->i_gid);
 
 	ufs_inode->ui_size = cpu_to_fs64(sb, inode->i_size);
-	ufs_inode->ui_atime.tv_sec = cpu_to_fs32(sb, inode->i_atime.tv_sec);
-	ufs_inode->ui_atime.tv_usec = 0;
-	ufs_inode->ui_ctime.tv_sec = cpu_to_fs32(sb, inode->i_ctime.tv_sec);
-	ufs_inode->ui_ctime.tv_usec = 0;
-	ufs_inode->ui_mtime.tv_sec = cpu_to_fs32(sb, inode->i_mtime.tv_sec);
-	ufs_inode->ui_mtime.tv_usec = 0;
+	ufs_inode->ui_atime = cpu_to_fs64(sb, inode->i_atime.tv_sec);
+	ufs_inode->ui_atimensec = cpu_to_fs32(sb, inode->i_atime.tv_nsec);
+	ufs_inode->ui_ctime = cpu_to_fs64(sb, inode->i_ctime.tv_sec);
+	ufs_inode->ui_ctimensec = cpu_to_fs32(sb, inode->i_ctime.tv_nsec);
+	ufs_inode->ui_mtime = cpu_to_fs64(sb, inode->i_mtime.tv_sec);
+	ufs_inode->ui_mtimensec = cpu_to_fs32(sb, inode->i_mtime.tv_nsec);
 
 	ufs_inode->ui_blocks = cpu_to_fs64(sb, inode->i_blocks);
 	ufs_inode->ui_flags = cpu_to_fs32(sb, ufsi->i_flags);
diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index dc2e9fe69418..daeba22b7656 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -649,10 +649,10 @@ struct ufs2_inode {
 	__fs32     ui_blksize;     /*  12: Inode blocksize. */
 	__fs64     ui_size;        /*  16: File byte count. */
 	__fs64     ui_blocks;      /*  24: Bytes actually held. */
-	struct ufs_timeval   ui_atime;       /*  32: Last access time. */
-	struct ufs_timeval   ui_mtime;       /*  40: Last modified time. */
-	struct ufs_timeval   ui_ctime;       /*  48: Last inode change time. */
-	struct ufs_timeval   ui_birthtime;   /*  56: Inode creation time. */
+	__fs64   ui_atime;       /*  32: Last access time. */
+	__fs64   ui_mtime;       /*  40: Last modified time. */
+	__fs64   ui_ctime;       /*  48: Last inode change time. */
+	__fs64   ui_birthtime;   /*  56: Inode creation time. */
 	__fs32     ui_mtimensec;   /*  64: Last modified time. */
 	__fs32     ui_atimensec;   /*  68: Last access time. */
 	__fs32     ui_ctimensec;   /*  72: Last inode change time. */
-- 
cgit v1.2.3


From a836f5856ae46ccb2464ea76031ea05ae967b832 Mon Sep 17 00:00:00 2001
From: Chris Lesiak <chris.lesiak@licor.com>
Date: Fri, 16 Mar 2007 13:38:13 -0800
Subject: [PATCH] spi: destroy workqueue after spi_unregister_master

Fix a bug in the cleanup of an spi_bitbang bus.

The workqueue associated with the bus was destroyed before the call to
spi_unregister_master.  That meant that spi devices on that bus would be
unable to do IO in their remove method.  The shutdown flag should have been
able to prevent a segfault, but was never getting set.  By waiting to
destroy the workqueue until after the master is unregistered, devices are
able to do IO in their remove methods.  An added benefit is that neither
the shutdown flag nor a wait for the queue of messages to empty is needed.

Signed-off-by: Chris Lesiak <chris.lesiak@licor.com>
Signed-off-by: David Brownell <dbrownell@users.sourceforge.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/spi/spi_bitbang.c       | 26 ++------------------------
 include/linux/spi/spi_bitbang.h |  1 -
 2 files changed, 2 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi_bitbang.c b/drivers/spi/spi_bitbang.c
index 24a330d82395..88425e1af4d3 100644
--- a/drivers/spi/spi_bitbang.c
+++ b/drivers/spi/spi_bitbang.c
@@ -302,10 +302,6 @@ static void bitbang_work(struct work_struct *work)
 		setup_transfer = NULL;
 
 		list_for_each_entry (t, &m->transfers, transfer_list) {
-			if (bitbang->shutdown) {
-				status = -ESHUTDOWN;
-				break;
-			}
 
 			/* override or restore speed and wordsize */
 			if (t->speed_hz || t->bits_per_word) {
@@ -410,8 +406,6 @@ int spi_bitbang_transfer(struct spi_device *spi, struct spi_message *m)
 	m->status = -EINPROGRESS;
 
 	bitbang = spi_master_get_devdata(spi->master);
-	if (bitbang->shutdown)
-		return -ESHUTDOWN;
 
 	spin_lock_irqsave(&bitbang->lock, flags);
 	if (!spi->max_speed_hz)
@@ -507,28 +501,12 @@ EXPORT_SYMBOL_GPL(spi_bitbang_start);
  */
 int spi_bitbang_stop(struct spi_bitbang *bitbang)
 {
-	unsigned	limit = 500;
-
-	spin_lock_irq(&bitbang->lock);
-	bitbang->shutdown = 0;
-	while (!list_empty(&bitbang->queue) && limit--) {
-		spin_unlock_irq(&bitbang->lock);
+	spi_unregister_master(bitbang->master);
 
-		dev_dbg(bitbang->master->cdev.dev, "wait for queue\n");
-		msleep(10);
-
-		spin_lock_irq(&bitbang->lock);
-	}
-	spin_unlock_irq(&bitbang->lock);
-	if (!list_empty(&bitbang->queue)) {
-		dev_err(bitbang->master->cdev.dev, "queue didn't empty\n");
-		return -EBUSY;
-	}
+	WARN_ON(!list_empty(&bitbang->queue));
 
 	destroy_workqueue(bitbang->workqueue);
 
-	spi_unregister_master(bitbang->master);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(spi_bitbang_stop);
diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h
index 2e8c048b9b80..9dbca629dcfb 100644
--- a/include/linux/spi/spi_bitbang.h
+++ b/include/linux/spi/spi_bitbang.h
@@ -25,7 +25,6 @@ struct spi_bitbang {
 	spinlock_t		lock;
 	struct list_head	queue;
 	u8			busy;
-	u8			shutdown;
 	u8			use_dma;
 
 	struct spi_master	*master;
-- 
cgit v1.2.3


From b257bc051f06607beb3004d9a1c297085e728bec Mon Sep 17 00:00:00 2001
From: Andrew Johnson <ajohnson@intrinsyc.com>
Date: Fri, 16 Mar 2007 13:38:24 -0800
Subject: [PATCH] swsusp: fix suspend when console is in VT_AUTO+KD_GRAPHICS
 mode

When the console is in VT_AUTO+KD_GRAPHICS mode, switching to the
SUSPEND_CONSOLE fails, resulting in vt_waitactive() waiting indefinitely or
until the task is interrupted.  This patch tests if a console switch can
occur in set_console() and returns early if a console switch is not
possible.

[akpm@linux-foundation.org: cleanup]
Signed-off-by: Andrew Johnson <ajohnson@intrinsyc.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/char/vt.c        | 20 +++++++++++++++++++-
 drivers/char/vt_ioctl.c  |  2 +-
 include/linux/kbd_kern.h |  2 +-
 include/linux/vt_kern.h  |  1 +
 kernel/power/console.c   | 10 +++++++++-
 5 files changed, 31 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/vt.c b/drivers/char/vt.c
index c3f8e383933b..0fefe2a28055 100644
--- a/drivers/char/vt.c
+++ b/drivers/char/vt.c
@@ -2185,10 +2185,28 @@ static void console_callback(struct work_struct *ignored)
 	release_console_sem();
 }
 
-void set_console(int nr)
+int set_console(int nr)
 {
+	struct vc_data *vc = vc_cons[fg_console].d;
+
+	if (!vc_cons_allocated(nr) || vt_dont_switch ||
+		(vc->vt_mode.mode == VT_AUTO && vc->vc_mode == KD_GRAPHICS)) {
+
+		/*
+		 * Console switch will fail in console_callback() or
+		 * change_console() so there is no point scheduling
+		 * the callback
+		 *
+		 * Existing set_console() users don't check the return
+		 * value so this shouldn't break anything
+		 */
+		return -EINVAL;
+	}
+
 	want_console = nr;
 	schedule_console_callback();
+
+	return 0;
 }
 
 struct tty_driver *console_driver;
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 3a5d301e783b..1fa2da8f4fbe 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -34,7 +34,7 @@
 #include <linux/kbd_diacr.h>
 #include <linux/selection.h>
 
-static char vt_dont_switch;
+char vt_dont_switch;
 extern struct tty_driver *console_driver;
 
 #define VT_IS_IN_USE(i)	(console_driver->ttys[i] && console_driver->ttys[i]->count)
diff --git a/include/linux/kbd_kern.h b/include/linux/kbd_kern.h
index 06c58c423fe1..506ad20c18f8 100644
--- a/include/linux/kbd_kern.h
+++ b/include/linux/kbd_kern.h
@@ -75,7 +75,7 @@ extern int do_poke_blanked_console;
 
 extern void (*kbd_ledfunc)(unsigned int led);
 
-extern void set_console(int nr);
+extern int set_console(int nr);
 extern void schedule_console_callback(void);
 
 static inline void set_leds(void)
diff --git a/include/linux/vt_kern.h b/include/linux/vt_kern.h
index 37a1a41f5b65..e0db669998f3 100644
--- a/include/linux/vt_kern.h
+++ b/include/linux/vt_kern.h
@@ -83,6 +83,7 @@ void reset_vc(struct vc_data *vc);
 #define CON_BUF_SIZE (CONFIG_BASE_SMALL ? 256 : PAGE_SIZE)
 extern char con_buf[CON_BUF_SIZE];
 extern struct semaphore con_buf_sem;
+extern char vt_dont_switch;
 
 struct vt_spawn_console {
 	spinlock_t lock;
diff --git a/kernel/power/console.c b/kernel/power/console.c
index 623786d44159..89bcf4973ee5 100644
--- a/kernel/power/console.c
+++ b/kernel/power/console.c
@@ -27,7 +27,15 @@ int pm_prepare_console(void)
 		return 1;
 	}
 
-	set_console(SUSPEND_CONSOLE);
+	if (set_console(SUSPEND_CONSOLE)) {
+		/*
+		 * We're unable to switch to the SUSPEND_CONSOLE.
+		 * Let the calling function know so it can decide
+		 * what to do.
+		 */
+		release_console_sem();
+		return 1;
+	}
 	release_console_sem();
 
 	if (vt_waitactive(SUSPEND_CONSOLE)) {
-- 
cgit v1.2.3


From 89a09141df6ac1c3821fbe44ca8384eb37692965 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 16 Mar 2007 13:38:26 -0800
Subject: [PATCH] nfs: fix congestion control

The current NFS client congestion logic is severly broken, it marks the
backing device congested during each nfs_writepages() call but doesn't
mirror this in nfs_writepage() which makes for deadlocks.  Also it
implements its own waitqueue.

Replace this by a more regular congestion implementation that puts a cap on
the number of active writeback pages and uses the bdi congestion waitqueue.

Also always use an interruptible wait since it makes sense to be able to
SIGKILL the process even for mounts without 'intr'.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/super.c              |   4 +-
 fs/nfs/sysctl.c             |   8 +++
 fs/nfs/write.c              | 116 ++++++++++++++++++++++++++++----------------
 include/linux/backing-dev.h |   1 +
 include/linux/nfs_fs.h      |   1 +
 include/linux/nfs_fs_sb.h   |   1 +
 mm/backing-dev.c            |  16 ++++++
 7 files changed, 103 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index bb516a2cfbaf..f1eae44b9a1a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -151,10 +151,10 @@ int __init register_nfs_fs(void)
 	if (ret < 0)
 		goto error_0;
 
-#ifdef CONFIG_NFS_V4
 	ret = nfs_register_sysctl();
 	if (ret < 0)
 		goto error_1;
+#ifdef CONFIG_NFS_V4
 	ret = register_filesystem(&nfs4_fs_type);
 	if (ret < 0)
 		goto error_2;
@@ -165,9 +165,9 @@ int __init register_nfs_fs(void)
 #ifdef CONFIG_NFS_V4
 error_2:
 	nfs_unregister_sysctl();
+#endif
 error_1:
 	unregister_filesystem(&nfs_fs_type);
-#endif
 error_0:
 	return ret;
 }
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index fcdcafbb3293..b62481dabae9 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -50,6 +50,14 @@ static ctl_table nfs_cb_sysctls[] = {
 		.proc_handler	= &proc_dointvec_jiffies,
 		.strategy	= &sysctl_jiffies,
 	},
+	{
+		.ctl_name	= CTL_UNNUMBERED,
+		.procname	= "nfs_congestion_kb",
+		.data		= &nfs_congestion_kb,
+		.maxlen		= sizeof(nfs_congestion_kb),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
 	{ .ctl_name = 0 }
 };
 
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index febdade91670..2867e6b7096f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -12,6 +12,7 @@
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/writeback.h>
+#include <linux/swap.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/nfs_fs.h>
@@ -38,7 +39,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context*,
 					    struct page *,
 					    unsigned int, unsigned int);
 static void nfs_mark_request_dirty(struct nfs_page *req);
-static int nfs_wait_on_write_congestion(struct address_space *, int);
 static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
@@ -48,8 +48,6 @@ static struct kmem_cache *nfs_wdata_cachep;
 static mempool_t *nfs_wdata_mempool;
 static mempool_t *nfs_commit_mempool;
 
-static DECLARE_WAIT_QUEUE_HEAD(nfs_write_congestion);
-
 struct nfs_write_data *nfs_commit_alloc(void)
 {
 	struct nfs_write_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOFS);
@@ -210,6 +208,40 @@ static int wb_priority(struct writeback_control *wbc)
 	return 0;
 }
 
+/*
+ * NFS congestion control
+ */
+
+int nfs_congestion_kb;
+
+#define NFS_CONGESTION_ON_THRESH 	(nfs_congestion_kb >> (PAGE_SHIFT-10))
+#define NFS_CONGESTION_OFF_THRESH	\
+	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
+
+static void nfs_set_page_writeback(struct page *page)
+{
+	if (!test_set_page_writeback(page)) {
+		struct inode *inode = page->mapping->host;
+		struct nfs_server *nfss = NFS_SERVER(inode);
+
+		if (atomic_inc_return(&nfss->writeback) >
+				NFS_CONGESTION_ON_THRESH)
+			set_bdi_congested(&nfss->backing_dev_info, WRITE);
+	}
+}
+
+static void nfs_end_page_writeback(struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct nfs_server *nfss = NFS_SERVER(inode);
+
+	end_page_writeback(page);
+	if (atomic_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH) {
+		clear_bdi_congested(&nfss->backing_dev_info, WRITE);
+		congestion_end(WRITE);
+	}
+}
+
 /*
  * Find an associated nfs write request, and prepare to flush it out
  * Returns 1 if there was no write request, or if the request was
@@ -247,7 +279,7 @@ static int nfs_page_mark_flush(struct page *page)
 	spin_unlock(req_lock);
 	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
 		nfs_mark_request_dirty(req);
-		set_page_writeback(page);
+		nfs_set_page_writeback(page);
 	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
@@ -302,13 +334,8 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
 	return err; 
 }
 
-/*
- * Note: causes nfs_update_request() to block on the assumption
- * 	 that the writeback is generated due to memory pressure.
- */
 int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-	struct backing_dev_info *bdi = mapping->backing_dev_info;
 	struct inode *inode = mapping->host;
 	int err;
 
@@ -317,20 +344,12 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 	err = generic_writepages(mapping, wbc);
 	if (err)
 		return err;
-	while (test_and_set_bit(BDI_write_congested, &bdi->state) != 0) {
-		if (wbc->nonblocking)
-			return 0;
-		nfs_wait_on_write_congestion(mapping, 0);
-	}
 	err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
 	if (err < 0)
 		goto out;
 	nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
 	err = 0;
 out:
-	clear_bit(BDI_write_congested, &bdi->state);
-	wake_up_all(&nfs_write_congestion);
-	congestion_end(WRITE);
 	return err;
 }
 
@@ -360,7 +379,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 }
 
 /*
- * Insert a write request into an inode
+ * Remove a write request from an inode
  */
 static void nfs_inode_remove_request(struct nfs_page *req)
 {
@@ -531,10 +550,10 @@ static inline int nfs_scan_commit(struct inode *inode, struct list_head *dst, un
 }
 #endif
 
-static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
+static int nfs_wait_on_write_congestion(struct address_space *mapping)
 {
+	struct inode *inode = mapping->host;
 	struct backing_dev_info *bdi = mapping->backing_dev_info;
-	DEFINE_WAIT(wait);
 	int ret = 0;
 
 	might_sleep();
@@ -542,31 +561,23 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 	if (!bdi_write_congested(bdi))
 		return 0;
 
-	nfs_inc_stats(mapping->host, NFSIOS_CONGESTIONWAIT);
+	nfs_inc_stats(inode, NFSIOS_CONGESTIONWAIT);
 
-	if (intr) {
-		struct rpc_clnt *clnt = NFS_CLIENT(mapping->host);
+	do {
+		struct rpc_clnt *clnt = NFS_CLIENT(inode);
 		sigset_t oldset;
 
 		rpc_clnt_sigmask(clnt, &oldset);
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_INTERRUPTIBLE);
-		if (bdi_write_congested(bdi)) {
-			if (signalled())
-				ret = -ERESTARTSYS;
-			else
-				schedule();
-		}
+		ret = congestion_wait_interruptible(WRITE, HZ/10);
 		rpc_clnt_sigunmask(clnt, &oldset);
-	} else {
-		prepare_to_wait(&nfs_write_congestion, &wait, TASK_UNINTERRUPTIBLE);
-		if (bdi_write_congested(bdi))
-			schedule();
-	}
-	finish_wait(&nfs_write_congestion, &wait);
+		if (ret == -ERESTARTSYS)
+			break;
+		ret = 0;
+	} while (bdi_write_congested(bdi));
+
 	return ret;
 }
 
-
 /*
  * Try to update any existing write request, or create one if there is none.
  * In order to match, the request's credentials must match those of
@@ -577,14 +588,15 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
 static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 		struct page *page, unsigned int offset, unsigned int bytes)
 {
-	struct inode *inode = page->mapping->host;
+	struct address_space *mapping = page->mapping;
+	struct inode *inode = mapping->host;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	struct nfs_page		*req, *new = NULL;
 	unsigned long		rqend, end;
 
 	end = offset + bytes;
 
-	if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR))
+	if (nfs_wait_on_write_congestion(mapping))
 		return ERR_PTR(-ERESTARTSYS);
 	for (;;) {
 		/* Loop over all inode entries and see if we find
@@ -727,7 +739,7 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	end_page_writeback(req->wb_page);
+	nfs_end_page_writeback(req->wb_page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 	if (!PageError(req->wb_page)) {
@@ -1042,12 +1054,12 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		if (task->tk_status < 0) {
 			nfs_set_pageerror(page);
 			req->wb_context->error = task->tk_status;
-			end_page_writeback(page);
+			nfs_end_page_writeback(page);
 			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
 			goto next;
 		}
-		end_page_writeback(page);
+		nfs_end_page_writeback(page);
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
@@ -1514,6 +1526,26 @@ int __init nfs_init_writepagecache(void)
 	if (nfs_commit_mempool == NULL)
 		return -ENOMEM;
 
+	/*
+	 * NFS congestion size, scale with available memory.
+	 *
+	 *  64MB:    8192k
+	 * 128MB:   11585k
+	 * 256MB:   16384k
+	 * 512MB:   23170k
+	 *   1GB:   32768k
+	 *   2GB:   46340k
+	 *   4GB:   65536k
+	 *   8GB:   92681k
+	 *  16GB:  131072k
+	 *
+	 * This allows larger machines to have larger/more transfers.
+	 * Limit the default to 256M
+	 */
+	nfs_congestion_kb = (16*int_sqrt(totalram_pages)) << (PAGE_SHIFT-10);
+	if (nfs_congestion_kb > 256*1024)
+		nfs_congestion_kb = 256*1024;
+
 	return 0;
 }
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 7011d6255593..f2542c24b328 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -93,6 +93,7 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
 void set_bdi_congested(struct backing_dev_info *bdi, int rw);
 long congestion_wait(int rw, long timeout);
+long congestion_wait_interruptible(int rw, long timeout);
 void congestion_end(int rw);
 
 #define bdi_cap_writeback_dirty(bdi) \
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 47aaa2c66738..e9ae0c6e2c62 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -415,6 +415,7 @@ extern void nfs_complete_unlink(struct dentry *);
 /*
  * linux/fs/nfs/write.c
  */
+extern int  nfs_congestion_kb;
 extern int  nfs_writepage(struct page *page, struct writeback_control *wbc);
 extern int  nfs_writepages(struct address_space *, struct writeback_control *);
 extern int  nfs_flush_incompatible(struct file *file, struct page *page);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 95796e6924f1..c95d5e642548 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -82,6 +82,7 @@ struct nfs_server {
 	struct rpc_clnt *	client_acl;	/* ACL RPC client handle */
 	struct nfs_iostats *	io_stats;	/* I/O statistics */
 	struct backing_dev_info	backing_dev_info;
+	atomic_t		writeback;	/* number of writeback pages */
 	int			flags;		/* various flags */
 	unsigned int		caps;		/* server capabilities */
 	unsigned int		rsize;		/* read size */
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f50a2811f9dc..e5de3781d3fe 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -55,6 +55,22 @@ long congestion_wait(int rw, long timeout)
 }
 EXPORT_SYMBOL(congestion_wait);
 
+long congestion_wait_interruptible(int rw, long timeout)
+{
+	long ret;
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	prepare_to_wait(wqh, &wait, TASK_INTERRUPTIBLE);
+	if (signal_pending(current))
+		ret = -ERESTARTSYS;
+	else
+		ret = io_schedule_timeout(timeout);
+	finish_wait(wqh, &wait);
+	return ret;
+}
+EXPORT_SYMBOL(congestion_wait_interruptible);
+
 /**
  * congestion_end - wake up sleepers on a congested backing_dev_info
  * @rw: READ or WRITE
-- 
cgit v1.2.3


From 5379058b718ac6354ba99cc74d10c28d632dc28a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 16 Mar 2007 14:15:57 -0800
Subject: [PATCH] fix MTIME_SEC_MAX on 32-bit

The maximum seconds value we can handle on 32bit is LONG_MAX.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ktime.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index c68c7ac6b232..248305bb9a18 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -57,7 +57,11 @@ typedef union {
 } ktime_t;
 
 #define KTIME_MAX			((s64)~((u64)1 << 63))
-#define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+#if (BITS_PER_LONG == 64)
+# define KTIME_SEC_MAX			(KTIME_MAX / NSEC_PER_SEC)
+#else
+# define KTIME_SEC_MAX			LONG_MAX
+#endif
 
 /*
  * ktime_t definitions when using the 64-bit scalar representation:
-- 
cgit v1.2.3


From 5851fadce8824d5d4b8fd02c22ae098401f6489e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 18 Mar 2007 12:58:08 +0000
Subject: [PATCH] Fix build error due to not including <linux/errno.h>

Since d9a9cdfb078d755e648d53ec25b7370f84ee5729 <linux/sysfs.h> is using
ENOSYS without including <linux/errno.h> if CONFIG_SYSFS is disabled.

Fixed by including <linux/errno.h>.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sysfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 0544edda7168..fea9a6b3fb7b 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -11,6 +11,7 @@
 #define _SYSFS_H_
 
 #include <linux/compiler.h>
+#include <linux/errno.h>
 #include <linux/list.h>
 #include <asm/atomic.h>
 
-- 
cgit v1.2.3


From 8da38d7bac802ed2a09a79aaae9961c806a1847c Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 22 Mar 2007 00:11:22 -0800
Subject: [PATCH] FRV: fix unannotated variable declarations

Fix unannotated variable declarations.  Variables that have allocation
section annotations (such as __meminitdata) on their definitions must also
have them on their declarations as not doing so may affect the addressing
mode used by the compiler and may result in a linker error.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Yasunori Goto <y-goto@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 2275f2748708..81c07cd18643 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -108,7 +108,7 @@ static inline void *alloc_remap(int nid, unsigned long size)
 #endif /* CONFIG_HAVE_ARCH_ALLOC_REMAP */
 
 extern unsigned long __meminitdata nr_kernel_pages;
-extern unsigned long nr_all_pages;
+extern unsigned long __meminitdata nr_all_pages;
 
 extern void *alloc_large_system_hash(const char *tablename,
 				     unsigned long bucketsize,
-- 
cgit v1.2.3


From e3a55fd18deab758a2970e0dfcd60a677a920426 Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@o2.pl>
Date: Thu, 22 Mar 2007 00:11:26 -0800
Subject: [PATCH] lockdep: lockdep_depth vs. debug_locks

lockdep found a bug during a run of workqueue function - this could be also
caused by a bug from other code running simultaneously.

lockdep really shouldn't be used when debug_locks == 0!

Reported-by: Folkert van Heusden <folkert@vanheusden.com>
Inspired-by: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Jarek Poplawski <jarkao2@o2.pl>
Cc: Ingo Molnar <mingo@elte.hu>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/lockdep.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 06fe93a3e916..14c937d345cb 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -245,7 +245,7 @@ extern void lock_release(struct lockdep_map *lock, int nested,
 
 # define INIT_LOCKDEP				.lockdep_recursion = 0,
 
-#define lockdep_depth(tsk)	((tsk)->lockdep_depth)
+#define lockdep_depth(tsk)	(debug_locks ? (tsk)->lockdep_depth : 0)
 
 #else /* !LOCKDEP */
 
-- 
cgit v1.2.3


From 513daadd152ddbf32cb6d0447ddba3427ce5b8e8 Mon Sep 17 00:00:00 2001
From: Suleiman Souhlal <suleiman@google.com>
Date: Mon, 26 Mar 2007 23:03:20 +0200
Subject: ide: use correct IDE error recovery
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

IDE error recovery is using IDLE IMMEDIATE if the drive is busy or has DRQ set.
This violates the ATA spec (can only send IDLEÂ IMMEDIATE when drive is not
busy) and really hoses up some drives (modern drives will not be able to
recover using this error handling).  The correct thing to do is issue a SRST
followed by a SET FEATURES command.  This is what Western Digital recommends
for error recovery and what Western Digital says Windows does. Â ItÂ also does
not violate the ATA spec as far as I can tell.

Bart:
* port the patch over the current tree
* undo the recalibration code removal
* send SET FEATURES command after checking for good drive status
* don't check whether the current request is of REQ_TYPE_ATA_{CMD,TASK}
  type because we need to send SET FEATURES before handling any requests
* some pre-ATA4 drives require INITIALIZE DEVICE PARAMETERS command before
  other commands (except IDENTIFY) so send SET FEATURES only if there are
  no pending drive->special requests
* update comments and patch description
* any bugs introduced by this patch are mine and not Suleiman's :-)

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c   | 32 +++++++++++++++++++++-----------
 drivers/ide/ide-iops.c |  3 +++
 include/linux/ide.h    |  1 +
 3 files changed, 25 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index c193553f6fe7..0e0280076fcd 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -519,21 +519,24 @@ static ide_startstop_t ide_ata_error(ide_drive_t *drive, struct request *rq, u8
 	if ((stat & DRQ_STAT) && rq_data_dir(rq) == READ && hwif->err_stops_fifo == 0)
 		try_to_flush_leftover_data(drive);
 
+	if (rq->errors >= ERROR_MAX || blk_noretry_request(rq)) {
+		ide_kill_rq(drive, rq);
+		return ide_stopped;
+	}
+
 	if (hwif->INB(IDE_STATUS_REG) & (BUSY_STAT|DRQ_STAT))
-		/* force an abort */
-		hwif->OUTB(WIN_IDLEIMMEDIATE, IDE_COMMAND_REG);
+		rq->errors |= ERROR_RESET;
 
-	if (rq->errors >= ERROR_MAX || blk_noretry_request(rq))
-		ide_kill_rq(drive, rq);
-	else {
-		if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
-			++rq->errors;
-			return ide_do_reset(drive);
-		}
-		if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
-			drive->special.b.recalibrate = 1;
+	if ((rq->errors & ERROR_RESET) == ERROR_RESET) {
 		++rq->errors;
+		return ide_do_reset(drive);
 	}
+
+	if ((rq->errors & ERROR_RECAL) == ERROR_RECAL)
+		drive->special.b.recalibrate = 1;
+
+	++rq->errors;
+
 	return ide_stopped;
 }
 
@@ -1025,6 +1028,13 @@ static ide_startstop_t start_request (ide_drive_t *drive, struct request *rq)
 	if (!drive->special.all) {
 		ide_driver_t *drv;
 
+		/*
+		 * We reset the drive so we need to issue a SETFEATURES.
+		 * Do it _after_ do_special() restored device parameters.
+		 */
+		if (drive->current_speed == 0xff)
+			ide_config_drive_speed(drive, drive->desired_speed);
+
 		if (rq->cmd_type == REQ_TYPE_ATA_CMD ||
 		    rq->cmd_type == REQ_TYPE_ATA_TASK ||
 		    rq->cmd_type == REQ_TYPE_ATA_TASKFILE)
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 5523c52fee7a..1ee53a551c3a 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -1094,6 +1094,9 @@ static void pre_reset(ide_drive_t *drive)
 	if (HWIF(drive)->pre_reset != NULL)
 		HWIF(drive)->pre_reset(drive);
 
+	if (drive->current_speed != 0xff)
+		drive->desired_speed = drive->current_speed;
+	drive->current_speed = 0xff;
 }
 
 /*
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 34f2676b3c62..58564a199862 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -615,6 +615,7 @@ typedef struct ide_drive_s {
         u8	init_speed;	/* transfer rate set at boot */
         u8	pio_speed;      /* unused by core, used by some drivers for fallback from DMA */
         u8	current_speed;	/* current transfer rate set */
+	u8	desired_speed;	/* desired transfer rate set */
         u8	dn;		/* now wide spread use */
         u8	wcache;		/* status of write cache */
 	u8	acoustic;	/* acoustic management */
-- 
cgit v1.2.3


From 04a395233089ed160ef87a6c2155e5dedc6f7d15 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@swtch.com>
Date: Mon, 26 Mar 2007 11:23:56 -0400
Subject: [PATCH] Add const to pointer qualifiers for __chk_user_ptr and
 __chk_io_ptr.

Change prototypes for __chk_user_ptr and __chk_io_ptr to take const
void* instead of void*, so that code can pass "const void *" to them.

(Right now sparse does not warn about passing const void* to void*
functions, but that is a separate bug that I believe Josh is working on,
and once sparse does check this, the changed prototypes will be
necessary.)

Signed-off-by: Russ Cox <rsc@swtch.com>
Signed-off-by: Josh Triplett <josh@freedesktop.org>
Acked-by: Christopher Li <sparse@chrisli.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index aca66984aafd..3b6949b41745 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -15,8 +15,8 @@
 # define __acquire(x)	__context__(x,1)
 # define __release(x)	__context__(x,-1)
 # define __cond_lock(x,c)	((c) ? ({ __acquire(x); 1; }) : 0)
-extern void __chk_user_ptr(void __user *);
-extern void __chk_io_ptr(void __iomem *);
+extern void __chk_user_ptr(const void __user *);
+extern void __chk_io_ptr(const void __iomem *);
 #else
 # define __user
 # define __kernel
-- 
cgit v1.2.3


From 40bee44eaef91b6030037c8bb47f909181fb1edc Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Wed, 21 Mar 2007 13:11:02 +0100
Subject: Export __splice_from_pipe()

Ocfs2 wants to implement it's own splice write actor so that it can better
manage cluster / page locks. This lets us re-use the rest of splice write
while only providing our own code where it's actually important.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c               | 7 ++++---
 include/linux/pipe_fs_i.h | 4 ++++
 2 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/splice.c b/fs/splice.c
index ae50208e3e6c..07f6556add0a 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -651,9 +651,9 @@ out_ret:
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
-				  struct file *out, loff_t *ppos, size_t len,
-				  unsigned int flags, splice_actor *actor)
+ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+			   struct file *out, loff_t *ppos, size_t len,
+			   unsigned int flags, splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
@@ -747,6 +747,7 @@ static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
 
 	return ret;
 }
+EXPORT_SYMBOL(__splice_from_pipe);
 
 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 			 loff_t *ppos, size_t len, unsigned int flags,
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 2e19478e9e84..8bcbc54e1b48 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -99,4 +99,8 @@ extern ssize_t splice_from_pipe(struct pipe_inode_info *, struct file *,
 				loff_t *, size_t, unsigned int,
 				splice_actor *);
 
+extern ssize_t __splice_from_pipe(struct pipe_inode_info *, struct file *,
+				  loff_t *, size_t, unsigned int,
+				  splice_actor *);
+
 #endif
-- 
cgit v1.2.3


From d75e26a8298f84bca66374e98fa69049f26083ba Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 26 Mar 2007 21:32:20 -0800
Subject: [PATCH] uml: fix epoll

UML/x86_64 needs the same packing of struct epoll_event as x86_64.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/eventpoll.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/eventpoll.h b/include/linux/eventpoll.h
index 84cfa8bbdc36..d2a96cbf4f0e 100644
--- a/include/linux/eventpoll.h
+++ b/include/linux/eventpoll.h
@@ -31,12 +31,19 @@
 /* 
  * On x86-64 make the 64bit structure have the same alignment as the
  * 32bit structure. This makes 32bit emulation easier.
+ *
+ * UML/x86_64 needs the same packing as x86_64 - UML + UML_X86 +
+ * 64_BIT adds up to UML/x86_64.
  */
 #ifdef __x86_64__
 #define EPOLL_PACKED __attribute__((packed))
 #else
+#if defined(CONFIG_UML) && defined(CONFIG_UML_X86) && defined(CONFIG_64BIT)
+#define EPOLL_PACKED __attribute__((packed))
+#else
 #define EPOLL_PACKED
 #endif
+#endif
 
 struct epoll_event {
 	__u32 events;
-- 
cgit v1.2.3


From 78d832f62643ac6209beccbfb29228314423935e Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 26 Mar 2007 21:32:22 -0800
Subject: [PATCH] utsns: fix !CONFIG_UTS_NS behavior

When CONFIG_UTS_NS=n, clone(CLONE_NEWUTS) quietly refuses.  So correctly does
not unshare a new uts namespace, but also does not return -EINVAL.

Fix this to return -EINVAL so the caller knows his request was denied.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/utsname.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index a4555fe3754c..e10267d402c5 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -70,6 +70,8 @@ static inline int unshare_utsname(unsigned long unshare_flags,
 
 static inline int copy_utsname(int flags, struct task_struct *tsk)
 {
+	if (flags & CLONE_NEWUTS)
+		return -EINVAL;
 	return 0;
 }
 static inline void put_uts_ns(struct uts_namespace *ns)
-- 
cgit v1.2.3


From a28d193cbf01375974683c13e99a52ef489e5eb0 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 26 Mar 2007 21:32:31 -0800
Subject: [PATCH] ipcns: fix !CONFIG_IPC_NS behavior

When CONFIG_IPC_NS=n, clone(CLONE_NEWIPC) claims success, but did not actually
clone a new IPC namespace.

Fix this to return -EINVAL so the caller knows his request was denied.

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ipc.h | 9 +++------
 ipc/util.c          | 7 +++++++
 2 files changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipc.h b/include/linux/ipc.h
index 636094c29b16..6da6772c19ff 100644
--- a/include/linux/ipc.h
+++ b/include/linux/ipc.h
@@ -92,19 +92,16 @@ extern struct ipc_namespace init_ipc_ns;
 
 #ifdef CONFIG_SYSVIPC
 #define INIT_IPC_NS(ns)		.ns		= &init_ipc_ns,
+extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
 #else
 #define INIT_IPC_NS(ns)
+static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{ return 0; }
 #endif
 
 #ifdef CONFIG_IPC_NS
 extern void free_ipc_ns(struct kref *kref);
-extern int copy_ipcs(unsigned long flags, struct task_struct *tsk);
 extern int unshare_ipcs(unsigned long flags, struct ipc_namespace **ns);
-#else
-static inline int copy_ipcs(unsigned long flags, struct task_struct *tsk)
-{
-	return 0;
-}
 #endif
 
 static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
diff --git a/ipc/util.c b/ipc/util.c
index 08a647965b9e..0b652387d169 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -144,6 +144,13 @@ void free_ipc_ns(struct kref *kref)
 	shm_exit_ns(ns);
 	kfree(ns);
 }
+#else
+int copy_ipcs(unsigned long flags, struct task_struct *tsk)
+{
+	if (flags & CLONE_NEWIPC)
+		return -EINVAL;
+	return 0;
+}
 #endif
 
 /**
-- 
cgit v1.2.3


From c2805fbb8630abb95d94ce7adc3f97976f7e0367 Mon Sep 17 00:00:00 2001
From: Jean Tourrilhes <jt@hpl.hp.com>
Date: Fri, 23 Mar 2007 00:31:16 +0000
Subject: [PATCH] WE-22 : prevent information leak on 64 bit

 	Johannes Berg discovered that kernel space was leaking to
userspace on 64 bit platform. He made a first patch to fix that. This
is an improved version of his patch.

Signed-off-by: Jean Tourrilhes <jt@hpl.hp.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h | 21 +++++++++++--
 include/net/iw_handler.h | 30 ++++++++++++------
 net/core/rtnetlink.c     |  3 +-
 net/core/wireless.c      | 82 +++++++++++++++++++++++++++++-------------------
 4 files changed, 91 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 447c52beb691..48759b2f57d7 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -1,10 +1,10 @@
 /*
  * This file define a set of standard wireless extensions
  *
- * Version :	21	14.3.06
+ * Version :	22	16.3.07
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
  */
 
 #ifndef _LINUX_WIRELESS_H
@@ -85,7 +85,7 @@
  * (there is some stuff that will be added in the future...)
  * I just plan to increment with each new version.
  */
-#define WIRELESS_EXT	21
+#define WIRELESS_EXT	22
 
 /*
  * Changes :
@@ -221,6 +221,10 @@
  *	- Add IW_RETRY_SHORT/IW_RETRY_LONG retry modifiers
  *	- Power/Retry relative values no longer * 100000
  *	- Add explicit flag to tell stats are in 802.11k RCPI : IW_QUAL_RCPI
+ *
+ * V21 to V22
+ * ----------
+ *	- Prevent leaking of kernel space in stream on 64 bits.
  */
 
 /**************************** CONSTANTS ****************************/
@@ -1085,4 +1089,15 @@ struct iw_event
 #define IW_EV_POINT_LEN	(IW_EV_LCP_LEN + sizeof(struct iw_point) - \
 			 IW_EV_POINT_OFF)
 
+/* Size of the Event prefix when packed in stream */
+#define IW_EV_LCP_PK_LEN	(4)
+/* Size of the various events when packed in stream */
+#define IW_EV_CHAR_PK_LEN	(IW_EV_LCP_PK_LEN + IFNAMSIZ)
+#define IW_EV_UINT_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(__u32))
+#define IW_EV_FREQ_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct iw_freq))
+#define IW_EV_PARAM_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct iw_param))
+#define IW_EV_ADDR_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct sockaddr))
+#define IW_EV_QUAL_PK_LEN	(IW_EV_LCP_PK_LEN + sizeof(struct iw_quality))
+#define IW_EV_POINT_PK_LEN	(IW_EV_LCP_LEN + 4)
+
 #endif	/* _LINUX_WIRELESS_H */
diff --git a/include/net/iw_handler.h b/include/net/iw_handler.h
index 10559e937d27..8a830188354d 100644
--- a/include/net/iw_handler.h
+++ b/include/net/iw_handler.h
@@ -1,10 +1,10 @@
 /*
  * This file define the new driver API for Wireless Extensions
  *
- * Version :	7	18.3.05
+ * Version :	8	16.3.07
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 2001-2006 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 2001-2007 Jean Tourrilhes, All Rights Reserved.
  */
 
 #ifndef _IW_HANDLER_H
@@ -207,7 +207,7 @@
  * will be needed...
  * I just plan to increment with each new version.
  */
-#define IW_HANDLER_VERSION	7
+#define IW_HANDLER_VERSION	8
 
 /*
  * Changes :
@@ -239,6 +239,10 @@
  *	- Remove (struct iw_point *)->pointer from events and streams
  *	- Remove spy_offset from struct iw_handler_def
  *	- Add "check" version of event macros for ieee802.11 stack
+ *
+ * V7 to V8
+ * ----------
+ *	- Prevent leaking of kernel space in stream on 64 bits.
  */
 
 /**************************** CONSTANTS ****************************/
@@ -500,7 +504,11 @@ iwe_stream_add_event(char *	stream,		/* Stream of events */
 	/* Check if it's possible */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
-		memcpy(stream, (char *) iwe, event_len);
+		/* Beware of alignement issues on 64 bits */
+		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
+		memcpy(stream + IW_EV_LCP_LEN,
+		       ((char *) iwe) + IW_EV_LCP_LEN,
+		       event_len - IW_EV_LCP_LEN);
 		stream += event_len;
 	}
 	return stream;
@@ -521,10 +529,10 @@ iwe_stream_add_point(char *	stream,		/* Stream of events */
 	/* Check if it's possible */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
-		memcpy(stream, (char *) iwe, IW_EV_LCP_LEN);
+		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
 		memcpy(stream + IW_EV_LCP_LEN,
 		       ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
+		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
 		memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length);
 		stream += event_len;
 	}
@@ -574,7 +582,11 @@ iwe_stream_check_add_event(char *	stream,		/* Stream of events */
 	/* Check if it's possible, set error if not */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
-		memcpy(stream, (char *) iwe, event_len);
+		/* Beware of alignement issues on 64 bits */
+		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
+		memcpy(stream + IW_EV_LCP_LEN,
+		       ((char *) iwe) + IW_EV_LCP_LEN,
+		       event_len - IW_EV_LCP_LEN);
 		stream += event_len;
 	} else
 		*perr = -E2BIG;
@@ -598,10 +610,10 @@ iwe_stream_check_add_point(char *	stream,		/* Stream of events */
 	/* Check if it's possible */
 	if(likely((stream + event_len) < ends)) {
 		iwe->len = event_len;
-		memcpy(stream, (char *) iwe, IW_EV_LCP_LEN);
+		memcpy(stream, (char *) iwe, IW_EV_LCP_PK_LEN);
 		memcpy(stream + IW_EV_LCP_LEN,
 		       ((char *) iwe) + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
+		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
 		memcpy(stream + IW_EV_POINT_LEN, extra, iwe->u.data.length);
 		stream += event_len;
 	} else
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 6055074c4b81..33ea8eac7fe0 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -621,7 +621,8 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 		if (err < 0)
 			goto errout;
 
-		iw += IW_EV_POINT_OFF;
+		/* Payload is at an offset in buffer */
+		iw = iw_buf + IW_EV_POINT_OFF;
 	}
 #endif	/* CONFIG_NET_WIRELESS_RTNETLINK */
 
diff --git a/net/core/wireless.c b/net/core/wireless.c
index 9936ab11e6e0..b07fe270a508 100644
--- a/net/core/wireless.c
+++ b/net/core/wireless.c
@@ -2,7 +2,7 @@
  * This file implement the Wireless Extensions APIs.
  *
  * Authors :	Jean Tourrilhes - HPL - <jt@hpl.hp.com>
- * Copyright (c) 1997-2006 Jean Tourrilhes, All Rights Reserved.
+ * Copyright (c) 1997-2007 Jean Tourrilhes, All Rights Reserved.
  *
  * (As all part of the Linux kernel, this file is GPL)
  */
@@ -76,6 +76,9 @@
  *	o Change length in ESSID and NICK to strlen() instead of strlen()+1
  *	o Make standard_ioctl_num and standard_event_num unsigned
  *	o Remove (struct net_device *)->get_wireless_stats()
+ *
+ * v10 - 16.3.07 - Jean II
+ *	o Prevent leaking of kernel space in stream on 64 bits.
  */
 
 /***************************** INCLUDES *****************************/
@@ -427,6 +430,21 @@ static const int event_type_size[] = {
 	IW_EV_QUAL_LEN,			/* IW_HEADER_TYPE_QUAL */
 };
 
+/* Size (in bytes) of various events, as packed */
+static const int event_type_pk_size[] = {
+	IW_EV_LCP_PK_LEN,		/* IW_HEADER_TYPE_NULL */
+	0,
+	IW_EV_CHAR_PK_LEN,		/* IW_HEADER_TYPE_CHAR */
+	0,
+	IW_EV_UINT_PK_LEN,		/* IW_HEADER_TYPE_UINT */
+	IW_EV_FREQ_PK_LEN,		/* IW_HEADER_TYPE_FREQ */
+	IW_EV_ADDR_PK_LEN,		/* IW_HEADER_TYPE_ADDR */
+	0,
+	IW_EV_POINT_PK_LEN,		/* Without variable payload */
+	IW_EV_PARAM_PK_LEN,		/* IW_HEADER_TYPE_PARAM */
+	IW_EV_QUAL_PK_LEN,		/* IW_HEADER_TYPE_QUAL */
+};
+
 /************************ COMMON SUBROUTINES ************************/
 /*
  * Stuff that may be used in various place or doesn't fit in one
@@ -1217,7 +1235,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
 		/* Use our own copy of wrqu */
 		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_LEN);
+					     + IW_EV_LCP_PK_LEN);
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, wrqu, NULL);
@@ -1229,8 +1247,8 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 
 		/* Get a temp copy of wrqu (skip pointer) */
 		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_LEN,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
+		       ((char *) request) + IW_EV_LCP_PK_LEN,
+		       IW_EV_POINT_LEN - IW_EV_LCP_PK_LEN);
 
 		/* Calculate space needed by arguments. Always allocate
 		 * for max space. Easier, and won't last long... */
@@ -1240,7 +1258,7 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 		   (wrqu_point.data.length > descr->max_tokens))
 			extra_size = (wrqu_point.data.length
 				      * descr->token_size);
-		buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
+		buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
 		       dev->name, extra_size, buffer_size);
@@ -1254,15 +1272,15 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 
 		/* Put wrqu in the right place (just before extra).
 		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
+		 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
 		 */
-		memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
+		memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
 		       ((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
+		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
+		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
 
 		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
+		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
 
 		/* Call the handler */
 		ret = handler(dev, &info, wrqu, extra);
@@ -1270,11 +1288,11 @@ static int rtnetlink_standard_get(struct net_device *	dev,
 		/* Calculate real returned length */
 		extra_size = (wrqu->data.length * descr->token_size);
 		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_LEN;
+		request->len = extra_size + IW_EV_POINT_PK_LEN;
 
 		/* Put the iwe header where it should, i.e. scrap the
 		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
+		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
 
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
@@ -1331,10 +1349,10 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 #endif	/* WE_RTNETLINK_DEBUG */
 
 	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = &request->u;
+	wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
 
 	/* Check if wrqu is complete */
-	hdr_len = event_type_size[descr->header_type];
+	hdr_len = event_type_pk_size[descr->header_type];
 	if(request_len < hdr_len) {
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG
@@ -1359,7 +1377,7 @@ static inline int rtnetlink_standard_set(struct net_device *	dev,
 
 		/* Put wrqu in the right place (skip pointer) */
 		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
+		       wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
 		/* Don't forget about the event code... */
 		wrqu = &wrqu_point;
 
@@ -1483,7 +1501,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 		hdr_len = extra_size;
 		extra_size = 0;
 	} else {
-		hdr_len = IW_EV_POINT_LEN;
+		hdr_len = IW_EV_POINT_PK_LEN;
 	}
 
 	/* Check if wrqu is complete */
@@ -1514,7 +1532,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 		memcpy(buffer + IW_EV_POINT_OFF, request, request_len);
 		/* Use our own copy of wrqu */
 		wrqu = (union iwreq_data *) (buffer + IW_EV_POINT_OFF
-					     + IW_EV_LCP_LEN);
+					     + IW_EV_LCP_PK_LEN);
 
 		/* No extra arguments. Trivial to handle */
 		ret = handler(dev, &info, wrqu, (char *) wrqu);
@@ -1523,7 +1541,7 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 		char *	extra;
 
 		/* Buffer for full reply */
-		buffer_size = extra_size + IW_EV_POINT_LEN + IW_EV_POINT_OFF;
+		buffer_size = extra_size + IW_EV_POINT_PK_LEN + IW_EV_POINT_OFF;
 
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG "%s (WE.r) : Malloc %d bytes (%d bytes)\n",
@@ -1538,15 +1556,15 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 
 		/* Put wrqu in the right place (just before extra).
 		 * Leave space for IWE header and dummy pointer...
-		 * Note that IW_EV_LCP_LEN==4 bytes, so it's still aligned...
+		 * Note that IW_EV_LCP_PK_LEN==4 bytes, so it's still aligned.
 		 */
-		memcpy(buffer + IW_EV_LCP_LEN + IW_EV_POINT_OFF,
-		       ((char *) request) + IW_EV_LCP_LEN,
-		       IW_EV_POINT_LEN - IW_EV_LCP_LEN);
-		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_LEN);
+		memcpy(buffer + IW_EV_LCP_PK_LEN + IW_EV_POINT_OFF,
+		       ((char *) request) + IW_EV_LCP_PK_LEN,
+		       IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
+		wrqu = (union iwreq_data *) (buffer + IW_EV_LCP_PK_LEN);
 
 		/* Extra comes logically after that. Offset +12 bytes. */
-		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_LEN;
+		extra = buffer + IW_EV_POINT_OFF + IW_EV_POINT_PK_LEN;
 
 		/* Call the handler */
 		ret = handler(dev, &info, wrqu, extra);
@@ -1556,11 +1574,11 @@ static inline int rtnetlink_private_get(struct net_device *	dev,
 		if (!(descr->get_args & IW_PRIV_SIZE_FIXED))
 			extra_size = adjust_priv_size(descr->get_args, wrqu);
 		/* Re-adjust reply size */
-		request->len = extra_size + IW_EV_POINT_LEN;
+		request->len = extra_size + IW_EV_POINT_PK_LEN;
 
 		/* Put the iwe header where it should, i.e. scrap the
 		 * dummy pointer. */
-		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_LEN);
+		memcpy(buffer + IW_EV_POINT_OFF, request, IW_EV_LCP_PK_LEN);
 
 #ifdef WE_RTNETLINK_DEBUG
 		printk(KERN_DEBUG "%s (WE.r) : Reply 0x%04X, hdr_len %d, tokens %d, extra_size %d, buffer_size %d\n", dev->name, cmd, hdr_len, wrqu->data.length, extra_size, buffer_size);
@@ -1641,14 +1659,14 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 	/* Does it fits in wrqu ? */
 	if((descr->set_args & IW_PRIV_SIZE_FIXED) &&
 	   (extra_size <= IFNAMSIZ)) {
-		hdr_len = IW_EV_LCP_LEN + extra_size;
+		hdr_len = IW_EV_LCP_PK_LEN + extra_size;
 		extra_size = 0;
 	} else {
-		hdr_len = IW_EV_POINT_LEN;
+		hdr_len = IW_EV_POINT_PK_LEN;
 	}
 
 	/* Extract fixed header from request. This is properly aligned. */
-	wrqu = &request->u;
+	wrqu = (union iwreq_data *) (((char *) request) + IW_EV_LCP_PK_LEN);
 
 	/* Check if wrqu is complete */
 	if(request_len < hdr_len) {
@@ -1675,7 +1693,7 @@ static inline int rtnetlink_private_set(struct net_device *	dev,
 
 		/* Put wrqu in the right place (skip pointer) */
 		memcpy(((char *) &wrqu_point) + IW_EV_POINT_OFF,
-		       wrqu, IW_EV_POINT_LEN - IW_EV_LCP_LEN);
+		       wrqu, IW_EV_POINT_PK_LEN - IW_EV_LCP_PK_LEN);
 
 		/* Does it fits within bounds ? */
 		if(wrqu_point.data.length > (descr->set_args &
@@ -1738,7 +1756,7 @@ int wireless_rtnetlink_get(struct net_device *	dev,
 	iw_handler		handler;
 
 	/* Check length */
-	if(len < IW_EV_LCP_LEN) {
+	if(len < IW_EV_LCP_PK_LEN) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
 		       dev->name, len);
 		return -EINVAL;
@@ -1822,7 +1840,7 @@ int wireless_rtnetlink_set(struct net_device *	dev,
 	iw_handler		handler;
 
 	/* Check length */
-	if(len < IW_EV_LCP_LEN) {
+	if(len < IW_EV_LCP_PK_LEN) {
 		printk(KERN_DEBUG "%s (WE.r) : RtNetlink request too short (%d)\n",
 		       dev->name, len);
 		return -EINVAL;
-- 
cgit v1.2.3


From c01003c20563d1e75ec9828d21743919d2b43977 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Thu, 29 Mar 2007 11:46:52 -0700
Subject: [IFB]: Fix crash on input device removal

The input_device pointer is not refcounted, which means the device may
disappear while packets are queued, causing a crash when ifb passes packets
with a stale skb->dev pointer to netif_rx().

Fix by storing the interface index instead and do a lookup where neccessary.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ifb.c      | 35 +++++++++++++----------------------
 include/linux/skbuff.h |  5 +++--
 include/net/pkt_cls.h  |  7 +++++--
 net/core/dev.c         |  8 ++++----
 net/core/skbuff.c      |  2 +-
 net/sched/act_mirred.c |  2 +-
 6 files changed, 27 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index ca2b21f9d444..07b4c0d7a75c 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -96,17 +96,24 @@ static void ri_tasklet(unsigned long dev)
 		skb->tc_verd = SET_TC_NCLS(skb->tc_verd);
 		stats->tx_packets++;
 		stats->tx_bytes +=skb->len;
+
+		skb->dev = __dev_get_by_index(skb->iif);
+		if (!skb->dev) {
+			dev_kfree_skb(skb);
+			stats->tx_dropped++;
+			break;
+		}
+		skb->iif = _dev->ifindex;
+
 		if (from & AT_EGRESS) {
 			dp->st_rx_frm_egr++;
 			dev_queue_xmit(skb);
 		} else if (from & AT_INGRESS) {
-
 			dp->st_rx_frm_ing++;
+			skb_pull(skb, skb->dev->hard_header_len);
 			netif_rx(skb);
-		} else {
-			dev_kfree_skb(skb);
-			stats->tx_dropped++;
-		}
+		} else
+			BUG();
 	}
 
 	if (netif_tx_trylock(_dev)) {
@@ -157,26 +164,10 @@ static int ifb_xmit(struct sk_buff *skb, struct net_device *dev)
 	stats->rx_packets++;
 	stats->rx_bytes+=skb->len;
 
-	if (!from || !skb->input_dev) {
-dropped:
+	if (!(from & (AT_INGRESS|AT_EGRESS)) || !skb->iif) {
 		dev_kfree_skb(skb);
 		stats->rx_dropped++;
 		return ret;
-	} else {
-		/*
-		 * note we could be going
-		 * ingress -> egress or
-		 * egress -> ingress
-		*/
-		skb->dev = skb->input_dev;
-		skb->input_dev = dev;
-		if (from & AT_INGRESS) {
-			skb_pull(skb, skb->dev->hard_header_len);
-		} else {
-			if (!(from & AT_EGRESS)) {
-				goto dropped;
-			}
-		}
 	}
 
 	if (skb_queue_len(&dp->rq) >= dev->tx_queue_len) {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4ff3940210d8..82f43ad478c7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -188,7 +188,7 @@ enum {
  *	@sk: Socket we are owned by
  *	@tstamp: Time we arrived
  *	@dev: Device we arrived on/are leaving by
- *	@input_dev: Device we arrived on
+ *	@iif: ifindex of device we arrived on
  *	@h: Transport layer header
  *	@nh: Network layer header
  *	@mac: Link layer header
@@ -235,7 +235,8 @@ struct sk_buff {
 	struct sock		*sk;
 	struct skb_timeval	tstamp;
 	struct net_device	*dev;
-	struct net_device	*input_dev;
+	int			iif;
+	/* 4 byte hole on 64 bit*/
 
 	union {
 		struct tcphdr	*th;
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index b902d24a3256..02647fe3d74b 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -352,10 +352,13 @@ tcf_change_indev(struct tcf_proto *tp, char *indev, struct rtattr *indev_tlv)
 static inline int
 tcf_match_indev(struct sk_buff *skb, char *indev)
 {
+	struct net_device *dev;
+
 	if (indev[0]) {
-		if  (!skb->input_dev)
+		if  (!skb->iif)
 			return 0;
-		if (strcmp(indev, skb->input_dev->name))
+		dev = __dev_get_by_index(skb->iif);
+		if (!dev || strcmp(indev, dev->name))
 			return 0;
 	}
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 5984b55311a1..d44b8f1964fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1741,8 +1741,8 @@ static int ing_filter(struct sk_buff *skb)
 	if (dev->qdisc_ingress) {
 		__u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
 		if (MAX_RED_LOOP < ttl++) {
-			printk(KERN_WARNING "Redir loop detected Dropping packet (%s->%s)\n",
-				skb->input_dev->name, skb->dev->name);
+			printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
+				skb->iif, skb->dev->ifindex);
 			return TC_ACT_SHOT;
 		}
 
@@ -1775,8 +1775,8 @@ int netif_receive_skb(struct sk_buff *skb)
 	if (!skb->tstamp.off_sec)
 		net_timestamp(skb);
 
-	if (!skb->input_dev)
-		skb->input_dev = skb->dev;
+	if (!skb->iif)
+		skb->iif = skb->dev->ifindex;
 
 	orig_dev = skb_bond(skb);
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 702fa8f08747..87573ae35b02 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -496,7 +496,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	n->tc_verd = SET_TC_VERD(skb->tc_verd,0);
 	n->tc_verd = CLR_TC_OK2MUNGE(n->tc_verd);
 	n->tc_verd = CLR_TC_MUNGED(n->tc_verd);
-	C(input_dev);
+	C(iif);
 #endif
 	skb_copy_secmark(n, skb);
 #endif
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 68f26cb278f9..3e93683e9ab3 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -198,7 +198,7 @@ bad_mirred:
 		skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at);
 
 	skb2->dev = dev;
-	skb2->input_dev = skb->dev;
+	skb2->iif = skb->dev->ifindex;
 	dev_queue_xmit(skb2);
 	spin_unlock(&m->tcf_lock);
 	return m->tcf_action;
-- 
cgit v1.2.3


From 0c84ce268b69855919b6ac7edc8f11caf21e9c88 Mon Sep 17 00:00:00 2001
From: Kay Sievers <kay.sievers@vrfy.org>
Date: Sun, 1 Apr 2007 23:49:48 -0700
Subject: [PATCH] driver core: fix built-in drivers sysfs links

built-in drivers had broken sysfs links that caused bootup hangs for
certain driver unregistry sequences.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/device.h |  1 +
 kernel/module.c        | 18 ++++++++++++++----
 2 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index caad9bba9652..5cf30e95c8b6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -128,6 +128,7 @@ struct device_driver {
 
 	struct module		* owner;
 	const char 		* mod_name;	/* used for built-in modules */
+	struct module_kobject	* mkobj;
 
 	int	(*probe)	(struct device * dev);
 	int	(*remove)	(struct device * dev);
diff --git a/kernel/module.c b/kernel/module.c
index fbc51de6444e..dcdb32b8b13c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2384,8 +2384,13 @@ void module_add_driver(struct module *mod, struct device_driver *drv)
 
 		/* Lookup built-in module entry in /sys/modules */
 		mkobj = kset_find_obj(&module_subsys.kset, drv->mod_name);
-		if (mkobj)
+		if (mkobj) {
 			mk = container_of(mkobj, struct module_kobject, kobj);
+			/* remember our module structure */
+			drv->mkobj = mk;
+			/* kset_find_obj took a reference */
+			kobject_put(mkobj);
+		}
 	}
 
 	if (!mk)
@@ -2405,17 +2410,22 @@ EXPORT_SYMBOL(module_add_driver);
 
 void module_remove_driver(struct device_driver *drv)
 {
+	struct module_kobject *mk = NULL;
 	char *driver_name;
 
 	if (!drv)
 		return;
 
 	sysfs_remove_link(&drv->kobj, "module");
-	if (drv->owner && drv->owner->mkobj.drivers_dir) {
+
+	if (drv->owner)
+		mk = &drv->owner->mkobj;
+	else if (drv->mkobj)
+		mk = drv->mkobj;
+	if (mk && mk->drivers_dir) {
 		driver_name = make_driver_name(drv);
 		if (driver_name) {
-			sysfs_remove_link(drv->owner->mkobj.drivers_dir,
-					  driver_name);
+			sysfs_remove_link(mk->drivers_dir, driver_name);
 			kfree(driver_name);
 		}
 	}
-- 
cgit v1.2.3


From 1d64b9cb1dc2a7cd521444e3d908adeccd026356 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Sun, 1 Apr 2007 23:49:49 -0700
Subject: [PATCH] Fix microcode-related suspend problem

Fix the regression resulting from the recent change of suspend code
ordering that causes systems based on Intel x86 CPUs using the microcode
driver to hang during the resume.

The problem occurs since the microcode driver uses request_firmware() in
its CPU hotplug notifier, which is called after tasks has been frozen and
hangs.  It can be fixed by telling the microcode driver to use the
microcode stored in memory during the resume instead of trying to load it
from disk.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Adrian Bunk <bunk@stusta.de>
Cc: Tigran Aivazian <tigran@aivazian.fsnet.co.uk>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Maxim <maximlevitsky@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/i386/kernel/microcode.c | 71 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/cpu.h          |  4 +++
 kernel/cpu.c                 | 32 ++++++++++----------
 3 files changed, 87 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/i386/kernel/microcode.c b/arch/i386/kernel/microcode.c
index b8f16633a6ec..cbe7ec8dbb9f 100644
--- a/arch/i386/kernel/microcode.c
+++ b/arch/i386/kernel/microcode.c
@@ -567,6 +567,53 @@ static int cpu_request_microcode(int cpu)
 	return error;
 }
 
+static int apply_microcode_on_cpu(int cpu)
+{
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+	struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+	cpumask_t old;
+	unsigned int val[2];
+	int err = 0;
+
+	if (!uci->mc)
+		return -EINVAL;
+
+	old = current->cpus_allowed;
+	set_cpus_allowed(current, cpumask_of_cpu(cpu));
+
+	/* Check if the microcode we have in memory matches the CPU */
+	if (c->x86_vendor != X86_VENDOR_INTEL || c->x86 < 6 ||
+	    cpu_has(c, X86_FEATURE_IA64) || uci->sig != cpuid_eax(0x00000001))
+		err = -EINVAL;
+
+	if (!err && ((c->x86_model >= 5) || (c->x86 > 6))) {
+		/* get processor flags from MSR 0x17 */
+		rdmsr(MSR_IA32_PLATFORM_ID, val[0], val[1]);
+		if (uci->pf != (1 << ((val[1] >> 18) & 7)))
+			err = -EINVAL;
+	}
+
+	if (!err) {
+		wrmsr(MSR_IA32_UCODE_REV, 0, 0);
+		/* see notes above for revision 1.07.  Apparent chip bug */
+		sync_core();
+		/* get the current revision from MSR 0x8B */
+		rdmsr(MSR_IA32_UCODE_REV, val[0], val[1]);
+		if (uci->rev != val[1])
+			err = -EINVAL;
+	}
+
+	if (!err)
+		apply_microcode(cpu);
+	else
+		printk(KERN_ERR "microcode: Could not apply microcode to CPU%d:"
+			" sig=0x%x, pf=0x%x, rev=0x%x\n",
+			cpu, uci->sig, uci->pf, uci->rev);
+
+	set_cpus_allowed(current, old);
+	return err;
+}
+
 static void microcode_init_cpu(int cpu)
 {
 	cpumask_t old;
@@ -577,7 +624,8 @@ static void microcode_init_cpu(int cpu)
 	set_cpus_allowed(current, cpumask_of_cpu(cpu));
 	mutex_lock(&microcode_mutex);
 	collect_cpu_info(cpu);
-	if (uci->valid && system_state == SYSTEM_RUNNING)
+	if (uci->valid && system_state == SYSTEM_RUNNING &&
+	    !suspend_cpu_hotplug)
 		cpu_request_microcode(cpu);
 	mutex_unlock(&microcode_mutex);
 	set_cpus_allowed(current, old);
@@ -663,13 +711,24 @@ static int mc_sysdev_add(struct sys_device *sys_dev)
 		return 0;
 
 	pr_debug("Microcode:CPU %d added\n", cpu);
-	memset(uci, 0, sizeof(*uci));
+	/* If suspend_cpu_hotplug is set, the system is resuming and we should
+	 * use the data from before the suspend.
+	 */
+	if (suspend_cpu_hotplug) {
+		err = apply_microcode_on_cpu(cpu);
+		if (err)
+			microcode_fini_cpu(cpu);
+	}
+	if (!uci->valid)
+		memset(uci, 0, sizeof(*uci));
 
 	err = sysfs_create_group(&sys_dev->kobj, &mc_attr_group);
 	if (err)
 		return err;
 
-	microcode_init_cpu(cpu);
+	if (!uci->valid)
+		microcode_init_cpu(cpu);
+
 	return 0;
 }
 
@@ -680,7 +739,11 @@ static int mc_sysdev_remove(struct sys_device *sys_dev)
 	if (!cpu_online(cpu))
 		return 0;
 	pr_debug("Microcode:CPU %d removed\n", cpu);
-	microcode_fini_cpu(cpu);
+	/* If suspend_cpu_hotplug is set, the system is suspending and we should
+	 * keep the microcode in memory for the resume.
+	 */
+	if (!suspend_cpu_hotplug)
+		microcode_fini_cpu(cpu);
 	sysfs_remove_group(&sys_dev->kobj, &mc_attr_group);
 	return 0;
 }
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 769ddc6df492..c22b0dfcbcd2 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -127,9 +127,13 @@ static inline int cpu_is_offline(int cpu) { return 0; }
 #endif		/* CONFIG_HOTPLUG_CPU */
 
 #ifdef CONFIG_SUSPEND_SMP
+extern int suspend_cpu_hotplug;
+
 extern int disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
+#define suspend_cpu_hotplug	0
+
 static inline int disable_nonboot_cpus(void) { return 0; }
 static inline void enable_nonboot_cpus(void) {}
 #endif
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 3d4206ada5c9..36e70845cfc3 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -254,6 +254,12 @@ int __cpuinit cpu_up(unsigned int cpu)
 }
 
 #ifdef CONFIG_SUSPEND_SMP
+/* Needed to prevent the microcode driver from requesting firmware in its CPU
+ * hotplug notifier during the suspend/resume.
+ */
+int suspend_cpu_hotplug;
+EXPORT_SYMBOL(suspend_cpu_hotplug);
+
 static cpumask_t frozen_cpus;
 
 int disable_nonboot_cpus(void)
@@ -261,16 +267,8 @@ int disable_nonboot_cpus(void)
 	int cpu, first_cpu, error = 0;
 
 	mutex_lock(&cpu_add_remove_lock);
-	first_cpu = first_cpu(cpu_present_map);
-	if (!cpu_online(first_cpu)) {
-		error = _cpu_up(first_cpu);
-		if (error) {
-			printk(KERN_ERR "Could not bring CPU%d up.\n",
-				first_cpu);
-			goto out;
-		}
-	}
-
+	suspend_cpu_hotplug = 1;
+	first_cpu = first_cpu(cpu_online_map);
 	/* We take down all of the non-boot CPUs in one shot to avoid races
 	 * with the userspace trying to use the CPU hotplug at the same time
 	 */
@@ -296,7 +294,7 @@ int disable_nonboot_cpus(void)
 	} else {
 		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 	}
-out:
+	suspend_cpu_hotplug = 0;
 	mutex_unlock(&cpu_add_remove_lock);
 	return error;
 }
@@ -308,20 +306,22 @@ void enable_nonboot_cpus(void)
 	/* Allow everyone to use the CPU hotplug again */
 	mutex_lock(&cpu_add_remove_lock);
 	cpu_hotplug_disabled = 0;
-	mutex_unlock(&cpu_add_remove_lock);
 	if (cpus_empty(frozen_cpus))
-		return;
+		goto out;
 
+	suspend_cpu_hotplug = 1;
 	printk("Enabling non-boot CPUs ...\n");
 	for_each_cpu_mask(cpu, frozen_cpus) {
-		error = cpu_up(cpu);
+		error = _cpu_up(cpu);
 		if (!error) {
 			printk("CPU%d is up\n", cpu);
 			continue;
 		}
-		printk(KERN_WARNING "Error taking CPU%d up: %d\n",
-			cpu, error);
+		printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
 	}
 	cpus_clear(frozen_cpus);
+	suspend_cpu_hotplug = 0;
+out:
+	mutex_unlock(&cpu_add_remove_lock);
 }
 #endif
-- 
cgit v1.2.3


From 7152764700559b6a4041fdaba345df9a5cd962f0 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Mon, 2 Apr 2007 11:28:52 +0800
Subject: libata: reorder HSM_ST_FIRST for easier decoding (take 3)

patch 1/4:
  Reorder HSM_ST_FIRST, such that the task state transition is easier decoded with human eyes.

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index e3f32f3189b2..c7b5e661fe59 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -315,11 +315,11 @@ enum {
 
 enum hsm_task_states {
 	HSM_ST_IDLE,		/* no command on going */
+	HSM_ST_FIRST,		/* (waiting the device to)
+				   write CDB or first data block */
 	HSM_ST,			/* (waiting the device to) transfer data */
 	HSM_ST_LAST,		/* (waiting the device to) complete command */
 	HSM_ST_ERR,		/* error */
-	HSM_ST_FIRST,		/* (waiting the device to)
-				   write CDB or first data block */
 };
 
 enum ata_completion_errors {
-- 
cgit v1.2.3


From 18d6e9d51891f91af4e7351cbab3cb180bb9f430 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Mon, 2 Apr 2007 11:34:15 +0800
Subject: libata: Limit max sector to 128 for TORiSAN DVD drives (take 3)

patch 3/4:
  The TORiSAN drive locks up when max sector == 256.
  Limit max sector to 128 for the TORiSAN DRD-N216 drives.
  (http://bugzilla.kernel.org/show_bug.cgi?id=6710)

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 6 ++++++
 include/linux/ata.h       | 1 +
 include/linux/libata.h    | 1 +
 3 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index e07142b4bb54..84f6f8575966 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1784,6 +1784,9 @@ int ata_dev_configure(struct ata_device *dev)
 		dev->max_sectors = ATA_MAX_SECTORS;
 	}
 
+	if (ata_device_blacklisted(dev) & ATA_HORKAGE_MAX_SEC_128)
+		dev->max_sectors = min(ATA_MAX_SECTORS_128, dev->max_sectors);
+
 	if (ap->ops->dev_config)
 		ap->ops->dev_config(ap, dev);
 
@@ -3352,6 +3355,9 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "_NEC DV5800A", 	NULL,		ATA_HORKAGE_NODMA },
 	{ "SAMSUNG CD-ROM SN-124","N001",	ATA_HORKAGE_NODMA },
 
+	/* Weird ATAPI devices */
+	{ "TORiSAN DVD-ROM DRD-N216", NULL,	ATA_HORKAGE_MAX_SEC_128 },
+
 	/* Devices we expect to fail diagnostics */
 
 	/* Devices where NCQ should be avoided */
diff --git a/include/linux/ata.h b/include/linux/ata.h
index c331da2da5f7..6caeb98e29dd 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -40,6 +40,7 @@ enum {
 	ATA_MAX_DEVICES		= 2,	/* per bus/port */
 	ATA_MAX_PRD		= 256,	/* we could make these 256/256 */
 	ATA_SECT_SIZE		= 512,
+	ATA_MAX_SECTORS_128	= 128,
 	ATA_MAX_SECTORS		= 256,
 	ATA_MAX_SECTORS_LBA48	= 65535,/* TODO: 65536? */
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c7b5e661fe59..c04aec360406 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -311,6 +311,7 @@ enum {
 	ATA_HORKAGE_DIAGNOSTIC	= (1 << 0),	/* Failed boot diag */
 	ATA_HORKAGE_NODMA	= (1 << 1),	/* DMA problems */
 	ATA_HORKAGE_NONCQ	= (1 << 2),	/* Don't use NCQ */
+	ATA_HORKAGE_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
 };
 
 enum hsm_task_states {
-- 
cgit v1.2.3


From 6f23a31d1cbe791a1ce86ffa9b23251ab0a1ef45 Mon Sep 17 00:00:00 2001
From: Albert Lee <albertcc@tw.ibm.com>
Date: Mon, 2 Apr 2007 11:39:25 +0800
Subject: libata: Limit ATAPI DMA to R/W commands only for TORiSAN DVD drives
 (take 3)

patch 4/4:

  Limit ATAPI DMA to R/W commands only for TORiSAN DRD-N216 DVD-ROM drives
  (http://bugzilla.kernel.org/show_bug.cgi?id=6710)

Signed-off-by: Albert Lee <albertcc@tw.ibm.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 27 ++++++++++++++++++++++++++-
 include/linux/libata.h    |  1 +
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 84f6f8575966..cf2338cbe4ea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1787,6 +1787,10 @@ int ata_dev_configure(struct ata_device *dev)
 	if (ata_device_blacklisted(dev) & ATA_HORKAGE_MAX_SEC_128)
 		dev->max_sectors = min(ATA_MAX_SECTORS_128, dev->max_sectors);
 
+	/* limit ATAPI DMA to R/W commands only */
+	if (ata_device_blacklisted(dev) & ATA_HORKAGE_DMA_RW_ONLY)
+		dev->horkage |= ATA_HORKAGE_DMA_RW_ONLY;
+
 	if (ap->ops->dev_config)
 		ap->ops->dev_config(ap, dev);
 
@@ -3356,7 +3360,8 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {
 	{ "SAMSUNG CD-ROM SN-124","N001",	ATA_HORKAGE_NODMA },
 
 	/* Weird ATAPI devices */
-	{ "TORiSAN DVD-ROM DRD-N216", NULL,	ATA_HORKAGE_MAX_SEC_128 },
+	{ "TORiSAN DVD-ROM DRD-N216", NULL,	ATA_HORKAGE_MAX_SEC_128 |
+						ATA_HORKAGE_DMA_RW_ONLY },
 
 	/* Devices we expect to fail diagnostics */
 
@@ -3680,6 +3685,26 @@ int ata_check_atapi_dma(struct ata_queued_cmd *qc)
 	struct ata_port *ap = qc->ap;
 	int rc = 0; /* Assume ATAPI DMA is OK by default */
 
+	/* some drives can only do ATAPI DMA on read/write */
+	if (unlikely(qc->dev->horkage & ATA_HORKAGE_DMA_RW_ONLY)) {
+		struct scsi_cmnd *cmd = qc->scsicmd;
+		u8 *scsicmd = cmd->cmnd;
+
+		switch (scsicmd[0]) {
+		case READ_10:
+		case WRITE_10:
+		case READ_12:
+		case WRITE_12:
+		case READ_6:
+		case WRITE_6:
+			/* atapi dma maybe ok */
+			break;
+		default:
+			/* turn off atapi dma */
+			return 1;
+		}
+	}
+
 	if (ap->ops->check_atapi_dma)
 		rc = ap->ops->check_atapi_dma(qc);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index c04aec360406..0cfbcb6f08eb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -312,6 +312,7 @@ enum {
 	ATA_HORKAGE_NODMA	= (1 << 1),	/* DMA problems */
 	ATA_HORKAGE_NONCQ	= (1 << 2),	/* Don't use NCQ */
 	ATA_HORKAGE_MAX_SEC_128	= (1 << 3),	/* Limit max sects to 128 */
+	ATA_HORKAGE_DMA_RW_ONLY	= (1 << 4),	/* ATAPI DMA for RW only */
 };
 
 enum hsm_task_states {
-- 
cgit v1.2.3


From 5792a2856a63cdc568e08a7d6f9b2413d9217b3e Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 4 Apr 2007 19:08:18 -0700
Subject: [PATCH] md: avoid a deadlock when removing a device from an md array
 via sysfs

A device can be removed from an md array via e.g.
  echo remove > /sys/block/md3/md/dev-sde/state

This will try to remove the 'dev-sde' subtree which will deadlock
since
  commit e7b0d26a86943370c04d6833c6edba2a72a6e240

With this patch we run the kobject_del via schedule_work so as to
avoid the deadlock.

Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/md/md.c           | 16 +++++++++++++++-
 include/linux/raid/md_k.h |  1 +
 2 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index 2a9b6a07e3a2..509171ca7fa8 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1378,6 +1378,12 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 	return err;
 }
 
+static void delayed_delete(struct work_struct *ws)
+{
+	mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
+	kobject_del(&rdev->kobj);
+}
+
 static void unbind_rdev_from_array(mdk_rdev_t * rdev)
 {
 	char b[BDEVNAME_SIZE];
@@ -1390,7 +1396,12 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev)
 	printk(KERN_INFO "md: unbind<%s>\n", bdevname(rdev->bdev,b));
 	rdev->mddev = NULL;
 	sysfs_remove_link(&rdev->kobj, "block");
-	kobject_del(&rdev->kobj);
+
+	/* We need to delay this, otherwise we can deadlock when
+	 * writing to 'remove' to "dev/state"
+	 */
+	INIT_WORK(&rdev->del_work, delayed_delete);
+	schedule_work(&rdev->del_work);
 }
 
 /*
@@ -3389,6 +3400,9 @@ static int do_md_stop(mddev_t * mddev, int mode)
 				sysfs_remove_link(&mddev->kobj, nm);
 			}
 
+		/* make sure all delayed_delete calls have finished */
+		flush_scheduled_work();
+
 		export_array(mddev);
 
 		mddev->array_size = 0;
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h
index 8245c282168b..de72c49747c8 100644
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -104,6 +104,7 @@ struct mdk_rdev_s
 					   * for reporting to userspace and storing
 					   * in superblock.
 					   */
+	struct work_struct del_work;	/* used for delayed sysfs removal */
 };
 
 struct mddev_s
-- 
cgit v1.2.3


From 2363cc0264c42636e9e7622f78dde5c2f66beb8e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Wed, 4 Apr 2007 19:08:22 -0700
Subject: [PATCH] remove protection of LANANA-reserved majors

Revert all this.  It can cause device-mapper to receive a different major from
earlier kernels and it turns out that the Amanda backup program (via GNU tar,
apparently) checks major numbers on files when performing incremental backups.

Which is a bit broken of Amanda (or tar), but this feature isn't important
enough to justify the churn.

Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 block/genhd.c          |  2 --
 drivers/base/core.c    | 14 --------------
 fs/char_dev.c          |  2 --
 include/linux/kdev_t.h |  2 --
 4 files changed, 20 deletions(-)

(limited to 'include/linux')

diff --git a/block/genhd.c b/block/genhd.c
index 050a1f0f3a86..441432a142f2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -62,8 +62,6 @@ int register_blkdev(unsigned int major, const char *name)
 	/* temporary */
 	if (major == 0) {
 		for (index = ARRAY_SIZE(major_names)-1; index > 0; index--) {
-			if (is_lanana_major(index))
-				continue;
 			if (major_names[index] == NULL)
 				break;
 		}
diff --git a/drivers/base/core.c b/drivers/base/core.c
index ad0f4a2f25c4..d7fcf823a42a 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -27,20 +27,6 @@
 int (*platform_notify)(struct device * dev) = NULL;
 int (*platform_notify_remove)(struct device * dev) = NULL;
 
-/*
- * Detect the LANANA-assigned LOCAL/EXPERIMENTAL majors
- */
-bool is_lanana_major(unsigned int major)
-{
-	if (major >= 60 && major <= 63)
-		return 1;
-	if (major >= 120 && major <= 127)
-		return 1;
-	if (major >= 240 && major <= 254)
-		return 1;
-	return 0;
-}
-
 /*
  * sysfs bindings for devices.
  */
diff --git a/fs/char_dev.c b/fs/char_dev.c
index 78ced721554d..164a45cdaf5f 100644
--- a/fs/char_dev.c
+++ b/fs/char_dev.c
@@ -109,8 +109,6 @@ __register_chrdev_region(unsigned int major, unsigned int baseminor,
 	/* temporary */
 	if (major == 0) {
 		for (i = ARRAY_SIZE(chrdevs)-1; i > 0; i--) {
-			if (is_lanana_major(i))
-				continue;
 			if (chrdevs[i] == NULL)
 				break;
 		}
diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h
index 4c2c3737e415..2dacab8beccb 100644
--- a/include/linux/kdev_t.h
+++ b/include/linux/kdev_t.h
@@ -87,8 +87,6 @@ static inline unsigned sysv_minor(u32 dev)
 	return dev & 0x3ffff;
 }
 
-bool is_lanana_major(unsigned int major);
-
 #else /* __KERNEL__ */
 
 /*
-- 
cgit v1.2.3


From 995f054f2a342f8505fed4f8395d12c0f5966414 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Sat, 7 Apr 2007 12:05:00 +0200
Subject: [PATCH] high-res timers: resume fix

Soeren Sonnenburg reported that upon resume he is getting
this backtrace:

 [<c0119637>] smp_apic_timer_interrupt+0x57/0x90
 [<c0142d30>] retrigger_next_event+0x0/0xb0
 [<c0104d30>] apic_timer_interrupt+0x28/0x30
 [<c0142d30>] retrigger_next_event+0x0/0xb0
 [<c0140068>] __kfifo_put+0x8/0x90
 [<c0130fe5>] on_each_cpu+0x35/0x60
 [<c0143538>] clock_was_set+0x18/0x20
 [<c0135cdc>] timekeeping_resume+0x7c/0xa0
 [<c02aabe1>] __sysdev_resume+0x11/0x80
 [<c02ab0c7>] sysdev_resume+0x47/0x80
 [<c02b0b05>] device_power_up+0x5/0x10

it turns out that on resume we mistakenly re-enable interrupts too
early.  Do the timer retrigger only on the current CPU.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Soeren Sonnenburg <kernel@nn7.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hrtimer.h |  3 +++
 kernel/hrtimer.c        | 12 ++++++++++++
 kernel/timer.c          |  2 +-
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5bdbc744e773..17c29dca8354 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -206,6 +206,7 @@ struct hrtimer_cpu_base {
 struct clock_event_device;
 
 extern void clock_was_set(void);
+extern void hres_timers_resume(void);
 extern void hrtimer_interrupt(struct clock_event_device *dev);
 
 /*
@@ -236,6 +237,8 @@ static inline ktime_t hrtimer_cb_get_time(struct hrtimer *timer)
  */
 static inline void clock_was_set(void) { }
 
+static inline void hres_timers_resume(void) { }
+
 /*
  * In non high resolution mode the time reference is taken from
  * the base softirq time variable.
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 067ba2c05328..b74860aaf5f1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -458,6 +458,18 @@ void clock_was_set(void)
 	on_each_cpu(retrigger_next_event, NULL, 0, 1);
 }
 
+/*
+ * During resume we might have to reprogram the high resolution timer
+ * interrupt (on the local CPU):
+ */
+void hres_timers_resume(void)
+{
+	WARN_ON_ONCE(num_online_cpus() > 1);
+
+	/* Retrigger the CPU local events: */
+	retrigger_next_event(NULL);
+}
+
 /*
  * Check, whether the timer is on the callback pending list
  */
diff --git a/kernel/timer.c b/kernel/timer.c
index 440048acaea1..dd6c2c1c561b 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1016,7 +1016,7 @@ static int timekeeping_resume(struct sys_device *dev)
 	clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
 
 	/* Resume hrtimers */
-	clock_was_set();
+	hres_timers_resume();
 
 	return 0;
 }
-- 
cgit v1.2.3


From 23450319e2890986c247ec0aa1442f060e657e6d Mon Sep 17 00:00:00 2001
From: Suleiman Souhlal <suleiman@google.com>
Date: Tue, 10 Apr 2007 22:38:37 +0200
Subject: ide: correctly prevent IDE timer expiry function to run if request
 was already handled

It is possible for the timer expiry function to run even though the
request has already been handled: ide_timer_expiry() only checks that
the handler is not NULL, but it is possible that we have handled a
request (thus clearing the handler) and then started a new request
(thus starting the timer again, and setting a handler).

A simple way to exhibit this is to set the DMA timeout to 1 jiffy and
run dd: The kernel will panic after a few minutes because
ide_timer_expiry() tries to add a timer when it's already active.

To fix this, we simply add a request generation count that gets
incremented at every interrupt, and check in ide_timer_expiry() that
we have not already handled a new interrupt before running the expiry
function.

Signed-off-by: Suleiman Souhlal <suleiman@google.com>
Signed-off-by: Bartlomiej Zolnierkiewicz <bzolnier@gmail.com>
---
 drivers/ide/ide-io.c   | 6 +++++-
 drivers/ide/ide-iops.c | 2 ++
 include/linux/ide.h    | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/ide/ide-io.c b/drivers/ide/ide-io.c
index 0e0280076fcd..8670112f1d39 100644
--- a/drivers/ide/ide-io.c
+++ b/drivers/ide/ide-io.c
@@ -1226,6 +1226,7 @@ static void ide_do_request (ide_hwgroup_t *hwgroup, int masked_irq)
 #endif
 				/* so that ide_timer_expiry knows what to do */
 				hwgroup->sleeping = 1;
+				hwgroup->req_gen_timer = hwgroup->req_gen;
 				mod_timer(&hwgroup->timer, sleep);
 				/* we purposely leave hwgroup->busy==1
 				 * while sleeping */
@@ -1411,7 +1412,8 @@ void ide_timer_expiry (unsigned long data)
 
 	spin_lock_irqsave(&ide_lock, flags);
 
-	if ((handler = hwgroup->handler) == NULL) {
+	if (((handler = hwgroup->handler) == NULL) ||
+	    (hwgroup->req_gen != hwgroup->req_gen_timer)) {
 		/*
 		 * Either a marginal timeout occurred
 		 * (got the interrupt just as timer expired),
@@ -1439,6 +1441,7 @@ void ide_timer_expiry (unsigned long data)
 				if ((wait = expiry(drive)) > 0) {
 					/* reset timer */
 					hwgroup->timer.expires  = jiffies + wait;
+					hwgroup->req_gen_timer = hwgroup->req_gen;
 					add_timer(&hwgroup->timer);
 					spin_unlock_irqrestore(&ide_lock, flags);
 					return;
@@ -1653,6 +1656,7 @@ irqreturn_t ide_intr (int irq, void *dev_id)
 		printk(KERN_ERR "%s: ide_intr: hwgroup->busy was 0 ??\n", drive->name);
 	}
 	hwgroup->handler = NULL;
+	hwgroup->req_gen++;
 	del_timer(&hwgroup->timer);
 	spin_unlock(&ide_lock);
 
diff --git a/drivers/ide/ide-iops.c b/drivers/ide/ide-iops.c
index 1ee53a551c3a..3caa176b3155 100644
--- a/drivers/ide/ide-iops.c
+++ b/drivers/ide/ide-iops.c
@@ -889,6 +889,7 @@ static void __ide_set_handler (ide_drive_t *drive, ide_handler_t *handler,
 	hwgroup->handler	= handler;
 	hwgroup->expiry		= expiry;
 	hwgroup->timer.expires	= jiffies + timeout;
+	hwgroup->req_gen_timer = hwgroup->req_gen;
 	add_timer(&hwgroup->timer);
 }
 
@@ -929,6 +930,7 @@ void ide_execute_command(ide_drive_t *drive, task_ioreg_t cmd, ide_handler_t *ha
 	hwgroup->handler	= handler;
 	hwgroup->expiry		= expiry;
 	hwgroup->timer.expires	= jiffies + timeout;
+	hwgroup->req_gen_timer = hwgroup->req_gen;
 	add_timer(&hwgroup->timer);
 	hwif->OUTBSYNC(drive, cmd, IDE_COMMAND_REG);
 	/* Drive takes 400nS to respond, we must avoid the IRQ being
diff --git a/include/linux/ide.h b/include/linux/ide.h
index 58564a199862..d3bbc7188b6a 100644
--- a/include/linux/ide.h
+++ b/include/linux/ide.h
@@ -861,6 +861,8 @@ typedef struct hwgroup_s {
 	int (*expiry)(ide_drive_t *);
 		/* ide_system_bus_speed */
 	int pio_clock;
+	int req_gen;
+	int req_gen_timer;
 
 	unsigned char cmd_buf[4];
 } ide_hwgroup_t;
-- 
cgit v1.2.3


From 5a6d41b32a17ca902ef50fdfa170d7f23264bad5 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Sat, 14 Apr 2007 19:10:12 -0400
Subject: NFS: Ensure PG_writeback is cleared when writeback fails

If the writebacks are cancelled via nfs_cancel_dirty_list, or due to the
memory allocation failing in nfs_flush_one/nfs_flush_multi, then we must
ensure that the PG_writeback flag is cleared.

Also ensure that we actually own the PG_writeback flag whenever we
schedule a new writeback by making nfs_set_page_writeback() return the
value of test_set_page_writeback().
The PG_writeback page flag ends up replacing the functionality of the
PG_FLUSHING nfs_page flag, so we rip that out too.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c           | 22 +++++++++++++++-------
 include/linux/nfs_page.h |  1 -
 2 files changed, 15 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 2867e6b7096f..e5d7cac569aa 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -218,9 +218,11 @@ int nfs_congestion_kb;
 #define NFS_CONGESTION_OFF_THRESH	\
 	(NFS_CONGESTION_ON_THRESH - (NFS_CONGESTION_ON_THRESH >> 2))
 
-static void nfs_set_page_writeback(struct page *page)
+static int nfs_set_page_writeback(struct page *page)
 {
-	if (!test_set_page_writeback(page)) {
+	int ret = test_set_page_writeback(page);
+
+	if (!ret) {
 		struct inode *inode = page->mapping->host;
 		struct nfs_server *nfss = NFS_SERVER(inode);
 
@@ -228,6 +230,7 @@ static void nfs_set_page_writeback(struct page *page)
 				NFS_CONGESTION_ON_THRESH)
 			set_bdi_congested(&nfss->backing_dev_info, WRITE);
 	}
+	return ret;
 }
 
 static void nfs_end_page_writeback(struct page *page)
@@ -277,10 +280,8 @@ static int nfs_page_mark_flush(struct page *page)
 		spin_lock(req_lock);
 	}
 	spin_unlock(req_lock);
-	if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
+	if (nfs_set_page_writeback(page) == 0)
 		nfs_mark_request_dirty(req);
-		nfs_set_page_writeback(page);
-	}
 	ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
 	nfs_unlock_request(req);
 	return ret;
@@ -424,7 +425,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
 static void
 nfs_redirty_request(struct nfs_page *req)
 {
-	clear_bit(PG_FLUSHING, &req->wb_flags);
 	__set_page_dirty_nobuffers(req->wb_page);
 }
 
@@ -434,7 +434,11 @@ nfs_redirty_request(struct nfs_page *req)
 static inline int
 nfs_dirty_request(struct nfs_page *req)
 {
-	return test_bit(PG_FLUSHING, &req->wb_flags) == 0;
+	struct page *page = req->wb_page;
+
+	if (page == NULL)
+		return 0;
+	return !PageWriteback(req->wb_page);
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -500,6 +504,7 @@ static void nfs_cancel_dirty_list(struct list_head *head)
 	while(!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_inode_remove_request(req);
 		nfs_clear_page_writeback(req);
 	}
@@ -890,6 +895,7 @@ out_bad:
 		list_del(&data->pages);
 		nfs_writedata_release(data);
 	}
+	nfs_end_page_writeback(req->wb_page);
 	nfs_redirty_request(req);
 	nfs_clear_page_writeback(req);
 	return -ENOMEM;
@@ -935,6 +941,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
 	while (!list_empty(head)) {
 		struct nfs_page *req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
@@ -970,6 +977,7 @@ out_err:
 	while (!list_empty(head)) {
 		req = nfs_list_entry(head->next);
 		nfs_list_remove_request(req);
+		nfs_end_page_writeback(req->wb_page);
 		nfs_redirty_request(req);
 		nfs_clear_page_writeback(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 2e555d49c9b7..d111be639140 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -31,7 +31,6 @@
 #define PG_NEED_COMMIT		1
 #define PG_NEED_RESCHED		2
 #define PG_NEED_FLUSH		3
-#define PG_FLUSHING		4
 
 struct nfs_inode;
 struct nfs_page {
-- 
cgit v1.2.3


From b4dfa0b1fb39c7ffe74741d60668825de6a47b69 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 17 Apr 2007 12:28:27 -0700
Subject: [NET]: Get rid of alloc_skb_from_cache

Since this was added originally for Xen, and Xen has recently (~2.6.18)
stopped using this function, we can safely get rid of it.  Good timing
too since this function has started to bit rot.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86_64/kernel/functionlist |  1 -
 include/linux/skbuff.h          |  3 ---
 net/core/skbuff.c               | 55 -----------------------------------------
 3 files changed, 59 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86_64/kernel/functionlist b/arch/x86_64/kernel/functionlist
index 01fa23580c85..7ae18ec12454 100644
--- a/arch/x86_64/kernel/functionlist
+++ b/arch/x86_64/kernel/functionlist
@@ -514,7 +514,6 @@
 *(.text.dentry_open)
 *(.text.dentry_iput)
 *(.text.bio_alloc)
-*(.text.alloc_skb_from_cache)
 *(.text.wait_on_page_bit)
 *(.text.vfs_readdir)
 *(.text.vfs_lstat)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82f43ad478c7..0e86b6007a0a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -346,9 +346,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 	return __alloc_skb(size, priority, 1, -1);
 }
 
-extern struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
-					    unsigned int size,
-					    gfp_t priority);
 extern void	       kfree_skbmem(struct sk_buff *skb);
 extern struct sk_buff *skb_clone(struct sk_buff *skb,
 				 gfp_t priority);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 87573ae35b02..336958fbbcb2 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -196,61 +196,6 @@ nodata:
 	goto out;
 }
 
-/**
- *	alloc_skb_from_cache	-	allocate a network buffer
- *	@cp: kmem_cache from which to allocate the data area
- *           (object size must be big enough for @size bytes + skb overheads)
- *	@size: size to allocate
- *	@gfp_mask: allocation mask
- *
- *	Allocate a new &sk_buff. The returned buffer has no headroom and
- *	tail room of size bytes. The object has a reference count of one.
- *	The return is the buffer. On a failure the return is %NULL.
- *
- *	Buffers may only be allocated from interrupts using a @gfp_mask of
- *	%GFP_ATOMIC.
- */
-struct sk_buff *alloc_skb_from_cache(struct kmem_cache *cp,
-				     unsigned int size,
-				     gfp_t gfp_mask)
-{
-	struct sk_buff *skb;
-	u8 *data;
-
-	/* Get the HEAD */
-	skb = kmem_cache_alloc(skbuff_head_cache,
-			       gfp_mask & ~__GFP_DMA);
-	if (!skb)
-		goto out;
-
-	/* Get the DATA. */
-	size = SKB_DATA_ALIGN(size);
-	data = kmem_cache_alloc(cp, gfp_mask);
-	if (!data)
-		goto nodata;
-
-	memset(skb, 0, offsetof(struct sk_buff, truesize));
-	skb->truesize = size + sizeof(struct sk_buff);
-	atomic_set(&skb->users, 1);
-	skb->head = data;
-	skb->data = data;
-	skb->tail = data;
-	skb->end  = data + size;
-
-	atomic_set(&(skb_shinfo(skb)->dataref), 1);
-	skb_shinfo(skb)->nr_frags  = 0;
-	skb_shinfo(skb)->gso_size = 0;
-	skb_shinfo(skb)->gso_segs = 0;
-	skb_shinfo(skb)->gso_type = 0;
-	skb_shinfo(skb)->frag_list = NULL;
-out:
-	return skb;
-nodata:
-	kmem_cache_free(skbuff_head_cache, skb);
-	skb = NULL;
-	goto out;
-}
-
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
-- 
cgit v1.2.3


From c2ecba71717c4f60671175fd26083c35a4b9ad58 Mon Sep 17 00:00:00 2001
From: Pavel Emelianov <xemul@sw.ru>
Date: Tue, 17 Apr 2007 12:45:31 -0700
Subject: [NET]: Set a separate lockdep class for neighbour table's proxy_queue

Otherwise the following calltrace will lead to a wrong
lockdep warning:

  neigh_proxy_process()
    `- lock(neigh_table->proxy_queue.lock);
  arp_redo /* via tbl->proxy_redo */
  arp_process
  neigh_event_ns
  neigh_update
  skb_queue_purge
    `- lock(neighbor->arp_queue.lock);

This is not a deadlock actually, as neighbor table's proxy_queue
and the neighbor's arp_queue are different queues.

Lockdep thinks there is a deadlock as both queues are initialized
with skb_queue_head_init() and thus have a common class.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 +++++++
 net/core/neighbour.c   | 5 ++++-
 2 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 0e86b6007a0a..5992f65b4184 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -619,6 +619,13 @@ static inline void skb_queue_head_init(struct sk_buff_head *list)
 	list->qlen = 0;
 }
 
+static inline void skb_queue_head_init_class(struct sk_buff_head *list,
+		struct lock_class_key *class)
+{
+	skb_queue_head_init(list);
+	lockdep_set_class(&list->lock, class);
+}
+
 /*
  *	Insert an sk_buff at the start of a list.
  *
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index cfc60019cf92..841e3f32cab1 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1331,6 +1331,8 @@ void neigh_parms_destroy(struct neigh_parms *parms)
 	kfree(parms);
 }
 
+static struct lock_class_key neigh_table_proxy_queue_class;
+
 void neigh_table_init_no_netlink(struct neigh_table *tbl)
 {
 	unsigned long now = jiffies;
@@ -1379,7 +1381,8 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl)
 	init_timer(&tbl->proxy_timer);
 	tbl->proxy_timer.data	  = (unsigned long)tbl;
 	tbl->proxy_timer.function = neigh_proxy_process;
-	skb_queue_head_init(&tbl->proxy_queue);
+	skb_queue_head_init_class(&tbl->proxy_queue,
+			&neigh_table_proxy_queue_class);
 
 	tbl->last_flush = now;
 	tbl->last_rand	= now + tbl->parms.reachable_time * 20;
-- 
cgit v1.2.3


From 112654208bd6f092e064973b8fa680e37ffa74a6 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Mon, 16 Apr 2007 22:53:15 -0700
Subject: kernel-doc: fix plist.h comments

Make kernel-doc comments match macro names.
Correct parameter names in a few places.
Remove '#' from beginning of kernel-doc comment macro names.
Remove extra (erroneous) blank lines in kernel-doc.

Warning(plist.h:100): Cannot understand  * #PLIST_HEAD_INIT - static struct plist_head initializer on line 100 - I thought it was a doc line
Warning(plist.h:112): Cannot understand  * #PLIST_NODE_INIT - static struct plist_node initializer on line 112 - I thought it was a doc line
Warning(plist.h:103): No description found for parameter '_lock'
Warning(plist.h:129): No description found for parameter 'lock'
Warning(plist.h:158): No description found for parameter 'pos'
Warning(plist.h:169): No description found for parameter 'pos'
Warning(plist.h:169): No description found for parameter 'n'
Warning(plist.h:179): No description found for parameter 'mem'

This still leaves one warning & one error that need attention:
Error(plist.h:219): cannot understand prototype: '('
Warning(plist.h): no structured comments found

Acked-by: Inaky Perez-Gonzalez <inaky.perez-gonzalez@intel.com>
Cc: Daniel Walker <dwalker@mvista.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Oleg Nesterov <oleg@tv-sign.ru>
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/plist.h | 54 ++++++++++++++++++++++-----------------------------
 1 file changed, 23 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/plist.h b/include/linux/plist.h
index b95818a037ad..85de2f055874 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -97,9 +97,9 @@ struct plist_node {
 #endif
 
 /**
- * #PLIST_HEAD_INIT - static struct plist_head initializer
- *
+ * PLIST_HEAD_INIT - static struct plist_head initializer
  * @head:	struct plist_head variable name
+ * @_lock:	lock to initialize for this list
  */
 #define PLIST_HEAD_INIT(head, _lock)			\
 {							\
@@ -109,8 +109,7 @@ struct plist_node {
 }
 
 /**
- * #PLIST_NODE_INIT - static struct plist_node initializer
- *
+ * PLIST_NODE_INIT - static struct plist_node initializer
  * @node:	struct plist_node variable name
  * @__prio:	initial node priority
  */
@@ -122,8 +121,8 @@ struct plist_node {
 
 /**
  * plist_head_init - dynamic struct plist_head initializer
- *
  * @head:	&struct plist_head pointer
+ * @lock:	list spinlock, remembered for debugging
  */
 static inline void
 plist_head_init(struct plist_head *head, spinlock_t *lock)
@@ -137,7 +136,6 @@ plist_head_init(struct plist_head *head, spinlock_t *lock)
 
 /**
  * plist_node_init - Dynamic struct plist_node initializer
- *
  * @node:	&struct plist_node pointer
  * @prio:	initial node priority
  */
@@ -152,49 +150,46 @@ extern void plist_del(struct plist_node *node, struct plist_head *head);
 
 /**
  * plist_for_each - iterate over the plist
- *
- * @pos1:	the type * to use as a loop counter.
- * @head:	the head for your list.
+ * @pos:	the type * to use as a loop counter
+ * @head:	the head for your list
  */
 #define plist_for_each(pos, head)	\
 	 list_for_each_entry(pos, &(head)->node_list, plist.node_list)
 
 /**
- * plist_for_each_entry_safe - iterate over a plist of given type safe
- * against removal of list entry
+ * plist_for_each_safe - iterate safely over a plist of given type
+ * @pos:	the type * to use as a loop counter
+ * @n:	another type * to use as temporary storage
+ * @head:	the head for your list
  *
- * @pos1:	the type * to use as a loop counter.
- * @n1:	another type * to use as temporary storage
- * @head:	the head for your list.
+ * Iterate over a plist of given type, safe against removal of list entry.
  */
 #define plist_for_each_safe(pos, n, head)	\
 	 list_for_each_entry_safe(pos, n, &(head)->node_list, plist.node_list)
 
 /**
  * plist_for_each_entry	- iterate over list of given type
- *
- * @pos:	the type * to use as a loop counter.
- * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @pos:	the type * to use as a loop counter
+ * @head:	the head for your list
+ * @mem:	the name of the list_struct within the struct
  */
 #define plist_for_each_entry(pos, head, mem)	\
 	 list_for_each_entry(pos, &(head)->node_list, mem.plist.node_list)
 
 /**
- * plist_for_each_entry_safe - iterate over list of given type safe against
- * removal of list entry
- *
- * @pos:	the type * to use as a loop counter.
+ * plist_for_each_entry_safe - iterate safely over list of given type
+ * @pos:	the type * to use as a loop counter
  * @n:		another type * to use as temporary storage
- * @head:	the head for your list.
- * @m:		the name of the list_struct within the struct.
+ * @head:	the head for your list
+ * @m:		the name of the list_struct within the struct
+ *
+ * Iterate over list of given type, safe against removal of list entry.
  */
 #define plist_for_each_entry_safe(pos, n, head, m)	\
 	list_for_each_entry_safe(pos, n, &(head)->node_list, m.plist.node_list)
 
 /**
  * plist_head_empty - return !0 if a plist_head is empty
- *
  * @head:	&struct plist_head pointer
  */
 static inline int plist_head_empty(const struct plist_head *head)
@@ -204,7 +199,6 @@ static inline int plist_head_empty(const struct plist_head *head)
 
 /**
  * plist_node_empty - return !0 if plist_node is not on a list
- *
  * @node:	&struct plist_node pointer
  */
 static inline int plist_node_empty(const struct plist_node *node)
@@ -216,10 +210,9 @@ static inline int plist_node_empty(const struct plist_node *node)
 
 /**
  * plist_first_entry - get the struct for the first entry
- *
- * @ptr:	the &struct plist_head pointer.
- * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @head:	the &struct plist_head pointer
+ * @type:	the type of the struct this is embedded in
+ * @member:	the name of the list_struct within the struct
  */
 #ifdef CONFIG_DEBUG_PI_LIST
 # define plist_first_entry(head, type, member)	\
@@ -234,7 +227,6 @@ static inline int plist_node_empty(const struct plist_node *node)
 
 /**
  * plist_first - return the first node (and thus, highest priority)
- *
  * @head:	the &struct plist_head pointer
  *
  * Assumes the plist is _not_ empty.
-- 
cgit v1.2.3


From 93da28790c17345f4db10358dfb19b4c241d8ba3 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Tue, 17 Apr 2007 00:32:26 -0700
Subject: Provide dummy devm_ioport_* if !HAS_IOPORT

Provide an dummy implementation of devm_ioport_map() and
devm_ioport_unmap() to allow drivers (eg, pata_platform) to build for
platforms where CONFIG_NO_IOPORT is selected.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/io.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index c244a0cc9319..09d351236379 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -33,9 +33,22 @@ int ioremap_page_range(unsigned long addr, unsigned long end,
 /*
  * Managed iomap interface
  */
+#ifdef CONFIG_HAS_IOPORT
 void __iomem * devm_ioport_map(struct device *dev, unsigned long port,
 			       unsigned int nr);
 void devm_ioport_unmap(struct device *dev, void __iomem *addr);
+#else
+static inline void __iomem *devm_ioport_map(struct device *dev,
+					     unsigned long port,
+					     unsigned int nr)
+{
+	return NULL;
+}
+
+static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr)
+{
+}
+#endif
 
 void __iomem * devm_ioremap(struct device *dev, unsigned long offset,
 			    unsigned long size);
-- 
cgit v1.2.3


From 8e821cad12e80cd1a8a3fbadf91f62f17f32549e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 20 Apr 2007 16:12:34 -0400
Subject: NFS: clean up the unstable write code

Get rid of the inlined #ifdefs.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/nfs/write.c           | 117 ++++++++++++++++++++++++++++-------------------
 include/linux/nfs_page.h |  30 ------------
 2 files changed, 71 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ad2e91b4904f..3ed4feb8c856 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -460,6 +460,43 @@ nfs_mark_request_commit(struct nfs_page *req)
 	inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
+
+static inline
+int nfs_write_need_commit(struct nfs_write_data *data)
+{
+	return data->verf.committed != NFS_FILE_SYNC;
+}
+
+static inline
+int nfs_reschedule_unstable_write(struct nfs_page *req)
+{
+	if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+		nfs_mark_request_commit(req);
+		return 1;
+	}
+	if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
+		nfs_redirty_request(req);
+		return 1;
+	}
+	return 0;
+}
+#else
+static inline void
+nfs_mark_request_commit(struct nfs_page *req)
+{
+}
+
+static inline
+int nfs_write_need_commit(struct nfs_write_data *data)
+{
+	return 0;
+}
+
+static inline
+int nfs_reschedule_unstable_write(struct nfs_page *req)
+{
+	return 0;
+}
 #endif
 
 /*
@@ -746,26 +783,12 @@ int nfs_updatepage(struct file *file, struct page *page,
 
 static void nfs_writepage_release(struct nfs_page *req)
 {
-	nfs_end_page_writeback(req->wb_page);
 
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-	if (!PageError(req->wb_page)) {
-		if (NFS_NEED_RESCHED(req)) {
-			nfs_redirty_request(req);
-			goto out;
-		} else if (NFS_NEED_COMMIT(req)) {
-			nfs_mark_request_commit(req);
-			goto out;
-		}
-	}
-	nfs_inode_remove_request(req);
-
-out:
-	nfs_clear_commit(req);
-	nfs_clear_reschedule(req);
-#else
-	nfs_inode_remove_request(req);
-#endif
+	if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) {
+		nfs_end_page_writeback(req->wb_page);
+		nfs_inode_remove_request(req);
+	} else
+		nfs_end_page_writeback(req->wb_page);
 	nfs_clear_page_writeback(req);
 }
 
@@ -1008,22 +1031,28 @@ static void nfs_writeback_done_partial(struct rpc_task *task, void *calldata)
 		nfs_set_pageerror(page);
 		req->wb_context->error = task->tk_status;
 		dprintk(", error = %d\n", task->tk_status);
-	} else {
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (data->verf.committed < NFS_FILE_SYNC) {
-			if (!NFS_NEED_COMMIT(req)) {
-				nfs_defer_commit(req);
-				memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-				dprintk(" defer commit\n");
-			} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
-				nfs_defer_reschedule(req);
-				dprintk(" server reboot detected\n");
-			}
-		} else
-#endif
-			dprintk(" OK\n");
+		goto out;
 	}
 
+	if (nfs_write_need_commit(data)) {
+		spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+
+		spin_lock(req_lock);
+		if (test_bit(PG_NEED_RESCHED, &req->wb_flags)) {
+			/* Do nothing we need to resend the writes */
+		} else if (!test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags)) {
+			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+			dprintk(" defer commit\n");
+		} else if (memcmp(&req->wb_verf, &data->verf, sizeof(req->wb_verf))) {
+			set_bit(PG_NEED_RESCHED, &req->wb_flags);
+			clear_bit(PG_NEED_COMMIT, &req->wb_flags);
+			dprintk(" server reboot detected\n");
+		}
+		spin_unlock(req_lock);
+	} else
+		dprintk(" OK\n");
+
+out:
 	if (atomic_dec_and_test(&req->wb_complete))
 		nfs_writepage_release(req);
 }
@@ -1064,25 +1093,21 @@ static void nfs_writeback_done_full(struct rpc_task *task, void *calldata)
 		if (task->tk_status < 0) {
 			nfs_set_pageerror(page);
 			req->wb_context->error = task->tk_status;
-			nfs_end_page_writeback(page);
-			nfs_inode_remove_request(req);
 			dprintk(", error = %d\n", task->tk_status);
-			goto next;
+			goto remove_request;
 		}
-		nfs_end_page_writeback(page);
 
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-		if (data->args.stable != NFS_UNSTABLE || data->verf.committed == NFS_FILE_SYNC) {
-			nfs_inode_remove_request(req);
-			dprintk(" OK\n");
+		if (nfs_write_need_commit(data)) {
+			memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
+			nfs_mark_request_commit(req);
+			nfs_end_page_writeback(page);
+			dprintk(" marked for commit\n");
 			goto next;
 		}
-		memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
-		nfs_mark_request_commit(req);
-		dprintk(" marked for commit\n");
-#else
+		dprintk(" OK\n");
+remove_request:
+		nfs_end_page_writeback(page);
 		nfs_inode_remove_request(req);
-#endif
 	next:
 		nfs_clear_page_writeback(req);
 	}
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index d111be639140..16b0266b14fd 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -49,8 +49,6 @@ struct nfs_page {
 };
 
 #define NFS_WBACK_BUSY(req)	(test_bit(PG_BUSY,&(req)->wb_flags))
-#define NFS_NEED_COMMIT(req)	(test_bit(PG_NEED_COMMIT,&(req)->wb_flags))
-#define NFS_NEED_RESCHED(req)	(test_bit(PG_NEED_RESCHED,&(req)->wb_flags))
 
 extern	struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
 					    struct inode *inode,
@@ -121,34 +119,6 @@ nfs_list_remove_request(struct nfs_page *req)
 	req->wb_list_head = NULL;
 }
 
-static inline int
-nfs_defer_commit(struct nfs_page *req)
-{
-	return !test_and_set_bit(PG_NEED_COMMIT, &req->wb_flags);
-}
-
-static inline void
-nfs_clear_commit(struct nfs_page *req)
-{
-	smp_mb__before_clear_bit();
-	clear_bit(PG_NEED_COMMIT, &req->wb_flags);
-	smp_mb__after_clear_bit();
-}
-
-static inline int
-nfs_defer_reschedule(struct nfs_page *req)
-{
-	return !test_and_set_bit(PG_NEED_RESCHED, &req->wb_flags);
-}
-
-static inline void
-nfs_clear_reschedule(struct nfs_page *req)
-{
-	smp_mb__before_clear_bit();
-	clear_bit(PG_NEED_RESCHED, &req->wb_flags);
-	smp_mb__after_clear_bit();
-}
-
 static inline struct nfs_page *
 nfs_list_entry(struct list_head *head)
 {
-- 
cgit v1.2.3


From 7e40f2ab0a7e36706ee78b78b3792f08f208cd44 Mon Sep 17 00:00:00 2001
From: Balbir Singh <balbir@in.ibm.com>
Date: Mon, 23 Apr 2007 14:41:05 -0700
Subject: Taskstats fix the structure members alignment issue

We broke the the alignment of members of taskstats to the 8 byte boundary
with the CSA patches.  In the current kernel, the taskstats structure is
not suitable for use by 32 bit applications in a 64 bit kernel.

On x86_64

Offsets of taskstats' members (64 bit kernel, 64 bit application)

@taskstats'offsetof[@taskstats'indices] = (
        0,      # version
        4,      # ac_exitcode
        8,      # ac_flag
        9,      # ac_nice
        16,     # cpu_count
        24,     # cpu_delay_total
        32,     # blkio_count
        40,     # blkio_delay_total
        48,     # swapin_count
        56,     # swapin_delay_total
        64,     # cpu_run_real_total
        72,     # cpu_run_virtual_total
        80,     # ac_comm
        112,    # ac_sched
        113,    # ac_pad
        116,    # ac_uid
        120,    # ac_gid
        124,    # ac_pid
        128,    # ac_ppid
        132,    # ac_btime
        136,    # ac_etime
        144,    # ac_utime
        152,    # ac_stime
        160,    # ac_minflt
        168,    # ac_majflt
        176,    # coremem
        184,    # virtmem
        192,    # hiwater_rss
        200,    # hiwater_vm
        208,    # read_char
        216,    # write_char
        224,    # read_syscalls
        232,    # write_syscalls
        240,    # read_bytes
        248,    # write_bytes
        256,    # cancelled_write_bytes
    );

Offsets of taskstats' members (64 bit kernel, 32 bit application)

@taskstats'offsetof[@taskstats'indices] = (
        0,      # version
        4,      # ac_exitcode
        8,      # ac_flag
        9,      # ac_nice
        12,     # cpu_count
        20,     # cpu_delay_total
        28,     # blkio_count
        36,     # blkio_delay_total
        44,     # swapin_count
        52,     # swapin_delay_total
        60,     # cpu_run_real_total
        68,     # cpu_run_virtual_total
        76,     # ac_comm
        108,    # ac_sched
        109,    # ac_pad
        112,    # ac_uid
        116,    # ac_gid
        120,    # ac_pid
        124,    # ac_ppid
        128,    # ac_btime
        132,    # ac_etime
        140,    # ac_utime
        148,    # ac_stime
        156,    # ac_minflt
        164,    # ac_majflt
        172,    # coremem
        180,    # virtmem
        188,    # hiwater_rss
        196,    # hiwater_vm
        204,    # read_char
        212,    # write_char
        220,    # read_syscalls
        228,    # write_syscalls
        236,    # read_bytes
        244,    # write_bytes
        252,    # cancelled_write_bytes
    );

This is one way to solve the problem without re-arranging structure members
is to pack the structure.  The patch adds an __attribute__((aligned(8))) to
the taskstats structure members so that 32 bit applications using taskstats
can work with a 64 bit kernel.

Using __attribute__((packed)) would break the 64 bit alignment of members.

The fix was tested on x86_64. After the fix, we got

Offsets of taskstats' members (64 bit kernel, 64 bit application)

@taskstats'offsetof[@taskstats'indices] = (
        0,      # version
        4,      # ac_exitcode
        8,      # ac_flag
        9,      # ac_nice
        16,     # cpu_count
        24,     # cpu_delay_total
        32,     # blkio_count
        40,     # blkio_delay_total
        48,     # swapin_count
        56,     # swapin_delay_total
        64,     # cpu_run_real_total
        72,     # cpu_run_virtual_total
        80,     # ac_comm
        112,    # ac_sched
        113,    # ac_pad
        120,    # ac_uid
        124,    # ac_gid
        128,    # ac_pid
        132,    # ac_ppid
        136,    # ac_btime
        144,    # ac_etime
        152,    # ac_utime
        160,    # ac_stime
        168,    # ac_minflt
        176,    # ac_majflt
        184,    # coremem
        192,    # virtmem
        200,    # hiwater_rss
        208,    # hiwater_vm
        216,    # read_char
        224,    # write_char
        232,    # read_syscalls
        240,    # write_syscalls
        248,    # read_bytes
        256,    # write_bytes
        264,    # cancelled_write_bytes
    );

Offsets of taskstats' members (64 bit kernel, 32 bit application)

@taskstats'offsetof[@taskstats'indices] = (
        0,      # version
        4,      # ac_exitcode
        8,      # ac_flag
        9,      # ac_nice
        16,     # cpu_count
        24,     # cpu_delay_total
        32,     # blkio_count
        40,     # blkio_delay_total
        48,     # swapin_count
        56,     # swapin_delay_total
        64,     # cpu_run_real_total
        72,     # cpu_run_virtual_total
        80,     # ac_comm
        112,    # ac_sched
        113,    # ac_pad
        120,    # ac_uid
        124,    # ac_gid
        128,    # ac_pid
        132,    # ac_ppid
        136,    # ac_btime
        144,    # ac_etime
        152,    # ac_utime
        160,    # ac_stime
        168,    # ac_minflt
        176,    # ac_majflt
        184,    # coremem
        192,    # virtmem
        200,    # hiwater_rss
        208,    # hiwater_vm
        216,    # read_char
        224,    # write_char
        232,    # read_syscalls
        240,    # write_syscalls
        248,    # read_bytes
        256,    # write_bytes
        264,    # cancelled_write_bytes
    );

Signed-off-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Cc: Jay Lan <jlan@engr.sgi.com>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/taskstats.h | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/taskstats.h b/include/linux/taskstats.h
index 3fced4798255..a46104a28f66 100644
--- a/include/linux/taskstats.h
+++ b/include/linux/taskstats.h
@@ -31,7 +31,7 @@
  */
 
 
-#define TASKSTATS_VERSION	3
+#define TASKSTATS_VERSION	4
 #define TS_COMM_LEN		32	/* should be >= TASK_COMM_LEN
 					 * in linux/sched.h */
 
@@ -66,7 +66,7 @@ struct taskstats {
 	/* Delay waiting for cpu, while runnable
 	 * count, delay_total NOT updated atomically
 	 */
-	__u64	cpu_count;
+	__u64	cpu_count __attribute__((aligned(8)));
 	__u64	cpu_delay_total;
 
 	/* Following four fields atomically updated using task->delays->lock */
@@ -101,14 +101,17 @@ struct taskstats {
 
 	/* Basic Accounting Fields start */
 	char	ac_comm[TS_COMM_LEN];	/* Command name */
-	__u8	ac_sched;		/* Scheduling discipline */
+	__u8	ac_sched __attribute__((aligned(8)));
+					/* Scheduling discipline */
 	__u8	ac_pad[3];
-	__u32	ac_uid;			/* User ID */
+	__u32	ac_uid __attribute__((aligned(8)));
+					/* User ID */
 	__u32	ac_gid;			/* Group ID */
 	__u32	ac_pid;			/* Process ID */
 	__u32	ac_ppid;		/* Parent process ID */
 	__u32	ac_btime;		/* Begin time [sec since 1970] */
-	__u64	ac_etime;		/* Elapsed time [usec] */
+	__u64	ac_etime __attribute__((aligned(8)));
+					/* Elapsed time [usec] */
 	__u64	ac_utime;		/* User CPU time [usec] */
 	__u64	ac_stime;		/* SYstem CPU time [usec] */
 	__u64	ac_minflt;		/* Minor Page Fault Count */
-- 
cgit v1.2.3


From 0bcbc92629044b5403719f77fb015e9005b1f504 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 24 Apr 2007 14:58:30 -0700
Subject: [IPV6]: Disallow RH0 by default.

A security issue is emerging.  Disallow Routing Header Type 0 by default
as we have been doing for IPv4.
Note: We allow RH2 by default because it is harmless.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  9 ++++++++
 include/linux/ipv6.h                   |  3 +++
 include/linux/sysctl.h                 |  1 +
 net/ipv6/addrconf.c                    | 11 ++++++++++
 net/ipv6/exthdrs.c                     | 40 +++++++++++++++++++++++++++++-----
 5 files changed, 58 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index d3aae1f9b4c1..702d1d8dd04a 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -851,6 +851,15 @@ accept_redirects - BOOLEAN
 	Functional default: enabled if local forwarding is disabled.
 			    disabled if local forwarding is enabled.
 
+accept_source_route - INTEGER
+	Accept source routing (routing extension header).
+
+	> 0: Accept routing header.
+	= 0: Accept only routing header type 2.
+	< 0: Do not accept routing header.
+
+	Default: 0
+
 autoconf - BOOLEAN
 	Autoconfigure addresses using Prefix Information in Router 
 	Advertisements.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index f8241130f5ea..713eb5eaa81f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -177,6 +177,7 @@ struct ipv6_devconf {
 #endif
 #endif
 	__s32		proxy_ndp;
+	__s32		accept_source_route;
 	void		*sysctl;
 };
 
@@ -205,6 +206,8 @@ enum {
 	DEVCONF_RTR_PROBE_INTERVAL,
 	DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN,
 	DEVCONF_PROXY_NDP,
+	__DEVCONF_OPTIMISTIC_DAD,
+	DEVCONF_ACCEPT_SOURCE_ROUTE,
 	DEVCONF_MAX
 };
 
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 2c5fb38d9392..9a8970bf99a6 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -580,6 +580,7 @@ enum {
 	NET_IPV6_RTR_PROBE_INTERVAL=21,
 	NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22,
 	NET_IPV6_PROXY_NDP=23,
+	NET_IPV6_ACCEPT_SOURCE_ROUTE=25,
 	__NET_IPV6_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 7552663aa125..452a82ce4796 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -172,6 +172,7 @@ struct ipv6_devconf ipv6_devconf __read_mostly = {
 #endif
 #endif
 	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -203,6 +204,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 #endif
 #endif
 	.proxy_ndp		= 0,
+	.accept_source_route	= 0,	/* we do not accept RH0 by default. */
 };
 
 /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
@@ -3356,6 +3358,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 #endif
 #endif
 	array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp;
+	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 }
 
 static inline size_t inet6_if_nlmsg_size(void)
@@ -3883,6 +3886,14 @@ static struct addrconf_sysctl_table
 			.mode		=	0644,
 			.proc_handler	=	&proc_dointvec,
 		},
+		{
+			.ctl_name	=	NET_IPV6_ACCEPT_SOURCE_ROUTE,
+			.procname	=	"accept_source_route",
+			.data		=	&ipv6_devconf.accept_source_route,
+			.maxlen		=	sizeof(int),
+			.mode		=	0644,
+			.proc_handler	=	&proc_dointvec,
+		},
 		{
 			.ctl_name	=	0,	/* sentinel */
 		}
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 28e0c6568272..6ed6a8cd6a68 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -362,10 +362,27 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 	struct inet6_skb_parm *opt = IP6CB(skb);
 	struct in6_addr *addr = NULL;
 	struct in6_addr daddr;
+	struct inet6_dev *idev;
 	int n, i;
-
 	struct ipv6_rt_hdr *hdr;
 	struct rt0_hdr *rthdr;
+	int accept_source_route = ipv6_devconf.accept_source_route;
+
+	if (accept_source_route < 0 ||
+	    ((idev = in6_dev_get(skb->dev)) == NULL)) {
+		kfree_skb(skb);
+		return -1;
+	}
+	if (idev->cnf.accept_source_route < 0) {
+		in6_dev_put(idev);
+		kfree_skb(skb);
+		return -1;
+	}
+
+	if (accept_source_route > idev->cnf.accept_source_route)
+		accept_source_route = idev->cnf.accept_source_route;
+
+	in6_dev_put(idev);
 
 	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
 	    !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) {
@@ -377,6 +394,22 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp)
 
 	hdr = (struct ipv6_rt_hdr *) skb->h.raw;
 
+	switch (hdr->type) {
+#ifdef CONFIG_IPV6_MIP6
+		break;
+#endif
+	case IPV6_SRCRT_TYPE_0:
+		if (accept_source_route <= 0)
+			break;
+		kfree_skb(skb);
+		return -1;
+	default:
+		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
+				 IPSTATS_MIB_INHDRERRORS);
+		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
+		return -1;
+	}
+
 	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr) ||
 	    skb->pkt_type != PACKET_HOST) {
 		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
@@ -434,11 +467,6 @@ looped_back:
 		}
 		break;
 #endif
-	default:
-		IP6_INC_STATS_BH(ip6_dst_idev(skb->dst),
-				 IPSTATS_MIB_INHDRERRORS);
-		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw);
-		return -1;
 	}
 
 	/*
-- 
cgit v1.2.3