From 17d9ddc72fb8bba0d4f67868c9c612e472a594a9 Mon Sep 17 00:00:00 2001
From: "Pallipadi, Venkatesh" <venkatesh.pallipadi@intel.com>
Date: Wed, 10 Feb 2010 15:23:44 -0800
Subject: rbtree: Add support for augmented rbtrees

Add support for augmented rbtrees in core rbtree code.

This will be used in subsequent patches, in x86 PAT code, which needs
interval trees to efficiently keep track of PAT ranges.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
LKML-Reference: <20100210232343.GA11465@linux-os.sc.intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/linux/rbtree.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h
index 9c295411d01f..8e33a256ea0e 100644
--- a/include/linux/rbtree.h
+++ b/include/linux/rbtree.h
@@ -110,6 +110,7 @@ struct rb_node
 struct rb_root
 {
 	struct rb_node *rb_node;
+	void (*augment_cb)(struct rb_node *node);
 };
 
 
@@ -129,7 +130,9 @@ static inline void rb_set_color(struct rb_node *rb, int color)
 	rb->rb_parent_color = (rb->rb_parent_color & ~1) | color;
 }
 
-#define RB_ROOT	(struct rb_root) { NULL, }
+#define RB_ROOT	(struct rb_root) { NULL, NULL, }
+#define RB_AUGMENT_ROOT(x)	(struct rb_root) { NULL, x}
+
 #define	rb_entry(ptr, type, member) container_of(ptr, type, member)
 
 #define RB_EMPTY_ROOT(root)	((root)->rb_node == NULL)
-- 
cgit v1.2.3


From 1449c5d0e8f25af6c903797a636696901122e4e8 Mon Sep 17 00:00:00 2001
From: H Hartley Sweeten <hartleys@visionengravers.com>
Date: Fri, 15 Jan 2010 11:09:32 -0700
Subject: mtd: quiet sparse noise in cfi.h

In the inline function cfi_build_cmd_addr, the cast of cmd_ofs to an
uint8_t produces a sparse warning of the type:

warning: cast truncates bits from constant value (2aa becomes aa)

Quiet the warning by masking cmd_ofs with 0xff and remove the cast.

Signed-off-by: H Hartley Sweeten <hsweeten@visionengravers.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/cfi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index df89f4275232..a4eefc5810dc 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -297,7 +297,7 @@ static inline uint32_t cfi_build_cmd_addr(uint32_t cmd_ofs,
 	 * and 32bit devices on 16 bit busses
 	 * set the low bit of the alternating bit sequence of the address.
 	 */
-	if (((type * interleave) > bankwidth) && ((uint8_t)cmd_ofs == 0xaa))
+	if (((type * interleave) > bankwidth) && ((cmd_ofs & 0xff) == 0xaa))
 		addr |= (type >> 1)*interleave;
 
 	return  addr;
-- 
cgit v1.2.3


From b520e412faaaad35641aeedd6059179f9f1b393c Mon Sep 17 00:00:00 2001
From: Ben Hutchings <bhutchings@solarflare.com>
Date: Fri, 29 Jan 2010 20:59:42 +0000
Subject: mtd: Replace static array of devices with an idr structure

Signed-off-by: Ben Hutchings <bhutchings@solarflare.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 151 +++++++++++++++++++++++++-----------------------
 drivers/mtd/mtdcore.h   |  12 +---
 include/linux/mtd/mtd.h |   1 -
 3 files changed, 81 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index 402d41723c3f..b3b98d1fffc3 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/mtd/compatmac.h>
 #include <linux/proc_fs.h>
+#include <linux/idr.h>
 
 #include <linux/mtd/mtd.h>
 #include "internal.h"
@@ -33,13 +34,18 @@ static struct class mtd_class = {
 	.resume = mtd_cls_resume,
 };
 
+static DEFINE_IDR(mtd_idr);
+
 /* These are exported solely for the purpose of mtd_blkdevs.c. You
    should not use them for _anything_ else */
 DEFINE_MUTEX(mtd_table_mutex);
-struct mtd_info *mtd_table[MAX_MTD_DEVICES];
-
 EXPORT_SYMBOL_GPL(mtd_table_mutex);
-EXPORT_SYMBOL_GPL(mtd_table);
+
+struct mtd_info *__mtd_next_device(int i)
+{
+	return idr_get_next(&mtd_idr, &i);
+}
+EXPORT_SYMBOL_GPL(__mtd_next_device);
 
 static LIST_HEAD(mtd_notifiers);
 
@@ -235,13 +241,13 @@ static struct device_type mtd_devtype = {
  *	Add a device to the list of MTD devices present in the system, and
  *	notify each currently active MTD 'user' of its arrival. Returns
  *	zero on success or 1 on failure, which currently will only happen
- *	if the number of present devices exceeds MAX_MTD_DEVICES (i.e. 16)
- *	or there's a sysfs error.
+ *	if there is insufficient memory or a sysfs error.
  */
 
 int add_mtd_device(struct mtd_info *mtd)
 {
-	int i;
+	struct mtd_notifier *not;
+	int i, error;
 
 	if (!mtd->backing_dev_info) {
 		switch (mtd->type) {
@@ -260,70 +266,73 @@ int add_mtd_device(struct mtd_info *mtd)
 	BUG_ON(mtd->writesize == 0);
 	mutex_lock(&mtd_table_mutex);
 
-	for (i=0; i < MAX_MTD_DEVICES; i++)
-		if (!mtd_table[i]) {
-			struct mtd_notifier *not;
-
-			mtd_table[i] = mtd;
-			mtd->index = i;
-			mtd->usecount = 0;
-
-			if (is_power_of_2(mtd->erasesize))
-				mtd->erasesize_shift = ffs(mtd->erasesize) - 1;
-			else
-				mtd->erasesize_shift = 0;
-
-			if (is_power_of_2(mtd->writesize))
-				mtd->writesize_shift = ffs(mtd->writesize) - 1;
-			else
-				mtd->writesize_shift = 0;
-
-			mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
-			mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
-
-			/* Some chips always power up locked. Unlock them now */
-			if ((mtd->flags & MTD_WRITEABLE)
-			    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
-				if (mtd->unlock(mtd, 0, mtd->size))
-					printk(KERN_WARNING
-					       "%s: unlock failed, "
-					       "writes may not work\n",
-					       mtd->name);
-			}
+	do {
+		if (!idr_pre_get(&mtd_idr, GFP_KERNEL))
+			goto fail_locked;
+		error = idr_get_new(&mtd_idr, mtd, &i);
+	} while (error == -EAGAIN);
 
-			/* Caller should have set dev.parent to match the
-			 * physical device.
-			 */
-			mtd->dev.type = &mtd_devtype;
-			mtd->dev.class = &mtd_class;
-			mtd->dev.devt = MTD_DEVT(i);
-			dev_set_name(&mtd->dev, "mtd%d", i);
-			dev_set_drvdata(&mtd->dev, mtd);
-			if (device_register(&mtd->dev) != 0) {
-				mtd_table[i] = NULL;
-				break;
-			}
+	if (error)
+		goto fail_locked;
 
-			if (MTD_DEVT(i))
-				device_create(&mtd_class, mtd->dev.parent,
-						MTD_DEVT(i) + 1,
-						NULL, "mtd%dro", i);
-
-			DEBUG(0, "mtd: Giving out device %d to %s\n",i, mtd->name);
-			/* No need to get a refcount on the module containing
-			   the notifier, since we hold the mtd_table_mutex */
-			list_for_each_entry(not, &mtd_notifiers, list)
-				not->add(mtd);
-
-			mutex_unlock(&mtd_table_mutex);
-			/* We _know_ we aren't being removed, because
-			   our caller is still holding us here. So none
-			   of this try_ nonsense, and no bitching about it
-			   either. :) */
-			__module_get(THIS_MODULE);
-			return 0;
-		}
+	mtd->index = i;
+	mtd->usecount = 0;
+
+	if (is_power_of_2(mtd->erasesize))
+		mtd->erasesize_shift = ffs(mtd->erasesize) - 1;
+	else
+		mtd->erasesize_shift = 0;
+
+	if (is_power_of_2(mtd->writesize))
+		mtd->writesize_shift = ffs(mtd->writesize) - 1;
+	else
+		mtd->writesize_shift = 0;
+
+	mtd->erasesize_mask = (1 << mtd->erasesize_shift) - 1;
+	mtd->writesize_mask = (1 << mtd->writesize_shift) - 1;
+
+	/* Some chips always power up locked. Unlock them now */
+	if ((mtd->flags & MTD_WRITEABLE)
+	    && (mtd->flags & MTD_POWERUP_LOCK) && mtd->unlock) {
+		if (mtd->unlock(mtd, 0, mtd->size))
+			printk(KERN_WARNING
+			       "%s: unlock failed, writes may not work\n",
+			       mtd->name);
+	}
+
+	/* Caller should have set dev.parent to match the
+	 * physical device.
+	 */
+	mtd->dev.type = &mtd_devtype;
+	mtd->dev.class = &mtd_class;
+	mtd->dev.devt = MTD_DEVT(i);
+	dev_set_name(&mtd->dev, "mtd%d", i);
+	dev_set_drvdata(&mtd->dev, mtd);
+	if (device_register(&mtd->dev) != 0)
+		goto fail_added;
+
+	if (MTD_DEVT(i))
+		device_create(&mtd_class, mtd->dev.parent,
+			      MTD_DEVT(i) + 1,
+			      NULL, "mtd%dro", i);
+
+	DEBUG(0, "mtd: Giving out device %d to %s\n", i, mtd->name);
+	/* No need to get a refcount on the module containing
+	   the notifier, since we hold the mtd_table_mutex */
+	list_for_each_entry(not, &mtd_notifiers, list)
+		not->add(mtd);
+
+	mutex_unlock(&mtd_table_mutex);
+	/* We _know_ we aren't being removed, because
+	   our caller is still holding us here. So none
+	   of this try_ nonsense, and no bitching about it
+	   either. :) */
+	__module_get(THIS_MODULE);
+	return 0;
 
+fail_added:
+	idr_remove(&mtd_idr, i);
+fail_locked:
 	mutex_unlock(&mtd_table_mutex);
 	return 1;
 }
@@ -344,7 +353,7 @@ int del_mtd_device (struct mtd_info *mtd)
 
 	mutex_lock(&mtd_table_mutex);
 
-	if (mtd_table[mtd->index] != mtd) {
+	if (idr_find(&mtd_idr, mtd->index) != mtd) {
 		ret = -ENODEV;
 	} else if (mtd->usecount) {
 		printk(KERN_NOTICE "Removing MTD device #%d (%s) with use count %d\n",
@@ -360,7 +369,7 @@ int del_mtd_device (struct mtd_info *mtd)
 		list_for_each_entry(not, &mtd_notifiers, list)
 			not->remove(mtd);
 
-		mtd_table[mtd->index] = NULL;
+		idr_remove(&mtd_idr, mtd->index);
 
 		module_put(THIS_MODULE);
 		ret = 0;
@@ -448,8 +457,8 @@ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num)
 				break;
 			}
 		}
-	} else if (num >= 0 && num < MAX_MTD_DEVICES) {
-		ret = mtd_table[num];
+	} else if (num >= 0) {
+		ret = idr_find(&mtd_idr, num);
 		if (mtd && mtd != ret)
 			ret = NULL;
 	}
diff --git a/drivers/mtd/mtdcore.h b/drivers/mtd/mtdcore.h
index e2f93a300738..6a64fdebc898 100644
--- a/drivers/mtd/mtdcore.h
+++ b/drivers/mtd/mtdcore.h
@@ -8,17 +8,7 @@
    should not use them for _anything_ else */
 
 extern struct mutex mtd_table_mutex;
-extern struct mtd_info *mtd_table[MAX_MTD_DEVICES];
-
-static inline struct mtd_info *__mtd_next_device(int i)
-{
-	while (i < MAX_MTD_DEVICES) {
-		if (mtd_table[i])
-			return mtd_table[i];
-		i++;
-	}
-	return NULL;
-}
+extern struct mtd_info *__mtd_next_device(int i);
 
 #define mtd_for_each_device(mtd)			\
 	for ((mtd) = __mtd_next_device(0);		\
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 0f32a9b6ff55..ba53ecca107c 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -20,7 +20,6 @@
 
 #define MTD_CHAR_MAJOR 90
 #define MTD_BLOCK_MAJOR 31
-#define MAX_MTD_DEVICES 32
 
 #define MTD_ERASE_PENDING      	0x01
 #define MTD_ERASING		0x02
-- 
cgit v1.2.3


From 91f8026603d4443d1b24ee3552c5a58682bbae27 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Tue, 2 Feb 2010 14:43:10 -0800
Subject: JFFS2: avoid using C++ keyword `new' in userspace-visible header

Addresses http://bugzilla.kernel.org/show_bug.cgi?id=14995

Reported-by: R. Diez <rdiezmail-kernelbugzilla@yahoo.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 fs/jffs2/fs.c         | 10 +++++-----
 fs/jffs2/nodelist.h   |  8 ++++----
 include/linux/jffs2.h |  4 ++--
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index 3451a81b2142..86e0821fc989 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -313,8 +313,8 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 	case S_IFBLK:
 	case S_IFCHR:
 		/* Read the device numbers from the media */
-		if (f->metadata->size != sizeof(jdev.old) &&
-		    f->metadata->size != sizeof(jdev.new)) {
+		if (f->metadata->size != sizeof(jdev.old_id) &&
+		    f->metadata->size != sizeof(jdev.new_id)) {
 			printk(KERN_NOTICE "Device node has strange size %d\n", f->metadata->size);
 			goto error_io;
 		}
@@ -325,10 +325,10 @@ struct inode *jffs2_iget(struct super_block *sb, unsigned long ino)
 			printk(KERN_NOTICE "Read device numbers for inode %lu failed\n", (unsigned long)inode->i_ino);
 			goto error;
 		}
-		if (f->metadata->size == sizeof(jdev.old))
-			rdev = old_decode_dev(je16_to_cpu(jdev.old));
+		if (f->metadata->size == sizeof(jdev.old_id))
+			rdev = old_decode_dev(je16_to_cpu(jdev.old_id));
 		else
-			rdev = new_decode_dev(je32_to_cpu(jdev.new));
+			rdev = new_decode_dev(je32_to_cpu(jdev.new_id));
 
 	case S_IFSOCK:
 	case S_IFIFO:
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 507ed6ec1847..36d7a849ee2c 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -312,11 +312,11 @@ static inline int jffs2_blocks_use_vmalloc(struct jffs2_sb_info *c)
 static inline int jffs2_encode_dev(union jffs2_device_node *jdev, dev_t rdev)
 {
 	if (old_valid_dev(rdev)) {
-		jdev->old = cpu_to_je16(old_encode_dev(rdev));
-		return sizeof(jdev->old);
+		jdev->old_id = cpu_to_je16(old_encode_dev(rdev));
+		return sizeof(jdev->old_id);
 	} else {
-		jdev->new = cpu_to_je32(new_encode_dev(rdev));
-		return sizeof(jdev->new);
+		jdev->new_id = cpu_to_je32(new_encode_dev(rdev));
+		return sizeof(jdev->new_id);
 	}
 }
 
diff --git a/include/linux/jffs2.h b/include/linux/jffs2.h
index 2b32d638147d..0874ab59ffef 100644
--- a/include/linux/jffs2.h
+++ b/include/linux/jffs2.h
@@ -215,8 +215,8 @@ union jffs2_node_union
 
 /* Data payload for device nodes. */
 union jffs2_device_node {
-	jint16_t old;
-	jint32_t new;
+	jint16_t old_id;
+	jint32_t new_id;
 };
 
 #endif /* __LINUX_JFFS2_H__ */
-- 
cgit v1.2.3


From 7d70f334ad2bf1b3aaa1f0699c0f442e14bcc9e0 Mon Sep 17 00:00:00 2001
From: Vimal Singh <vimal.newwork@gmail.com>
Date: Mon, 8 Feb 2010 15:50:49 +0530
Subject: mtd: nand: add lock/unlock routines

Add nand lock / unlock routines. At least 'micron' parts
support this.

Signed-off-by: Vimal Singh <vimalsingh@ti.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 164 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mtd/nand.h     |  10 +++
 2 files changed, 174 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 2dfeb4bea83a..ed62e1ee0f81 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -863,6 +863,168 @@ static int nand_wait(struct mtd_info *mtd, struct nand_chip *chip)
 	return status;
 }
 
+/**
+ * __nand_unlock - [REPLACABLE] unlocks specified locked blockes
+ *
+ * @param mtd - mtd info
+ * @param ofs - offset to start unlock from
+ * @param len - length to unlock
+ * @invert -  when = 0, unlock the range of blocks within the lower and
+ *                      upper boundary address
+ *            whne = 1, unlock the range of blocks outside the boundaries
+ *                      of the lower and upper boundary address
+ *
+ * @return - unlock status
+ */
+static int __nand_unlock(struct mtd_info *mtd, loff_t ofs,
+					uint64_t len, int invert)
+{
+	int ret = 0;
+	int status, page;
+	struct nand_chip *chip = mtd->priv;
+
+	/* Submit address of first page to unlock */
+	page = ofs >> chip->page_shift;
+	chip->cmdfunc(mtd, NAND_CMD_UNLOCK1, -1, page & chip->pagemask);
+
+	/* Submit address of last page to unlock */
+	page = (ofs + len) >> chip->page_shift;
+	chip->cmdfunc(mtd, NAND_CMD_UNLOCK2, -1,
+				(page | invert) & chip->pagemask);
+
+	/* Call wait ready function */
+	status = chip->waitfunc(mtd, chip);
+	udelay(1000);
+	/* See if device thinks it succeeded */
+	if (status & 0x01) {
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Error status = 0x%08x\n",
+					__func__, status);
+		ret = -EIO;
+	}
+
+	return ret;
+}
+
+/**
+ * nand_unlock - [REPLACABLE] unlocks specified locked blockes
+ *
+ * @param mtd - mtd info
+ * @param ofs - offset to start unlock from
+ * @param len - length to unlock
+ *
+ * @return - unlock status
+ */
+int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	int ret = 0;
+	int chipnr;
+	struct nand_chip *chip = mtd->priv;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: start = 0x%012llx, len = %llu\n",
+			__func__, (unsigned long long)ofs, len);
+
+	if (check_offs_len(mtd, ofs, len))
+		ret = -EINVAL;
+
+	/* Align to last block address if size addresses end of the device */
+	if (ofs + len == mtd->size)
+		len -= mtd->erasesize;
+
+	nand_get_device(chip, mtd, FL_UNLOCKING);
+
+	/* Shift to get chip number */
+	chipnr = ofs >> chip->chip_shift;
+
+	chip->select_chip(mtd, chipnr);
+
+	/* Check, if it is write protected */
+	if (nand_check_wp(mtd)) {
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Device is write protected!!!\n",
+					__func__);
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = __nand_unlock(mtd, ofs, len, 0);
+
+out:
+	/* de-select the NAND device */
+	chip->select_chip(mtd, -1);
+
+	nand_release_device(mtd);
+
+	return ret;
+}
+
+/**
+ * nand_lock - [REPLACABLE] locks all blockes present in the device
+ *
+ * @param mtd - mtd info
+ * @param ofs - offset to start unlock from
+ * @param len - length to unlock
+ *
+ * @return - lock status
+ *
+ * This feature is not support in many NAND parts. 'Micron' NAND parts
+ * do have this feature, but it allows only to lock all blocks not for
+ * specified range for block.
+ *
+ * Implementing 'lock' feature by making use of 'unlock', for now.
+ */
+int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
+{
+	int ret = 0;
+	int chipnr, status, page;
+	struct nand_chip *chip = mtd->priv;
+
+	DEBUG(MTD_DEBUG_LEVEL3, "%s: start = 0x%012llx, len = %llu\n",
+			__func__, (unsigned long long)ofs, len);
+
+	if (check_offs_len(mtd, ofs, len))
+		ret = -EINVAL;
+
+	nand_get_device(chip, mtd, FL_LOCKING);
+
+	/* Shift to get chip number */
+	chipnr = ofs >> chip->chip_shift;
+
+	chip->select_chip(mtd, chipnr);
+
+	/* Check, if it is write protected */
+	if (nand_check_wp(mtd)) {
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Device is write protected!!!\n",
+					__func__);
+		status = MTD_ERASE_FAILED;
+		ret = -EIO;
+		goto out;
+	}
+
+	/* Submit address of first page to lock */
+	page = ofs >> chip->page_shift;
+	chip->cmdfunc(mtd, NAND_CMD_LOCK, -1, page & chip->pagemask);
+
+	/* Call wait ready function */
+	status = chip->waitfunc(mtd, chip);
+	udelay(1000);
+	/* See if device thinks it succeeded */
+	if (status & 0x01) {
+		DEBUG(MTD_DEBUG_LEVEL0, "%s: Error status = 0x%08x\n",
+					__func__, status);
+		ret = -EIO;
+		goto out;
+	}
+
+	ret = __nand_unlock(mtd, ofs, len, 0x1);
+
+out:
+	/* de-select the NAND device */
+	chip->select_chip(mtd, -1);
+
+	nand_release_device(mtd);
+
+	return ret;
+}
+
 /**
  * nand_read_page_raw - [Intern] read raw page data without ecc
  * @mtd:	mtd info structure
@@ -3089,6 +3251,8 @@ void nand_release(struct mtd_info *mtd)
 		kfree(chip->buffers);
 }
 
+EXPORT_SYMBOL_GPL(nand_lock);
+EXPORT_SYMBOL_GPL(nand_unlock);
 EXPORT_SYMBOL_GPL(nand_scan);
 EXPORT_SYMBOL_GPL(nand_scan_ident);
 EXPORT_SYMBOL_GPL(nand_scan_tail);
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index ccab9dfc5217..48bc2c54302c 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -38,6 +38,12 @@ extern void nand_release (struct mtd_info *mtd);
 /* Internal helper for board drivers which need to override command function */
 extern void nand_wait_ready(struct mtd_info *mtd);
 
+/* locks all blockes present in the device */
+extern int nand_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+
+/* unlocks specified locked blockes */
+extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
+
 /* The maximum number of NAND chips in an array */
 #define NAND_MAX_CHIPS		8
 
@@ -82,6 +88,10 @@ extern void nand_wait_ready(struct mtd_info *mtd);
 #define NAND_CMD_ERASE2		0xd0
 #define NAND_CMD_RESET		0xff
 
+#define NAND_CMD_LOCK		0x2a
+#define NAND_CMD_UNLOCK1	0x23
+#define NAND_CMD_UNLOCK2	0x24
+
 /* Extended commands for large page devices */
 #define NAND_CMD_READSTART	0x30
 #define NAND_CMD_RNDOUTSTART	0xE0
-- 
cgit v1.2.3


From f3e69c6584be2db1ccd5292d6a1d7c566d265701 Mon Sep 17 00:00:00 2001
From: Guillaume LECERF <glecerf@gmail.com>
Date: Tue, 15 Dec 2009 23:01:06 +0100
Subject: mtd: move more manufacturers to the common cfi.h header file

Move MANUFACTURER_MACRONIX and MANUFACTURER_SST definitions to the
include/linux/mtd/cfi.h header file and rename them to CFI_MFR_MACRONIX and
CFI_MFR_SST.

All references in drivers/mtd/chips/cfi_cmdset_0002.c are updated to reflect
this.

Signed-off-by: Guillaume LECERF <glecerf@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0002.c | 14 +++++---------
 include/linux/mtd/cfi.h             | 12 +++++++-----
 2 files changed, 12 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index 1ebdcdd72d84..ea2a7f66ddf9 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -43,10 +43,6 @@
 
 #define MAX_WORD_RETRIES 3
 
-#define MANUFACTURER_AMD	0x0001
-#define MANUFACTURER_ATMEL	0x001F
-#define MANUFACTURER_MACRONIX	0x00C2
-#define MANUFACTURER_SST	0x00BF
 #define SST49LF004B	        0x0060
 #define SST49LF040B	        0x0050
 #define SST49LF008A		0x005a
@@ -168,7 +164,7 @@ static void fixup_amd_bootblock(struct mtd_info *mtd, void* param)
 			 * This reduces the risk of false detection due to
 			 * the 8-bit device ID.
 			 */
-			(cfi->mfr == MANUFACTURER_MACRONIX)) {
+			(cfi->mfr == CFI_MFR_MACRONIX)) {
 			DEBUG(MTD_DEBUG_LEVEL1,
 				"%s: Macronix MX29LV400C with bottom boot block"
 				" detected\n", map->name);
@@ -286,7 +282,7 @@ static struct cfi_fixup cfi_fixup_table[] = {
 	{ CFI_MFR_ATMEL, CFI_ID_ANY, fixup_convert_atmel_pri, NULL },
 #ifdef AMD_BOOTLOC_BUG
 	{ CFI_MFR_AMD, CFI_ID_ANY, fixup_amd_bootblock, NULL },
-	{ MANUFACTURER_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
+	{ CFI_MFR_MACRONIX, CFI_ID_ANY, fixup_amd_bootblock, NULL },
 #endif
 	{ CFI_MFR_AMD, 0x0050, fixup_use_secsi, NULL, },
 	{ CFI_MFR_AMD, 0x0053, fixup_use_secsi, NULL, },
@@ -304,9 +300,9 @@ static struct cfi_fixup cfi_fixup_table[] = {
 	{ 0, 0, NULL, NULL }
 };
 static struct cfi_fixup jedec_fixup_table[] = {
-	{ MANUFACTURER_SST, SST49LF004B, fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_SST, SST49LF040B, fixup_use_fwh_lock, NULL, },
-	{ MANUFACTURER_SST, SST49LF008A, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_SST, SST49LF004B, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_SST, SST49LF040B, fixup_use_fwh_lock, NULL, },
+	{ CFI_MFR_SST, SST49LF008A, fixup_use_fwh_lock, NULL, },
 	{ 0, 0, NULL, NULL }
 };
 
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index a4eefc5810dc..cee05b1e62b1 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -518,11 +518,13 @@ struct cfi_fixup {
 #define CFI_MFR_ANY 0xffff
 #define CFI_ID_ANY  0xffff
 
-#define CFI_MFR_AMD	0x0001
-#define CFI_MFR_INTEL	0x0089
-#define CFI_MFR_ATMEL	0x001F
-#define CFI_MFR_SAMSUNG	0x00EC
-#define CFI_MFR_ST	0x0020 /* STMicroelectronics */
+#define CFI_MFR_AMD		0x0001
+#define CFI_MFR_ATMEL		0x001F
+#define CFI_MFR_INTEL		0x0089
+#define CFI_MFR_MACRONIX	0x00C2
+#define CFI_MFR_SAMSUNG		0x00EC
+#define CFI_MFR_SST		0x00BF
+#define CFI_MFR_ST		0x0020 /* STMicroelectronics */
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
 
-- 
cgit v1.2.3


From 3bd456576f22acd55fb6c3d3d4261131821f5a3b Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:28 +0200
Subject: mtd: create unlocked versions of {get,put}_mtd_device

Use these only if you know that you already hold mtd_table_mutex

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtdcore.c   | 60 ++++++++++++++++++++++++++++++++-----------------
 include/linux/mtd/mtd.h |  3 ++-
 2 files changed, 41 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdcore.c b/drivers/mtd/mtdcore.c
index b3b98d1fffc3..67669a76eaf5 100644
--- a/drivers/mtd/mtdcore.c
+++ b/drivers/mtd/mtdcore.c
@@ -463,27 +463,38 @@ struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num)
 			ret = NULL;
 	}
 
-	if (!ret)
-		goto out_unlock;
-
-	if (!try_module_get(ret->owner))
-		goto out_unlock;
-
-	if (ret->get_device) {
-		err = ret->get_device(ret);
-		if (err)
-			goto out_put;
+	if (!ret) {
+		ret = ERR_PTR(err);
+		goto out;
 	}
 
-	ret->usecount++;
+	err = __get_mtd_device(ret);
+	if (err)
+		ret = ERR_PTR(err);
+out:
 	mutex_unlock(&mtd_table_mutex);
 	return ret;
+}
 
-out_put:
-	module_put(ret->owner);
-out_unlock:
-	mutex_unlock(&mtd_table_mutex);
-	return ERR_PTR(err);
+
+int __get_mtd_device(struct mtd_info *mtd)
+{
+	int err;
+
+	if (!try_module_get(mtd->owner))
+		return -ENODEV;
+
+	if (mtd->get_device) {
+
+		err = mtd->get_device(mtd);
+
+		if (err) {
+			module_put(mtd->owner);
+			return err;
+		}
+	}
+	mtd->usecount++;
+	return 0;
 }
 
 /**
@@ -534,14 +545,19 @@ out_unlock:
 
 void put_mtd_device(struct mtd_info *mtd)
 {
-	int c;
-
 	mutex_lock(&mtd_table_mutex);
-	c = --mtd->usecount;
+	__put_mtd_device(mtd);
+	mutex_unlock(&mtd_table_mutex);
+
+}
+
+void __put_mtd_device(struct mtd_info *mtd)
+{
+	--mtd->usecount;
+	BUG_ON(mtd->usecount < 0);
+
 	if (mtd->put_device)
 		mtd->put_device(mtd);
-	mutex_unlock(&mtd_table_mutex);
-	BUG_ON(c < 0);
 
 	module_put(mtd->owner);
 }
@@ -579,7 +595,9 @@ EXPORT_SYMBOL_GPL(add_mtd_device);
 EXPORT_SYMBOL_GPL(del_mtd_device);
 EXPORT_SYMBOL_GPL(get_mtd_device);
 EXPORT_SYMBOL_GPL(get_mtd_device_nm);
+EXPORT_SYMBOL_GPL(__get_mtd_device);
 EXPORT_SYMBOL_GPL(put_mtd_device);
+EXPORT_SYMBOL_GPL(__put_mtd_device);
 EXPORT_SYMBOL_GPL(register_mtd_user);
 EXPORT_SYMBOL_GPL(unregister_mtd_user);
 EXPORT_SYMBOL_GPL(default_mtd_writev);
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index ba53ecca107c..11d8e68d17c0 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -289,8 +289,9 @@ extern int add_mtd_device(struct mtd_info *mtd);
 extern int del_mtd_device (struct mtd_info *mtd);
 
 extern struct mtd_info *get_mtd_device(struct mtd_info *mtd, int num);
+extern int __get_mtd_device(struct mtd_info *mtd);
+extern void __put_mtd_device(struct mtd_info *mtd);
 extern struct mtd_info *get_mtd_device_nm(const char *name);
-
 extern void put_mtd_device(struct mtd_info *mtd);
 
 
-- 
cgit v1.2.3


From a863862257b7dd08d855bafcb0aedd9ad848ed91 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:29 +0200
Subject: mtd: blktrans: remove mtd_blkcore_priv, switch to per device queue
 and thread

This is the biggest change. To make hotplug possible, and this layer
clean, the mtd_blktrans_dev now contains everything for a single mtd
block translation device. Also removed some very old leftovers.

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtd_blkdevs.c    | 124 ++++++++++++++++++++-----------------------
 include/linux/mtd/blktrans.h |  10 ++--
 2 files changed, 63 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 2f8c202dbd86..6a572625bfc0 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -14,7 +14,6 @@
 #include <linux/mtd/mtd.h>
 #include <linux/blkdev.h>
 #include <linux/blkpg.h>
-#include <linux/freezer.h>
 #include <linux/spinlock.h>
 #include <linux/hdreg.h>
 #include <linux/init.h>
@@ -26,11 +25,6 @@
 
 static LIST_HEAD(blktrans_majors);
 
-struct mtd_blkcore_priv {
-	struct task_struct *thread;
-	struct request_queue *rq;
-	spinlock_t queue_lock;
-};
 
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			       struct mtd_blktrans_dev *dev,
@@ -61,7 +55,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 				return -EIO;
 		rq_flush_dcache_pages(req);
 		return 0;
-
 	case WRITE:
 		if (!tr->writesect)
 			return -EIO;
@@ -71,7 +64,6 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 			if (tr->writesect(dev, block, buf))
 				return -EIO;
 		return 0;
-
 	default:
 		printk(KERN_NOTICE "Unknown request %u\n", rq_data_dir(req));
 		return -EIO;
@@ -80,14 +72,13 @@ static int do_blktrans_request(struct mtd_blktrans_ops *tr,
 
 static int mtd_blktrans_thread(void *arg)
 {
-	struct mtd_blktrans_ops *tr = arg;
-	struct request_queue *rq = tr->blkcore_priv->rq;
+	struct mtd_blktrans_dev *dev = arg;
+	struct request_queue *rq = dev->rq;
 	struct request *req = NULL;
 
 	spin_lock_irq(rq->queue_lock);
 
 	while (!kthread_should_stop()) {
-		struct mtd_blktrans_dev *dev;
 		int res;
 
 		if (!req && !(req = blk_fetch_request(rq))) {
@@ -98,13 +89,10 @@ static int mtd_blktrans_thread(void *arg)
 			continue;
 		}
 
-		dev = req->rq_disk->private_data;
-		tr = dev->tr;
-
 		spin_unlock_irq(rq->queue_lock);
 
 		mutex_lock(&dev->lock);
-		res = do_blktrans_request(tr, dev, req);
+		res = do_blktrans_request(dev->tr, dev, req);
 		mutex_unlock(&dev->lock);
 
 		spin_lock_irq(rq->queue_lock);
@@ -123,8 +111,8 @@ static int mtd_blktrans_thread(void *arg)
 
 static void mtd_blktrans_request(struct request_queue *rq)
 {
-	struct mtd_blktrans_ops *tr = rq->queuedata;
-	wake_up_process(tr->blkcore_priv->thread);
+	struct mtd_blktrans_dev *dev = rq->queuedata;
+	wake_up_process(dev->thread);
 }
 
 
@@ -214,6 +202,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	struct mtd_blktrans_dev *d;
 	int last_devnum = -1;
 	struct gendisk *gd;
+	int ret;
 
 	if (mutex_trylock(&mtd_table_mutex)) {
 		mutex_unlock(&mtd_table_mutex);
@@ -239,6 +228,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		}
 		last_devnum = d->devnum;
 	}
+
+	ret = -EBUSY;
 	if (new->devnum == -1)
 		new->devnum = last_devnum+1;
 
@@ -247,7 +238,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	 * with this number. */
 	if (new->devnum > (MINORMASK >> tr->part_bits) ||
 	    (tr->part_bits && new->devnum >= 27 * 26))
-		return -EBUSY;
+		goto error1;
 
 	list_add_tail(&new->list, &tr->devs);
  added:
@@ -255,11 +246,16 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	if (!tr->writesect)
 		new->readonly = 1;
 
+
+	/* Create gendisk */
+	ret = -ENOMEM;
 	gd = alloc_disk(1 << tr->part_bits);
-	if (!gd) {
-		list_del(&new->list);
-		return -ENOMEM;
-	}
+
+	if (!gd)
+		goto error2;
+
+	new->disk = gd;
+	gd->private_data = new;
 	gd->major = tr->major;
 	gd->first_minor = (new->devnum) << tr->part_bits;
 	gd->fops = &mtd_blktrans_ops;
@@ -277,21 +273,49 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		snprintf(gd->disk_name, sizeof(gd->disk_name),
 			 "%s%d", tr->name, new->devnum);
 
-	/* 2.5 has capacity in units of 512 bytes while still
-	   having BLOCK_SIZE_BITS set to 10. Just to keep us amused. */
 	set_capacity(gd, (new->size * tr->blksize) >> 9);
 
-	gd->private_data = new;
-	new->blkcore_priv = gd;
-	gd->queue = tr->blkcore_priv->rq;
+
+	/* Create the request queue */
+	spin_lock_init(&new->queue_lock);
+	new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);
+
+	if (!new->rq)
+		goto error3;
+
+	new->rq->queuedata = new;
+	blk_queue_logical_block_size(new->rq, tr->blksize);
+
+	if (tr->discard)
+		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
+					new->rq);
+
+	gd->queue = new->rq;
+
+	/* Create processing thread */
+	/* TODO: workqueue ? */
+	new->thread = kthread_run(mtd_blktrans_thread, new,
+			"%s%d", tr->name, new->mtd->index);
+	if (IS_ERR(new->thread)) {
+		ret = PTR_ERR(new->thread);
+		goto error4;
+	}
 	gd->driverfs_dev = &new->mtd->dev;
 
 	if (new->readonly)
 		set_disk_ro(gd, 1);
 
 	add_disk(gd);
-
 	return 0;
+error4:
+	blk_cleanup_queue(new->rq);
+error3:
+	put_disk(new->disk);
+error2:
+	list_del(&new->list);
+error1:
+	kfree(new);
+	return ret;
 }
 
 int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
@@ -303,9 +327,13 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 
 	list_del(&old->list);
 
-	del_gendisk(old->blkcore_priv);
-	put_disk(old->blkcore_priv);
+	/* stop new requests to arrive */
+	del_gendisk(old->disk);
 
+	/* Stop the thread */
+	kthread_stop(old->thread);
+
+	blk_cleanup_queue(old->rq);
 	return 0;
 }
 
@@ -347,9 +375,6 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	if (!blktrans_notifier.list.next)
 		register_mtd_user(&blktrans_notifier);
 
-	tr->blkcore_priv = kzalloc(sizeof(*tr->blkcore_priv), GFP_KERNEL);
-	if (!tr->blkcore_priv)
-		return -ENOMEM;
 
 	mutex_lock(&mtd_table_mutex);
 
@@ -357,39 +382,12 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	if (ret) {
 		printk(KERN_WARNING "Unable to register %s block device on major %d: %d\n",
 		       tr->name, tr->major, ret);
-		kfree(tr->blkcore_priv);
 		mutex_unlock(&mtd_table_mutex);
 		return ret;
 	}
-	spin_lock_init(&tr->blkcore_priv->queue_lock);
-
-	tr->blkcore_priv->rq = blk_init_queue(mtd_blktrans_request, &tr->blkcore_priv->queue_lock);
-	if (!tr->blkcore_priv->rq) {
-		unregister_blkdev(tr->major, tr->name);
-		kfree(tr->blkcore_priv);
-		mutex_unlock(&mtd_table_mutex);
-		return -ENOMEM;
-	}
-
-	tr->blkcore_priv->rq->queuedata = tr;
-	blk_queue_logical_block_size(tr->blkcore_priv->rq, tr->blksize);
-	if (tr->discard)
-		queue_flag_set_unlocked(QUEUE_FLAG_DISCARD,
-					tr->blkcore_priv->rq);
 
 	tr->blkshift = ffs(tr->blksize) - 1;
 
-	tr->blkcore_priv->thread = kthread_run(mtd_blktrans_thread, tr,
-			"%sd", tr->name);
-	if (IS_ERR(tr->blkcore_priv->thread)) {
-		ret = PTR_ERR(tr->blkcore_priv->thread);
-		blk_cleanup_queue(tr->blkcore_priv->rq);
-		unregister_blkdev(tr->major, tr->name);
-		kfree(tr->blkcore_priv);
-		mutex_unlock(&mtd_table_mutex);
-		return ret;
-	}
-
 	INIT_LIST_HEAD(&tr->devs);
 	list_add(&tr->list, &blktrans_majors);
 
@@ -408,8 +406,6 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
 
 	mutex_lock(&mtd_table_mutex);
 
-	/* Clean up the kernel thread */
-	kthread_stop(tr->blkcore_priv->thread);
 
 	/* Remove it from the list of active majors */
 	list_del(&tr->list);
@@ -417,13 +413,9 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
 	list_for_each_entry_safe(dev, next, &tr->devs, list)
 		tr->remove_dev(dev);
 
-	blk_cleanup_queue(tr->blkcore_priv->rq);
 	unregister_blkdev(tr->major, tr->name);
-
 	mutex_unlock(&mtd_table_mutex);
 
-	kfree(tr->blkcore_priv);
-
 	BUG_ON(!list_empty(&tr->devs));
 	return 0;
 }
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index 8b4aa0523db7..a4b392868b54 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -24,11 +24,13 @@ struct mtd_blktrans_dev {
 	int devnum;
 	unsigned long size;
 	int readonly;
-	void *blkcore_priv; /* gendisk in 2.5, devfs_handle in 2.4 */
+	struct gendisk *disk;
+	struct task_struct *thread;
+	struct request_queue *rq;
+	spinlock_t queue_lock;
+	void *priv;
 };
 
-struct blkcore_priv; /* Differs for 2.4 and 2.5 kernels; private */
-
 struct mtd_blktrans_ops {
 	char *name;
 	int major;
@@ -60,8 +62,6 @@ struct mtd_blktrans_ops {
 	struct list_head devs;
 	struct list_head list;
 	struct module *owner;
-
-	struct mtd_blkcore_priv *blkcore_priv;
 };
 
 extern int register_mtd_blktrans(struct mtd_blktrans_ops *tr);
-- 
cgit v1.2.3


From 048d87199566663e4edc4880df3703c04bcf41d9 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:30 +0200
Subject: mtd: blktrans: Hotplug fixes

* Add locking where it was missing.

* Don't do a get_mtd_device in blktrans_open because it would lead to a
  deadlock; instead do that in add_mtd_blktrans_dev.

* Only free the mtd_blktrans_dev structure when the last user exits.

* Flush request queue on device removal.

* Track users, and call tr->release in del_mtd_blktrans_dev
  Due to that ->open and release aren't called more that once.

Now it is safe to call del_mtd_blktrans_dev while the device is still in use.

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/ftl.c            |   1 -
 drivers/mtd/inftlcore.c      |   1 -
 drivers/mtd/mtd_blkdevs.c    | 202 +++++++++++++++++++++++++++++++------------
 drivers/mtd/mtdblock.c       |   2 -
 drivers/mtd/mtdblock_ro.c    |   1 -
 drivers/mtd/nftlcore.c       |   1 -
 drivers/mtd/rfd_ftl.c        |   1 -
 drivers/mtd/ssfdc.c          |   1 -
 include/linux/mtd/blktrans.h |   3 +
 9 files changed, 151 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/ftl.c b/drivers/mtd/ftl.c
index e56d6b42f020..62da9eb7032b 100644
--- a/drivers/mtd/ftl.c
+++ b/drivers/mtd/ftl.c
@@ -1082,7 +1082,6 @@ static void ftl_remove_dev(struct mtd_blktrans_dev *dev)
 {
 	del_mtd_blktrans_dev(dev);
 	ftl_freepart((partition_t *)dev);
-	kfree(dev);
 }
 
 static struct mtd_blktrans_ops ftl_tr = {
diff --git a/drivers/mtd/inftlcore.c b/drivers/mtd/inftlcore.c
index 8aca5523a337..015a7fe1b6ee 100755
--- a/drivers/mtd/inftlcore.c
+++ b/drivers/mtd/inftlcore.c
@@ -139,7 +139,6 @@ static void inftl_remove_dev(struct mtd_blktrans_dev *dev)
 
 	kfree(inftl->PUtable);
 	kfree(inftl->VUtable);
-	kfree(inftl);
 }
 
 /*
diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 6a572625bfc0..646cc84ae692 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -24,6 +24,40 @@
 #include "mtdcore.h"
 
 static LIST_HEAD(blktrans_majors);
+static DEFINE_MUTEX(blktrans_ref_mutex);
+
+void blktrans_dev_release(struct kref *kref)
+{
+	struct mtd_blktrans_dev *dev =
+		container_of(kref, struct mtd_blktrans_dev, ref);
+
+	dev->disk->private_data = NULL;
+	put_disk(dev->disk);
+	list_del(&dev->list);
+	kfree(dev);
+}
+
+static struct mtd_blktrans_dev *blktrans_dev_get(struct gendisk *disk)
+{
+	struct mtd_blktrans_dev *dev;
+
+	mutex_lock(&blktrans_ref_mutex);
+	dev = disk->private_data;
+
+	if (!dev)
+		goto unlock;
+	kref_get(&dev->ref);
+unlock:
+	mutex_unlock(&blktrans_ref_mutex);
+	return dev;
+}
+
+void blktrans_dev_put(struct mtd_blktrans_dev *dev)
+{
+	mutex_lock(&blktrans_ref_mutex);
+	kref_put(&dev->ref, blktrans_dev_release);
+	mutex_unlock(&blktrans_ref_mutex);
+}
 
 
 static int do_blktrans_request(struct mtd_blktrans_ops *tr,
@@ -111,81 +145,112 @@ static int mtd_blktrans_thread(void *arg)
 
 static void mtd_blktrans_request(struct request_queue *rq)
 {
-	struct mtd_blktrans_dev *dev = rq->queuedata;
-	wake_up_process(dev->thread);
-}
+	struct mtd_blktrans_dev *dev;
+	struct request *req = NULL;
+
+	dev = rq->queuedata;
 
+	if (!dev)
+		while ((req = blk_fetch_request(rq)) != NULL)
+			__blk_end_request_all(req, -ENODEV);
+	else
+		wake_up_process(dev->thread);
+}
 
 static int blktrans_open(struct block_device *bdev, fmode_t mode)
 {
-	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
-	struct mtd_blktrans_ops *tr = dev->tr;
-	int ret = -ENODEV;
-
-	if (!get_mtd_device(NULL, dev->mtd->index))
-		goto out;
-
-	if (!try_module_get(tr->owner))
-		goto out_tr;
-
-	/* FIXME: Locking. A hot pluggable device can go away
-	   (del_mtd_device can be called for it) without its module
-	   being unloaded. */
-	dev->mtd->usecount++;
-
-	ret = 0;
-	if (tr->open && (ret = tr->open(dev))) {
-		dev->mtd->usecount--;
-		put_mtd_device(dev->mtd);
-	out_tr:
-		module_put(tr->owner);
+	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
+	int ret;
+
+	if (!dev)
+		return -ERESTARTSYS;
+
+	mutex_lock(&dev->lock);
+
+	if (!dev->mtd) {
+		ret = -ENXIO;
+		goto unlock;
 	}
- out:
+
+	ret = !dev->open++ && dev->tr->open ? dev->tr->open(dev) : 0;
+
+	/* Take another reference on the device so it won't go away till
+		last release */
+	if (!ret)
+		kref_get(&dev->ref);
+unlock:
+	mutex_unlock(&dev->lock);
+	blktrans_dev_put(dev);
 	return ret;
 }
 
 static int blktrans_release(struct gendisk *disk, fmode_t mode)
 {
-	struct mtd_blktrans_dev *dev = disk->private_data;
-	struct mtd_blktrans_ops *tr = dev->tr;
-	int ret = 0;
+	struct mtd_blktrans_dev *dev = blktrans_dev_get(disk);
+	int ret = -ENXIO;
 
-	if (tr->release)
-		ret = tr->release(dev);
+	if (!dev)
+		return ret;
 
-	if (!ret) {
-		dev->mtd->usecount--;
-		put_mtd_device(dev->mtd);
-		module_put(tr->owner);
-	}
+	mutex_lock(&dev->lock);
+
+	/* Release one reference, we sure its not the last one here*/
+	kref_put(&dev->ref, blktrans_dev_release);
 
+	if (!dev->mtd)
+		goto unlock;
+
+	ret = !--dev->open && dev->tr->release ? dev->tr->release(dev) : 0;
+unlock:
+	mutex_unlock(&dev->lock);
+	blktrans_dev_put(dev);
 	return ret;
 }
 
 static int blktrans_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 {
-	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
+	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
+	int ret = -ENXIO;
+
+	if (!dev)
+		return ret;
+
+	mutex_lock(&dev->lock);
 
-	if (dev->tr->getgeo)
-		return dev->tr->getgeo(dev, geo);
-	return -ENOTTY;
+	if (!dev->mtd)
+		goto unlock;
+
+	ret = dev->tr->getgeo ? dev->tr->getgeo(dev, geo) : 0;
+unlock:
+	mutex_unlock(&dev->lock);
+	blktrans_dev_put(dev);
+	return ret;
 }
 
 static int blktrans_ioctl(struct block_device *bdev, fmode_t mode,
 			      unsigned int cmd, unsigned long arg)
 {
-	struct mtd_blktrans_dev *dev = bdev->bd_disk->private_data;
-	struct mtd_blktrans_ops *tr = dev->tr;
+	struct mtd_blktrans_dev *dev = blktrans_dev_get(bdev->bd_disk);
+	int ret = -ENXIO;
+
+	if (!dev)
+		return ret;
+
+	mutex_lock(&dev->lock);
+
+	if (!dev->mtd)
+		goto unlock;
 
 	switch (cmd) {
 	case BLKFLSBUF:
-		if (tr->flush)
-			return tr->flush(dev);
-		/* The core code did the work, we had nothing to do. */
-		return 0;
+		ret = dev->tr->flush ? dev->tr->flush(dev) : 0;
 	default:
-		return -ENOTTY;
+		ret = -ENOTTY;
 	}
+unlock:
+	mutex_unlock(&dev->lock);
+	blktrans_dev_put(dev);
+	return ret;
 }
 
 static const struct block_device_operations mtd_blktrans_ops = {
@@ -209,6 +274,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		BUG();
 	}
 
+	mutex_lock(&blktrans_ref_mutex);
 	list_for_each_entry(d, &tr->devs, list) {
 		if (new->devnum == -1) {
 			/* Use first free number */
@@ -220,6 +286,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 			}
 		} else if (d->devnum == new->devnum) {
 			/* Required number taken */
+			mutex_unlock(&blktrans_ref_mutex);
 			return -EBUSY;
 		} else if (d->devnum > new->devnum) {
 			/* Required number was free */
@@ -237,16 +304,20 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	 * minor numbers and that the disk naming code below can cope
 	 * with this number. */
 	if (new->devnum > (MINORMASK >> tr->part_bits) ||
-	    (tr->part_bits && new->devnum >= 27 * 26))
+	    (tr->part_bits && new->devnum >= 27 * 26)) {
+		mutex_unlock(&blktrans_ref_mutex);
 		goto error1;
+	}
 
 	list_add_tail(&new->list, &tr->devs);
  added:
+	mutex_unlock(&blktrans_ref_mutex);
+
 	mutex_init(&new->lock);
+	kref_init(&new->ref);
 	if (!tr->writesect)
 		new->readonly = 1;
 
-
 	/* Create gendisk */
 	ret = -ENOMEM;
 	gd = alloc_disk(1 << tr->part_bits);
@@ -275,7 +346,6 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	set_capacity(gd, (new->size * tr->blksize) >> 9);
 
-
 	/* Create the request queue */
 	spin_lock_init(&new->queue_lock);
 	new->rq = blk_init_queue(mtd_blktrans_request, &new->queue_lock);
@@ -292,6 +362,9 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 
 	gd->queue = new->rq;
 
+	__get_mtd_device(new->mtd);
+	__module_get(tr->owner);
+
 	/* Create processing thread */
 	/* TODO: workqueue ? */
 	new->thread = kthread_run(mtd_blktrans_thread, new,
@@ -308,6 +381,8 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 	add_disk(gd);
 	return 0;
 error4:
+	module_put(tr->owner);
+	__put_mtd_device(new->mtd);
 	blk_cleanup_queue(new->rq);
 error3:
 	put_disk(new->disk);
@@ -320,20 +395,41 @@ error1:
 
 int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 {
+	unsigned long flags;
+
 	if (mutex_trylock(&mtd_table_mutex)) {
 		mutex_unlock(&mtd_table_mutex);
 		BUG();
 	}
 
-	list_del(&old->list);
-
-	/* stop new requests to arrive */
+	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
 	/* Stop the thread */
 	kthread_stop(old->thread);
 
+	/* Kill current requests */
+	spin_lock_irqsave(&old->queue_lock, flags);
+	old->rq->queuedata = NULL;
+	blk_start_queue(old->rq);
+	spin_unlock_irqrestore(&old->queue_lock, flags);
 	blk_cleanup_queue(old->rq);
+
+	/* Ask trans driver for release to the mtd device */
+	mutex_lock(&old->lock);
+	if (old->open && old->tr->release) {
+		old->tr->release(old);
+		old->open = 0;
+	}
+
+	__put_mtd_device(old->mtd);
+	module_put(old->tr->owner);
+
+	/* At that point, we don't touch the mtd anymore */
+	old->mtd = NULL;
+
+	mutex_unlock(&old->lock);
+	blktrans_dev_put(old);
 	return 0;
 }
 
@@ -396,7 +492,6 @@ int register_mtd_blktrans(struct mtd_blktrans_ops *tr)
 			tr->add_mtd(tr, mtd);
 
 	mutex_unlock(&mtd_table_mutex);
-
 	return 0;
 }
 
@@ -406,7 +501,6 @@ int deregister_mtd_blktrans(struct mtd_blktrans_ops *tr)
 
 	mutex_lock(&mtd_table_mutex);
 
-
 	/* Remove it from the list of active majors */
 	list_del(&tr->list);
 
diff --git a/drivers/mtd/mtdblock.c b/drivers/mtd/mtdblock.c
index 69f6bf2e0a8c..8e5da1e46076 100644
--- a/drivers/mtd/mtdblock.c
+++ b/drivers/mtd/mtdblock.c
@@ -354,9 +354,7 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 static void mtdblock_remove_dev(struct mtd_blktrans_dev *dev)
 {
 	struct mtdblk_dev *mtdblk = container_of(dev, struct mtdblk_dev, mbd);
-
 	del_mtd_blktrans_dev(dev);
-	kfree(mtdblk);
 }
 
 static struct mtd_blktrans_ops mtdblock_tr = {
diff --git a/drivers/mtd/mtdblock_ro.c b/drivers/mtd/mtdblock_ro.c
index 852165f8b1c3..54ff2880cf65 100644
--- a/drivers/mtd/mtdblock_ro.c
+++ b/drivers/mtd/mtdblock_ro.c
@@ -49,7 +49,6 @@ static void mtdblock_add_mtd(struct mtd_blktrans_ops *tr, struct mtd_info *mtd)
 static void mtdblock_remove_dev(struct mtd_blktrans_dev *dev)
 {
 	del_mtd_blktrans_dev(dev);
-	kfree(dev);
 }
 
 static struct mtd_blktrans_ops mtdblock_tr = {
diff --git a/drivers/mtd/nftlcore.c b/drivers/mtd/nftlcore.c
index 1002e1882996..a4578bf903aa 100644
--- a/drivers/mtd/nftlcore.c
+++ b/drivers/mtd/nftlcore.c
@@ -126,7 +126,6 @@ static void nftl_remove_dev(struct mtd_blktrans_dev *dev)
 	del_mtd_blktrans_dev(dev);
 	kfree(nftl->ReplUnitTable);
 	kfree(nftl->EUNtable);
-	kfree(nftl);
 }
 
 /*
diff --git a/drivers/mtd/rfd_ftl.c b/drivers/mtd/rfd_ftl.c
index d2aa9c46530f..63b83c0d9a13 100644
--- a/drivers/mtd/rfd_ftl.c
+++ b/drivers/mtd/rfd_ftl.c
@@ -817,7 +817,6 @@ static void rfd_ftl_remove_dev(struct mtd_blktrans_dev *dev)
 	vfree(part->sector_map);
 	kfree(part->header_cache);
 	kfree(part->blocks);
-	kfree(part);
 }
 
 static struct mtd_blktrans_ops rfd_ftl_tr = {
diff --git a/drivers/mtd/ssfdc.c b/drivers/mtd/ssfdc.c
index 3f67e00d98e0..81c4ecdc11f5 100644
--- a/drivers/mtd/ssfdc.c
+++ b/drivers/mtd/ssfdc.c
@@ -375,7 +375,6 @@ static void ssfdcr_remove_dev(struct mtd_blktrans_dev *dev)
 
 	del_mtd_blktrans_dev(dev);
 	kfree(ssfdc->logic_block_map);
-	kfree(ssfdc);
 }
 
 static int ssfdcr_readsect(struct mtd_blktrans_dev *dev,
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index a4b392868b54..d89b8fbba4c9 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -9,6 +9,7 @@
 #define __MTD_TRANS_H__
 
 #include <linux/mutex.h>
+#include <linux/kref.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -24,6 +25,8 @@ struct mtd_blktrans_dev {
 	int devnum;
 	unsigned long size;
 	int readonly;
+	int open;
+	struct kref ref;
 	struct gendisk *disk;
 	struct task_struct *thread;
 	struct request_queue *rq;
-- 
cgit v1.2.3


From 026ec57886b67c092bf7baecd029a7c1c4998c28 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:33 +0200
Subject: mtd: blktrans: allow FTL drivers to export sysfs attributes

This patch adds an ability to export sysfs attributes below
the block disk device.

This can be used to pass the udev an information about the FTL
and could include the vendor, serial, version, etc...

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/mtd_blkdevs.c    | 8 ++++++++
 include/linux/mtd/blktrans.h | 2 ++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 646cc84ae692..9dd23d6acbb6 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -379,6 +379,10 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
 		set_disk_ro(gd, 1);
 
 	add_disk(gd);
+
+	if (new->disk_attributes)
+		sysfs_create_group(&disk_to_dev(gd)->kobj,
+					new->disk_attributes);
 	return 0;
 error4:
 	module_put(tr->owner);
@@ -405,6 +409,10 @@ int del_mtd_blktrans_dev(struct mtd_blktrans_dev *old)
 	/* Stop new requests to arrive */
 	del_gendisk(old->disk);
 
+	if (old->disk_attributes)
+		sysfs_remove_group(&disk_to_dev(old->disk)->kobj,
+						old->disk_attributes);
+
 	/* Stop the thread */
 	kthread_stop(old->thread);
 
diff --git a/include/linux/mtd/blktrans.h b/include/linux/mtd/blktrans.h
index d89b8fbba4c9..b481ccd7ff3c 100644
--- a/include/linux/mtd/blktrans.h
+++ b/include/linux/mtd/blktrans.h
@@ -10,6 +10,7 @@
 
 #include <linux/mutex.h>
 #include <linux/kref.h>
+#include <linux/sysfs.h>
 
 struct hd_geometry;
 struct mtd_info;
@@ -28,6 +29,7 @@ struct mtd_blktrans_dev {
 	int open;
 	struct kref ref;
 	struct gendisk *disk;
+	struct attribute_group *disk_attributes;
 	struct task_struct *thread;
 	struct request_queue *rq;
 	spinlock_t queue_lock;
-- 
cgit v1.2.3


From b64d39d8b03fea88417d53715ccbebf71d4dcc9f Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:37 +0200
Subject: mtd: nand: make reads using MTD_OOB_RAW affect only ECC validation

This changes the behavier of MTD_OOB_RAW. It used to read both OOB and
data to the data buffer, however you would still need to specify the
dummy oob buffer.

This is only used in one place, but makes it hard to read data+oob
without ECC test, thus I removed that behavier, and fixed the user.

Now MTD_OOB_RAW behaves just like MTD_OOB_PLACE, but doesn't do ECC
validation

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 19 +++++++------------
 drivers/mtd/nand/nand_bbt.c  | 26 ++++++++++++++++++++++----
 include/linux/mtd/mtd.h      |  4 +---
 3 files changed, 30 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 138674183c1c..51dfea1b3ce6 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -1474,18 +1474,13 @@ static int nand_do_read_ops(struct mtd_info *mtd, loff_t from,
 
 			if (unlikely(oob)) {
 
-				/* Raw mode does data:oob:data:oob */
-				if (ops->mode != MTD_OOB_RAW) {
-					int toread = min(oobreadlen,
-								max_oobsize);
-					if (toread) {
-						oob = nand_transfer_oob(chip,
-							oob, ops, toread);
-						oobreadlen -= toread;
-					}
-				} else
-					buf = nand_transfer_oob(chip,
-						buf, ops, mtd->oobsize);
+				int toread = min(oobreadlen, max_oobsize);
+
+				if (toread) {
+					oob = nand_transfer_oob(chip,
+						oob, ops, toread);
+					oobreadlen -= toread;
+				}
 			}
 
 			if (!(chip->options & NAND_NO_READRDY)) {
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 55c23e5cd210..387c45c366fe 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -237,15 +237,33 @@ static int scan_read_raw(struct mtd_info *mtd, uint8_t *buf, loff_t offs,
 			 size_t len)
 {
 	struct mtd_oob_ops ops;
+	int res;
 
 	ops.mode = MTD_OOB_RAW;
 	ops.ooboffs = 0;
 	ops.ooblen = mtd->oobsize;
-	ops.oobbuf = buf;
-	ops.datbuf = buf;
-	ops.len = len;
 
-	return mtd->read_oob(mtd, offs, &ops);
+
+	while (len > 0) {
+		if (len <= mtd->writesize) {
+			ops.oobbuf = buf + len;
+			ops.datbuf = buf;
+			ops.len = len;
+			return mtd->read_oob(mtd, offs, &ops);
+		} else {
+			ops.oobbuf = buf + mtd->writesize;
+			ops.datbuf = buf;
+			ops.len = mtd->writesize;
+			res = mtd->read_oob(mtd, offs, &ops);
+
+			if (res)
+				return res;
+		}
+
+		buf += mtd->oobsize + mtd->writesize;
+		len -= mtd->writesize;
+	}
+	return 0;
 }
 
 /*
diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h
index 11d8e68d17c0..5326435a7571 100644
--- a/include/linux/mtd/mtd.h
+++ b/include/linux/mtd/mtd.h
@@ -60,9 +60,7 @@ struct mtd_erase_region_info {
  * MTD_OOB_PLACE:	oob data are placed at the given offset
  * MTD_OOB_AUTO:	oob data are automatically placed at the free areas
  *			which are defined by the ecclayout
- * MTD_OOB_RAW:		mode to read raw data+oob in one chunk. The oob data
- *			is inserted into the data. Thats a raw image of the
- *			flash contents.
+ * MTD_OOB_RAW:		mode to read oob and data without doing ECC checking
  */
 typedef enum {
 	MTD_OOB_PLACE,
-- 
cgit v1.2.3


From e0b58d0a7005cd4b9c7fa4694a437a2d86719c13 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:38 +0200
Subject: mtd: nand: add ->badblockbits for minimum number of set bits in bad
 block byte

This can be used to protect against bitflips in that field, but now mostly
for smartmedia.

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 13 +++++++++----
 include/linux/mtd/nand.h     |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 51dfea1b3ce6..ba29a29bd743 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -364,14 +364,18 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 		bad = cpu_to_le16(chip->read_word(mtd));
 		if (chip->badblockpos & 0x1)
 			bad >>= 8;
-		if ((bad & 0xFF) != 0xff)
-			res = 1;
+		else
+			bad &= 0xFF;
 	} else {
 		chip->cmdfunc(mtd, NAND_CMD_READOOB, chip->badblockpos, page);
-		if (chip->read_byte(mtd) != 0xff)
-			res = 1;
+		bad = chip->read_byte(mtd);
 	}
 
+	if (likely(chip->badblockbits == 8))
+		res = bad != 0xFF;
+	else
+		res = hweight8(bad) < chip->badblockbits;
+
 	if (getchip)
 		nand_release_device(mtd);
 
@@ -2884,6 +2888,7 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	/* Set the bad block position */
 	chip->badblockpos = mtd->writesize > 512 ?
 		NAND_LARGE_BADBLOCK_POS : NAND_SMALL_BADBLOCK_POS;
+	chip->badblockbits = 8;
 
 	/* Get chip options, preserve non chip based options */
 	chip->options &= ~NAND_CHIPOPTIONS_MSK;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 48bc2c54302c..f2d4a1ac14b8 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -401,6 +401,7 @@ struct nand_chip {
 	int		subpagesize;
 	uint8_t		cellinfo;
 	int		badblockpos;
+	int		badblockbits;
 
 	flstate_t	state;
 
-- 
cgit v1.2.3


From 5e81e88a4c140586d9212999cea683bcd66a15c6 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Fri, 26 Feb 2010 18:32:56 +0000
Subject: mtd: nand: Allow caller to pass alternative ID table to
 nand_scan_ident()

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/atmel_nand.c    |  2 +-
 drivers/mtd/nand/bcm_umi_nand.c  |  2 +-
 drivers/mtd/nand/cafe_nand.c     |  2 +-
 drivers/mtd/nand/davinci_nand.c  |  2 +-
 drivers/mtd/nand/fsl_elbc_nand.c |  2 +-
 drivers/mtd/nand/mxc_nand.c      |  2 +-
 drivers/mtd/nand/nand_base.c     | 29 +++++++++++++++--------------
 drivers/mtd/nand/s3c2410.c       |  3 ++-
 drivers/mtd/nand/sh_flctl.c      |  2 +-
 drivers/mtd/nand/sm_common.c     |  2 +-
 drivers/mtd/nand/socrates_nand.c |  2 +-
 drivers/mtd/nand/txx9ndfmc.c     |  2 +-
 include/linux/mtd/nand.h         |  4 +++-
 13 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c
index 524e6c9e0672..04d30887ca7f 100644
--- a/drivers/mtd/nand/atmel_nand.c
+++ b/drivers/mtd/nand/atmel_nand.c
@@ -474,7 +474,7 @@ static int __init atmel_nand_probe(struct platform_device *pdev)
 	}
 
 	/* first scan to find the device and get the page size */
-	if (nand_scan_ident(mtd, 1)) {
+	if (nand_scan_ident(mtd, 1, NULL)) {
 		res = -ENXIO;
 		goto err_scan_ident;
 	}
diff --git a/drivers/mtd/nand/bcm_umi_nand.c b/drivers/mtd/nand/bcm_umi_nand.c
index 087bcd745bb7..5ff90b7e565e 100644
--- a/drivers/mtd/nand/bcm_umi_nand.c
+++ b/drivers/mtd/nand/bcm_umi_nand.c
@@ -446,7 +446,7 @@ static int __devinit bcm_umi_nand_probe(struct platform_device *pdev)
 	 * layout we'll be using.
 	 */
 
-	err = nand_scan_ident(board_mtd, 1);
+	err = nand_scan_ident(board_mtd, 1, NULL);
 	if (err) {
 		printk(KERN_ERR "nand_scan failed: %d\n", err);
 		iounmap(bcm_umi_io_base);
diff --git a/drivers/mtd/nand/cafe_nand.c b/drivers/mtd/nand/cafe_nand.c
index 67e2b33f7eff..01a6fe1c7805 100644
--- a/drivers/mtd/nand/cafe_nand.c
+++ b/drivers/mtd/nand/cafe_nand.c
@@ -761,7 +761,7 @@ static int __devinit cafe_nand_probe(struct pci_dev *pdev,
 		cafe_readl(cafe, GLOBAL_CTRL), cafe_readl(cafe, GLOBAL_IRQ_MASK));
 
 	/* Scan to find existence of the device */
-	if (nand_scan_ident(mtd, 2)) {
+	if (nand_scan_ident(mtd, 2, NULL)) {
 		err = -ENXIO;
 		goto out_irq;
 	}
diff --git a/drivers/mtd/nand/davinci_nand.c b/drivers/mtd/nand/davinci_nand.c
index e2eeaf1e51a3..45bb931c0848 100644
--- a/drivers/mtd/nand/davinci_nand.c
+++ b/drivers/mtd/nand/davinci_nand.c
@@ -690,7 +690,7 @@ static int __init nand_davinci_probe(struct platform_device *pdev)
 	spin_unlock_irq(&davinci_nand_lock);
 
 	/* Scan to find existence of the device(s) */
-	ret = nand_scan_ident(&info->mtd, pdata->mask_chipsel ? 2 : 1);
+	ret = nand_scan_ident(&info->mtd, pdata->mask_chipsel ? 2 : 1, NULL);
 	if (ret < 0) {
 		dev_dbg(&pdev->dev, "no NAND chip(s) found\n");
 		goto err_scan;
diff --git a/drivers/mtd/nand/fsl_elbc_nand.c b/drivers/mtd/nand/fsl_elbc_nand.c
index 1b8328fbb9dc..3f38fb8e6666 100644
--- a/drivers/mtd/nand/fsl_elbc_nand.c
+++ b/drivers/mtd/nand/fsl_elbc_nand.c
@@ -891,7 +891,7 @@ static int __devinit fsl_elbc_chip_probe(struct fsl_elbc_ctrl *ctrl,
 	if (ret)
 		goto err;
 
-	ret = nand_scan_ident(&priv->mtd, 1);
+	ret = nand_scan_ident(&priv->mtd, 1, NULL);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/mxc_nand.c b/drivers/mtd/nand/mxc_nand.c
index 06cc378196b5..474a09e53131 100644
--- a/drivers/mtd/nand/mxc_nand.c
+++ b/drivers/mtd/nand/mxc_nand.c
@@ -819,7 +819,7 @@ static int __init mxcnd_probe(struct platform_device *pdev)
 	}
 
 	/* first scan to find the device and get the page size */
-	if (nand_scan_ident(mtd, 1)) {
+	if (nand_scan_ident(mtd, 1, NULL)) {
 		err = -ENXIO;
 		goto escan;
 	}
diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index ba29a29bd743..1c4823696be2 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2766,10 +2766,10 @@ static void nand_set_defaults(struct nand_chip *chip, int busw)
  */
 static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 						  struct nand_chip *chip,
-						  int busw, int *maf_id)
+						  int busw, int *maf_id,
+						  struct nand_flash_dev *type)
 {
-	struct nand_flash_dev *type = NULL;
-	int i, dev_id, maf_idx;
+	int dev_id, maf_idx;
 	int tmp_id, tmp_manf;
 
 	/* Select the device */
@@ -2808,15 +2808,14 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 		return ERR_PTR(-ENODEV);
 	}
 
-	/* Lookup the flash id */
-	for (i = 0; nand_flash_ids[i].name != NULL; i++) {
-		if (dev_id == nand_flash_ids[i].id) {
-			type =  &nand_flash_ids[i];
-			break;
-		}
-	}
-
 	if (!type)
+		type = nand_flash_ids;
+
+	for (; type->name != NULL; type++)
+		if (dev_id == type->id)
+                        break;
+
+	if (!type->name)
 		return ERR_PTR(-ENODEV);
 
 	if (!mtd->name)
@@ -2926,13 +2925,15 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
  * nand_scan_ident - [NAND Interface] Scan for the NAND device
  * @mtd:	     MTD device structure
  * @maxchips:	     Number of chips to scan for
+ * @table:	     Alternative NAND ID table
  *
  * This is the first phase of the normal nand_scan() function. It
  * reads the flash ID and sets up MTD fields accordingly.
  *
  * The mtd->owner field must be set to the module of the caller.
  */
-int nand_scan_ident(struct mtd_info *mtd, int maxchips)
+int nand_scan_ident(struct mtd_info *mtd, int maxchips,
+		    struct nand_flash_dev *table)
 {
 	int i, busw, nand_maf_id;
 	struct nand_chip *chip = mtd->priv;
@@ -2944,7 +2945,7 @@ int nand_scan_ident(struct mtd_info *mtd, int maxchips)
 	nand_set_defaults(chip, busw);
 
 	/* Read the flash type */
-	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id);
+	type = nand_get_flash_type(mtd, chip, busw, &nand_maf_id, table);
 
 	if (IS_ERR(type)) {
 		if (!(chip->options & NAND_SCAN_SILENT_NODEV))
@@ -3235,7 +3236,7 @@ int nand_scan(struct mtd_info *mtd, int maxchips)
 		BUG();
 	}
 
-	ret = nand_scan_ident(mtd, maxchips);
+	ret = nand_scan_ident(mtd, maxchips, NULL);
 	if (!ret)
 		ret = nand_scan_tail(mtd);
 	return ret;
diff --git a/drivers/mtd/nand/s3c2410.c b/drivers/mtd/nand/s3c2410.c
index c41ad2285c63..dc02dcd0c08f 100644
--- a/drivers/mtd/nand/s3c2410.c
+++ b/drivers/mtd/nand/s3c2410.c
@@ -1013,7 +1013,8 @@ static int s3c24xx_nand_probe(struct platform_device *pdev)
 		s3c2410_nand_init_chip(info, nmtd, sets);
 
 		nmtd->scan_res = nand_scan_ident(&nmtd->mtd,
-						 (sets) ? sets->nr_chips : 1);
+						 (sets) ? sets->nr_chips : 1,
+						 NULL);
 
 		if (nmtd->scan_res == 0) {
 			s3c2410_nand_update_chip(info, nmtd);
diff --git a/drivers/mtd/nand/sh_flctl.c b/drivers/mtd/nand/sh_flctl.c
index 4260ab78f95c..dbc09a81866e 100644
--- a/drivers/mtd/nand/sh_flctl.c
+++ b/drivers/mtd/nand/sh_flctl.c
@@ -825,7 +825,7 @@ static int __init flctl_probe(struct platform_device *pdev)
 	nand->select_chip = flctl_select_chip;
 	nand->cmdfunc = flctl_cmdfunc;
 
-	ret = nand_scan_ident(flctl_mtd, 1);
+	ret = nand_scan_ident(flctl_mtd, 1, NULL);
 	if (ret)
 		goto err;
 
diff --git a/drivers/mtd/nand/sm_common.c b/drivers/mtd/nand/sm_common.c
index 07b6f725723f..f52bb3949275 100644
--- a/drivers/mtd/nand/sm_common.c
+++ b/drivers/mtd/nand/sm_common.c
@@ -75,7 +75,7 @@ int sm_register_device(struct mtd_info *mtd)
 	chip->options |= NAND_SKIP_BBTSCAN | NAND_SMARTMEDIA;
 
 	/* Scan for card properties */
-	ret = nand_scan_ident(mtd, 1);
+	ret = nand_scan_ident(mtd, 1, NULL);
 
 	if (ret)
 		return ret;
diff --git a/drivers/mtd/nand/socrates_nand.c b/drivers/mtd/nand/socrates_nand.c
index 65748ea2b348..b37cbde6e7db 100644
--- a/drivers/mtd/nand/socrates_nand.c
+++ b/drivers/mtd/nand/socrates_nand.c
@@ -220,7 +220,7 @@ static int __devinit socrates_nand_probe(struct of_device *ofdev,
 	dev_set_drvdata(&ofdev->dev, host);
 
 	/* first scan to find the device and get the page size */
-	if (nand_scan_ident(mtd, 1)) {
+	if (nand_scan_ident(mtd, 1, NULL)) {
 		res = -ENXIO;
 		goto out;
 	}
diff --git a/drivers/mtd/nand/txx9ndfmc.c b/drivers/mtd/nand/txx9ndfmc.c
index 863513c3b69a..054a41c0ef4a 100644
--- a/drivers/mtd/nand/txx9ndfmc.c
+++ b/drivers/mtd/nand/txx9ndfmc.c
@@ -274,7 +274,7 @@ static int txx9ndfmc_nand_scan(struct mtd_info *mtd)
 	struct nand_chip *chip = mtd->priv;
 	int ret;
 
-	ret = nand_scan_ident(mtd, 1);
+	ret = nand_scan_ident(mtd, 1, NULL);
 	if (!ret) {
 		if (mtd->writesize >= 512) {
 			chip->ecc.size = mtd->writesize;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index f2d4a1ac14b8..d152bdf9161f 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -25,11 +25,13 @@
 #include <linux/mtd/bbm.h>
 
 struct mtd_info;
+struct nand_flash_dev;
 /* Scan and identify a NAND device */
 extern int nand_scan (struct mtd_info *mtd, int max_chips);
 /* Separate phases of nand_scan(), allowing board driver to intervene
  * and override command or ECC setup according to flash type */
-extern int nand_scan_ident(struct mtd_info *mtd, int max_chips);
+extern int nand_scan_ident(struct mtd_info *mtd, int max_chips,
+			   struct nand_flash_dev *table);
 extern int nand_scan_tail(struct mtd_info *mtd);
 
 /* Free resources held by the NAND device */
-- 
cgit v1.2.3


From 93edbad69b0491d794c2ec86bcc65c69eac676e3 Mon Sep 17 00:00:00 2001
From: Maxim Levitsky <maximlevitsky@gmail.com>
Date: Mon, 22 Feb 2010 20:39:40 +0200
Subject: mtd: Workaround wrong write protect status on some xD cards

Signed-off-by: Maxim Levitsky <maximlevitsky@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 8 +++++++-
 include/linux/mtd/nand.h     | 6 ++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 1c4823696be2..b9dc65c7253c 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -434,6 +434,11 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
 static int nand_check_wp(struct mtd_info *mtd)
 {
 	struct nand_chip *chip = mtd->priv;
+
+	/* broken xD cards report WP despite being writable */
+	if (chip->options & NAND_BROKEN_XD)
+		return 0;
+
 	/* Check the WP bit */
 	chip->cmdfunc(mtd, NAND_CMD_STATUS, -1, -1);
 	return (chip->read_byte(mtd) & NAND_STATUS_WP) ? 0 : 1;
@@ -3175,7 +3180,8 @@ int nand_scan_tail(struct mtd_info *mtd)
 
 	/* Fill in remaining MTD driver data */
 	mtd->type = MTD_NANDFLASH;
-	mtd->flags = MTD_CAP_NANDFLASH;
+	mtd->flags = (chip->options & NAND_ROM) ? MTD_CAP_ROM :
+						MTD_CAP_NANDFLASH;
 	mtd->erase = nand_erase;
 	mtd->point = NULL;
 	mtd->unpoint = NULL;
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index d152bdf9161f..8bdacb885f90 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -182,6 +182,12 @@ typedef enum {
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
 
+/* Device is one of 'new' xD cards that expose fake nand command set */
+#define NAND_BROKEN_XD		0x00000400
+
+/* Device behaves just like nand, but is readonly */
+#define NAND_ROM		0x00000800
+
 /* Options valid for Samsung large page devices */
 #define NAND_SAMSUNG_LP_OPTIONS \
 	(NAND_NO_PADDING | NAND_CACHEPRG | NAND_COPYBACK)
-- 
cgit v1.2.3


From 4abc14a733f9002c05623db755aaafdd27fa7a91 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Wed, 20 Jan 2010 14:52:23 +0100
Subject: iommu-api: Rename ->{un}map function pointers to ->{un}map_range

The new function pointer names match better with the
top-level functions of the iommu-api which are using them.
Main intention of this change is to make the ->{un}map
pointer names free for two new mapping functions.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 4 ++--
 drivers/base/iommu.c        | 4 ++--
 drivers/pci/intel-iommu.c   | 4 ++--
 include/linux/iommu.h       | 8 ++++----
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index adb0ba025702..59cae7c4df54 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2515,8 +2515,8 @@ static struct iommu_ops amd_iommu_ops = {
 	.domain_destroy = amd_iommu_domain_destroy,
 	.attach_dev = amd_iommu_attach_device,
 	.detach_dev = amd_iommu_detach_device,
-	.map = amd_iommu_map_range,
-	.unmap = amd_iommu_unmap_range,
+	.map_range = amd_iommu_map_range,
+	.unmap_range = amd_iommu_unmap_range,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.domain_has_cap = amd_iommu_domain_has_cap,
 };
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index 8ad4ffea6920..f4c86c429297 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -83,14 +83,14 @@ EXPORT_SYMBOL_GPL(iommu_detach_device);
 int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
 		    phys_addr_t paddr, size_t size, int prot)
 {
-	return iommu_ops->map(domain, iova, paddr, size, prot);
+	return iommu_ops->map_range(domain, iova, paddr, size, prot);
 }
 EXPORT_SYMBOL_GPL(iommu_map_range);
 
 void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
 		      size_t size)
 {
-	iommu_ops->unmap(domain, iova, size);
+	iommu_ops->unmap_range(domain, iova, size);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap_range);
 
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 417312528ddf..a714e3db13c1 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -3714,8 +3714,8 @@ static struct iommu_ops intel_iommu_ops = {
 	.domain_destroy = intel_iommu_domain_destroy,
 	.attach_dev	= intel_iommu_attach_device,
 	.detach_dev	= intel_iommu_detach_device,
-	.map		= intel_iommu_map_range,
-	.unmap		= intel_iommu_unmap_range,
+	.map_range	= intel_iommu_map_range,
+	.unmap_range	= intel_iommu_unmap_range,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.domain_has_cap = intel_iommu_domain_has_cap,
 };
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 3af4ffd591b9..0f18f37a6503 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -36,10 +36,10 @@ struct iommu_ops {
 	void (*domain_destroy)(struct iommu_domain *domain);
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
-	int (*map)(struct iommu_domain *domain, unsigned long iova,
-		   phys_addr_t paddr, size_t size, int prot);
-	void (*unmap)(struct iommu_domain *domain, unsigned long iova,
-		      size_t size);
+	int (*map_range)(struct iommu_domain *domain, unsigned long iova,
+			 phys_addr_t paddr, size_t size, int prot);
+	void (*unmap_range)(struct iommu_domain *domain, unsigned long iova,
+			    size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
-- 
cgit v1.2.3


From cefc53c7f494240d4813c80154c7617452d1904d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Fri, 8 Jan 2010 13:35:09 +0100
Subject: iommu-api: Add iommu_map and iommu_unmap functions

These two functions provide support for mapping and
unmapping physical addresses to io virtual addresses. The
difference to the iommu_(un)map_range() is that the new
functions take a gfp_order parameter instead of a size. This
allows the IOMMU backend implementations to detect easier if
a given range can be mapped by larger page sizes.
These new functions should replace the old ones in the long
term.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/base/iommu.c  | 31 +++++++++++++++++++++++++++++++
 include/linux/iommu.h | 16 ++++++++++++++++
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index f4c86c429297..cf7cbec116ed 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -107,3 +107,34 @@ int iommu_domain_has_cap(struct iommu_domain *domain,
 	return iommu_ops->domain_has_cap(domain, cap);
 }
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
+
+int iommu_map(struct iommu_domain *domain, unsigned long iova,
+	      phys_addr_t paddr, int gfp_order, int prot)
+{
+	unsigned long invalid_mask;
+	size_t size;
+
+	size         = 0x1000UL << gfp_order;
+	invalid_mask = size - 1;
+
+	BUG_ON((iova | paddr) & invalid_mask);
+
+	return iommu_ops->map_range(domain, iova, paddr, size, prot);
+}
+EXPORT_SYMBOL_GPL(iommu_map);
+
+int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
+{
+	unsigned long invalid_mask;
+	size_t size;
+
+	size         = 0x1000UL << gfp_order;
+	invalid_mask = size - 1;
+
+	BUG_ON(iova & invalid_mask);
+
+	iommu_ops->unmap_range(domain, iova, size);
+
+	return gfp_order;
+}
+EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 0f18f37a6503..6d0035bb1a0c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -60,6 +60,10 @@ extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
 			   phys_addr_t paddr, size_t size, int prot);
 extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
 			      size_t size);
+extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
+		     phys_addr_t paddr, int gfp_order, int prot);
+extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+		       int gfp_order);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 				      unsigned long iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
@@ -108,6 +112,18 @@ static inline void iommu_unmap_range(struct iommu_domain *domain,
 {
 }
 
+static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
+			    phys_addr_t paddr, int gfp_order, int prot)
+{
+	return -ENODEV;
+}
+
+static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
+			      int gfp_order)
+{
+	return -ENODEV;
+}
+
 static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 					     unsigned long iova)
 {
-- 
cgit v1.2.3


From 67651786948c360c3122b8a17cb1e59209d50880 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 21 Jan 2010 16:32:27 +0100
Subject: iommu-api: Add ->{un}map callbacks to iommu_ops

This patch adds new callbacks for mapping and unmapping
pages to the iommu_ops structure. These callbacks are aware
of page sizes which makes them different to the
->{un}map_range callbacks.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 drivers/base/iommu.c  | 6 ++++++
 include/linux/iommu.h | 4 ++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index cf7cbec116ed..55d37e4609eb 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -119,6 +119,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 
 	BUG_ON((iova | paddr) & invalid_mask);
 
+	if (iommu_ops->map)
+		return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
+
 	return iommu_ops->map_range(domain, iova, paddr, size, prot);
 }
 EXPORT_SYMBOL_GPL(iommu_map);
@@ -133,6 +136,9 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 
 	BUG_ON(iova & invalid_mask);
 
+	if (iommu_ops->unmap)
+		return iommu_ops->unmap(domain, iova, gfp_order);
+
 	iommu_ops->unmap_range(domain, iova, size);
 
 	return gfp_order;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 6d0035bb1a0c..5a7a3d888dac 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -36,6 +36,10 @@ struct iommu_ops {
 	void (*domain_destroy)(struct iommu_domain *domain);
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
 	void (*detach_dev)(struct iommu_domain *domain, struct device *dev);
+	int (*map)(struct iommu_domain *domain, unsigned long iova,
+		   phys_addr_t paddr, int gfp_order, int prot);
+	int (*unmap)(struct iommu_domain *domain, unsigned long iova,
+		     int gfp_order);
 	int (*map_range)(struct iommu_domain *domain, unsigned long iova,
 			 phys_addr_t paddr, size_t size, int prot);
 	void (*unmap_range)(struct iommu_domain *domain, unsigned long iova,
-- 
cgit v1.2.3


From 12c7389abe5786349d3ea6da1961cf78d0c1c7cd Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Thu, 21 Jan 2010 11:50:28 +0100
Subject: iommu-api: Remove iommu_{un}map_range functions

These functions are not longer used and can be removed
savely. There functionality is now provided by the
iommu_{un}map functions which are also capable of multiple
page sizes.

Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
---
 arch/x86/kernel/amd_iommu.c | 48 ---------------------------------------------
 drivers/base/iommu.c        | 26 ++----------------------
 include/linux/iommu.h       | 20 -------------------
 3 files changed, 2 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c
index 0e068c9ca5f5..d8da9988edd9 100644
--- a/arch/x86/kernel/amd_iommu.c
+++ b/arch/x86/kernel/amd_iommu.c
@@ -2506,52 +2506,6 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
 	return ret;
 }
 
-static int amd_iommu_map_range(struct iommu_domain *dom,
-			       unsigned long iova, phys_addr_t paddr,
-			       size_t size, int iommu_prot)
-{
-	struct protection_domain *domain = dom->priv;
-	unsigned long i,  npages = iommu_num_pages(paddr, size, PAGE_SIZE);
-	int prot = 0;
-	int ret;
-
-	if (iommu_prot & IOMMU_READ)
-		prot |= IOMMU_PROT_IR;
-	if (iommu_prot & IOMMU_WRITE)
-		prot |= IOMMU_PROT_IW;
-
-	iova  &= PAGE_MASK;
-	paddr &= PAGE_MASK;
-
-	for (i = 0; i < npages; ++i) {
-		ret = iommu_map_page(domain, iova, paddr, prot, PAGE_SIZE);
-		if (ret)
-			return ret;
-
-		iova  += PAGE_SIZE;
-		paddr += PAGE_SIZE;
-	}
-
-	return 0;
-}
-
-static void amd_iommu_unmap_range(struct iommu_domain *dom,
-				  unsigned long iova, size_t size)
-{
-
-	struct protection_domain *domain = dom->priv;
-	unsigned long i,  npages = iommu_num_pages(iova, size, PAGE_SIZE);
-
-	iova  &= PAGE_MASK;
-
-	for (i = 0; i < npages; ++i) {
-		iommu_unmap_page(domain, iova, PAGE_SIZE);
-		iova  += PAGE_SIZE;
-	}
-
-	iommu_flush_tlb_pde(domain);
-}
-
 static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
 			 phys_addr_t paddr, int gfp_order, int iommu_prot)
 {
@@ -2616,8 +2570,6 @@ static struct iommu_ops amd_iommu_ops = {
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
-	.map_range = amd_iommu_map_range,
-	.unmap_range = amd_iommu_unmap_range,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.domain_has_cap = amd_iommu_domain_has_cap,
 };
diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c
index 55d37e4609eb..6e6b6a11b3ce 100644
--- a/drivers/base/iommu.c
+++ b/drivers/base/iommu.c
@@ -80,20 +80,6 @@ void iommu_detach_device(struct iommu_domain *domain, struct device *dev)
 }
 EXPORT_SYMBOL_GPL(iommu_detach_device);
 
-int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
-		    phys_addr_t paddr, size_t size, int prot)
-{
-	return iommu_ops->map_range(domain, iova, paddr, size, prot);
-}
-EXPORT_SYMBOL_GPL(iommu_map_range);
-
-void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
-		      size_t size)
-{
-	iommu_ops->unmap_range(domain, iova, size);
-}
-EXPORT_SYMBOL_GPL(iommu_unmap_range);
-
 phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain,
 			       unsigned long iova)
 {
@@ -119,10 +105,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
 
 	BUG_ON((iova | paddr) & invalid_mask);
 
-	if (iommu_ops->map)
-		return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
-
-	return iommu_ops->map_range(domain, iova, paddr, size, prot);
+	return iommu_ops->map(domain, iova, paddr, gfp_order, prot);
 }
 EXPORT_SYMBOL_GPL(iommu_map);
 
@@ -136,11 +119,6 @@ int iommu_unmap(struct iommu_domain *domain, unsigned long iova, int gfp_order)
 
 	BUG_ON(iova & invalid_mask);
 
-	if (iommu_ops->unmap)
-		return iommu_ops->unmap(domain, iova, gfp_order);
-
-	iommu_ops->unmap_range(domain, iova, size);
-
-	return gfp_order;
+	return iommu_ops->unmap(domain, iova, gfp_order);
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 5a7a3d888dac..be22ad83689c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -40,10 +40,6 @@ struct iommu_ops {
 		   phys_addr_t paddr, int gfp_order, int prot);
 	int (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     int gfp_order);
-	int (*map_range)(struct iommu_domain *domain, unsigned long iova,
-			 phys_addr_t paddr, size_t size, int prot);
-	void (*unmap_range)(struct iommu_domain *domain, unsigned long iova,
-			    size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain,
 				    unsigned long iova);
 	int (*domain_has_cap)(struct iommu_domain *domain,
@@ -60,10 +56,6 @@ extern int iommu_attach_device(struct iommu_domain *domain,
 			       struct device *dev);
 extern void iommu_detach_device(struct iommu_domain *domain,
 				struct device *dev);
-extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova,
-			   phys_addr_t paddr, size_t size, int prot);
-extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova,
-			      size_t size);
 extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, int gfp_order, int prot);
 extern int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
@@ -104,18 +96,6 @@ static inline void iommu_detach_device(struct iommu_domain *domain,
 {
 }
 
-static inline int iommu_map_range(struct iommu_domain *domain,
-				  unsigned long iova, phys_addr_t paddr,
-				  size_t size, int prot)
-{
-	return -ENODEV;
-}
-
-static inline void iommu_unmap_range(struct iommu_domain *domain,
-				     unsigned long iova, size_t size)
-{
-}
-
 static inline int iommu_map(struct iommu_domain *domain, unsigned long iova,
 			    phys_addr_t paddr, int gfp_order, int prot)
 {
-- 
cgit v1.2.3


From 63e690caf24e8f43ba019fe1107669746b072d80 Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Mon, 8 Mar 2010 13:13:07 +0100
Subject: netfilter: include/linux/netfilter/nf_conntrack_tuple_common.h:
 Checkpatch cleanup

include/linux/netfilter/nf_conntrack_tuple_common.h:5: ERROR: open brace '{' following enum go on the same line

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_tuple_common.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_tuple_common.h b/include/linux/netfilter/nf_conntrack_tuple_common.h
index 8e145f0d61cb..2ea22b018a87 100644
--- a/include/linux/netfilter/nf_conntrack_tuple_common.h
+++ b/include/linux/netfilter/nf_conntrack_tuple_common.h
@@ -1,8 +1,7 @@
 #ifndef _NF_CONNTRACK_TUPLE_COMMON_H
 #define _NF_CONNTRACK_TUPLE_COMMON_H
 
-enum ip_conntrack_dir
-{
+enum ip_conntrack_dir {
 	IP_CT_DIR_ORIGINAL,
 	IP_CT_DIR_REPLY,
 	IP_CT_DIR_MAX
-- 
cgit v1.2.3


From 7b4df05537f4e6c0c3524055ece7f99b5c98cc87 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 8 Mar 2010 13:17:01 +0100
Subject: netfilter: remove stale declaration for ip6_masked_addrcmp()

Commit f2ffd9ee... ("[NETFILTER]: Move ip6_masked_addrcmp to
include/net/ipv6.h") replaced ip6_masked_addrcmp() with
ipv6_masked_addr_cmp().  Function definition went away.

Let's remove its declaration as well in header file.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_ipv6/ip6_tables.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index e5ba03d783c6..18442ff19c07 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -316,10 +316,6 @@ extern int ip6t_ext_hdr(u8 nexthdr);
 extern int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,
 			 int target, unsigned short *fragoff);
 
-extern int ip6_masked_addrcmp(const struct in6_addr *addr1,
-			      const struct in6_addr *mask,
-			      const struct in6_addr *addr2);
-
 #define IP6T_ALIGN(s) XT_ALIGN(s)
 
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From a636ee7fb35b731ba2b331f6294e809bb6be09c8 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 9 Mar 2010 06:57:53 +0000
Subject: driver core: Early dev_name() support.

Presently early platform devices suffer from the fact they are unable to
use dev_xxx() calls early on due to dev_name() and others being
unavailable at the time ->probe() is called.

This implements early init_name construction from the matched name/id
pair following the semantics of the late device/driver match. As a
result, matched IDs (inclusive of requested ones) are preserved when the
handoff from the early platform code happens at kobject initialization
time.

Since we still require kmalloc slabs to be available at this point, using
kstrdup() for establishing the init_name works fine. This subsequently
needs to be tested from dev_name() prior to the init_name being cleared
by the driver core. We don't kfree() since others will already have a
handle on the string long before the kobject initialization takes place.

This is also needed to permit drivers to use the clock framework early,
without having to manually construct their own device IDs from the match
id/name pair locally (needed by the early console and timer code on sh
and arm).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c | 19 +++++++++++++++++++
 include/linux/device.h  |  4 ++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 1ba9d617d241..d2d4926c5c4c 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1239,6 +1239,25 @@ static int __init early_platform_driver_probe_id(char *class_str,
 		}
 
 		if (match) {
+			/*
+			 * Set up a sensible init_name to enable
+			 * dev_name() and others to be used before the
+			 * rest of the driver core is initialized.
+			 */
+			if (!match->dev.init_name) {
+				char buf[32];
+
+				if (match->id != -1)
+					snprintf(buf, sizeof(buf), "%s.%d",
+						 match->name, match->id);
+				else
+					snprintf(buf, sizeof(buf), "%s",
+						 match->name);
+
+				match->dev.init_name = kstrdup(buf, GFP_KERNEL);
+				if (!match->dev.init_name)
+					return -ENOMEM;
+			}
 			if (epdrv->pdrv->probe(match))
 				pr_warning("%s: unable to probe %s early.\n",
 					   class_str, match->name);
diff --git a/include/linux/device.h b/include/linux/device.h
index 182192892d45..241b96bcd7ad 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -451,6 +451,10 @@ struct device {
 
 static inline const char *dev_name(const struct device *dev)
 {
+	/* Use the init name until the kobject becomes available */
+	if (dev->init_name)
+		return dev->init_name;
+
 	return kobject_name(&dev->kobj);
 }
 
-- 
cgit v1.2.3


From ca037701a025334e724e5c61b3b1082940c8b981 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Tue, 2 Mar 2010 19:52:12 +0100
Subject: perf, x86: Add PEBS infrastructure

This patch implements support for Intel Precise Event Based Sampling,
which is an alternative counter mode in which the counter triggers a
hardware assist to collect information on events. The hardware assist
takes a trap like snapshot of a subset of the machine registers.

This data is written to the Intel Debug-Store, which can be programmed
with a data threshold at which to raise a PMI.

With the PEBS hardware assist being trap like, the reported IP is always
one instruction after the actual instruction that triggered the event.

This implements a simple PEBS model that always takes a single PEBS event
at a time. This is done so that the interaction with the rest of the
system is as expected (freq adjust, period randomization, lbr,
callchains, etc.).

It adds an ABI element: perf_event_attr::precise, which indicates that we
wish to use this (constrained, but precise) mode.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.392111285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c          | 223 +++++-------
 arch/x86/kernel/cpu/perf_event_intel.c    | 150 ++------
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 557 ++++++++++++++++++++++++++++++
 include/linux/perf_event.h                |   3 +-
 4 files changed, 671 insertions(+), 262 deletions(-)
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_ds.c

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1d665a0b202c..0c03d5c1671f 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -31,45 +31,6 @@
 
 static u64 perf_event_mask __read_mostly;
 
-/* The maximal number of PEBS events: */
-#define MAX_PEBS_EVENTS	4
-
-/* The size of a BTS record in bytes: */
-#define BTS_RECORD_SIZE		24
-
-/* The size of a per-cpu BTS buffer in bytes: */
-#define BTS_BUFFER_SIZE		(BTS_RECORD_SIZE * 2048)
-
-/* The BTS overflow threshold in bytes from the end of the buffer: */
-#define BTS_OVFL_TH		(BTS_RECORD_SIZE * 128)
-
-
-/*
- * Bits in the debugctlmsr controlling branch tracing.
- */
-#define X86_DEBUGCTL_TR			(1 << 6)
-#define X86_DEBUGCTL_BTS		(1 << 7)
-#define X86_DEBUGCTL_BTINT		(1 << 8)
-#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
-#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
-
-/*
- * A debug store configuration.
- *
- * We only support architectures that use 64bit fields.
- */
-struct debug_store {
-	u64	bts_buffer_base;
-	u64	bts_index;
-	u64	bts_absolute_maximum;
-	u64	bts_interrupt_threshold;
-	u64	pebs_buffer_base;
-	u64	pebs_index;
-	u64	pebs_absolute_maximum;
-	u64	pebs_interrupt_threshold;
-	u64	pebs_event_reset[MAX_PEBS_EVENTS];
-};
-
 struct event_constraint {
 	union {
 		unsigned long	idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -88,17 +49,29 @@ struct amd_nb {
 };
 
 struct cpu_hw_events {
+	/*
+	 * Generic x86 PMC bits
+	 */
 	struct perf_event	*events[X86_PMC_IDX_MAX]; /* in counter order */
 	unsigned long		active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	unsigned long		interrupts;
 	int			enabled;
-	struct debug_store	*ds;
 
 	int			n_events;
 	int			n_added;
 	int			assign[X86_PMC_IDX_MAX]; /* event to counter assignment */
 	u64			tags[X86_PMC_IDX_MAX];
 	struct perf_event	*event_list[X86_PMC_IDX_MAX]; /* in enabled order */
+
+	/*
+	 * Intel DebugStore bits
+	 */
+	struct debug_store	*ds;
+	u64			pebs_enabled;
+
+	/*
+	 * AMD specific bits
+	 */
 	struct amd_nb		*amd_nb;
 };
 
@@ -112,12 +85,24 @@ struct cpu_hw_events {
 #define EVENT_CONSTRAINT(c, n, m)	\
 	__EVENT_CONSTRAINT(c, n, m, HWEIGHT(n))
 
+/*
+ * Constraint on the Event code.
+ */
 #define INTEL_EVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
+/*
+ * Constraint on the Event code + UMask + fixed-mask
+ */
 #define FIXED_EVENT_CONSTRAINT(c, n)	\
 	EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK)
 
+/*
+ * Constraint on the Event code + UMask
+ */
+#define PEBS_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
+
 #define EVENT_CONSTRAINT_END		\
 	EVENT_CONSTRAINT(0, 0, 0)
 
@@ -128,6 +113,9 @@ struct cpu_hw_events {
  * struct x86_pmu - generic x86 pmu
  */
 struct x86_pmu {
+	/*
+	 * Generic x86 PMC bits
+	 */
 	const char	*name;
 	int		version;
 	int		(*handle_irq)(struct pt_regs *);
@@ -146,10 +134,6 @@ struct x86_pmu {
 	u64		event_mask;
 	int		apic;
 	u64		max_period;
-	u64		intel_ctrl;
-	void		(*enable_bts)(u64 config);
-	void		(*disable_bts)(void);
-
 	struct event_constraint *
 			(*get_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
@@ -162,6 +146,19 @@ struct x86_pmu {
 	void		(*cpu_starting)(int cpu);
 	void		(*cpu_dying)(int cpu);
 	void		(*cpu_dead)(int cpu);
+
+	/*
+	 * Intel Arch Perfmon v2+
+	 */
+	u64		intel_ctrl;
+
+	/*
+	 * Intel DebugStore bits
+	 */
+	int		bts, pebs;
+	int		pebs_record_size;
+	void		(*drain_pebs)(struct pt_regs *regs);
+	struct event_constraint *pebs_constraints;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -293,110 +290,14 @@ static void release_pmc_hardware(void)
 #endif
 }
 
-static inline bool bts_available(void)
-{
-	return x86_pmu.enable_bts != NULL;
-}
-
-static void init_debug_store_on_cpu(int cpu)
-{
-	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-	if (!ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
-		     (u32)((u64)(unsigned long)ds),
-		     (u32)((u64)(unsigned long)ds >> 32));
-}
-
-static void fini_debug_store_on_cpu(int cpu)
-{
-	if (!per_cpu(cpu_hw_events, cpu).ds)
-		return;
-
-	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
-}
-
-static void release_bts_hardware(void)
-{
-	int cpu;
-
-	if (!bts_available())
-		return;
-
-	get_online_cpus();
-
-	for_each_online_cpu(cpu)
-		fini_debug_store_on_cpu(cpu);
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
-
-		if (!ds)
-			continue;
-
-		per_cpu(cpu_hw_events, cpu).ds = NULL;
-
-		kfree((void *)(unsigned long)ds->bts_buffer_base);
-		kfree(ds);
-	}
-
-	put_online_cpus();
-}
-
-static int reserve_bts_hardware(void)
-{
-	int cpu, err = 0;
-
-	if (!bts_available())
-		return 0;
-
-	get_online_cpus();
-
-	for_each_possible_cpu(cpu) {
-		struct debug_store *ds;
-		void *buffer;
-
-		err = -ENOMEM;
-		buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
-		if (unlikely(!buffer))
-			break;
-
-		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
-		if (unlikely(!ds)) {
-			kfree(buffer);
-			break;
-		}
-
-		ds->bts_buffer_base = (u64)(unsigned long)buffer;
-		ds->bts_index = ds->bts_buffer_base;
-		ds->bts_absolute_maximum =
-			ds->bts_buffer_base + BTS_BUFFER_SIZE;
-		ds->bts_interrupt_threshold =
-			ds->bts_absolute_maximum - BTS_OVFL_TH;
-
-		per_cpu(cpu_hw_events, cpu).ds = ds;
-		err = 0;
-	}
-
-	if (err)
-		release_bts_hardware();
-	else {
-		for_each_online_cpu(cpu)
-			init_debug_store_on_cpu(cpu);
-	}
-
-	put_online_cpus();
-
-	return err;
-}
+static int reserve_ds_buffers(void);
+static void release_ds_buffers(void);
 
 static void hw_perf_event_destroy(struct perf_event *event)
 {
 	if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) {
 		release_pmc_hardware();
-		release_bts_hardware();
+		release_ds_buffers();
 		mutex_unlock(&pmc_reserve_mutex);
 	}
 }
@@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 			if (!reserve_pmc_hardware())
 				err = -EBUSY;
 			else
-				err = reserve_bts_hardware();
+				err = reserve_ds_buffers();
 		}
 		if (!err)
 			atomic_inc(&active_events);
@@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event)
 	if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) &&
 	    (hwc->sample_period == 1)) {
 		/* BTS is not supported by this architecture. */
-		if (!bts_available())
+		if (!x86_pmu.bts)
 			return -EOPNOTSUPP;
 
 		/* BTS is currently only allowed for user-mode. */
@@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event)
 void perf_event_print_debug(void)
 {
 	u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed;
+	u64 pebs;
 	struct cpu_hw_events *cpuc;
 	unsigned long flags;
 	int cpu, idx;
@@ -1012,12 +914,14 @@ void perf_event_print_debug(void)
 		rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status);
 		rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow);
 		rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed);
+		rdmsrl(MSR_IA32_PEBS_ENABLE, pebs);
 
 		pr_info("\n");
 		pr_info("CPU#%d: ctrl:       %016llx\n", cpu, ctrl);
 		pr_info("CPU#%d: status:     %016llx\n", cpu, status);
 		pr_info("CPU#%d: overflow:   %016llx\n", cpu, overflow);
 		pr_info("CPU#%d: fixed:      %016llx\n", cpu, fixed);
+		pr_info("CPU#%d: pebs:       %016llx\n", cpu, pebs);
 	}
 	pr_info("CPU#%d: active:       %016llx\n", cpu, *(u64 *)cpuc->active_mask);
 
@@ -1333,6 +1237,7 @@ undo:
 
 #include "perf_event_amd.c"
 #include "perf_event_p6.c"
+#include "perf_event_intel_ds.c"
 #include "perf_event_intel.c"
 
 static int __cpuinit
@@ -1464,6 +1369,32 @@ static const struct pmu pmu = {
 	.unthrottle	= x86_pmu_unthrottle,
 };
 
+/*
+ * validate that we can schedule this event
+ */
+static int validate_event(struct perf_event *event)
+{
+	struct cpu_hw_events *fake_cpuc;
+	struct event_constraint *c;
+	int ret = 0;
+
+	fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO);
+	if (!fake_cpuc)
+		return -ENOMEM;
+
+	c = x86_pmu.get_event_constraints(fake_cpuc, event);
+
+	if (!c || !c->weight)
+		ret = -ENOSPC;
+
+	if (x86_pmu.put_event_constraints)
+		x86_pmu.put_event_constraints(fake_cpuc, event);
+
+	kfree(fake_cpuc);
+
+	return ret;
+}
+
 /*
  * validate a single event group
  *
@@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
 
 		if (event->group_leader != event)
 			err = validate_group(event);
+		else
+			err = validate_event(event);
 
 		event->pmu = tmp;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 84bfde64a337..11446412e4c7 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event)
 	return hw_event & CORE_EVNTSEL_MASK;
 }
 
-static void intel_pmu_enable_bts(u64 config)
-{
-	unsigned long debugctlmsr;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr |= X86_DEBUGCTL_TR;
-	debugctlmsr |= X86_DEBUGCTL_BTS;
-	debugctlmsr |= X86_DEBUGCTL_BTINT;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
-
-	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
-		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static void intel_pmu_disable_bts(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	unsigned long debugctlmsr;
-
-	if (!cpuc->ds)
-		return;
-
-	debugctlmsr = get_debugctlmsr();
-
-	debugctlmsr &=
-		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
-		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
-
-	update_debugctlmsr(debugctlmsr);
-}
-
 static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -514,6 +478,8 @@ static void intel_pmu_disable_all(void)
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
 		intel_pmu_disable_bts();
+
+	intel_pmu_pebs_disable_all();
 }
 
 static void intel_pmu_enable_all(void)
@@ -531,6 +497,8 @@ static void intel_pmu_enable_all(void)
 
 		intel_pmu_enable_bts(event->hw.config);
 	}
+
+	intel_pmu_pebs_enable_all();
 }
 
 static inline u64 intel_pmu_get_status(void)
@@ -547,8 +515,7 @@ static inline void intel_pmu_ack_status(u64 ack)
 	wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack);
 }
 
-static inline void
-intel_pmu_disable_fixed(struct hw_perf_event *hwc)
+static void intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 {
 	int idx = hwc->idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, mask;
@@ -560,68 +527,7 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc)
 	(void)checking_wrmsrl(hwc->config_base, ctrl_val);
 }
 
-static void intel_pmu_drain_bts_buffer(void)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct debug_store *ds = cpuc->ds;
-	struct bts_record {
-		u64	from;
-		u64	to;
-		u64	flags;
-	};
-	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
-	struct bts_record *at, *top;
-	struct perf_output_handle handle;
-	struct perf_event_header header;
-	struct perf_sample_data data;
-	struct pt_regs regs;
-
-	if (!event)
-		return;
-
-	if (!ds)
-		return;
-
-	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
-	top = (struct bts_record *)(unsigned long)ds->bts_index;
-
-	if (top <= at)
-		return;
-
-	ds->bts_index = ds->bts_buffer_base;
-
-	perf_sample_data_init(&data, 0);
-
-	data.period	= event->hw.last_period;
-	regs.ip		= 0;
-
-	/*
-	 * Prepare a generic sample, i.e. fill in the invariant fields.
-	 * We will overwrite the from and to address before we output
-	 * the sample.
-	 */
-	perf_prepare_sample(&header, &data, event, &regs);
-
-	if (perf_output_begin(&handle, event,
-			      header.size * (top - at), 1, 1))
-		return;
-
-	for (; at < top; at++) {
-		data.ip		= at->from;
-		data.addr	= at->to;
-
-		perf_output_sample(&handle, &header, &data, event);
-	}
-
-	perf_output_end(&handle);
-
-	/* There's new data available. */
-	event->hw.interrupts++;
-	event->pending_kill = POLL_IN;
-}
-
-static inline void
-intel_pmu_disable_event(struct perf_event *event)
+static void intel_pmu_disable_event(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
 
@@ -637,10 +543,12 @@ intel_pmu_disable_event(struct perf_event *event)
 	}
 
 	x86_pmu_disable_event(event);
+
+	if (unlikely(event->attr.precise))
+		intel_pmu_pebs_disable(hwc);
 }
 
-static inline void
-intel_pmu_enable_fixed(struct hw_perf_event *hwc)
+static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
 {
 	int idx = hwc->idx - X86_PMC_IDX_FIXED;
 	u64 ctrl_val, bits, mask;
@@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		return;
 	}
 
+	if (unlikely(event->attr.precise))
+		intel_pmu_pebs_enable(hwc);
+
 	__x86_pmu_enable_event(hwc);
 }
 
@@ -762,6 +673,13 @@ again:
 
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
+
+	/*
+	 * PEBS overflow sets bit 62 in the global status register
+	 */
+	if (__test_and_clear_bit(62, (unsigned long *)&status))
+		x86_pmu.drain_pebs(regs);
+
 	for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) {
 		struct perf_event *event = cpuc->events[bit];
 
@@ -791,22 +709,18 @@ done:
 	return 1;
 }
 
-static struct event_constraint bts_constraint =
-	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
-
 static struct event_constraint *
-intel_special_constraints(struct perf_event *event)
+intel_bts_constraints(struct perf_event *event)
 {
-	unsigned int hw_event;
-
-	hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int hw_event, bts_event;
 
-	if (unlikely((hw_event ==
-		      x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) &&
-		     (event->hw.sample_period == 1))) {
+	hw_event = hwc->config & INTEL_ARCH_EVENT_MASK;
+	bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS);
 
+	if (unlikely(hw_event == bts_event && hwc->sample_period == 1))
 		return &bts_constraint;
-	}
+
 	return NULL;
 }
 
@@ -815,7 +729,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 {
 	struct event_constraint *c;
 
-	c = intel_special_constraints(event);
+	c = intel_bts_constraints(event);
+	if (c)
+		return c;
+
+	c = intel_pebs_constraints(event);
 	if (c)
 		return c;
 
@@ -864,8 +782,6 @@ static __initconst struct x86_pmu intel_pmu = {
 	 * the generic event period:
 	 */
 	.max_period		= (1ULL << 31) - 1,
-	.enable_bts		= intel_pmu_enable_bts,
-	.disable_bts		= intel_pmu_disable_bts,
 	.get_event_constraints	= intel_get_event_constraints,
 
 	.cpu_starting		= init_debug_store_on_cpu,
@@ -915,6 +831,8 @@ static __init int intel_pmu_init(void)
 	if (version > 1)
 		x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3);
 
+	intel_ds_init();
+
 	/*
 	 * Install the hw-cache-events table:
 	 */
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
new file mode 100644
index 000000000000..0d994ef213b9
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -0,0 +1,557 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+/* The maximal number of PEBS events: */
+#define MAX_PEBS_EVENTS		4
+
+/* The size of a BTS record in bytes: */
+#define BTS_RECORD_SIZE		24
+
+#define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
+#define PEBS_BUFFER_SIZE	PAGE_SIZE
+
+/*
+ * pebs_record_32 for p4 and core not supported
+
+struct pebs_record_32 {
+	u32 flags, ip;
+	u32 ax, bc, cx, dx;
+	u32 si, di, bp, sp;
+};
+
+ */
+
+struct pebs_record_core {
+	u64 flags, ip;
+	u64 ax, bx, cx, dx;
+	u64 si, di, bp, sp;
+	u64 r8,  r9,  r10, r11;
+	u64 r12, r13, r14, r15;
+};
+
+struct pebs_record_nhm {
+	u64 flags, ip;
+	u64 ax, bx, cx, dx;
+	u64 si, di, bp, sp;
+	u64 r8,  r9,  r10, r11;
+	u64 r12, r13, r14, r15;
+	u64 status, dla, dse, lat;
+};
+
+/*
+ * Bits in the debugctlmsr controlling branch tracing.
+ */
+#define X86_DEBUGCTL_TR			(1 << 6)
+#define X86_DEBUGCTL_BTS		(1 << 7)
+#define X86_DEBUGCTL_BTINT		(1 << 8)
+#define X86_DEBUGCTL_BTS_OFF_OS		(1 << 9)
+#define X86_DEBUGCTL_BTS_OFF_USR	(1 << 10)
+
+/*
+ * A debug store configuration.
+ *
+ * We only support architectures that use 64bit fields.
+ */
+struct debug_store {
+	u64	bts_buffer_base;
+	u64	bts_index;
+	u64	bts_absolute_maximum;
+	u64	bts_interrupt_threshold;
+	u64	pebs_buffer_base;
+	u64	pebs_index;
+	u64	pebs_absolute_maximum;
+	u64	pebs_interrupt_threshold;
+	u64	pebs_event_reset[MAX_PEBS_EVENTS];
+};
+
+static void init_debug_store_on_cpu(int cpu)
+{
+	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+	if (!ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA,
+		     (u32)((u64)(unsigned long)ds),
+		     (u32)((u64)(unsigned long)ds >> 32));
+}
+
+static void fini_debug_store_on_cpu(int cpu)
+{
+	if (!per_cpu(cpu_hw_events, cpu).ds)
+		return;
+
+	wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0);
+}
+
+static void release_ds_buffers(void)
+{
+	int cpu;
+
+	if (!x86_pmu.bts && !x86_pmu.pebs)
+		return;
+
+	get_online_cpus();
+
+	for_each_online_cpu(cpu)
+		fini_debug_store_on_cpu(cpu);
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+
+		if (!ds)
+			continue;
+
+		per_cpu(cpu_hw_events, cpu).ds = NULL;
+
+		kfree((void *)(unsigned long)ds->pebs_buffer_base);
+		kfree((void *)(unsigned long)ds->bts_buffer_base);
+		kfree(ds);
+	}
+
+	put_online_cpus();
+}
+
+static int reserve_ds_buffers(void)
+{
+	int cpu, err = 0;
+
+	if (!x86_pmu.bts && !x86_pmu.pebs)
+		return 0;
+
+	get_online_cpus();
+
+	for_each_possible_cpu(cpu) {
+		struct debug_store *ds;
+		void *buffer;
+		int max, thresh;
+
+		err = -ENOMEM;
+		ds = kzalloc(sizeof(*ds), GFP_KERNEL);
+		if (unlikely(!ds)) {
+			kfree(buffer);
+			break;
+		}
+		per_cpu(cpu_hw_events, cpu).ds = ds;
+
+		if (x86_pmu.bts) {
+			buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL);
+			if (unlikely(!buffer))
+				break;
+
+			max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE;
+			thresh = max / 16;
+
+			ds->bts_buffer_base = (u64)(unsigned long)buffer;
+			ds->bts_index = ds->bts_buffer_base;
+			ds->bts_absolute_maximum = ds->bts_buffer_base +
+				max * BTS_RECORD_SIZE;
+			ds->bts_interrupt_threshold = ds->bts_absolute_maximum -
+				thresh * BTS_RECORD_SIZE;
+		}
+
+		if (x86_pmu.pebs) {
+			buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL);
+			if (unlikely(!buffer))
+				break;
+
+			max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size;
+
+			ds->pebs_buffer_base = (u64)(unsigned long)buffer;
+			ds->pebs_index = ds->pebs_buffer_base;
+			ds->pebs_absolute_maximum = ds->pebs_buffer_base +
+				max * x86_pmu.pebs_record_size;
+			/*
+			 * Always use single record PEBS
+			 */
+			ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
+				x86_pmu.pebs_record_size;
+		}
+
+		err = 0;
+	}
+
+	if (err)
+		release_ds_buffers();
+	else {
+		for_each_online_cpu(cpu)
+			init_debug_store_on_cpu(cpu);
+	}
+
+	put_online_cpus();
+
+	return err;
+}
+
+/*
+ * BTS
+ */
+
+static struct event_constraint bts_constraint =
+	EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0);
+
+static void intel_pmu_enable_bts(u64 config)
+{
+	unsigned long debugctlmsr;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr |= X86_DEBUGCTL_TR;
+	debugctlmsr |= X86_DEBUGCTL_BTS;
+	debugctlmsr |= X86_DEBUGCTL_BTINT;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_OS))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS;
+
+	if (!(config & ARCH_PERFMON_EVENTSEL_USR))
+		debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR;
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_disable_bts(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long debugctlmsr;
+
+	if (!cpuc->ds)
+		return;
+
+	debugctlmsr = get_debugctlmsr();
+
+	debugctlmsr &=
+		~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT |
+		  X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR);
+
+	update_debugctlmsr(debugctlmsr);
+}
+
+static void intel_pmu_drain_bts_buffer(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct bts_record {
+		u64	from;
+		u64	to;
+		u64	flags;
+	};
+	struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS];
+	struct bts_record *at, *top;
+	struct perf_output_handle handle;
+	struct perf_event_header header;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!event)
+		return;
+
+	if (!ds)
+		return;
+
+	at  = (struct bts_record *)(unsigned long)ds->bts_buffer_base;
+	top = (struct bts_record *)(unsigned long)ds->bts_index;
+
+	if (top <= at)
+		return;
+
+	ds->bts_index = ds->bts_buffer_base;
+
+	perf_sample_data_init(&data, 0);
+	data.period = event->hw.last_period;
+	regs.ip     = 0;
+
+	/*
+	 * Prepare a generic sample, i.e. fill in the invariant fields.
+	 * We will overwrite the from and to address before we output
+	 * the sample.
+	 */
+	perf_prepare_sample(&header, &data, event, &regs);
+
+	if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1))
+		return;
+
+	for (; at < top; at++) {
+		data.ip		= at->from;
+		data.addr	= at->to;
+
+		perf_output_sample(&handle, &header, &data, event);
+	}
+
+	perf_output_end(&handle);
+
+	/* There's new data available. */
+	event->hw.interrupts++;
+	event->pending_kill = POLL_IN;
+}
+
+/*
+ * PEBS
+ */
+
+static struct event_constraint intel_core_pebs_events[] = {
+	PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */
+	PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */
+	PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */
+	PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */
+	PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */
+	PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
+	PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */
+	PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
+	PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_nehalem_pebs_events[] = {
+	PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */
+	PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */
+	PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */
+	PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */
+	PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */
+	PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */
+	PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */
+	PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */
+	PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint *
+intel_pebs_constraints(struct perf_event *event)
+{
+	struct event_constraint *c;
+
+	if (!event->attr.precise)
+		return NULL;
+
+	if (x86_pmu.pebs_constraints) {
+		for_each_event_constraint(c, x86_pmu.pebs_constraints) {
+			if ((event->hw.config & c->cmask) == c->code)
+				return c;
+		}
+	}
+
+	return &emptyconstraint;
+}
+
+static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = cpuc->pebs_enabled;
+
+	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
+
+	val |= 1ULL << hwc->idx;
+	wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+}
+
+static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	u64 val = cpuc->pebs_enabled;
+
+	val &= ~(1ULL << hwc->idx);
+	wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+
+	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+}
+
+static void intel_pmu_pebs_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->pebs_enabled)
+		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
+}
+
+static void intel_pmu_pebs_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->pebs_enabled)
+		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
+}
+
+static int intel_pmu_save_and_restart(struct perf_event *event);
+static void intel_pmu_disable_event(struct perf_event *event);
+
+static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct perf_event *event = cpuc->events[0]; /* PMC0 only */
+	struct pebs_record_core *at, *top;
+	struct perf_sample_data data;
+	struct pt_regs regs;
+	int n;
+
+	if (!event || !ds || !x86_pmu.pebs)
+		return;
+
+	intel_pmu_pebs_disable_all();
+
+	at  = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_core *)(unsigned long)ds->pebs_index;
+
+	if (top <= at)
+		goto out;
+
+	ds->pebs_index = ds->pebs_buffer_base;
+
+	if (!intel_pmu_save_and_restart(event))
+		goto out;
+
+	perf_sample_data_init(&data, 0);
+	data.period = event->hw.last_period;
+
+	n = top - at;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ON_ONCE(n > 1);
+
+	/*
+	 * We use the interrupt regs as a base because the PEBS record
+	 * does not contain a full regs set, specifically it seems to
+	 * lack segment descriptors, which get used by things like
+	 * user_mode().
+	 *
+	 * In the simple case fix up only the IP and BP,SP regs, for
+	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
+	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
+	 */
+	regs = *iregs;
+	regs.ip = at->ip;
+	regs.bp = at->bp;
+	regs.sp = at->sp;
+
+	if (perf_event_overflow(event, 1, &data, &regs))
+		intel_pmu_disable_event(event);
+
+out:
+	intel_pmu_pebs_enable_all();
+}
+
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct debug_store *ds = cpuc->ds;
+	struct pebs_record_nhm *at, *top;
+	struct perf_sample_data data;
+	struct perf_event *event = NULL;
+	struct pt_regs regs;
+	int bit, n;
+
+	if (!ds || !x86_pmu.pebs)
+		return;
+
+	intel_pmu_pebs_disable_all();
+
+	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
+
+	if (top <= at)
+		goto out;
+
+	ds->pebs_index = ds->pebs_buffer_base;
+
+	n = top - at;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ON_ONCE(n > MAX_PEBS_EVENTS);
+
+	for ( ; at < top; at++) {
+		for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) {
+			if (!cpuc->events[bit]->attr.precise)
+				continue;
+
+			event = cpuc->events[bit];
+		}
+
+		if (!event)
+			continue;
+
+		if (!intel_pmu_save_and_restart(event))
+			continue;
+
+		perf_sample_data_init(&data, 0);
+		data.period = event->hw.last_period;
+
+		/*
+		 * See the comment in intel_pmu_drain_pebs_core()
+		 */
+		regs = *iregs;
+		regs.ip = at->ip;
+		regs.bp = at->bp;
+		regs.sp = at->sp;
+
+		if (perf_event_overflow(event, 1, &data, &regs))
+			intel_pmu_disable_event(event);
+	}
+out:
+	intel_pmu_pebs_enable_all();
+}
+
+/*
+ * BTS, PEBS probe and setup
+ */
+
+static void intel_ds_init(void)
+{
+	/*
+	 * No support for 32bit formats
+	 */
+	if (!boot_cpu_has(X86_FEATURE_DTES64))
+		return;
+
+	x86_pmu.bts  = boot_cpu_has(X86_FEATURE_BTS);
+	x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS);
+	if (x86_pmu.pebs) {
+		int format = 0;
+
+		if (x86_pmu.version > 1) {
+			u64 capabilities;
+			/*
+			 * v2+ has a PEBS format field
+			 */
+			rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+			format = (capabilities >> 8) & 0xf;
+		}
+
+		switch (format) {
+		case 0:
+			printk(KERN_CONT "PEBS v0, ");
+			x86_pmu.pebs_record_size = sizeof(struct pebs_record_core);
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_core;
+			x86_pmu.pebs_constraints = intel_core_pebs_events;
+			break;
+
+		case 1:
+			printk(KERN_CONT "PEBS v1, ");
+			x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
+			x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
+			break;
+
+		default:
+			printk(KERN_CONT "PEBS unknown format: %d, ", format);
+			x86_pmu.pebs = 0;
+			break;
+		}
+	}
+}
+
+#else /* CONFIG_CPU_SUP_INTEL */
+
+static int reseve_ds_buffers(void)
+{
+	return 0;
+}
+
+static void release_ds_buffers(void)
+{
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 80acbf3d5de1..42307b50c787 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -203,8 +203,9 @@ struct perf_event_attr {
 				enable_on_exec :  1, /* next exec enables     */
 				task           :  1, /* trace fork/exit       */
 				watermark      :  1, /* wakeup_watermark      */
+				precise        :  1, /* OoO invariant counter */
 
-				__reserved_1   : 49;
+				__reserved_1   : 48;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
-- 
cgit v1.2.3


From caff2befffe899e63df5cc760b7ed01cfd902685 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Mar 2010 12:02:30 +0100
Subject: perf, x86: Implement simple LBR support

Implement simple suport Intel Last-Branch-Record, it supports all
hardware that implements FREEZE_LBRS_ON_PMI, but does not (yet) implement
the LBR config register.

The Intel LBR is a FIFO of From,To addresses describing the last few
branches the hardware took.

This patch does not add perf interface to the LBR, but merely provides an
interface for internal use.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.544191154@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c           |  18 +++
 arch/x86/kernel/cpu/perf_event_intel.c     |  13 ++
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 +++++++++++++++++++++++++++++
 include/linux/perf_event.h                 |  11 ++
 4 files changed, 270 insertions(+)
 create mode 100644 arch/x86/kernel/cpu/perf_event_intel_lbr.c

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0c03d5c1671f..1badff6b6b28 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -48,6 +48,8 @@ struct amd_nb {
 	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
+#define MAX_LBR_ENTRIES		16
+
 struct cpu_hw_events {
 	/*
 	 * Generic x86 PMC bits
@@ -69,6 +71,14 @@ struct cpu_hw_events {
 	struct debug_store	*ds;
 	u64			pebs_enabled;
 
+	/*
+	 * Intel LBR bits
+	 */
+	int				lbr_users;
+	void				*lbr_context;
+	struct perf_branch_stack	lbr_stack;
+	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
+
 	/*
 	 * AMD specific bits
 	 */
@@ -159,6 +169,13 @@ struct x86_pmu {
 	int		pebs_record_size;
 	void		(*drain_pebs)(struct pt_regs *regs);
 	struct event_constraint *pebs_constraints;
+
+	/*
+	 * Intel LBR
+	 */
+	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
+	int		lbr_nr;			   /* hardware stack size */
+	int		lbr_format;		   /* hardware format     */
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -1237,6 +1254,7 @@ undo:
 
 #include "perf_event_amd.c"
 #include "perf_event_p6.c"
+#include "perf_event_intel_lbr.c"
 #include "perf_event_intel_ds.c"
 #include "perf_event_intel.c"
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 11446412e4c7..44f6ed42a934 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void)
 		intel_pmu_disable_bts();
 
 	intel_pmu_pebs_disable_all();
+	intel_pmu_lbr_disable_all();
 }
 
 static void intel_pmu_enable_all(void)
@@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void)
 	}
 
 	intel_pmu_pebs_enable_all();
+	intel_pmu_lbr_enable_all();
 }
 
 static inline u64 intel_pmu_get_status(void)
@@ -674,6 +676,8 @@ again:
 	inc_irq_stat(apic_perf_irqs);
 	ack = status;
 
+	intel_pmu_lbr_read();
+
 	/*
 	 * PEBS overflow sets bit 62 in the global status register
 	 */
@@ -848,6 +852,8 @@ static __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		intel_pmu_lbr_init_core();
+
 		x86_pmu.event_constraints = intel_core2_event_constraints;
 		pr_cont("Core2 events, ");
 		break;
@@ -857,13 +863,18 @@ static __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		intel_pmu_lbr_init_nhm();
+
 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
+
 	case 28: /* Atom */
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		intel_pmu_lbr_init_atom();
+
 		x86_pmu.event_constraints = intel_gen_event_constraints;
 		pr_cont("Atom events, ");
 		break;
@@ -873,6 +884,8 @@ static __init int intel_pmu_init(void)
 		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
+		intel_pmu_lbr_init_nhm();
+
 		x86_pmu.event_constraints = intel_westmere_event_constraints;
 		pr_cont("Westmere events, ");
 		break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
new file mode 100644
index 000000000000..ea3e99ed82ce
--- /dev/null
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -0,0 +1,228 @@
+#ifdef CONFIG_CPU_SUP_INTEL
+
+enum {
+	LBR_FORMAT_32		= 0x00,
+	LBR_FORMAT_LIP		= 0x01,
+	LBR_FORMAT_EIP		= 0x02,
+	LBR_FORMAT_EIP_FLAGS	= 0x03,
+};
+
+/*
+ * We only support LBR implementations that have FREEZE_LBRS_ON_PMI
+ * otherwise it becomes near impossible to get a reliable stack.
+ */
+
+#define X86_DEBUGCTL_LBR               		(1 << 0)
+#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI		(1 << 11)
+
+static void __intel_pmu_lbr_enable(void)
+{
+	u64 debugctl;
+
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void __intel_pmu_lbr_disable(void)
+{
+	u64 debugctl;
+
+	rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+	debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI);
+	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+}
+
+static void intel_pmu_lbr_reset_32(void)
+{
+	int i;
+
+	for (i = 0; i < x86_pmu.lbr_nr; i++)
+		wrmsrl(x86_pmu.lbr_from + i, 0);
+}
+
+static void intel_pmu_lbr_reset_64(void)
+{
+	int i;
+
+	for (i = 0; i < x86_pmu.lbr_nr; i++) {
+		wrmsrl(x86_pmu.lbr_from + i, 0);
+		wrmsrl(x86_pmu.lbr_to   + i, 0);
+	}
+}
+
+static void intel_pmu_lbr_reset(void)
+{
+	if (x86_pmu.lbr_format == LBR_FORMAT_32)
+		intel_pmu_lbr_reset_32();
+	else
+		intel_pmu_lbr_reset_64();
+}
+
+static void intel_pmu_lbr_enable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	WARN_ON(cpuc->enabled);
+
+	/*
+	 * Reset the LBR stack if this is the first LBR user or
+	 * we changed task context so as to avoid data leaks.
+	 */
+
+	if (!cpuc->lbr_users ||
+	    (event->ctx->task && cpuc->lbr_context != event->ctx)) {
+		intel_pmu_lbr_reset();
+		cpuc->lbr_context = event->ctx;
+	}
+
+	cpuc->lbr_users++;
+}
+
+static void intel_pmu_lbr_disable(struct perf_event *event)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!x86_pmu.lbr_nr)
+		return;
+
+	cpuc->lbr_users--;
+
+	BUG_ON(cpuc->lbr_users < 0);
+	WARN_ON(cpuc->enabled);
+}
+
+static void intel_pmu_lbr_enable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->lbr_users)
+		__intel_pmu_lbr_enable();
+}
+
+static void intel_pmu_lbr_disable_all(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (cpuc->lbr_users)
+		__intel_pmu_lbr_disable();
+}
+
+static inline u64 intel_pmu_lbr_tos(void)
+{
+	u64 tos;
+
+	rdmsrl(x86_pmu.lbr_tos, tos);
+
+	return tos;
+}
+
+static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc)
+{
+	unsigned long mask = x86_pmu.lbr_nr - 1;
+	u64 tos = intel_pmu_lbr_tos();
+	int i;
+
+	for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
+		unsigned long lbr_idx = (tos - i) & mask;
+		union {
+			struct {
+				u32 from;
+				u32 to;
+			};
+			u64     lbr;
+		} msr_lastbranch;
+
+		rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr);
+
+		cpuc->lbr_entries[i].from  = msr_lastbranch.from;
+		cpuc->lbr_entries[i].to    = msr_lastbranch.to;
+		cpuc->lbr_entries[i].flags = 0;
+	}
+	cpuc->lbr_stack.nr = i;
+}
+
+#define LBR_FROM_FLAG_MISPRED  (1ULL << 63)
+
+/*
+ * Due to lack of segmentation in Linux the effective address (offset)
+ * is the same as the linear address, allowing us to merge the LIP and EIP
+ * LBR formats.
+ */
+static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc)
+{
+	unsigned long mask = x86_pmu.lbr_nr - 1;
+	u64 tos = intel_pmu_lbr_tos();
+	int i;
+
+	for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) {
+		unsigned long lbr_idx = (tos - i) & mask;
+		u64 from, to, flags = 0;
+
+		rdmsrl(x86_pmu.lbr_from + lbr_idx, from);
+		rdmsrl(x86_pmu.lbr_to   + lbr_idx, to);
+
+		if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) {
+			flags = !!(from & LBR_FROM_FLAG_MISPRED);
+			from = (u64)((((s64)from) << 1) >> 1);
+		}
+
+		cpuc->lbr_entries[i].from  = from;
+		cpuc->lbr_entries[i].to    = to;
+		cpuc->lbr_entries[i].flags = flags;
+	}
+	cpuc->lbr_stack.nr = i;
+}
+
+static void intel_pmu_lbr_read(void)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+
+	if (!cpuc->lbr_users)
+		return;
+
+	if (x86_pmu.lbr_format == LBR_FORMAT_32)
+		intel_pmu_lbr_read_32(cpuc);
+	else
+		intel_pmu_lbr_read_64(cpuc);
+}
+
+static int intel_pmu_lbr_format(void)
+{
+	u64 capabilities;
+
+	rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities);
+	return capabilities & 0x1f;
+}
+
+static void intel_pmu_lbr_init_core(void)
+{
+	x86_pmu.lbr_format = intel_pmu_lbr_format();
+	x86_pmu.lbr_nr     = 4;
+	x86_pmu.lbr_tos    = 0x01c9;
+	x86_pmu.lbr_from   = 0x40;
+	x86_pmu.lbr_to     = 0x60;
+}
+
+static void intel_pmu_lbr_init_nhm(void)
+{
+	x86_pmu.lbr_format = intel_pmu_lbr_format();
+	x86_pmu.lbr_nr     = 16;
+	x86_pmu.lbr_tos    = 0x01c9;
+	x86_pmu.lbr_from   = 0x680;
+	x86_pmu.lbr_to     = 0x6c0;
+}
+
+static void intel_pmu_lbr_init_atom(void)
+{
+	x86_pmu.lbr_format = intel_pmu_lbr_format();
+	x86_pmu.lbr_nr	   = 8;
+	x86_pmu.lbr_tos    = 0x01c9;
+	x86_pmu.lbr_from   = 0x40;
+	x86_pmu.lbr_to     = 0x60;
+}
+
+#endif /* CONFIG_CPU_SUP_INTEL */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 42307b50c787..ab4fd9ede264 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -467,6 +467,17 @@ struct perf_raw_record {
 	void				*data;
 };
 
+struct perf_branch_entry {
+	__u64				from;
+	__u64				to;
+	__u64				flags;
+};
+
+struct perf_branch_stack {
+	__u64				nr;
+	struct perf_branch_entry	entries[0];
+};
+
 struct task_struct;
 
 /**
-- 
cgit v1.2.3


From ef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 3 Mar 2010 13:12:23 +0100
Subject: perf, x86: use LBR for PEBS IP+1 fixup

Use the LBR to fix up the PEBS IP+1 issue.

As said, PEBS reports the next instruction, here we use the LBR to find
the last branch and from that construct the actual IP. If the IP matches
the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the
beginning of the last basic block and decode forward.

Once we find a match to the current IP, we use the previous location.

This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which
conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction
that caused the event (barring CPU errata).

The fixup can fail due to various reasons:

 1) LBR contains invalid data (quite possible)
 2) part of the basic block got paged out
 3) the reported IP isn't part of the basic block (see 1)

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
Cc: Masami Hiramatsu <mhiramat@redhat.com>
Cc: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Cc: paulus@samba.org
Cc: eranian@google.com
Cc: robert.richter@amd.com
Cc: fweisbec@gmail.com
LKML-Reference: <20100304140100.619375431@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h         | 19 +++++++
 arch/x86/kernel/cpu/perf_event.c          | 70 +++++++++++++-------------
 arch/x86/kernel/cpu/perf_event_intel.c    |  4 +-
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 84 ++++++++++++++++++++++++++++++-
 include/linux/perf_event.h                |  6 +++
 5 files changed, 144 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index db6109a885a7..a9038c951619 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void);
 
 #define PERF_EVENT_INDEX_OFFSET			0
 
+/*
+ * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups.
+ * This flag is otherwise unused and ABI specified to be 0, so nobody should
+ * care what we do with it.
+ */
+#define PERF_EFLAGS_EXACT	(1UL << 3)
+
+#define perf_misc_flags(regs)				\
+({	int misc = 0;					\
+	if (user_mode(regs))				\
+		misc |= PERF_RECORD_MISC_USER;		\
+	else						\
+		misc |= PERF_RECORD_MISC_KERNEL;	\
+	if (regs->flags & PERF_EFLAGS_EXACT)		\
+		misc |= PERF_RECORD_MISC_EXACT;		\
+	misc; })
+
+#define perf_instruction_pointer(regs)	((regs)->ip)
+
 #else
 static inline void init_hw_perf_events(void)		{ }
 static inline void perf_events_lapic_init(void)	{ }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 1badff6b6b28..5cb4e8dcee4b 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -29,6 +29,41 @@
 #include <asm/stacktrace.h>
 #include <asm/nmi.h>
 
+/*
+ * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
+ */
+static unsigned long
+copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
+{
+	unsigned long offset, addr = (unsigned long)from;
+	int type = in_nmi() ? KM_NMI : KM_IRQ0;
+	unsigned long size, len = 0;
+	struct page *page;
+	void *map;
+	int ret;
+
+	do {
+		ret = __get_user_pages_fast(addr, 1, 0, &page);
+		if (!ret)
+			break;
+
+		offset = addr & (PAGE_SIZE - 1);
+		size = min(PAGE_SIZE - offset, n - len);
+
+		map = kmap_atomic(page, type);
+		memcpy(to, map+offset, size);
+		kunmap_atomic(map, type);
+		put_page(page);
+
+		len  += size;
+		to   += size;
+		addr += size;
+
+	} while (len < n);
+
+	return len;
+}
+
 static u64 perf_event_mask __read_mostly;
 
 struct event_constraint {
@@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
 	dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry);
 }
 
-/*
- * best effort, GUP based copy_from_user() that assumes IRQ or NMI context
- */
-static unsigned long
-copy_from_user_nmi(void *to, const void __user *from, unsigned long n)
-{
-	unsigned long offset, addr = (unsigned long)from;
-	int type = in_nmi() ? KM_NMI : KM_IRQ0;
-	unsigned long size, len = 0;
-	struct page *page;
-	void *map;
-	int ret;
-
-	do {
-		ret = __get_user_pages_fast(addr, 1, 0, &page);
-		if (!ret)
-			break;
-
-		offset = addr & (PAGE_SIZE - 1);
-		size = min(PAGE_SIZE - offset, n - len);
-
-		map = kmap_atomic(page, type);
-		memcpy(to, map+offset, size);
-		kunmap_atomic(map, type);
-		put_page(page);
-
-		len  += size;
-		to   += size;
-		addr += size;
-
-	} while (len < n);
-
-	return len;
-}
-
 static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
 {
 	unsigned long bytes;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 44f6ed42a934..7eb78be3b229 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 	x86_pmu_disable_event(event);
 
 	if (unlikely(event->attr.precise))
-		intel_pmu_pebs_disable(hwc);
+		intel_pmu_pebs_disable(event);
 }
 
 static void intel_pmu_enable_fixed(struct hw_perf_event *hwc)
@@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
 	}
 
 	if (unlikely(event->attr.precise))
-		intel_pmu_pebs_enable(hwc);
+		intel_pmu_pebs_enable(event);
 
 	__x86_pmu_enable_event(hwc);
 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0d994ef213b9..50e6ff3281fc 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
-static void intel_pmu_pebs_enable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 	u64 val = cpuc->pebs_enabled;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
 	val |= 1ULL << hwc->idx;
 	wrmsrl(MSR_IA32_PEBS_ENABLE, val);
+
+	intel_pmu_lbr_enable(event);
 }
 
-static void intel_pmu_pebs_disable(struct hw_perf_event *hwc)
+static void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	struct hw_perf_event *hwc = &event->hw;
 	u64 val = cpuc->pebs_enabled;
 
 	val &= ~(1ULL << hwc->idx);
 	wrmsrl(MSR_IA32_PEBS_ENABLE, val);
 
 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
+
+	intel_pmu_lbr_disable(event);
 }
 
 static void intel_pmu_pebs_enable_all(void)
@@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, 0);
 }
 
+#include <asm/insn.h>
+
+#define MAX_INSN_SIZE	16
+
+static inline bool kernel_ip(unsigned long ip)
+{
+#ifdef CONFIG_X86_32
+	return ip > PAGE_OFFSET;
+#else
+	return (long)ip < 0;
+#endif
+}
+
+static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs)
+{
+	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+	unsigned long from = cpuc->lbr_entries[0].from;
+	unsigned long old_to, to = cpuc->lbr_entries[0].to;
+	unsigned long ip = regs->ip;
+
+	if (!cpuc->lbr_stack.nr || !from || !to)
+		return 0;
+
+	if (ip < to)
+		return 0;
+
+	/*
+	 * We sampled a branch insn, rewind using the LBR stack
+	 */
+	if (ip == to) {
+		regs->ip = from;
+		return 1;
+	}
+
+	do {
+		struct insn insn;
+		u8 buf[MAX_INSN_SIZE];
+		void *kaddr;
+
+		old_to = to;
+		if (!kernel_ip(ip)) {
+			int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to);
+
+			bytes = copy_from_user_nmi(buf, (void __user *)to, size);
+			if (bytes != size)
+				return 0;
+
+			kaddr = buf;
+		} else
+			kaddr = (void *)to;
+
+		kernel_insn_init(&insn, kaddr);
+		insn_get_length(&insn);
+		to += insn.length;
+	} while (to < ip);
+
+	if (to == ip) {
+		regs->ip = old_to;
+		return 1;
+	}
+
+	return 0;
+}
+
 static int intel_pmu_save_and_restart(struct perf_event *event);
 static void intel_pmu_disable_event(struct perf_event *event);
 
@@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	regs.bp = at->bp;
 	regs.sp = at->sp;
 
+	if (intel_pmu_pebs_fixup_ip(&regs))
+		regs.flags |= PERF_EFLAGS_EXACT;
+	else
+		regs.flags &= ~PERF_EFLAGS_EXACT;
+
 	if (perf_event_overflow(event, 1, &data, &regs))
 		intel_pmu_disable_event(event);
 
@@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 		regs.bp = at->bp;
 		regs.sp = at->sp;
 
+		if (intel_pmu_pebs_fixup_ip(&regs))
+			regs.flags |= PERF_EFLAGS_EXACT;
+		else
+			regs.flags &= ~PERF_EFLAGS_EXACT;
+
 		if (perf_event_overflow(event, 1, &data, &regs))
 			intel_pmu_disable_event(event);
 	}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ab4fd9ede264..be85f7c4a94f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -294,6 +294,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_USER			(2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
 
+#define PERF_RECORD_MISC_EXACT			(1 << 14)
+/*
+ * Reserve the last bit to indicate some extended misc field
+ */
+#define PERF_RECORD_MISC_EXT_RESERVED		(1 << 15)
+
 struct perf_event_header {
 	__u32	type;
 	__u16	misc;
-- 
cgit v1.2.3


From 41acab8851a0408c1d5ad6c21a07456f88b54d40 Mon Sep 17 00:00:00 2001
From: Lucas De Marchi <lucas.de.marchi@gmail.com>
Date: Wed, 10 Mar 2010 23:37:45 -0300
Subject: sched: Implement group scheduler statistics in one struct

Put all statistic fields of sched_entity in one struct, sched_statistics,
and embed it into sched_entity.

This change allows to memset the sched_statistics to 0 when needed (for
instance when forking), avoiding bugs of non initialized fields.

Signed-off-by: Lucas De Marchi <lucas.de.marchi@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1268275065-18542-1-git-send-email-lucas.de.marchi@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  54 +++++++++++++--------------
 kernel/sched.c        |  47 +++++------------------
 kernel/sched_debug.c  | 101 +++++++++++++++++++-------------------------------
 kernel/sched_fair.c   |  65 ++++++++++++++++----------------
 kernel/sched_rt.c     |   2 +-
 5 files changed, 106 insertions(+), 163 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4b1753f7e48e..8cc863d66477 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1127,36 +1127,8 @@ struct load_weight {
 	unsigned long weight, inv_weight;
 };
 
-/*
- * CFS stats for a schedulable entity (task, task-group etc)
- *
- * Current field usage histogram:
- *
- *     4 se->block_start
- *     4 se->run_node
- *     4 se->sleep_start
- *     6 se->load.weight
- */
-struct sched_entity {
-	struct load_weight	load;		/* for load-balancing */
-	struct rb_node		run_node;
-	struct list_head	group_node;
-	unsigned int		on_rq;
-
-	u64			exec_start;
-	u64			sum_exec_runtime;
-	u64			vruntime;
-	u64			prev_sum_exec_runtime;
-
-	u64			last_wakeup;
-	u64			avg_overlap;
-
-	u64			nr_migrations;
-
-	u64			start_runtime;
-	u64			avg_wakeup;
-
 #ifdef CONFIG_SCHEDSTATS
+struct sched_statistics {
 	u64			wait_start;
 	u64			wait_max;
 	u64			wait_count;
@@ -1188,6 +1160,30 @@ struct sched_entity {
 	u64			nr_wakeups_affine_attempts;
 	u64			nr_wakeups_passive;
 	u64			nr_wakeups_idle;
+};
+#endif
+
+struct sched_entity {
+	struct load_weight	load;		/* for load-balancing */
+	struct rb_node		run_node;
+	struct list_head	group_node;
+	unsigned int		on_rq;
+
+	u64			exec_start;
+	u64			sum_exec_runtime;
+	u64			vruntime;
+	u64			prev_sum_exec_runtime;
+
+	u64			last_wakeup;
+	u64			avg_overlap;
+
+	u64			nr_migrations;
+
+	u64			start_runtime;
+	u64			avg_wakeup;
+
+#ifdef CONFIG_SCHEDSTATS
+	struct sched_statistics statistics;
 #endif
 
 #ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched.c b/kernel/sched.c
index 2c1db81f80eb..a4aa071f08f3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2437,15 +2437,15 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 
 out_activate:
 #endif /* CONFIG_SMP */
-	schedstat_inc(p, se.nr_wakeups);
+	schedstat_inc(p, se.statistics.nr_wakeups);
 	if (wake_flags & WF_SYNC)
-		schedstat_inc(p, se.nr_wakeups_sync);
+		schedstat_inc(p, se.statistics.nr_wakeups_sync);
 	if (orig_cpu != cpu)
-		schedstat_inc(p, se.nr_wakeups_migrate);
+		schedstat_inc(p, se.statistics.nr_wakeups_migrate);
 	if (cpu == this_cpu)
-		schedstat_inc(p, se.nr_wakeups_local);
+		schedstat_inc(p, se.statistics.nr_wakeups_local);
 	else
-		schedstat_inc(p, se.nr_wakeups_remote);
+		schedstat_inc(p, se.statistics.nr_wakeups_remote);
 	activate_task(rq, p, 1);
 	success = 1;
 
@@ -2532,36 +2532,7 @@ static void __sched_fork(struct task_struct *p)
 	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
 
 #ifdef CONFIG_SCHEDSTATS
-	p->se.wait_start			= 0;
-	p->se.wait_max				= 0;
-	p->se.wait_count			= 0;
-	p->se.wait_sum				= 0;
-
-	p->se.sleep_start			= 0;
-	p->se.sleep_max				= 0;
-	p->se.sum_sleep_runtime			= 0;
-
-	p->se.block_start			= 0;
-	p->se.block_max				= 0;
-	p->se.exec_max				= 0;
-	p->se.slice_max				= 0;
-
-	p->se.nr_migrations_cold		= 0;
-	p->se.nr_failed_migrations_affine	= 0;
-	p->se.nr_failed_migrations_running	= 0;
-	p->se.nr_failed_migrations_hot		= 0;
-	p->se.nr_forced_migrations		= 0;
-
-	p->se.nr_wakeups			= 0;
-	p->se.nr_wakeups_sync			= 0;
-	p->se.nr_wakeups_migrate		= 0;
-	p->se.nr_wakeups_local			= 0;
-	p->se.nr_wakeups_remote			= 0;
-	p->se.nr_wakeups_affine			= 0;
-	p->se.nr_wakeups_affine_attempts	= 0;
-	p->se.nr_wakeups_passive		= 0;
-	p->se.nr_wakeups_idle			= 0;
-
+	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 
 	INIT_LIST_HEAD(&p->rt.run_list);
@@ -7910,9 +7881,9 @@ void normalize_rt_tasks(void)
 
 		p->se.exec_start		= 0;
 #ifdef CONFIG_SCHEDSTATS
-		p->se.wait_start		= 0;
-		p->se.sleep_start		= 0;
-		p->se.block_start		= 0;
+		p->se.statistics.wait_start	= 0;
+		p->se.statistics.sleep_start	= 0;
+		p->se.statistics.block_start	= 0;
 #endif
 
 		if (!rt_task(p)) {
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 67f95aada4b9..ad9df4422763 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -70,16 +70,16 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu,
 	PN(se->vruntime);
 	PN(se->sum_exec_runtime);
 #ifdef CONFIG_SCHEDSTATS
-	PN(se->wait_start);
-	PN(se->sleep_start);
-	PN(se->block_start);
-	PN(se->sleep_max);
-	PN(se->block_max);
-	PN(se->exec_max);
-	PN(se->slice_max);
-	PN(se->wait_max);
-	PN(se->wait_sum);
-	P(se->wait_count);
+	PN(se->statistics.wait_start);
+	PN(se->statistics.sleep_start);
+	PN(se->statistics.block_start);
+	PN(se->statistics.sleep_max);
+	PN(se->statistics.block_max);
+	PN(se->statistics.exec_max);
+	PN(se->statistics.slice_max);
+	PN(se->statistics.wait_max);
+	PN(se->statistics.wait_sum);
+	P(se->statistics.wait_count);
 #endif
 	P(se->load.weight);
 #undef PN
@@ -104,7 +104,7 @@ print_task(struct seq_file *m, struct rq *rq, struct task_struct *p)
 	SEQ_printf(m, "%9Ld.%06ld %9Ld.%06ld %9Ld.%06ld",
 		SPLIT_NS(p->se.vruntime),
 		SPLIT_NS(p->se.sum_exec_runtime),
-		SPLIT_NS(p->se.sum_sleep_runtime));
+		SPLIT_NS(p->se.statistics.sum_sleep_runtime));
 #else
 	SEQ_printf(m, "%15Ld %15Ld %15Ld.%06ld %15Ld.%06ld %15Ld.%06ld",
 		0LL, 0LL, 0LL, 0L, 0LL, 0L, 0LL, 0L);
@@ -413,34 +413,34 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	nr_switches = p->nvcsw + p->nivcsw;
 
 #ifdef CONFIG_SCHEDSTATS
-	PN(se.wait_start);
-	PN(se.sleep_start);
-	PN(se.block_start);
-	PN(se.sleep_max);
-	PN(se.block_max);
-	PN(se.exec_max);
-	PN(se.slice_max);
-	PN(se.wait_max);
-	PN(se.wait_sum);
-	P(se.wait_count);
-	PN(se.iowait_sum);
-	P(se.iowait_count);
+	PN(se.statistics.wait_start);
+	PN(se.statistics.sleep_start);
+	PN(se.statistics.block_start);
+	PN(se.statistics.sleep_max);
+	PN(se.statistics.block_max);
+	PN(se.statistics.exec_max);
+	PN(se.statistics.slice_max);
+	PN(se.statistics.wait_max);
+	PN(se.statistics.wait_sum);
+	P(se.statistics.wait_count);
+	PN(se.statistics.iowait_sum);
+	P(se.statistics.iowait_count);
 	P(sched_info.bkl_count);
 	P(se.nr_migrations);
-	P(se.nr_migrations_cold);
-	P(se.nr_failed_migrations_affine);
-	P(se.nr_failed_migrations_running);
-	P(se.nr_failed_migrations_hot);
-	P(se.nr_forced_migrations);
-	P(se.nr_wakeups);
-	P(se.nr_wakeups_sync);
-	P(se.nr_wakeups_migrate);
-	P(se.nr_wakeups_local);
-	P(se.nr_wakeups_remote);
-	P(se.nr_wakeups_affine);
-	P(se.nr_wakeups_affine_attempts);
-	P(se.nr_wakeups_passive);
-	P(se.nr_wakeups_idle);
+	P(se.statistics.nr_migrations_cold);
+	P(se.statistics.nr_failed_migrations_affine);
+	P(se.statistics.nr_failed_migrations_running);
+	P(se.statistics.nr_failed_migrations_hot);
+	P(se.statistics.nr_forced_migrations);
+	P(se.statistics.nr_wakeups);
+	P(se.statistics.nr_wakeups_sync);
+	P(se.statistics.nr_wakeups_migrate);
+	P(se.statistics.nr_wakeups_local);
+	P(se.statistics.nr_wakeups_remote);
+	P(se.statistics.nr_wakeups_affine);
+	P(se.statistics.nr_wakeups_affine_attempts);
+	P(se.statistics.nr_wakeups_passive);
+	P(se.statistics.nr_wakeups_idle);
 
 	{
 		u64 avg_atom, avg_per_cpu;
@@ -491,32 +491,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 void proc_sched_set_task(struct task_struct *p)
 {
 #ifdef CONFIG_SCHEDSTATS
-	p->se.wait_max				= 0;
-	p->se.wait_sum				= 0;
-	p->se.wait_count			= 0;
-	p->se.iowait_sum			= 0;
-	p->se.iowait_count			= 0;
-	p->se.sleep_max				= 0;
-	p->se.sum_sleep_runtime			= 0;
-	p->se.block_max				= 0;
-	p->se.exec_max				= 0;
-	p->se.slice_max				= 0;
-	p->se.nr_migrations			= 0;
-	p->se.nr_migrations_cold		= 0;
-	p->se.nr_failed_migrations_affine	= 0;
-	p->se.nr_failed_migrations_running	= 0;
-	p->se.nr_failed_migrations_hot		= 0;
-	p->se.nr_forced_migrations		= 0;
-	p->se.nr_wakeups			= 0;
-	p->se.nr_wakeups_sync			= 0;
-	p->se.nr_wakeups_migrate		= 0;
-	p->se.nr_wakeups_local			= 0;
-	p->se.nr_wakeups_remote			= 0;
-	p->se.nr_wakeups_affine			= 0;
-	p->se.nr_wakeups_affine_attempts	= 0;
-	p->se.nr_wakeups_passive		= 0;
-	p->se.nr_wakeups_idle			= 0;
-	p->sched_info.bkl_count			= 0;
+	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
 #endif
 	p->se.sum_exec_runtime			= 0;
 	p->se.prev_sum_exec_runtime		= 0;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 3e1fd96c6cf9..8ad164bbdac1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -505,7 +505,8 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
 {
 	unsigned long delta_exec_weighted;
 
-	schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
+	schedstat_set(curr->statistics.exec_max,
+		      max((u64)delta_exec, curr->statistics.exec_max));
 
 	curr->sum_exec_runtime += delta_exec;
 	schedstat_add(cfs_rq, exec_clock, delta_exec);
@@ -548,7 +549,7 @@ static void update_curr(struct cfs_rq *cfs_rq)
 static inline void
 update_stats_wait_start(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	schedstat_set(se->wait_start, rq_of(cfs_rq)->clock);
+	schedstat_set(se->statistics.wait_start, rq_of(cfs_rq)->clock);
 }
 
 /*
@@ -567,18 +568,18 @@ static void update_stats_enqueue(struct cfs_rq *cfs_rq, struct sched_entity *se)
 static void
 update_stats_wait_end(struct cfs_rq *cfs_rq, struct sched_entity *se)
 {
-	schedstat_set(se->wait_max, max(se->wait_max,
-			rq_of(cfs_rq)->clock - se->wait_start));
-	schedstat_set(se->wait_count, se->wait_count + 1);
-	schedstat_set(se->wait_sum, se->wait_sum +
-			rq_of(cfs_rq)->clock - se->wait_start);
+	schedstat_set(se->statistics.wait_max, max(se->statistics.wait_max,
+			rq_of(cfs_rq)->clock - se->statistics.wait_start));
+	schedstat_set(se->statistics.wait_count, se->statistics.wait_count + 1);
+	schedstat_set(se->statistics.wait_sum, se->statistics.wait_sum +
+			rq_of(cfs_rq)->clock - se->statistics.wait_start);
 #ifdef CONFIG_SCHEDSTATS
 	if (entity_is_task(se)) {
 		trace_sched_stat_wait(task_of(se),
-			rq_of(cfs_rq)->clock - se->wait_start);
+			rq_of(cfs_rq)->clock - se->statistics.wait_start);
 	}
 #endif
-	schedstat_set(se->wait_start, 0);
+	schedstat_set(se->statistics.wait_start, 0);
 }
 
 static inline void
@@ -657,39 +658,39 @@ static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	if (entity_is_task(se))
 		tsk = task_of(se);
 
-	if (se->sleep_start) {
-		u64 delta = rq_of(cfs_rq)->clock - se->sleep_start;
+	if (se->statistics.sleep_start) {
+		u64 delta = rq_of(cfs_rq)->clock - se->statistics.sleep_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->sleep_max))
-			se->sleep_max = delta;
+		if (unlikely(delta > se->statistics.sleep_max))
+			se->statistics.sleep_max = delta;
 
-		se->sleep_start = 0;
-		se->sum_sleep_runtime += delta;
+		se->statistics.sleep_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
 			account_scheduler_latency(tsk, delta >> 10, 1);
 			trace_sched_stat_sleep(tsk, delta);
 		}
 	}
-	if (se->block_start) {
-		u64 delta = rq_of(cfs_rq)->clock - se->block_start;
+	if (se->statistics.block_start) {
+		u64 delta = rq_of(cfs_rq)->clock - se->statistics.block_start;
 
 		if ((s64)delta < 0)
 			delta = 0;
 
-		if (unlikely(delta > se->block_max))
-			se->block_max = delta;
+		if (unlikely(delta > se->statistics.block_max))
+			se->statistics.block_max = delta;
 
-		se->block_start = 0;
-		se->sum_sleep_runtime += delta;
+		se->statistics.block_start = 0;
+		se->statistics.sum_sleep_runtime += delta;
 
 		if (tsk) {
 			if (tsk->in_iowait) {
-				se->iowait_sum += delta;
-				se->iowait_count++;
+				se->statistics.iowait_sum += delta;
+				se->statistics.iowait_count++;
 				trace_sched_stat_iowait(tsk, delta);
 			}
 
@@ -826,9 +827,9 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 			struct task_struct *tsk = task_of(se);
 
 			if (tsk->state & TASK_INTERRUPTIBLE)
-				se->sleep_start = rq_of(cfs_rq)->clock;
+				se->statistics.sleep_start = rq_of(cfs_rq)->clock;
 			if (tsk->state & TASK_UNINTERRUPTIBLE)
-				se->block_start = rq_of(cfs_rq)->clock;
+				se->statistics.block_start = rq_of(cfs_rq)->clock;
 		}
 #endif
 	}
@@ -912,7 +913,7 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
 	 * when there are only lesser-weight tasks around):
 	 */
 	if (rq_of(cfs_rq)->load.weight >= 2*se->load.weight) {
-		se->slice_max = max(se->slice_max,
+		se->statistics.slice_max = max(se->statistics.slice_max,
 			se->sum_exec_runtime - se->prev_sum_exec_runtime);
 	}
 #endif
@@ -1306,7 +1307,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	if (sync && balanced)
 		return 1;
 
-	schedstat_inc(p, se.nr_wakeups_affine_attempts);
+	schedstat_inc(p, se.statistics.nr_wakeups_affine_attempts);
 	tl_per_task = cpu_avg_load_per_task(this_cpu);
 
 	if (balanced ||
@@ -1318,7 +1319,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 		 * there is no bad imbalance.
 		 */
 		schedstat_inc(sd, ttwu_move_affine);
-		schedstat_inc(p, se.nr_wakeups_affine);
+		schedstat_inc(p, se.statistics.nr_wakeups_affine);
 
 		return 1;
 	}
@@ -1844,13 +1845,13 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 	 * 3) are cache-hot on their current CPU.
 	 */
 	if (!cpumask_test_cpu(this_cpu, &p->cpus_allowed)) {
-		schedstat_inc(p, se.nr_failed_migrations_affine);
+		schedstat_inc(p, se.statistics.nr_failed_migrations_affine);
 		return 0;
 	}
 	*all_pinned = 0;
 
 	if (task_running(rq, p)) {
-		schedstat_inc(p, se.nr_failed_migrations_running);
+		schedstat_inc(p, se.statistics.nr_failed_migrations_running);
 		return 0;
 	}
 
@@ -1866,14 +1867,14 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
 #ifdef CONFIG_SCHEDSTATS
 		if (tsk_cache_hot) {
 			schedstat_inc(sd, lb_hot_gained[idle]);
-			schedstat_inc(p, se.nr_forced_migrations);
+			schedstat_inc(p, se.statistics.nr_forced_migrations);
 		}
 #endif
 		return 1;
 	}
 
 	if (tsk_cache_hot) {
-		schedstat_inc(p, se.nr_failed_migrations_hot);
+		schedstat_inc(p, se.statistics.nr_failed_migrations_hot);
 		return 0;
 	}
 	return 1;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index c4fb42a66cab..0335e87f5204 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -613,7 +613,7 @@ static void update_curr_rt(struct rq *rq)
 	if (unlikely((s64)delta_exec < 0))
 		delta_exec = 0;
 
-	schedstat_set(curr->se.exec_max, max(curr->se.exec_max, delta_exec));
+	schedstat_set(curr->se.statistics.exec_max, max(curr->se.statistics.exec_max, delta_exec));
 
 	curr->se.sum_exec_runtime += delta_exec;
 	account_group_exec_runtime(curr, delta_exec);
-- 
cgit v1.2.3


From 39c0cbe2150cbd848a25ba6cdb271d1ad46818ad Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 11 Mar 2010 17:17:13 +0100
Subject: sched: Rate-limit nohz

Entering nohz code on every micro-idle is costing ~10% throughput for netperf
TCP_RR when scheduling cross-cpu.  Rate limiting entry fixes this, but raises
ticks a bit.  On my Q6600, an idle box goes from ~85 interrupts/sec to 128.

The higher the context switch rate, the more nohz entry costs.  With this patch
and some cycle recovery patches in my tree, max cross cpu context switch rate is
improved by ~16%, a large portion of which of which is this ratelimiting.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1268301003.6785.28.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h    |  6 ++++++
 kernel/sched.c           | 12 ++++++++++++
 kernel/time/tick-sched.c |  3 +++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8cc863d66477..13efe7dac5fa 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -271,11 +271,17 @@ extern cpumask_var_t nohz_cpu_mask;
 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ)
 extern int select_nohz_load_balancer(int cpu);
 extern int get_nohz_load_balancer(void);
+extern int nohz_ratelimit(int cpu);
 #else
 static inline int select_nohz_load_balancer(int cpu)
 {
 	return 0;
 }
+
+static inline int nohz_ratelimit(int cpu)
+{
+	return 0;
+}
 #endif
 
 /*
diff --git a/kernel/sched.c b/kernel/sched.c
index a4aa071f08f3..60b1bbe2ad1b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -492,6 +492,7 @@ struct rq {
 	#define CPU_LOAD_IDX_MAX 5
 	unsigned long cpu_load[CPU_LOAD_IDX_MAX];
 #ifdef CONFIG_NO_HZ
+	u64 nohz_stamp;
 	unsigned char in_nohz_recently;
 #endif
 	/* capture load from *all* tasks on this cpu: */
@@ -1228,6 +1229,17 @@ void wake_up_idle_cpu(int cpu)
 	if (!tsk_is_polling(rq->idle))
 		smp_send_reschedule(cpu);
 }
+
+int nohz_ratelimit(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	u64 diff = rq->clock - rq->nohz_stamp;
+
+	rq->nohz_stamp = rq->clock;
+
+	return diff < (NSEC_PER_SEC / HZ) >> 1;
+}
+
 #endif /* CONFIG_NO_HZ */
 
 static u64 sched_avg_period(void)
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f992762d7f51..f25735a767af 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -262,6 +262,9 @@ void tick_nohz_stop_sched_tick(int inidle)
 		goto end;
 	}
 
+	if (nohz_ratelimit(cpu))
+		goto end;
+
 	ts->idle_calls++;
 	/* Read jiffies and the time when jiffies were updated last */
 	do {
-- 
cgit v1.2.3


From b42e0c41a422a212ddea0666d5a3a0e3c35206db Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 11 Mar 2010 17:15:38 +0100
Subject: sched: Remove avg_wakeup

Testing the load which led to this heuristic (nfs4 kbuild) shows that it has
outlived it's usefullness.  With intervening load balancing changes, I cannot
see any difference with/without, so recover there fastpath cycles.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1268301062.6785.29.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 ---
 kernel/sched.c          | 26 ++++----------------------
 kernel/sched_debug.c    |  1 -
 kernel/sched_fair.c     | 31 -------------------------------
 kernel/sched_features.h |  6 ------
 5 files changed, 4 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 13efe7dac5fa..70c560f5ada0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1185,9 +1185,6 @@ struct sched_entity {
 
 	u64			nr_migrations;
 
-	u64			start_runtime;
-	u64			avg_wakeup;
-
 #ifdef CONFIG_SCHEDSTATS
 	struct sched_statistics statistics;
 #endif
diff --git a/kernel/sched.c b/kernel/sched.c
index 60b1bbe2ad1b..35a8626ace7d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1880,9 +1880,6 @@ static void update_avg(u64 *avg, u64 sample)
 static void
 enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 {
-	if (wakeup)
-		p->se.start_runtime = p->se.sum_exec_runtime;
-
 	sched_info_queued(p);
 	p->sched_class->enqueue_task(rq, p, wakeup, head);
 	p->se.on_rq = 1;
@@ -1890,17 +1887,11 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	if (sleep) {
-		if (p->se.last_wakeup) {
-			update_avg(&p->se.avg_overlap,
-				p->se.sum_exec_runtime - p->se.last_wakeup);
-			p->se.last_wakeup = 0;
-		} else {
-			update_avg(&p->se.avg_wakeup,
-				sysctl_sched_wakeup_granularity);
-		}
+	if (sleep && p->se.last_wakeup) {
+		update_avg(&p->se.avg_overlap,
+			p->se.sum_exec_runtime - p->se.last_wakeup);
+		p->se.last_wakeup = 0;
 	}
-
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, sleep);
 	p->se.on_rq = 0;
@@ -2466,13 +2457,6 @@ out_activate:
 	 */
 	if (!in_interrupt()) {
 		struct sched_entity *se = &current->se;
-		u64 sample = se->sum_exec_runtime;
-
-		if (se->last_wakeup)
-			sample -= se->last_wakeup;
-		else
-			sample -= se->start_runtime;
-		update_avg(&se->avg_wakeup, sample);
 
 		se->last_wakeup = se->sum_exec_runtime;
 	}
@@ -2540,8 +2524,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.nr_migrations		= 0;
 	p->se.last_wakeup		= 0;
 	p->se.avg_overlap		= 0;
-	p->se.start_runtime		= 0;
-	p->se.avg_wakeup		= sysctl_sched_wakeup_granularity;
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index ad9df4422763..20b95a420fec 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -408,7 +408,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.vruntime);
 	PN(se.sum_exec_runtime);
 	PN(se.avg_overlap);
-	PN(se.avg_wakeup);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8ad164bbdac1..6fc62854422c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1592,42 +1592,11 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 }
 #endif /* CONFIG_SMP */
 
-/*
- * Adaptive granularity
- *
- * se->avg_wakeup gives the average time a task runs until it does a wakeup,
- * with the limit of wakeup_gran -- when it never does a wakeup.
- *
- * So the smaller avg_wakeup is the faster we want this task to preempt,
- * but we don't want to treat the preemptee unfairly and therefore allow it
- * to run for at least the amount of time we'd like to run.
- *
- * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
- *
- * NOTE: we use *nr_running to scale with load, this nicely matches the
- *       degrading latency on load.
- */
-static unsigned long
-adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
-{
-	u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
-	u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
-	u64 gran = 0;
-
-	if (this_run < expected_wakeup)
-		gran = expected_wakeup - this_run;
-
-	return min_t(s64, gran, sysctl_sched_wakeup_granularity);
-}
-
 static unsigned long
 wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
 {
 	unsigned long gran = sysctl_sched_wakeup_granularity;
 
-	if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
-		gran = adaptive_gran(curr, se);
-
 	/*
 	 * Since its curr running now, convert the gran from real-time
 	 * to virtual-time in his units.
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index d5059fd761d9..96ef5dbc66e1 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -30,12 +30,6 @@ SCHED_FEAT(START_DEBIT, 1)
  */
 SCHED_FEAT(WAKEUP_PREEMPT, 1)
 
-/*
- * Compute wakeup_gran based on task behaviour, clipped to
- *  [0, sched_wakeup_gran_ns]
- */
-SCHED_FEAT(ADAPTIVE_GRAN, 1)
-
 /*
  * When converting the wakeup granularity to virtual time, do it such
  * that heavier tasks preempting a lighter task have an edge.
-- 
cgit v1.2.3


From e12f31d3e5d36328c7fbd0fce40a95e70b59152c Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Thu, 11 Mar 2010 17:15:51 +0100
Subject: sched: Remove avg_overlap

Both avg_overlap and avg_wakeup had an inherent problem in that their accuracy
was detrimentally affected by cross-cpu wakeups, this because we are missing
the necessary call to update_curr().  This can't be fixed without increasing
overhead in our already too fat fastpath.

Additionally, with recent load balancing changes making us prefer to place tasks
in an idle cache domain (which is good for compute bound loads), communicating
tasks suffer when a sync wakeup, which would enable affine placement, is turned
into a non-sync wakeup by SYNC_LESS.  With one task on the runqueue, wake_affine()
rejects the affine wakeup request, leaving the unfortunate where placed, taking
frequent cache misses.

Remove it, and recover some fastpath cycles.

Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1268301121.6785.30.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 ---
 kernel/sched.c          | 33 ---------------------------------
 kernel/sched_debug.c    |  1 -
 kernel/sched_fair.c     | 18 ------------------
 kernel/sched_features.h | 16 ----------------
 5 files changed, 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 70c560f5ada0..8604884cee87 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1180,9 +1180,6 @@ struct sched_entity {
 	u64			vruntime;
 	u64			prev_sum_exec_runtime;
 
-	u64			last_wakeup;
-	u64			avg_overlap;
-
 	u64			nr_migrations;
 
 #ifdef CONFIG_SCHEDSTATS
diff --git a/kernel/sched.c b/kernel/sched.c
index 35a8626ace7d..68ed6f4f3c13 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1887,11 +1887,6 @@ enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
 
 static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
 {
-	if (sleep && p->se.last_wakeup) {
-		update_avg(&p->se.avg_overlap,
-			p->se.sum_exec_runtime - p->se.last_wakeup);
-		p->se.last_wakeup = 0;
-	}
 	sched_info_dequeued(p);
 	p->sched_class->dequeue_task(rq, p, sleep);
 	p->se.on_rq = 0;
@@ -2452,15 +2447,6 @@ out_activate:
 	activate_task(rq, p, 1);
 	success = 1;
 
-	/*
-	 * Only attribute actual wakeups done by this task.
-	 */
-	if (!in_interrupt()) {
-		struct sched_entity *se = &current->se;
-
-		se->last_wakeup = se->sum_exec_runtime;
-	}
-
 out_running:
 	trace_sched_wakeup(rq, p, success);
 	check_preempt_curr(rq, p, wake_flags);
@@ -2522,8 +2508,6 @@ static void __sched_fork(struct task_struct *p)
 	p->se.sum_exec_runtime		= 0;
 	p->se.prev_sum_exec_runtime	= 0;
 	p->se.nr_migrations		= 0;
-	p->se.last_wakeup		= 0;
-	p->se.avg_overlap		= 0;
 
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->se.statistics, 0, sizeof(p->se.statistics));
@@ -3594,23 +3578,6 @@ static inline void schedule_debug(struct task_struct *prev)
 
 static void put_prev_task(struct rq *rq, struct task_struct *prev)
 {
-	if (prev->state == TASK_RUNNING) {
-		u64 runtime = prev->se.sum_exec_runtime;
-
-		runtime -= prev->se.prev_sum_exec_runtime;
-		runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost);
-
-		/*
-		 * In order to avoid avg_overlap growing stale when we are
-		 * indeed overlapping and hence not getting put to sleep, grow
-		 * the avg_overlap on preemption.
-		 *
-		 * We use the average preemption runtime because that
-		 * correlates to the amount of cache footprint a task can
-		 * build up.
-		 */
-		update_avg(&prev->se.avg_overlap, runtime);
-	}
 	prev->sched_class->put_prev_task(rq, prev);
 }
 
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
index 20b95a420fec..8a46a719f367 100644
--- a/kernel/sched_debug.c
+++ b/kernel/sched_debug.c
@@ -407,7 +407,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
 	PN(se.exec_start);
 	PN(se.vruntime);
 	PN(se.sum_exec_runtime);
-	PN(se.avg_overlap);
 
 	nr_switches = p->nvcsw + p->nivcsw;
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6fc62854422c..c3b69d4b5d65 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1241,7 +1241,6 @@ static inline unsigned long effective_load(struct task_group *tg, int cpu,
 
 static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 {
-	struct task_struct *curr = current;
 	unsigned long this_load, load;
 	int idx, this_cpu, prev_cpu;
 	unsigned long tl_per_task;
@@ -1256,18 +1255,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
 	load	  = source_load(prev_cpu, idx);
 	this_load = target_load(this_cpu, idx);
 
-	if (sync) {
-	       if (sched_feat(SYNC_LESS) &&
-		   (curr->se.avg_overlap > sysctl_sched_migration_cost ||
-		    p->se.avg_overlap > sysctl_sched_migration_cost))
-		       sync = 0;
-	} else {
-		if (sched_feat(SYNC_MORE) &&
-		    (curr->se.avg_overlap < sysctl_sched_migration_cost &&
-		     p->se.avg_overlap < sysctl_sched_migration_cost))
-			sync = 1;
-	}
-
 	/*
 	 * If sync wakeup then subtract the (maximum possible)
 	 * effect of the currently running task from the load
@@ -1711,11 +1698,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
 	if (sched_feat(WAKEUP_SYNC) && sync)
 		goto preempt;
 
-	if (sched_feat(WAKEUP_OVERLAP) &&
-			se->avg_overlap < sysctl_sched_migration_cost &&
-			pse->avg_overlap < sysctl_sched_migration_cost)
-		goto preempt;
-
 	if (!sched_feat(WAKEUP_PREEMPT))
 		return;
 
diff --git a/kernel/sched_features.h b/kernel/sched_features.h
index 96ef5dbc66e1..c545e048dfed 100644
--- a/kernel/sched_features.h
+++ b/kernel/sched_features.h
@@ -41,12 +41,6 @@ SCHED_FEAT(ASYM_GRAN, 1)
  */
 SCHED_FEAT(WAKEUP_SYNC, 0)
 
-/*
- * Wakeup preempt based on task behaviour. Tasks that do not overlap
- * don't get preempted.
- */
-SCHED_FEAT(WAKEUP_OVERLAP, 0)
-
 /*
  * Use the SYNC wakeup hint, pipes and the likes use this to indicate
  * the remote end is likely to consume the data we just wrote, and
@@ -63,16 +57,6 @@ SCHED_FEAT(SYNC_WAKEUPS, 1)
  */
 SCHED_FEAT(AFFINE_WAKEUPS, 1)
 
-/*
- * Weaken SYNC hint based on overlap
- */
-SCHED_FEAT(SYNC_LESS, 1)
-
-/*
- * Add SYNC hint based on overlap
- */
-SCHED_FEAT(SYNC_MORE, 0)
-
 /*
  * Prefer to schedule the task we woke last (assuming it failed
  * wakeup-preemption), since its likely going to consume data we
-- 
cgit v1.2.3


From ca26308c2223cb424c297a70fb7b6c7530a563ba Mon Sep 17 00:00:00 2001
From: Miguel Aguilar <miguel.aguilar@ridgerun.com>
Date: Thu, 11 Mar 2010 09:32:21 -0600
Subject: MFD: DaVinci Voice Codec

This is the MFD driver for the DaVinci Voice codec, it has two clients:

* Voice codec interface
* Voice codec CQ93VC

Signed-off-by: Miguel Aguilar <miguel.aguilar@ridgerun.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/Kconfig                    |   4 +
 drivers/mfd/Makefile                   |   1 +
 drivers/mfd/davinci_voicecodec.c       | 189 +++++++++++++++++++++++++++++++++
 include/linux/mfd/davinci_voicecodec.h | 126 ++++++++++++++++++++++
 4 files changed, 320 insertions(+)
 create mode 100644 drivers/mfd/davinci_voicecodec.c
 create mode 100644 include/linux/mfd/davinci_voicecodec.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 951fa9b93fbe..20e322912a59 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -53,6 +53,10 @@ config MFD_SH_MOBILE_SDHI
 	  This driver supports the SDHI hardware block found in many
 	  SuperH Mobile SoCs.
 
+config MFD_DAVINCI_VOICECODEC
+	tristate
+	select MFD_CORE
+
 config MFD_DM355EVM_MSP
 	bool "DaVinci DM355 EVM microcontroller"
 	depends on I2C && MACH_DAVINCI_DM355_EVM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 22715add99a7..4fbf8f89a49b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -12,6 +12,7 @@ obj-$(CONFIG_HTC_EGPIO)		+= htc-egpio.o
 obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 obj-$(CONFIG_HTC_I2CPLD)	+= htc-i2cpld.o
 
+obj-$(CONFIG_MFD_DAVINCI_VOICECODEC)	+= davinci_voicecodec.o
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
 obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
diff --git a/drivers/mfd/davinci_voicecodec.c b/drivers/mfd/davinci_voicecodec.c
new file mode 100644
index 000000000000..9886aa8de250
--- /dev/null
+++ b/drivers/mfd/davinci_voicecodec.c
@@ -0,0 +1,189 @@
+/*
+ * DaVinci Voice Codec Core Interface for TI platforms
+ *
+ * Copyright (C) 2010 Texas Instruments, Inc
+ *
+ * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/clk.h>
+
+#include <sound/pcm.h>
+
+#include <linux/mfd/davinci_voicecodec.h>
+
+u32 davinci_vc_read(struct davinci_vc *davinci_vc, int reg)
+{
+	return __raw_readl(davinci_vc->base + reg);
+}
+
+void davinci_vc_write(struct davinci_vc *davinci_vc,
+					   int reg, u32 val)
+{
+	__raw_writel(val, davinci_vc->base + reg);
+}
+
+static int __init davinci_vc_probe(struct platform_device *pdev)
+{
+	struct davinci_vc *davinci_vc;
+	struct resource *res, *mem;
+	struct mfd_cell *cell = NULL;
+	int ret;
+
+	davinci_vc = kzalloc(sizeof(struct davinci_vc), GFP_KERNEL);
+	if (!davinci_vc) {
+		dev_dbg(&pdev->dev,
+			    "could not allocate memory for private data\n");
+		return -ENOMEM;
+	}
+
+	davinci_vc->clk = clk_get(&pdev->dev, NULL);
+	if (IS_ERR(davinci_vc->clk)) {
+		dev_dbg(&pdev->dev,
+			    "could not get the clock for voice codec\n");
+		ret = -ENODEV;
+		goto fail1;
+	}
+	clk_enable(davinci_vc->clk);
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no mem resource\n");
+		ret = -ENODEV;
+		goto fail2;
+	}
+
+	davinci_vc->pbase = res->start;
+	davinci_vc->base_size = resource_size(res);
+
+	mem = request_mem_region(davinci_vc->pbase, davinci_vc->base_size,
+				 pdev->name);
+	if (!mem) {
+		dev_err(&pdev->dev, "VCIF region already claimed\n");
+		ret = -EBUSY;
+		goto fail2;
+	}
+
+	davinci_vc->base = ioremap(davinci_vc->pbase, davinci_vc->base_size);
+	if (!davinci_vc->base) {
+		dev_err(&pdev->dev, "can't ioremap mem resource.\n");
+		ret = -ENOMEM;
+		goto fail3;
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+	if (!res) {
+		dev_err(&pdev->dev, "no DMA resource\n");
+		return -ENXIO;
+	}
+
+	davinci_vc->davinci_vcif.dma_tx_channel = res->start;
+	davinci_vc->davinci_vcif.dma_tx_addr =
+		(dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_WFIFO);
+
+	res = platform_get_resource(pdev, IORESOURCE_DMA, 1);
+	if (!res) {
+		dev_err(&pdev->dev, "no DMA resource\n");
+		return -ENXIO;
+	}
+
+	davinci_vc->davinci_vcif.dma_rx_channel = res->start;
+	davinci_vc->davinci_vcif.dma_rx_addr =
+		(dma_addr_t)(io_v2p(davinci_vc->base) + DAVINCI_VC_RFIFO);
+
+	davinci_vc->dev = &pdev->dev;
+	davinci_vc->pdev = pdev;
+
+	/* Voice codec interface client */
+	cell = &davinci_vc->cells[DAVINCI_VC_VCIF_CELL];
+	cell->name = "davinci_vcif";
+	cell->driver_data = davinci_vc;
+
+	/* Voice codec CQ93VC client */
+	cell = &davinci_vc->cells[DAVINCI_VC_CQ93VC_CELL];
+	cell->name = "cq93vc";
+	cell->driver_data = davinci_vc;
+
+	ret = mfd_add_devices(&pdev->dev, pdev->id, davinci_vc->cells,
+			      DAVINCI_VC_CELLS, NULL, 0);
+	if (ret != 0) {
+		dev_err(&pdev->dev, "fail to register client devices\n");
+		goto fail4;
+	}
+
+	return 0;
+
+fail4:
+	iounmap(davinci_vc->base);
+fail3:
+	release_mem_region(davinci_vc->pbase, davinci_vc->base_size);
+fail2:
+	clk_disable(davinci_vc->clk);
+	clk_put(davinci_vc->clk);
+	davinci_vc->clk = NULL;
+fail1:
+	kfree(davinci_vc);
+
+	return ret;
+}
+
+static int __devexit davinci_vc_remove(struct platform_device *pdev)
+{
+	struct davinci_vc *davinci_vc = platform_get_drvdata(pdev);
+
+	mfd_remove_devices(&pdev->dev);
+
+	iounmap(davinci_vc->base);
+	release_mem_region(davinci_vc->pbase, davinci_vc->base_size);
+
+	clk_disable(davinci_vc->clk);
+	clk_put(davinci_vc->clk);
+	davinci_vc->clk = NULL;
+
+	kfree(davinci_vc);
+
+	return 0;
+}
+
+static struct platform_driver davinci_vc_driver = {
+	.driver	= {
+		.name = "davinci_voicecodec",
+		.owner = THIS_MODULE,
+	},
+	.remove	= __devexit_p(davinci_vc_remove),
+};
+
+static int __init davinci_vc_init(void)
+{
+	return platform_driver_probe(&davinci_vc_driver, davinci_vc_probe);
+}
+module_init(davinci_vc_init);
+
+static void __exit davinci_vc_exit(void)
+{
+	platform_driver_unregister(&davinci_vc_driver);
+}
+module_exit(davinci_vc_exit);
+
+MODULE_AUTHOR("Miguel Aguilar");
+MODULE_DESCRIPTION("Texas Instruments DaVinci Voice Codec Core Interface");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h
new file mode 100644
index 000000000000..0ab61320ffa8
--- /dev/null
+++ b/include/linux/mfd/davinci_voicecodec.h
@@ -0,0 +1,126 @@
+/*
+ * DaVinci Voice Codec Core Interface for TI platforms
+ *
+ * Copyright (C) 2010 Texas Instruments, Inc
+ *
+ * Author: Miguel Aguilar <miguel.aguilar@ridgerun.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef __LINUX_MFD_DAVINCI_VOICECODEC_H_
+#define __LINUX_MFD_DAVINIC_VOICECODEC_H_
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+
+#include <mach/edma.h>
+
+/*
+ * Register values.
+ */
+#define DAVINCI_VC_PID			0x00
+#define DAVINCI_VC_CTRL			0x04
+#define DAVINCI_VC_INTEN		0x08
+#define DAVINCI_VC_INTSTATUS		0x0c
+#define DAVINCI_VC_INTCLR		0x10
+#define DAVINCI_VC_EMUL_CTRL		0x14
+#define DAVINCI_VC_RFIFO		0x20
+#define DAVINCI_VC_WFIFO		0x24
+#define DAVINCI_VC_FIFOSTAT		0x28
+#define DAVINCI_VC_TST_CTRL		0x2C
+#define DAVINCI_VC_REG05		0x94
+#define DAVINCI_VC_REG09		0xA4
+#define DAVINCI_VC_REG12		0xB0
+
+/* DAVINCI_VC_CTRL bit fields */
+#define DAVINCI_VC_CTRL_MASK		0x5500
+#define DAVINCI_VC_CTRL_RSTADC		BIT(0)
+#define DAVINCI_VC_CTRL_RSTDAC		BIT(1)
+#define DAVINCI_VC_CTRL_RD_BITS_8	BIT(4)
+#define DAVINCI_VC_CTRL_RD_UNSIGNED	BIT(5)
+#define DAVINCI_VC_CTRL_WD_BITS_8	BIT(6)
+#define DAVINCI_VC_CTRL_WD_UNSIGNED	BIT(7)
+#define DAVINCI_VC_CTRL_RFIFOEN		BIT(8)
+#define DAVINCI_VC_CTRL_RFIFOCL		BIT(9)
+#define DAVINCI_VC_CTRL_RFIFOMD_WORD_1	BIT(10)
+#define DAVINCI_VC_CTRL_WFIFOEN		BIT(12)
+#define DAVINCI_VC_CTRL_WFIFOCL		BIT(13)
+#define DAVINCI_VC_CTRL_WFIFOMD_WORD_1	BIT(14)
+
+/* DAVINCI_VC_INT bit fields */
+#define DAVINCI_VC_INT_MASK		0x3F
+#define DAVINCI_VC_INT_RDRDY_MASK	BIT(0)
+#define DAVINCI_VC_INT_RERROVF_MASK	BIT(1)
+#define DAVINCI_VC_INT_RERRUDR_MASK	BIT(2)
+#define DAVINCI_VC_INT_WDREQ_MASK	BIT(3)
+#define DAVINCI_VC_INT_WERROVF_MASKBIT	BIT(4)
+#define DAVINCI_VC_INT_WERRUDR_MASK	BIT(5)
+
+/* DAVINCI_VC_REG05 bit fields */
+#define DAVINCI_VC_REG05_PGA_GAIN	0x07
+
+/* DAVINCI_VC_REG09 bit fields */
+#define DAVINCI_VC_REG09_MUTE		0x40
+#define DAVINCI_VC_REG09_DIG_ATTEN	0x3F
+
+/* DAVINCI_VC_REG12 bit fields */
+#define DAVINCI_VC_REG12_POWER_ALL_ON	0xFD
+#define DAVINCI_VC_REG12_POWER_ALL_OFF	0x00
+
+#define DAVINCI_VC_CELLS		2
+
+enum davinci_vc_cells {
+	DAVINCI_VC_VCIF_CELL,
+	DAVINCI_VC_CQ93VC_CELL,
+};
+
+struct davinci_vcif {
+	struct platform_device	*pdev;
+	u32 dma_tx_channel;
+	u32 dma_rx_channel;
+	dma_addr_t dma_tx_addr;
+	dma_addr_t dma_rx_addr;
+};
+
+struct cq93vc {
+	struct platform_device *pdev;
+	struct snd_soc_codec *codec;
+	u32 sysclk;
+};
+
+struct davinci_vc;
+
+struct davinci_vc {
+	/* Device data */
+	struct device *dev;
+	struct platform_device *pdev;
+	struct clk *clk;
+
+	/* Memory resources */
+	void __iomem *base;
+	resource_size_t pbase;
+	size_t base_size;
+
+	/* MFD cells */
+	struct mfd_cell cells[DAVINCI_VC_CELLS];
+
+	/* Client devices */
+	struct davinci_vcif davinci_vcif;
+	struct cq93vc cq93vc;
+};
+
+#endif
-- 
cgit v1.2.3


From 7e847894039d7590321de306fca2b1ae58662f29 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 11 Mar 2010 21:13:20 +0100
Subject: linux/usb/audio.h: split header

- Split the audio.h file in two to clearly denote the differences
  between the standards.
- Add many more defines to audio-v2.h. Most of them are not currently
  used.
- Replaced a magic value with a proper define

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v2.h | 319 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/audio.h    |  50 +------
 sound/usb/card.c             |   3 +-
 sound/usb/endpoint.c         |   1 +
 sound/usb/format.c           |   1 +
 sound/usb/pcm.c              |   5 +-
 6 files changed, 333 insertions(+), 46 deletions(-)
 create mode 100644 include/linux/usb/audio-v2.h

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
new file mode 100644
index 000000000000..3b8560d233bd
--- /dev/null
+++ b/include/linux/usb/audio-v2.h
@@ -0,0 +1,319 @@
+/*
+ * Copyright (c) 2010 Daniel Mack <daniel@caiaq.de>
+ *
+ * This software is distributed under the terms of the GNU General Public
+ * License ("GPL") version 2, as published by the Free Software Foundation.
+ *
+ * This file holds USB constants and structures defined
+ * by the USB Device Class Definition for Audio Devices in version 2.0.
+ * Comments below reference relevant sections of the documents contained
+ * in http://www.usb.org/developers/devclass_docs/Audio2.0_final.zip
+ */
+
+#ifndef __LINUX_USB_AUDIO_V2_H
+#define __LINUX_USB_AUDIO_V2_H
+
+#include <linux/types.h>
+
+/* v1.0 and v2.0 of this standard have many things in common. For the rest
+ * of the definitions, please refer to audio.h */
+
+/* 4.7.2.1 Clock Source Descriptor */
+
+struct uac_clock_source_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bClockID;
+	__u8 bmAttributes;
+	__u8 bmControls;
+	__u8 bAssocTerminal;
+	__u8 iClockSource;
+} __attribute__((packed));
+
+/* 4.7.2.2 Clock Source Descriptor */
+
+struct uac_clock_selector_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bClockID;
+	__u8 bNrInPins;
+	__u8 bmControls;
+	__u8 baCSourceID[];
+} __attribute__((packed));
+
+/* 4.9.2 Class-Specific AS Interface Descriptor */
+
+struct uac_as_header_descriptor_v2 {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bTerminalLink;
+	__u8 bmControls;
+	__u8 bFormatType;
+	__u32 bmFormats;
+	__u8 bNrChannels;
+	__u32 bmChannelConfig;
+	__u8 iChannelNames;
+} __attribute__((packed));
+
+
+/* A.7 Audio Function Category Codes */
+#define UAC2_FUNCTION_SUBCLASS_UNDEFINED	0x00
+#define UAC2_FUNCTION_DESKTOP_SPEAKER		0x01
+#define UAC2_FUNCTION_HOME_THEATER		0x02
+#define UAC2_FUNCTION_MICROPHONE		0x03
+#define UAC2_FUNCTION_HEADSET			0x04
+#define UAC2_FUNCTION_TELEPHONE			0x05
+#define UAC2_FUNCTION_CONVERTER			0x06
+#define UAC2_FUNCTION_SOUND_RECORDER		0x07
+#define UAC2_FUNCTION_IO_BOX			0x08
+#define UAC2_FUNCTION_MUSICAL_INSTRUMENT	0x09
+#define UAC2_FUNCTION_PRO_AUDIO			0x0a
+#define UAC2_FUNCTION_AUDIO_VIDEO		0x0b
+#define UAC2_FUNCTION_CONTROL_PANEL		0x0c
+#define UAC2_FUNCTION_OTHER			0xff
+
+/* A.9 Audio Class-Specific AC Interface Descriptor Subtypes */
+/* see audio.h for the rest, which is identical to v1 */
+#define UAC2_EFFECT_UNIT			0x07
+#define UAC2_PROCESSING_UNIT_V2		0x08
+#define UAC2_EXTENSION_UNIT_V2		0x09
+#define UAC2_CLOCK_SOURCE		0x0a
+#define UAC2_CLOCK_SELECTOR		0x0b
+#define UAC2_CLOCK_MULTIPLIER		0x0c
+#define UAC2_SAMPLE_RATE_CONVERTER	0x0d
+
+/* A.10 Audio Class-Specific AS Interface Descriptor Subtypes */
+/* see audio.h for the rest, which is identical to v1 */
+#define UAC2_ENCODER			0x03
+#define UAC2_DECODER			0x04
+
+/* A.11 Effect Unit Effect Types */
+#define UAC2_EFFECT_UNDEFINED		0x00
+#define UAC2_EFFECT_PARAM_EQ		0x01
+#define UAC2_EFFECT_REVERB		0x02
+#define UAC2_EFFECT_MOD_DELAY		0x03
+#define UAC2_EFFECT_DYN_RANGE_COMP	0x04
+
+/* A.12 Processing Unit Process Types */
+#define UAC2_PROCESS_UNDEFINED		0x00
+#define UAC2_PROCESS_UP_DOWNMIX		0x01
+#define UAC2_PROCESS_DOLBY_PROLOCIC	0x02
+#define UAC2_PROCESS_STEREO_EXTENDER	0x03
+
+/* A.14 Audio Class-Specific Request Codes */
+#define UAC2_CS_CUR			0x01
+#define UAC2_CS_RANGE			0x02
+
+/* A.15 Encoder Type Codes */
+#define UAC2_ENCODER_UNDEFINED		0x00
+#define UAC2_ENCODER_OTHER		0x01
+#define UAC2_ENCODER_MPEG		0x02
+#define UAC2_ENCODER_AC3		0x03
+#define UAC2_ENCODER_WMA		0x04
+#define UAC2_ENCODER_DTS		0x05
+
+/* A.16 Decoder Type Codes */
+#define UAC2_DECODER_UNDEFINED		0x00
+#define UAC2_DECODER_OTHER		0x01
+#define UAC2_DECODER_MPEG		0x02
+#define UAC2_DECODER_AC3		0x03
+#define UAC2_DECODER_WMA		0x04
+#define UAC2_DECODER_DTS		0x05
+
+/* A.17.1 Clock Source Control Selectors */
+#define UAC2_CS_UNDEFINED		0x00
+#define UAC2_CS_CONTROL_SAM_FREQ	0x01
+#define UAC2_CS_CONTROL_CLOCK_VALID	0x02
+
+/* A.17.2 Clock Selector Control Selectors */
+#define UAC2_CX_UNDEFINED		0x00
+#define UAC2_CX_CLOCK_SELECTOR		0x01
+
+/* A.17.3 Clock Multiplier Control Selectors */
+#define UAC2_CM_UNDEFINED		0x00
+#define UAC2_CM_NUMERATOR		0x01
+#define UAC2_CM_DENOMINTATOR		0x02
+
+/* A.17.4 Terminal Control Selectors */
+#define UAC2_TE_UNDEFINED		0x00
+#define UAC2_TE_COPY_PROTECT		0x01
+#define UAC2_TE_CONNECTOR		0x02
+#define UAC2_TE_OVERLOAD		0x03
+#define UAC2_TE_CLUSTER			0x04
+#define UAC2_TE_UNDERFLOW		0x05
+#define UAC2_TE_OVERFLOW		0x06
+#define UAC2_TE_LATENCY			0x07
+
+/* A.17.5 Mixer Control Selectors */
+#define UAC2_MU_UNDEFINED		0x00
+#define UAC2_MU_MIXER			0x01
+#define UAC2_MU_CLUSTER			0x02
+#define UAC2_MU_UNDERFLOW		0x03
+#define UAC2_MU_OVERFLOW		0x04
+#define UAC2_MU_LATENCY			0x05
+
+/* A.17.6 Selector Control Selectors */
+#define UAC2_SU_UNDEFINED		0x00
+#define UAC2_SU_SELECTOR		0x01
+#define UAC2_SU_LATENCY			0x02
+
+/* A.17.7 Feature Unit Control Selectors */
+/* see audio.h for the rest, which is identical to v1 */
+#define UAC2_FU_INPUT_GAIN		0x0b
+#define UAC2_FU_INPUT_GAIN_PAD		0x0c
+#define UAC2_FU_PHASE_INVERTER		0x0d
+#define UAC2_FU_UNDERFLOW		0x0e
+#define UAC2_FU_OVERFLOW		0x0f
+#define UAC2_FU_LATENCY			0x10
+
+/* A.17.8.1 Parametric Equalizer Section Effect Unit Control Selectors */
+#define UAC2_PE_UNDEFINED		0x00
+#define UAC2_PE_ENABLE			0x01
+#define UAC2_PE_CENTERFREQ		0x02
+#define UAC2_PE_QFACTOR			0x03
+#define UAC2_PE_GAIN			0x04
+#define UAC2_PE_UNDERFLOW		0x05
+#define UAC2_PE_OVERFLOW		0x06
+#define UAC2_PE_LATENCY			0x07
+
+/* A.17.8.2 Reverberation Effect Unit Control Selectors */
+#define UAC2_RV_UNDEFINED		0x00
+#define UAC2_RV_ENABLE			0x01
+#define UAC2_RV_TYPE			0x02
+#define UAC2_RV_LEVEL			0x03
+#define UAC2_RV_TIME			0x04
+#define UAC2_RV_FEEDBACK		0x05
+#define UAC2_RV_PREDELAY		0x06
+#define UAC2_RV_DENSITY			0x07
+#define UAC2_RV_HIFREQ_ROLLOFF		0x08
+#define UAC2_RV_UNDERFLOW		0x09
+#define UAC2_RV_OVERFLOW		0x0a
+#define UAC2_RV_LATENCY			0x0b
+
+/* A.17.8.3 Modulation Delay Effect Control Selectors */
+#define UAC2_MD_UNDEFINED		0x00
+#define UAC2_MD_ENABLE			0x01
+#define UAC2_MD_BALANCE			0x02
+#define UAC2_MD_RATE			0x03
+#define UAC2_MD_DEPTH			0x04
+#define UAC2_MD_TIME			0x05
+#define UAC2_MD_FEEDBACK		0x06
+#define UAC2_MD_UNDERFLOW		0x07
+#define UAC2_MD_OVERFLOW		0x08
+#define UAC2_MD_LATENCY			0x09
+
+/* A.17.8.4 Dynamic Range Compressor Effect Unit Control Selectors */
+#define UAC2_DR_UNDEFINED		0x00
+#define UAC2_DR_ENABLE			0x01
+#define UAC2_DR_COMPRESSION_RATE	0x02
+#define UAC2_DR_MAXAMPL			0x03
+#define UAC2_DR_THRESHOLD		0x04
+#define UAC2_DR_ATTACK_TIME		0x05
+#define UAC2_DR_RELEASE_TIME		0x06
+#define UAC2_DR_UNDEFLOW		0x07
+#define UAC2_DR_OVERFLOW		0x08
+#define UAC2_DR_LATENCY			0x09
+
+/* A.17.9.1 Up/Down-mix Processing Unit Control Selectors */
+#define UAC2_UD_UNDEFINED		0x00
+#define UAC2_UD_ENABLE			0x01
+#define UAC2_UD_MODE_SELECT		0x02
+#define UAC2_UD_CLUSTER			0x03
+#define UAC2_UD_UNDERFLOW		0x04
+#define UAC2_UD_OVERFLOW		0x05
+#define UAC2_UD_LATENCY			0x06
+
+/* A.17.9.2 Dolby Prologic[tm] Processing Unit Control Selectors */
+#define UAC2_DP_UNDEFINED		0x00
+#define UAC2_DP_ENABLE			0x01
+#define UAC2_DP_MODE_SELECT		0x02
+#define UAC2_DP_CLUSTER			0x03
+#define UAC2_DP_UNDERFFLOW		0x04
+#define UAC2_DP_OVERFLOW		0x05
+#define UAC2_DP_LATENCY			0x06
+
+/* A.17.9.3 Stereo Expander Processing Unit Control Selectors */
+#define UAC2_ST_EXT_UNDEFINED		0x00
+#define UAC2_ST_EXT_ENABLE		0x01
+#define UAC2_ST_EXT_WIDTH		0x02
+#define UAC2_ST_EXT_UNDEFLOW		0x03
+#define UAC2_ST_EXT_OVERFLOW		0x04
+#define UAC2_ST_EXT_LATENCY		0x05
+
+/* A.17.10 Extension Unit Control Selectors */
+#define UAC2_XU_UNDEFINED		0x00
+#define UAC2_XU_ENABLE			0x01
+#define UAC2_XU_CLUSTER			0x02
+#define UAC2_XU_UNDERFLOW		0x03
+#define UAC2_XU_OVERFLOW		0x04
+#define UAC2_XU_LATENCY			0x05
+
+/* A.17.11 AudioStreaming Interface Control Selectors */
+#define UAC2_AS_UNDEFINED		0x00
+#define UAC2_AS_ACT_ALT_SETTING		0x01
+#define UAC2_AS_VAL_ALT_SETTINGS	0x02
+#define UAC2_AS_AUDIO_DATA_FORMAT	0x03
+
+/* A.17.12 Encoder Control Selectors */
+#define UAC2_EN_UNDEFINED		0x00
+#define UAC2_EN_BIT_RATE		0x01
+#define UAC2_EN_QUALITY			0x02
+#define UAC2_EN_VBR			0x03
+#define UAC2_EN_TYPE			0x04
+#define UAC2_EN_UNDERFLOW		0x05
+#define UAC2_EN_OVERFLOW		0x06
+#define UAC2_EN_ENCODER_ERROR		0x07
+#define UAC2_EN_PARAM1			0x08
+#define UAC2_EN_PARAM2			0x09
+#define UAC2_EN_PARAM3			0x0a
+#define UAC2_EN_PARAM4			0x0b
+#define UAC2_EN_PARAM5			0x0c
+#define UAC2_EN_PARAM6			0x0d
+#define UAC2_EN_PARAM7			0x0e
+#define UAC2_EN_PARAM8			0x0f
+
+/* A.17.13.1 MPEG Decoder Control Selectors */
+#define UAC2_MPEG_UNDEFINED		0x00
+#define UAC2_MPEG_DUAL_CHANNEL		0x01
+#define UAC2_MPEG_SECOND_STEREO		0x02
+#define UAC2_MPEG_MULTILINGUAL		0x03
+#define UAC2_MPEG_DYN_RANGE		0x04
+#define UAC2_MPEG_SCALING		0x05
+#define UAC2_MPEG_HILO_SCALING		0x06
+#define UAC2_MPEG_UNDERFLOW		0x07
+#define UAC2_MPEG_OVERFLOW		0x08
+#define UAC2_MPEG_DECODER_ERROR		0x09
+
+/* A17.13.2 AC3 Decoder Control Selectors */
+#define UAC2_AC3_UNDEFINED		0x00
+#define UAC2_AC3_MODE			0x01
+#define UAC2_AC3_DYN_RANGE		0x02
+#define UAC2_AC3_SCALING		0x03
+#define UAC2_AC3_HILO_SCALING		0x04
+#define UAC2_AC3_UNDERFLOW		0x05
+#define UAC2_AC3_OVERFLOW		0x06
+#define UAC2_AC3_DECODER_ERROR		0x07
+
+/* A17.13.3 WMA Decoder Control Selectors */
+#define UAC2_WMA_UNDEFINED		0x00
+#define UAC2_WMA_UNDERFLOW		0x01
+#define UAC2_WMA_OVERFLOW		0x02
+#define UAC2_WMA_DECODER_ERROR		0x03
+
+/* A17.13.4 DTS Decoder Control Selectors */
+#define UAC2_DTS_UNDEFINED		0x00
+#define UAC2_DTS_UNDERFLOW		0x01
+#define UAC2_DTS_OVERFLOW		0x02
+#define UAC2_DTS_DECODER_ERROR		0x03
+
+/* A17.14 Endpoint Control Selectors */
+#define UAC2_EP_CS_UNDEFINED		0x00
+#define UAC2_EP_CS_PITCH		0x01
+#define UAC2_EP_CS_DATA_OVERRUN		0x02
+#define UAC2_EP_CS_DATA_UNDERRUN	0x03
+
+#endif /* __LINUX_USB_AUDIO_V2_H */
+
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 4d3e450e2b03..cdad728543ae 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -13,6 +13,9 @@
  * Comments below reference relevant sections of that document:
  *
  * http://www.usb.org/developers/devclass_docs/audio10.pdf
+ *
+ * Types and defines in this file are either specific to version 1.0 of
+ * this standard or common for newer versions.
  */
 
 #ifndef __LINUX_USB_AUDIO_H
@@ -20,14 +23,15 @@
 
 #include <linux/types.h>
 
+/* bInterfaceProtocol values to denote the version of the standard used */
+#define UAC_VERSION_1			0x00
+#define UAC_VERSION_2			0x20
+
 /* A.2 Audio Interface Subclass Codes */
 #define USB_SUBCLASS_AUDIOCONTROL	0x01
 #define USB_SUBCLASS_AUDIOSTREAMING	0x02
 #define USB_SUBCLASS_MIDISTREAMING	0x03
 
-#define UAC_VERSION_1			0x00
-#define UAC_VERSION_2			0x20
-
 /* A.5 Audio Class-Specific AC Interface Descriptor Subtypes */
 #define UAC_HEADER			0x01
 #define UAC_INPUT_TERMINAL		0x02
@@ -38,15 +42,6 @@
 #define UAC_PROCESSING_UNIT_V1		0x07
 #define UAC_EXTENSION_UNIT_V1		0x08
 
-/* UAC v2.0 types */
-#define UAC_EFFECT_UNIT			0x07
-#define UAC_PROCESSING_UNIT_V2		0x08
-#define UAC_EXTENSION_UNIT_V2		0x09
-#define UAC_CLOCK_SOURCE		0x0a
-#define UAC_CLOCK_SELECTOR		0x0b
-#define UAC_CLOCK_MULTIPLIER		0x0c
-#define UAC_SAMPLE_RATE_CONVERTER	0x0d
-
 /* A.6 Audio Class-Specific AS Interface Descriptor Subtypes */
 #define UAC_AS_GENERAL			0x01
 #define UAC_FORMAT_TYPE			0x02
@@ -78,10 +73,6 @@
 
 #define UAC_GET_STAT			0xff
 
-/* Audio class v2.0 handles all the parameter calls differently */
-#define UAC2_CS_CUR			0x01
-#define UAC2_CS_RANGE			0x02
-
 /* MIDI - A.1 MS Class-Specific Interface Descriptor Subtypes */
 #define UAC_MS_HEADER			0x01
 #define UAC_MIDI_IN_JACK		0x02
@@ -200,19 +191,6 @@ struct uac_as_header_descriptor_v1 {
 	__le16 wFormatTag;		/* The Audio Data Format */
 } __attribute__ ((packed));
 
-struct uac_as_header_descriptor_v2 {
-	__u8 bLength;
-	__u8 bDescriptorType;
-	__u8 bDescriptorSubtype;
-	__u8 bTerminalLink;
-	__u8 bmControls;
-	__u8 bFormatType;
-	__u32 bmFormats;
-	__u8 bNrChannels;
-	__u32 bmChannelConfig;
-	__u8 iChannelNames;
-} __attribute__((packed));
-
 #define UAC_DT_AS_HEADER_SIZE		7
 
 /* Formats - A.1.1 Audio Data Format Type I Codes */
@@ -277,7 +255,6 @@ struct uac_format_type_i_ext_descriptor {
 	__u8 bSideBandProtocol;
 } __attribute__((packed));
 
-
 /* Formats - Audio Data Format Type I Codes */
 
 #define UAC_FORMAT_TYPE_II_MPEG	0x1001
@@ -336,19 +313,6 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_EP_CS_ATTR_PITCH_CONTROL	0x02
 #define UAC_EP_CS_ATTR_FILL_MAX		0x80
 
-/* Audio class v2.0: CLOCK_SOURCE descriptor */
-
-struct uac_clock_source_descriptor {
-	__u8 bLength;
-	__u8 bDescriptorType;
-	__u8 bDescriptorSubtype;
-	__u8 bClockID;
-	__u8 bmAttributes;
-	__u8 bmControls;
-	__u8 bAssocTerminal;
-	__u8 iClockSource;
-} __attribute__((packed));
-
 /* A.10.2 Feature Unit Control Selectors */
 
 struct uac_feature_unit_descriptor {
diff --git a/sound/usb/card.c b/sound/usb/card.c
index 426aabc729d9..78d12ff00e8a 100644
--- a/sound/usb/card.c
+++ b/sound/usb/card.c
@@ -45,6 +45,7 @@
 #include <linux/moduleparam.h>
 #include <linux/mutex.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
 
 #include <sound/core.h>
 #include <sound/info.h>
@@ -250,7 +251,7 @@ static int snd_usb_create_streams(struct snd_usb_audio *chip, int ctrlif)
 		 * clock selectors and sample rate conversion units. */
 
 		cs = snd_usb_find_csint_desc(host_iface->extra, host_iface->extralen,
-						NULL, UAC_CLOCK_SOURCE);
+						NULL, UAC2_CLOCK_SOURCE);
 
 		if (!cs) {
 			snd_printk(KERN_ERR "CLOCK_SOURCE descriptor not found\n");
diff --git a/sound/usb/endpoint.c b/sound/usb/endpoint.c
index 91850f84cad6..b1309cdc1ac0 100644
--- a/sound/usb/endpoint.c
+++ b/sound/usb/endpoint.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/usb.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
diff --git a/sound/usb/format.c b/sound/usb/format.c
index b613e0aaeb63..0e04efe9551e 100644
--- a/sound/usb/format.c
+++ b/sound/usb/format.c
@@ -18,6 +18,7 @@
 #include <linux/init.h>
 #include <linux/usb.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
diff --git a/sound/usb/pcm.c b/sound/usb/pcm.c
index e0f3f87f99a0..630e2203b344 100644
--- a/sound/usb/pcm.c
+++ b/sound/usb/pcm.c
@@ -17,6 +17,7 @@
 #include <linux/init.h>
 #include <linux/usb.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
 
 #include <sound/core.h>
 #include <sound/pcm.h>
@@ -215,7 +216,7 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface,
 	data[3] = rate >> 24;
 	if ((err = snd_usb_ctl_msg(dev, usb_sndctrlpipe(dev, 0), UAC2_CS_CUR,
 				   USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_OUT,
-				   0x0100, chip->clock_id << 8,
+				   UAC2_CS_CONTROL_SAM_FREQ << 8, chip->clock_id << 8,
 				   data, sizeof(data), 1000)) < 0) {
 		snd_printk(KERN_ERR "%d:%d:%d: cannot set freq %d (v2)\n",
 			   dev->devnum, iface, fmt->altsetting, rate);
@@ -223,7 +224,7 @@ static int set_sample_rate_v2(struct snd_usb_audio *chip, int iface,
 	}
 	if ((err = snd_usb_ctl_msg(dev, usb_rcvctrlpipe(dev, 0), UAC2_CS_CUR,
 				   USB_TYPE_CLASS | USB_RECIP_INTERFACE | USB_DIR_IN,
-				   0x0100, chip->clock_id << 8,
+				   UAC2_CS_CONTROL_SAM_FREQ << 8, chip->clock_id << 8,
 				   data, sizeof(data), 1000)) < 0) {
 		snd_printk(KERN_WARNING "%d:%d:%d: cannot get freq (v2)\n",
 			   dev->devnum, iface, fmt->altsetting);
-- 
cgit v1.2.3


From 99fc86450c439039d2ef88d06b222fd51a779176 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 11 Mar 2010 21:13:24 +0100
Subject: ALSA: usb-mixer: parse descriptors with structs

Introduce a number of new structs for mixer, selector, feature and
processing units and some static inline helpers to access fields which
have dynamic offsets. Use them in mixer.c to parse the descriptors. This
is necessary for the upcoming audio v2 parsers.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 129 ++++++++++++++++++++++++++++++++++++++++++----
 sound/usb/mixer.c         |  87 +++++++++++++++++--------------
 2 files changed, 166 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index cdad728543ae..bc78a83d0f48 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -181,6 +181,125 @@ struct uac_feature_unit_descriptor_##ch {			\
 	__u8  iFeature;						\
 } __attribute__ ((packed))
 
+/* 4.3.2.3 Mixer Unit Descriptor */
+struct uac_mixer_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUnitID;
+	__u8 bNrInPins;
+	__u8 baSourceID[];
+} __attribute__ ((packed));
+
+static inline __u8 uac_mixer_unit_bNrChannels(struct uac_mixer_unit_descriptor *desc)
+{
+	return desc->baSourceID[desc->bNrInPins];
+}
+
+static inline __u16 uac_mixer_unit_wChannelConfig(struct uac_mixer_unit_descriptor *desc)
+{
+	return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
+		desc->baSourceID[desc->bNrInPins + 1];
+}
+
+static inline __u8 uac_mixer_unit_iChannelNames(struct uac_mixer_unit_descriptor *desc)
+{
+	return desc->baSourceID[desc->bNrInPins + 3];
+}
+
+static inline __u8 *uac_mixer_unit_bmControls(struct uac_mixer_unit_descriptor *desc)
+{
+	return &desc->baSourceID[desc->bNrInPins + 4];
+}
+
+static inline __u8 uac_mixer_unit_iMixer(struct uac_mixer_unit_descriptor *desc)
+{
+	__u8 *raw = (__u8 *) desc;
+	return raw[desc->bLength - 1];
+}
+
+/* 4.3.2.4 Selector Unit Descriptor */
+struct uac_selector_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUintID;
+	__u8 bNrInPins;
+	__u8 baSourceID[];
+} __attribute__ ((packed));
+
+static inline __u8 uac_selector_unit_iSelector(struct uac_selector_unit_descriptor *desc)
+{
+	__u8 *raw = (__u8 *) desc;
+	return raw[desc->bLength - 1];
+}
+
+/* 4.3.2.5 Feature Unit Descriptor */
+struct uac_feature_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUnitID;
+	__u8 bSourceID;
+	__u8 bControlSize;
+	__u8 bmaControls[0]; /* variable length */
+} __attribute__((packed));
+
+static inline __u8 uac_feature_unit_iFeature(struct uac_feature_unit_descriptor *desc)
+{
+	__u8 *raw = (__u8 *) desc;
+	return raw[desc->bLength - 1];
+}
+
+/* 4.3.2.6 Processing Unit Descriptors */
+struct uac_processing_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUnitID;
+	__u16 wProcessType;
+	__u8 bNrInPins;
+	__u8 baSourceID[];
+} __attribute__ ((packed));
+
+static inline __u8 uac_processing_unit_bNrChannels(struct uac_processing_unit_descriptor *desc)
+{
+	return desc->baSourceID[desc->bNrInPins];
+}
+
+static inline __u16 uac_processing_unit_wChannelConfig(struct uac_processing_unit_descriptor *desc)
+{
+	return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
+		desc->baSourceID[desc->bNrInPins + 1];
+}
+
+static inline __u8 uac_processing_unit_iChannelNames(struct uac_processing_unit_descriptor *desc)
+{
+	return desc->baSourceID[desc->bNrInPins + 3];
+}
+
+static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_descriptor *desc)
+{
+	return desc->baSourceID[desc->bNrInPins + 4];
+}
+
+static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc)
+{
+	return &desc->baSourceID[desc->bNrInPins + 5];
+}
+
+static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc)
+{
+	__u8 control_size = uac_processing_unit_bControlSize(desc);
+	return desc->baSourceID[desc->bNrInPins + control_size];
+}
+
+static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc)
+{
+	__u8 control_size = uac_processing_unit_bControlSize(desc);
+	return &desc->baSourceID[desc->bNrInPins + control_size + 1];
+}
+
 /* 4.5.2 Class-Specific AS Interface Descriptor */
 struct uac_as_header_descriptor_v1 {
 	__u8  bLength;			/* in bytes: 7 */
@@ -315,16 +434,6 @@ struct uac_iso_endpoint_descriptor {
 
 /* A.10.2 Feature Unit Control Selectors */
 
-struct uac_feature_unit_descriptor {
-	__u8 bLength;
-	__u8 bDescriptorType;
-	__u8 bDescriptorSubtype;
-	__u8 bUnitID;
-	__u8 bSourceID;
-	__u8 bControlSize;
-	__u8 controls[0]; /* variable length */
-} __attribute__((packed));
-
 #define UAC_FU_CONTROL_UNDEFINED	0x00
 #define UAC_MUTE_CONTROL		0x01
 #define UAC_VOLUME_CONTROL		0x02
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 4e7c2fd9e3b4..994b0385235c 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -860,13 +860,14 @@ static size_t append_ctl_name(struct snd_kcontrol *kctl, const char *str)
 	return strlcat(kctl->id.name, str, sizeof(kctl->id.name));
 }
 
-static void build_feature_ctl(struct mixer_build *state, unsigned char *desc,
+static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
 			      unsigned int ctl_mask, int control,
 			      struct usb_audio_term *iterm, int unitid)
 {
+	struct uac_feature_unit_descriptor *desc = raw_desc;
 	unsigned int len = 0;
 	int mapped_name = 0;
-	int nameid = desc[desc[0] - 1];
+	int nameid = uac_feature_unit_iFeature(desc);
 	struct snd_kcontrol *kctl;
 	struct usb_mixer_elem_info *cval;
 	const struct usbmix_name_map *map;
@@ -1032,7 +1033,7 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 
 	channels = (ftr->bLength - 7) / csize - 1;
 
-	master_bits = snd_usb_combine_bytes(ftr->controls, csize);
+	master_bits = snd_usb_combine_bytes(ftr->bmaControls, csize);
 	/* master configuration quirks */
 	switch (state->chip->usb_id) {
 	case USB_ID(0x08bb, 0x2702):
@@ -1043,14 +1044,14 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 		break;
 	}
 	if (channels > 0)
-		first_ch_bits = snd_usb_combine_bytes(ftr->controls + csize, csize);
+		first_ch_bits = snd_usb_combine_bytes(ftr->bmaControls + csize, csize);
 	else
 		first_ch_bits = 0;
 	/* check all control types */
 	for (i = 0; i < 10; i++) {
 		unsigned int ch_bits = 0;
 		for (j = 0; j < channels; j++) {
-			unsigned int mask = snd_usb_combine_bytes(ftr->controls + csize * (j+1), csize);
+			unsigned int mask = snd_usb_combine_bytes(ftr->bmaControls + csize * (j+1), csize);
 			if (mask & (1 << i))
 				ch_bits |= (1 << j);
 		}
@@ -1075,13 +1076,13 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
  * input channel number (zero based) is given in control field instead.
  */
 
-static void build_mixer_unit_ctl(struct mixer_build *state, unsigned char *desc,
+static void build_mixer_unit_ctl(struct mixer_build *state,
+				 struct uac_mixer_unit_descriptor *desc,
 				 int in_pin, int in_ch, int unitid,
 				 struct usb_audio_term *iterm)
 {
 	struct usb_mixer_elem_info *cval;
-	unsigned int input_pins = desc[4];
-	unsigned int num_outs = desc[5 + input_pins];
+	unsigned int num_outs = uac_mixer_unit_bNrChannels(desc);
 	unsigned int i, len;
 	struct snd_kcontrol *kctl;
 	const struct usbmix_name_map *map;
@@ -1099,7 +1100,7 @@ static void build_mixer_unit_ctl(struct mixer_build *state, unsigned char *desc,
 	cval->control = in_ch + 1; /* based on 1 */
 	cval->val_type = USB_MIXER_S16;
 	for (i = 0; i < num_outs; i++) {
-		if (check_matrix_bitmap(desc + 9 + input_pins, in_ch, i, num_outs)) {
+		if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc), in_ch, i, num_outs)) {
 			cval->cmask |= (1 << i);
 			cval->channels++;
 		}
@@ -1132,18 +1133,19 @@ static void build_mixer_unit_ctl(struct mixer_build *state, unsigned char *desc,
 /*
  * parse a mixer unit
  */
-static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, unsigned char *desc)
+static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, void *raw_desc)
 {
+	struct uac_mixer_unit_descriptor *desc = raw_desc;
 	struct usb_audio_term iterm;
 	int input_pins, num_ins, num_outs;
 	int pin, ich, err;
 
-	if (desc[0] < 11 || ! (input_pins = desc[4]) || ! (num_outs = desc[5 + input_pins])) {
+	if (desc->bLength < 11 || ! (input_pins = desc->bNrInPins) || ! (num_outs = uac_mixer_unit_bNrChannels(desc))) {
 		snd_printk(KERN_ERR "invalid MIXER UNIT descriptor %d\n", unitid);
 		return -EINVAL;
 	}
 	/* no bmControls field (e.g. Maya44) -> ignore */
-	if (desc[0] <= 10 + input_pins) {
+	if (desc->bLength <= 10 + input_pins) {
 		snd_printdd(KERN_INFO "MU %d has no bmControls field\n", unitid);
 		return 0;
 	}
@@ -1151,10 +1153,10 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, unsigne
 	num_ins = 0;
 	ich = 0;
 	for (pin = 0; pin < input_pins; pin++) {
-		err = parse_audio_unit(state, desc[5 + pin]);
+		err = parse_audio_unit(state, desc->baSourceID[pin]);
 		if (err < 0)
 			return err;
-		err = check_input_term(state, desc[5 + pin], &iterm);
+		err = check_input_term(state, desc->baSourceID[pin], &iterm);
 		if (err < 0)
 			return err;
 		num_ins += iterm.channels;
@@ -1162,7 +1164,7 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, unsigne
 			int och, ich_has_controls = 0;
 
 			for (och = 0; och < num_outs; ++och) {
-				if (check_matrix_bitmap(desc + 9 + input_pins,
+				if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc),
 							ich, och, num_outs)) {
 					ich_has_controls = 1;
 					break;
@@ -1323,9 +1325,10 @@ static struct procunit_info extunits[] = {
 /*
  * build a processing/extension unit
  */
-static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned char *dsc, struct procunit_info *list, char *name)
+static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw_desc, struct procunit_info *list, char *name)
 {
-	int num_ins = dsc[6];
+	struct uac_processing_unit_descriptor *desc = raw_desc;
+	int num_ins = desc->bNrInPins;
 	struct usb_mixer_elem_info *cval;
 	struct snd_kcontrol *kctl;
 	int i, err, nameid, type, len;
@@ -1340,17 +1343,17 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned
 		0, NULL, default_value_info
 	};
 
-	if (dsc[0] < 13 || dsc[0] < 13 + num_ins || dsc[0] < num_ins + dsc[11 + num_ins]) {
+	if (desc->bLength < 13 || desc->bLength < 13 + num_ins || desc->bLength < num_ins + uac_processing_unit_bControlSize(desc)) {
 		snd_printk(KERN_ERR "invalid %s descriptor (id %d)\n", name, unitid);
 		return -EINVAL;
 	}
 
 	for (i = 0; i < num_ins; i++) {
-		if ((err = parse_audio_unit(state, dsc[7 + i])) < 0)
+		if ((err = parse_audio_unit(state, desc->baSourceID[i])) < 0)
 			return err;
 	}
 
-	type = combine_word(&dsc[4]);
+	type = le16_to_cpu(desc->wProcessType);
 	for (info = list; info && info->type; info++)
 		if (info->type == type)
 			break;
@@ -1358,8 +1361,9 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned
 		info = &default_info;
 
 	for (valinfo = info->values; valinfo->control; valinfo++) {
-		/* FIXME: bitmap might be longer than 8bit */
-		if (! (dsc[12 + num_ins] & (1 << (valinfo->control - 1))))
+		__u8 *controls = uac_processing_unit_bmControls(desc);
+
+		if (! (controls[valinfo->control / 8] & (1 << ((valinfo->control % 8) - 1))))
 			continue;
 		map = find_map(state, unitid, valinfo->control);
 		if (check_ignored_ctl(map))
@@ -1377,9 +1381,10 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned
 
 		/* get min/max values */
 		if (type == USB_PROC_UPDOWN && cval->control == USB_PROC_UPDOWN_MODE_SEL) {
+			__u8 *control_spec = uac_processing_unit_specific(desc);
 			/* FIXME: hard-coded */
 			cval->min = 1;
-			cval->max = dsc[15];
+			cval->max = control_spec[0];
 			cval->res = 1;
 			cval->initialized = 1;
 		} else {
@@ -1409,7 +1414,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned
 		else if (info->name)
 			strlcpy(kctl->id.name, info->name, sizeof(kctl->id.name));
 		else {
-			nameid = dsc[12 + num_ins + dsc[11 + num_ins]];
+			nameid = uac_processing_unit_iProcessing(desc);
 			len = 0;
 			if (nameid)
 				len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, sizeof(kctl->id.name));
@@ -1428,14 +1433,16 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, unsigned
 }
 
 
-static int parse_audio_processing_unit(struct mixer_build *state, int unitid, unsigned char *desc)
+static int parse_audio_processing_unit(struct mixer_build *state, int unitid, void *raw_desc)
 {
-	return build_audio_procunit(state, unitid, desc, procunits, "Processing Unit");
+	return build_audio_procunit(state, unitid, raw_desc, procunits, "Processing Unit");
 }
 
-static int parse_audio_extension_unit(struct mixer_build *state, int unitid, unsigned char *desc)
+static int parse_audio_extension_unit(struct mixer_build *state, int unitid, void *raw_desc)
 {
-	return build_audio_procunit(state, unitid, desc, extunits, "Extension Unit");
+	/* Note that we parse extension units with processing unit descriptors.
+	 * That's ok as the layout is the same */
+	return build_audio_procunit(state, unitid, raw_desc, extunits, "Extension Unit");
 }
 
 
@@ -1537,9 +1544,9 @@ static void usb_mixer_selector_elem_free(struct snd_kcontrol *kctl)
 /*
  * parse a selector unit
  */
-static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsigned char *desc)
+static int parse_audio_selector_unit(struct mixer_build *state, int unitid, void *raw_desc)
 {
-	unsigned int num_ins = desc[4];
+	struct uac_selector_unit_descriptor *desc = raw_desc;
 	unsigned int i, nameid, len;
 	int err;
 	struct usb_mixer_elem_info *cval;
@@ -1547,17 +1554,17 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsi
 	const struct usbmix_name_map *map;
 	char **namelist;
 
-	if (! num_ins || desc[0] < 5 + num_ins) {
+	if (!desc->bNrInPins || desc->bLength < 5 + desc->bNrInPins) {
 		snd_printk(KERN_ERR "invalid SELECTOR UNIT descriptor %d\n", unitid);
 		return -EINVAL;
 	}
 
-	for (i = 0; i < num_ins; i++) {
-		if ((err = parse_audio_unit(state, desc[5 + i])) < 0)
+	for (i = 0; i < desc->bNrInPins; i++) {
+		if ((err = parse_audio_unit(state, desc->baSourceID[i])) < 0)
 			return err;
 	}
 
-	if (num_ins == 1) /* only one ? nonsense! */
+	if (desc->bNrInPins == 1) /* only one ? nonsense! */
 		return 0;
 
 	map = find_map(state, unitid, 0);
@@ -1574,18 +1581,18 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsi
 	cval->val_type = USB_MIXER_U8;
 	cval->channels = 1;
 	cval->min = 1;
-	cval->max = num_ins;
+	cval->max = desc->bNrInPins;
 	cval->res = 1;
 	cval->initialized = 1;
 
-	namelist = kmalloc(sizeof(char *) * num_ins, GFP_KERNEL);
+	namelist = kmalloc(sizeof(char *) * desc->bNrInPins, GFP_KERNEL);
 	if (! namelist) {
 		snd_printk(KERN_ERR "cannot malloc\n");
 		kfree(cval);
 		return -ENOMEM;
 	}
 #define MAX_ITEM_NAME_LEN	64
-	for (i = 0; i < num_ins; i++) {
+	for (i = 0; i < desc->bNrInPins; i++) {
 		struct usb_audio_term iterm;
 		len = 0;
 		namelist[i] = kmalloc(MAX_ITEM_NAME_LEN, GFP_KERNEL);
@@ -1599,7 +1606,7 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsi
 		}
 		len = check_mapped_selector_name(state, unitid, i, namelist[i],
 						 MAX_ITEM_NAME_LEN);
-		if (! len && check_input_term(state, desc[5 + i], &iterm) >= 0)
+		if (! len && check_input_term(state, desc->baSourceID[i], &iterm) >= 0)
 			len = get_term_name(state, &iterm, namelist[i], MAX_ITEM_NAME_LEN, 0);
 		if (! len)
 			sprintf(namelist[i], "Input %d", i);
@@ -1615,7 +1622,7 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsi
 	kctl->private_value = (unsigned long)namelist;
 	kctl->private_free = usb_mixer_selector_elem_free;
 
-	nameid = desc[desc[0] - 1];
+	nameid = uac_selector_unit_iSelector(desc);
 	len = check_mapped_name(map, kctl->id.name, sizeof(kctl->id.name));
 	if (len)
 		;
@@ -1634,7 +1641,7 @@ static int parse_audio_selector_unit(struct mixer_build *state, int unitid, unsi
 	}
 
 	snd_printdd(KERN_INFO "[%d] SU [%s] items = %d\n",
-		    cval->id, kctl->id.name, num_ins);
+		    cval->id, kctl->id.name, desc->bNrInPins);
 	if ((err = add_control_to_empty(state, kctl)) < 0)
 		return err;
 
-- 
cgit v1.2.3


From 23caaf19b11eda7054348452e1618d4512a86907 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Thu, 11 Mar 2010 21:13:25 +0100
Subject: ALSA: usb-mixer: Add support for Audio Class v2.0

USB Audio Class v2.0 compliant devices have different descriptors and a
different way of setting/getting min/max/res/cur properties. This patch
adds support for them.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v2.h |  47 +++++++
 include/linux/usb/audio.h    |  71 +++++++---
 sound/usb/mixer.c            | 322 ++++++++++++++++++++++++++++++++-----------
 sound/usb/mixer.h            |   3 +
 4 files changed, 343 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index 3b8560d233bd..0952231e6c3f 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -43,6 +43,53 @@ struct uac_clock_selector_descriptor {
 	__u8 baCSourceID[];
 } __attribute__((packed));
 
+/* 4.7.2.4 Input terminal descriptor */
+
+struct uac2_input_terminal_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bTerminalID;
+	__u16 wTerminalType;
+	__u8 bAssocTerminal;
+	__u8 bCSourceID;
+	__u8 bNrChannels;
+	__u32 bmChannelConfig;
+	__u8 iChannelNames;
+	__u16 bmControls;
+	__u8 iTerminal;
+} __attribute__((packed));
+
+/* 4.7.2.5 Output terminal descriptor */
+
+struct uac2_output_terminal_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bTerminalID;
+	__u16 wTerminalType;
+	__u8 bAssocTerminal;
+	__u8 bSourceID;
+	__u8 bCSourceID;
+	__u16 bmControls;
+	__u8 iTerminal;
+} __attribute__((packed));
+
+
+
+/* 4.7.2.8 Feature Unit Descriptor */
+
+struct uac2_feature_unit_descriptor {
+	__u8 bLength;
+	__u8 bDescriptorType;
+	__u8 bDescriptorSubtype;
+	__u8 bUnitID;
+	__u8 bSourceID;
+	/* bmaControls is actually u32,
+	 * but u8 is needed for the hybrid parser */
+	__u8 bmaControls[0]; /* variable length */
+} __attribute__((packed));
+
 /* 4.9.2 Class-Specific AS Interface Descriptor */
 
 struct uac_as_header_descriptor_v2 {
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index bc78a83d0f48..905a87caf3fb 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -196,20 +196,33 @@ static inline __u8 uac_mixer_unit_bNrChannels(struct uac_mixer_unit_descriptor *
 	return desc->baSourceID[desc->bNrInPins];
 }
 
-static inline __u16 uac_mixer_unit_wChannelConfig(struct uac_mixer_unit_descriptor *desc)
+static inline __u32 uac_mixer_unit_wChannelConfig(struct uac_mixer_unit_descriptor *desc,
+						  int protocol)
 {
-	return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
-		desc->baSourceID[desc->bNrInPins + 1];
+	if (protocol == UAC_VERSION_1)
+		return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
+			desc->baSourceID[desc->bNrInPins + 1];
+	else
+		return  (desc->baSourceID[desc->bNrInPins + 4] << 24) |
+			(desc->baSourceID[desc->bNrInPins + 3] << 16) |
+			(desc->baSourceID[desc->bNrInPins + 2] << 8)  |
+			(desc->baSourceID[desc->bNrInPins + 1]);
 }
 
-static inline __u8 uac_mixer_unit_iChannelNames(struct uac_mixer_unit_descriptor *desc)
+static inline __u8 uac_mixer_unit_iChannelNames(struct uac_mixer_unit_descriptor *desc,
+						int protocol)
 {
-	return desc->baSourceID[desc->bNrInPins + 3];
+	return (protocol == UAC_VERSION_1) ?
+		desc->baSourceID[desc->bNrInPins + 3] :
+		desc->baSourceID[desc->bNrInPins + 5];
 }
 
-static inline __u8 *uac_mixer_unit_bmControls(struct uac_mixer_unit_descriptor *desc)
+static inline __u8 *uac_mixer_unit_bmControls(struct uac_mixer_unit_descriptor *desc,
+					      int protocol)
 {
-	return &desc->baSourceID[desc->bNrInPins + 4];
+	return (protocol == UAC_VERSION_1) ?
+		&desc->baSourceID[desc->bNrInPins + 4] :
+		&desc->baSourceID[desc->bNrInPins + 6];
 }
 
 static inline __u8 uac_mixer_unit_iMixer(struct uac_mixer_unit_descriptor *desc)
@@ -267,36 +280,54 @@ static inline __u8 uac_processing_unit_bNrChannels(struct uac_processing_unit_de
 	return desc->baSourceID[desc->bNrInPins];
 }
 
-static inline __u16 uac_processing_unit_wChannelConfig(struct uac_processing_unit_descriptor *desc)
+static inline __u32 uac_processing_unit_wChannelConfig(struct uac_processing_unit_descriptor *desc,
+						       int protocol)
 {
-	return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
-		desc->baSourceID[desc->bNrInPins + 1];
+	if (protocol == UAC_VERSION_1)
+		return (desc->baSourceID[desc->bNrInPins + 2] << 8) |
+			desc->baSourceID[desc->bNrInPins + 1];
+	else
+		return  (desc->baSourceID[desc->bNrInPins + 4] << 24) |
+			(desc->baSourceID[desc->bNrInPins + 3] << 16) |
+			(desc->baSourceID[desc->bNrInPins + 2] << 8)  |
+			(desc->baSourceID[desc->bNrInPins + 1]);
 }
 
-static inline __u8 uac_processing_unit_iChannelNames(struct uac_processing_unit_descriptor *desc)
+static inline __u8 uac_processing_unit_iChannelNames(struct uac_processing_unit_descriptor *desc,
+						     int protocol)
 {
-	return desc->baSourceID[desc->bNrInPins + 3];
+	return (protocol == UAC_VERSION_1) ?
+		desc->baSourceID[desc->bNrInPins + 3] :
+		desc->baSourceID[desc->bNrInPins + 5];
 }
 
-static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_descriptor *desc)
+static inline __u8 uac_processing_unit_bControlSize(struct uac_processing_unit_descriptor *desc,
+						    int protocol)
 {
-	return desc->baSourceID[desc->bNrInPins + 4];
+	return (protocol == UAC_VERSION_1) ?
+		desc->baSourceID[desc->bNrInPins + 4] :
+		desc->baSourceID[desc->bNrInPins + 6];
 }
 
-static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc)
+static inline __u8 *uac_processing_unit_bmControls(struct uac_processing_unit_descriptor *desc,
+						   int protocol)
 {
-	return &desc->baSourceID[desc->bNrInPins + 5];
+	return (protocol == UAC_VERSION_1) ?
+		&desc->baSourceID[desc->bNrInPins + 5] :
+		&desc->baSourceID[desc->bNrInPins + 7];
 }
 
-static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc)
+static inline __u8 uac_processing_unit_iProcessing(struct uac_processing_unit_descriptor *desc,
+						   int protocol)
 {
-	__u8 control_size = uac_processing_unit_bControlSize(desc);
+	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
 	return desc->baSourceID[desc->bNrInPins + control_size];
 }
 
-static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc)
+static inline __u8 *uac_processing_unit_specific(struct uac_processing_unit_descriptor *desc,
+						 int protocol)
 {
-	__u8 control_size = uac_processing_unit_bControlSize(desc);
+	__u8 control_size = uac_processing_unit_bControlSize(desc, protocol);
 	return &desc->baSourceID[desc->bNrInPins + control_size + 1];
 }
 
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 994b0385235c..1deef623c081 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -33,6 +33,7 @@
 #include <linux/string.h>
 #include <linux/usb.h>
 #include <linux/usb/audio.h>
+#include <linux/usb/audio-v2.h>
 
 #include <sound/core.h>
 #include <sound/control.h>
@@ -197,6 +198,7 @@ static int check_mapped_selector_name(struct mixer_build *state, int unitid,
 
 /*
  * find an audio control unit with the given unit id
+ * this doesn't return any clock related units, so they need to be handled elsewhere
  */
 static void *find_audio_control_unit(struct mixer_build *state, unsigned char unit)
 {
@@ -205,7 +207,7 @@ static void *find_audio_control_unit(struct mixer_build *state, unsigned char un
 	p = NULL;
 	while ((p = snd_usb_find_desc(state->buffer, state->buflen, p,
 				      USB_DT_CS_INTERFACE)) != NULL) {
-		if (p[0] >= 4 && p[2] >= UAC_INPUT_TERMINAL && p[2] <= UAC_EXTENSION_UNIT_V1 && p[3] == unit)
+		if (p[0] >= 4 && p[2] >= UAC_INPUT_TERMINAL && p[2] <= UAC2_EXTENSION_UNIT_V2 && p[3] == unit)
 			return p;
 	}
 	return NULL;
@@ -302,7 +304,7 @@ static int get_abs_value(struct usb_mixer_elem_info *cval, int val)
  * retrieve a mixer value
  */
 
-static int get_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret)
+static int get_ctl_value_v1(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret)
 {
 	unsigned char buf[2];
 	int val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
@@ -324,6 +326,58 @@ static int get_ctl_value(struct usb_mixer_elem_info *cval, int request, int vali
 	return -EINVAL;
 }
 
+static int get_ctl_value_v2(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret)
+{
+	unsigned char buf[14]; /* enough space for one range of 4 bytes */
+	unsigned char *val;
+	int ret;
+	__u8 bRequest;
+
+	bRequest = (request == UAC_GET_CUR) ?
+		UAC2_CS_CUR : UAC2_CS_RANGE;
+
+	ret = snd_usb_ctl_msg(cval->mixer->chip->dev,
+			      usb_rcvctrlpipe(cval->mixer->chip->dev, 0),
+			      bRequest,
+			      USB_RECIP_INTERFACE | USB_TYPE_CLASS | USB_DIR_IN,
+			      validx, cval->mixer->ctrlif | (cval->id << 8),
+			      buf, sizeof(buf), 1000);
+
+	if (ret < 0) {
+		snd_printdd(KERN_ERR "cannot get ctl value: req = %#x, wValue = %#x, wIndex = %#x, type = %d\n",
+			    request, validx, cval->mixer->ctrlif | (cval->id << 8), cval->val_type);
+		return ret;
+	}
+
+	switch (request) {
+	case UAC_GET_CUR:
+		val = buf;
+		break;
+	case UAC_GET_MIN:
+		val = buf + sizeof(__u16);
+		break;
+	case UAC_GET_MAX:
+		val = buf + sizeof(__u16) * 2;
+		break;
+	case UAC_GET_RES:
+		val = buf + sizeof(__u16) * 3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	*value_ret = convert_signed_value(cval, snd_usb_combine_bytes(val, sizeof(__u16)));
+
+	return 0;
+}
+
+static int get_ctl_value(struct usb_mixer_elem_info *cval, int request, int validx, int *value_ret)
+{
+	return (cval->mixer->protocol == UAC_VERSION_1) ?
+		get_ctl_value_v1(cval, request, validx, value_ret) :
+		get_ctl_value_v2(cval, request, validx, value_ret);
+}
+
 static int get_cur_ctl_value(struct usb_mixer_elem_info *cval, int validx, int *value)
 {
 	return get_ctl_value(cval, UAC_GET_CUR, validx, value);
@@ -348,8 +402,7 @@ static int get_cur_mix_value(struct usb_mixer_elem_info *cval,
 	err = get_cur_mix_raw(cval, channel, value);
 	if (err < 0) {
 		if (!cval->mixer->ignore_ctl_error)
-			snd_printd(KERN_ERR "cannot get current value for "
-				   "control %d ch %d: err = %d\n",
+			snd_printd(KERN_ERR "cannot get current value for control %d ch %d: err = %d\n",
 				   cval->control, channel, err);
 		return err;
 	}
@@ -367,8 +420,22 @@ int snd_usb_mixer_set_ctl_value(struct usb_mixer_elem_info *cval,
 				int request, int validx, int value_set)
 {
 	unsigned char buf[2];
-	int val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
-	int timeout = 10;
+	int val_len, timeout = 10;
+
+	if (cval->mixer->protocol == UAC_VERSION_1) {
+		val_len = cval->val_type >= USB_MIXER_S16 ? 2 : 1;
+	} else { /* UAC_VERSION_2 */
+		/* audio class v2 controls are always 2 bytes in size */
+		val_len = sizeof(__u16);
+
+		/* FIXME */
+		if (request != UAC_SET_CUR) {
+			snd_printdd(KERN_WARNING "RANGE setting not yet supported\n");
+			return -EINVAL;
+		}
+
+		request = UAC2_CS_CUR;
+	}
 
 	value_set = convert_bytes_value(cval, value_set);
 	buf[0] = value_set & 0xff;
@@ -564,46 +631,65 @@ static int get_term_name(struct mixer_build *state, struct usb_audio_term *iterm
  */
 static int check_input_term(struct mixer_build *state, int id, struct usb_audio_term *term)
 {
-	unsigned char *p1;
+	void *p1;
 
 	memset(term, 0, sizeof(*term));
 	while ((p1 = find_audio_control_unit(state, id)) != NULL) {
+		unsigned char *hdr = p1;
 		term->id = id;
-		switch (p1[2]) {
+		switch (hdr[2]) {
 		case UAC_INPUT_TERMINAL:
-			term->type = combine_word(p1 + 4);
-			term->channels = p1[7];
-			term->chconfig = combine_word(p1 + 8);
-			term->name = p1[11];
+			if (state->mixer->protocol == UAC_VERSION_1) {
+				struct uac_input_terminal_descriptor *d = p1;
+				term->type = le16_to_cpu(d->wTerminalType);
+				term->channels = d->bNrChannels;
+				term->chconfig = le16_to_cpu(d->wChannelConfig);
+				term->name = d->iTerminal;
+			} else { /* UAC_VERSION_2 */
+				struct uac2_input_terminal_descriptor *d = p1;
+				term->type = le16_to_cpu(d->wTerminalType);
+				term->channels = d->bNrChannels;
+				term->chconfig = le32_to_cpu(d->bmChannelConfig);
+				term->name = d->iTerminal;
+			}
 			return 0;
-		case UAC_FEATURE_UNIT:
-			id = p1[4];
+		case UAC_FEATURE_UNIT: {
+			/* the header is the same for v1 and v2 */
+			struct uac_feature_unit_descriptor *d = p1;
+			id = d->bUnitID;
 			break; /* continue to parse */
-		case UAC_MIXER_UNIT:
-			term->type = p1[2] << 16; /* virtual type */
-			term->channels = p1[5 + p1[4]];
-			term->chconfig = combine_word(p1 + 6 + p1[4]);
-			term->name = p1[p1[0] - 1];
+		}
+		case UAC_MIXER_UNIT: {
+			struct uac_mixer_unit_descriptor *d = p1;
+			term->type = d->bDescriptorSubtype << 16; /* virtual type */
+			term->channels = uac_mixer_unit_bNrChannels(d);
+			term->chconfig = uac_mixer_unit_wChannelConfig(d, state->mixer->protocol);
+			term->name = uac_mixer_unit_iMixer(d);
 			return 0;
-		case UAC_SELECTOR_UNIT:
+		}
+		case UAC_SELECTOR_UNIT: {
+			struct uac_selector_unit_descriptor *d = p1;
 			/* call recursively to retrieve the channel info */
-			if (check_input_term(state, p1[5], term) < 0)
+			if (check_input_term(state, d->baSourceID[0], term) < 0)
 				return -ENODEV;
-			term->type = p1[2] << 16; /* virtual type */
+			term->type = d->bDescriptorSubtype << 16; /* virtual type */
 			term->id = id;
-			term->name = p1[9 + p1[0] - 1];
+			term->name = uac_selector_unit_iSelector(d);
 			return 0;
+		}
 		case UAC_PROCESSING_UNIT_V1:
-		case UAC_EXTENSION_UNIT_V1:
-			if (p1[6] == 1) {
-				id = p1[7];
+		case UAC_EXTENSION_UNIT_V1: {
+			struct uac_processing_unit_descriptor *d = p1;
+			if (d->bNrInPins) {
+				id = d->baSourceID[0];
 				break; /* continue to parse */
 			}
-			term->type = p1[2] << 16; /* virtual type */
-			term->channels = p1[7 + p1[6]];
-			term->chconfig = combine_word(p1 + 8 + p1[6]);
-			term->name = p1[12 + p1[6] + p1[11 + p1[6]]];
+			term->type = d->bDescriptorSubtype << 16; /* virtual type */
+			term->channels = uac_processing_unit_bNrChannels(d);
+			term->chconfig = uac_processing_unit_wChannelConfig(d, state->mixer->protocol);
+			term->name = uac_processing_unit_iProcessing(d, state->mixer->protocol);
 			return 0;
+		}
 		default:
 			return -ENODEV;
 		}
@@ -850,6 +936,15 @@ static struct snd_kcontrol_new usb_feature_unit_ctl = {
 	.put = mixer_ctl_feature_put,
 };
 
+/* the read-only variant */
+static struct snd_kcontrol_new usb_feature_unit_ctl_ro = {
+	.iface = SNDRV_CTL_ELEM_IFACE_MIXER,
+	.name = "", /* will be filled later manually */
+	.info = mixer_ctl_feature_info,
+	.get = mixer_ctl_feature_get,
+	.put = NULL,
+};
+
 
 /*
  * build a feature control
@@ -862,7 +957,8 @@ static size_t append_ctl_name(struct snd_kcontrol *kctl, const char *str)
 
 static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
 			      unsigned int ctl_mask, int control,
-			      struct usb_audio_term *iterm, int unitid)
+			      struct usb_audio_term *iterm, int unitid,
+			      int read_only)
 {
 	struct uac_feature_unit_descriptor *desc = raw_desc;
 	unsigned int len = 0;
@@ -906,7 +1002,11 @@ static void build_feature_ctl(struct mixer_build *state, void *raw_desc,
 	/* get min/max values */
 	get_min_max(cval, 0);
 
-	kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);
+	if (read_only)
+		kctl = snd_ctl_new1(&usb_feature_unit_ctl_ro, cval);
+	else
+		kctl = snd_ctl_new1(&usb_feature_unit_ctl, cval);
+
 	if (! kctl) {
 		snd_printk(KERN_ERR "cannot malloc kcontrol\n");
 		kfree(cval);
@@ -1016,24 +1116,34 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 	struct usb_audio_term iterm;
 	unsigned int master_bits, first_ch_bits;
 	int err, csize;
-	struct uac_feature_unit_descriptor *ftr = _ftr;
+	struct uac_feature_unit_descriptor *hdr = _ftr;
+	__u8 *bmaControls;
+
+	if (state->mixer->protocol == UAC_VERSION_1) {
+		csize = hdr->bControlSize;
+		channels = (hdr->bLength - 7) / csize - 1;
+		bmaControls = hdr->bmaControls;
+	} else {
+		struct uac2_feature_unit_descriptor *ftr = _ftr;
+		csize = 4;
+		channels = (hdr->bLength - 6) / 4;
+		bmaControls = ftr->bmaControls;
+	}
 
-	if (ftr->bLength < 7 || ! (csize = ftr->bControlSize) || ftr->bLength < 7 + csize) {
+	if (hdr->bLength < 7 || !csize || hdr->bLength < 7 + csize) {
 		snd_printk(KERN_ERR "usbaudio: unit %u: invalid UAC_FEATURE_UNIT descriptor\n", unitid);
 		return -EINVAL;
 	}
 
 	/* parse the source unit */
-	if ((err = parse_audio_unit(state, ftr->bSourceID)) < 0)
+	if ((err = parse_audio_unit(state, hdr->bSourceID)) < 0)
 		return err;
 
 	/* determine the input source type and name */
-	if (check_input_term(state, ftr->bSourceID, &iterm) < 0)
+	if (check_input_term(state, hdr->bSourceID, &iterm) < 0)
 		return -EINVAL;
 
-	channels = (ftr->bLength - 7) / csize - 1;
-
-	master_bits = snd_usb_combine_bytes(ftr->bmaControls, csize);
+	master_bits = snd_usb_combine_bytes(bmaControls, csize);
 	/* master configuration quirks */
 	switch (state->chip->usb_id) {
 	case USB_ID(0x08bb, 0x2702):
@@ -1044,21 +1154,54 @@ static int parse_audio_feature_unit(struct mixer_build *state, int unitid, void
 		break;
 	}
 	if (channels > 0)
-		first_ch_bits = snd_usb_combine_bytes(ftr->bmaControls + csize, csize);
+		first_ch_bits = snd_usb_combine_bytes(bmaControls + csize, csize);
 	else
 		first_ch_bits = 0;
-	/* check all control types */
-	for (i = 0; i < 10; i++) {
-		unsigned int ch_bits = 0;
-		for (j = 0; j < channels; j++) {
-			unsigned int mask = snd_usb_combine_bytes(ftr->bmaControls + csize * (j+1), csize);
-			if (mask & (1 << i))
-				ch_bits |= (1 << j);
+
+	if (state->mixer->protocol == UAC_VERSION_1) {
+		/* check all control types */
+		for (i = 0; i < 10; i++) {
+			unsigned int ch_bits = 0;
+			for (j = 0; j < channels; j++) {
+				unsigned int mask = snd_usb_combine_bytes(bmaControls + csize * (j+1), csize);
+				if (mask & (1 << i))
+					ch_bits |= (1 << j);
+			}
+			/* audio class v1 controls are never read-only */
+			if (ch_bits & 1) /* the first channel must be set (for ease of programming) */
+				build_feature_ctl(state, _ftr, ch_bits, i, &iterm, unitid, 0);
+			if (master_bits & (1 << i))
+				build_feature_ctl(state, _ftr, 0, i, &iterm, unitid, 0);
+		}
+	} else { /* UAC_VERSION_2 */
+		for (i = 0; i < 30/2; i++) {
+			/* From the USB Audio spec v2.0:
+			   bmaControls() is a (ch+1)-element array of 4-byte bitmaps,
+			   each containing a set of bit pairs. If a Control is present,
+			   it must be Host readable. If a certain Control is not
+			   present then the bit pair must be set to 0b00.
+			   If a Control is present but read-only, the bit pair must be
+			   set to 0b01. If a Control is also Host programmable, the bit
+			   pair must be set to 0b11. The value 0b10 is not allowed. */
+			unsigned int ch_bits = 0;
+			unsigned int ch_read_only = 0;
+
+			for (j = 0; j < channels; j++) {
+				unsigned int mask = snd_usb_combine_bytes(bmaControls + csize * (j+1), csize);
+				if (mask & (1 << (i * 2))) {
+					ch_bits |= (1 << j);
+					if (~mask & (1 << ((i * 2) + 1)))
+						ch_read_only |= (1 << j);
+				}
+			}
+
+			/* FIXME: the whole unit is read-only if any of the channels is marked read-only */
+			if (ch_bits & 1) /* the first channel must be set (for ease of programming) */
+				build_feature_ctl(state, _ftr, ch_bits, i, &iterm, unitid, !!ch_read_only);
+			if (master_bits & (1 << i * 2))
+				build_feature_ctl(state, _ftr, 0, i, &iterm, unitid,
+						  ~master_bits & (1 << ((i * 2) + 1)));
 		}
-		if (ch_bits & 1) /* the first channel must be set (for ease of programming) */
-			build_feature_ctl(state, _ftr, ch_bits, i, &iterm, unitid);
-		if (master_bits & (1 << i))
-			build_feature_ctl(state, _ftr, 0, i, &iterm, unitid);
 	}
 
 	return 0;
@@ -1100,7 +1243,7 @@ static void build_mixer_unit_ctl(struct mixer_build *state,
 	cval->control = in_ch + 1; /* based on 1 */
 	cval->val_type = USB_MIXER_S16;
 	for (i = 0; i < num_outs; i++) {
-		if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc), in_ch, i, num_outs)) {
+		if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc, state->mixer->protocol), in_ch, i, num_outs)) {
 			cval->cmask |= (1 << i);
 			cval->channels++;
 		}
@@ -1164,7 +1307,7 @@ static int parse_audio_mixer_unit(struct mixer_build *state, int unitid, void *r
 			int och, ich_has_controls = 0;
 
 			for (och = 0; och < num_outs; ++och) {
-				if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc),
+				if (check_matrix_bitmap(uac_mixer_unit_bmControls(desc, state->mixer->protocol),
 							ich, och, num_outs)) {
 					ich_has_controls = 1;
 					break;
@@ -1343,7 +1486,8 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw
 		0, NULL, default_value_info
 	};
 
-	if (desc->bLength < 13 || desc->bLength < 13 + num_ins || desc->bLength < num_ins + uac_processing_unit_bControlSize(desc)) {
+	if (desc->bLength < 13 || desc->bLength < 13 + num_ins ||
+	    desc->bLength < num_ins + uac_processing_unit_bControlSize(desc, state->mixer->protocol)) {
 		snd_printk(KERN_ERR "invalid %s descriptor (id %d)\n", name, unitid);
 		return -EINVAL;
 	}
@@ -1361,7 +1505,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw
 		info = &default_info;
 
 	for (valinfo = info->values; valinfo->control; valinfo++) {
-		__u8 *controls = uac_processing_unit_bmControls(desc);
+		__u8 *controls = uac_processing_unit_bmControls(desc, state->mixer->protocol);
 
 		if (! (controls[valinfo->control / 8] & (1 << ((valinfo->control % 8) - 1))))
 			continue;
@@ -1381,7 +1525,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw
 
 		/* get min/max values */
 		if (type == USB_PROC_UPDOWN && cval->control == USB_PROC_UPDOWN_MODE_SEL) {
-			__u8 *control_spec = uac_processing_unit_specific(desc);
+			__u8 *control_spec = uac_processing_unit_specific(desc, state->mixer->protocol);
 			/* FIXME: hard-coded */
 			cval->min = 1;
 			cval->max = control_spec[0];
@@ -1414,7 +1558,7 @@ static int build_audio_procunit(struct mixer_build *state, int unitid, void *raw
 		else if (info->name)
 			strlcpy(kctl->id.name, info->name, sizeof(kctl->id.name));
 		else {
-			nameid = uac_processing_unit_iProcessing(desc);
+			nameid = uac_processing_unit_iProcessing(desc, state->mixer->protocol);
 			len = 0;
 			if (nameid)
 				len = snd_usb_copy_string_desc(state, nameid, kctl->id.name, sizeof(kctl->id.name));
@@ -1676,9 +1820,17 @@ static int parse_audio_unit(struct mixer_build *state, int unitid)
 	case UAC_FEATURE_UNIT:
 		return parse_audio_feature_unit(state, unitid, p1);
 	case UAC_PROCESSING_UNIT_V1:
-		return parse_audio_processing_unit(state, unitid, p1);
+	/*   UAC2_EFFECT_UNIT has the same value */
+		if (state->mixer->protocol == UAC_VERSION_1)
+			return parse_audio_processing_unit(state, unitid, p1);
+		else
+			return 0; /* FIXME - effect units not implemented yet */
 	case UAC_EXTENSION_UNIT_V1:
-		return parse_audio_extension_unit(state, unitid, p1);
+	/*   UAC2_PROCESSING_UNIT_V2 has the same value */
+		if (state->mixer->protocol == UAC_VERSION_1)
+			return parse_audio_extension_unit(state, unitid, p1);
+		else /* UAC_VERSION_2 */
+			return parse_audio_processing_unit(state, unitid, p1);
 	default:
 		snd_printk(KERN_ERR "usbaudio: unit %u: unexpected type 0x%02x\n", unitid, p1[2]);
 		return -EINVAL;
@@ -1711,11 +1863,11 @@ static int snd_usb_mixer_dev_free(struct snd_device *device)
  */
 static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 {
-	struct uac_output_terminal_descriptor_v1 *desc;
 	struct mixer_build state;
 	int err;
 	const struct usbmix_ctl_map *map;
 	struct usb_host_interface *hostif;
+	void *p;
 
 	hostif = &usb_ifnum_to_if(mixer->chip->dev, mixer->ctrlif)->altsetting[0];
 	memset(&state, 0, sizeof(state));
@@ -1734,18 +1886,35 @@ static int snd_usb_mixer_controls(struct usb_mixer_interface *mixer)
 		}
 	}
 
-	desc = NULL;
-	while ((desc = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, desc, UAC_OUTPUT_TERMINAL)) != NULL) {
-		if (desc->bLength < 9)
-			continue; /* invalid descriptor? */
-		set_bit(desc->bTerminalID, state.unitbitmap);  /* mark terminal ID as visited */
-		state.oterm.id = desc->bTerminalID;
-		state.oterm.type = le16_to_cpu(desc->wTerminalType);
-		state.oterm.name = desc->iTerminal;
-		err = parse_audio_unit(&state, desc->bSourceID);
-		if (err < 0)
-			return err;
+	p = NULL;
+	while ((p = snd_usb_find_csint_desc(hostif->extra, hostif->extralen, p, UAC_OUTPUT_TERMINAL)) != NULL) {
+		if (mixer->protocol == UAC_VERSION_1) {
+			struct uac_output_terminal_descriptor_v1 *desc = p;
+
+			if (desc->bLength < sizeof(*desc))
+				continue; /* invalid descriptor? */
+			set_bit(desc->bTerminalID, state.unitbitmap);  /* mark terminal ID as visited */
+			state.oterm.id = desc->bTerminalID;
+			state.oterm.type = le16_to_cpu(desc->wTerminalType);
+			state.oterm.name = desc->iTerminal;
+			err = parse_audio_unit(&state, desc->bSourceID);
+			if (err < 0)
+				return err;
+		} else { /* UAC_VERSION_2 */
+			struct uac2_output_terminal_descriptor *desc = p;
+
+			if (desc->bLength < sizeof(*desc))
+				continue; /* invalid descriptor? */
+			set_bit(desc->bTerminalID, state.unitbitmap);  /* mark terminal ID as visited */
+			state.oterm.id = desc->bTerminalID;
+			state.oterm.type = le16_to_cpu(desc->wTerminalType);
+			state.oterm.name = desc->iTerminal;
+			err = parse_audio_unit(&state, desc->bSourceID);
+			if (err < 0)
+				return err;
+		}
 	}
+
 	return 0;
 }
 
@@ -1868,7 +2037,7 @@ int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif,
 	struct usb_mixer_interface *mixer;
 	struct snd_info_entry *entry;
 	struct usb_host_interface *host_iface;
-	int err, protocol;
+	int err;
 
 	strcpy(chip->card->mixername, "USB Mixer");
 
@@ -1886,14 +2055,7 @@ int snd_usb_create_mixer(struct snd_usb_audio *chip, int ctrlif,
 	}
 
 	host_iface = &usb_ifnum_to_if(chip->dev, ctrlif)->altsetting[0];
-	protocol = host_iface->desc.bInterfaceProtocol;
-
-	/* FIXME! */
-	if (protocol != UAC_VERSION_1) {
-		snd_printk(KERN_WARNING "mixer interface protocol 0x%02x not yet supported\n",
-					protocol);
-		return 0;
-	}
+	mixer->protocol = host_iface->desc.bInterfaceProtocol;
 
 	if ((err = snd_usb_mixer_controls(mixer)) < 0 ||
 	    (err = snd_usb_mixer_status_create(mixer)) < 0)
diff --git a/sound/usb/mixer.h b/sound/usb/mixer.h
index 63101ae201cc..130123854a6c 100644
--- a/sound/usb/mixer.h
+++ b/sound/usb/mixer.h
@@ -10,6 +10,9 @@ struct usb_mixer_interface {
 	/* array[MAX_ID_ELEMS], indexed by unit id */
 	struct usb_mixer_elem_info **id_elems;
 
+	/* the usb audio specification version this interface complies to */
+	int protocol;
+
 	/* Sound Blaster remote control stuff */
 	const struct rc_config *rc_cfg;
 	u32 rc_code;
-- 
cgit v1.2.3


From 3e21ec224f0acbd2e9beeb3dea04edc1d36dbe1f Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Thu, 11 Mar 2010 06:32:44 -0500
Subject: kobj: add comment and multiple inclusion protection

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/kobj_map.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kobj_map.h b/include/linux/kobj_map.h
index 73717ed9ea79..18ca75ffcc5a 100644
--- a/include/linux/kobj_map.h
+++ b/include/linux/kobj_map.h
@@ -1,3 +1,10 @@
+/*
+ * kobj_map.h
+ */
+
+#ifndef _KOBJ_MAP_H_
+#define _KOBJ_MAP_H_
+
 #include <linux/mutex.h>
 
 typedef struct kobject *kobj_probe_t(dev_t, int *, void *);
@@ -8,3 +15,5 @@ int kobj_map(struct kobj_map *, dev_t, unsigned long, struct module *,
 void kobj_unmap(struct kobj_map *, dev_t, unsigned long);
 struct kobject *kobj_lookup(struct kobj_map *, dev_t, int *);
 struct kobj_map *kobj_map_init(kobj_probe_t *, struct mutex *);
+
+#endif /* _KOBJ_MAP_H_ */
-- 
cgit v1.2.3


From 932fb06b0898f5883200f1da2e00075f0d70ba9c Mon Sep 17 00:00:00 2001
From: "Robert P. J. Day" <rpjday@crashcourse.ca>
Date: Sat, 13 Mar 2010 07:58:13 -0500
Subject: kobj: kref.h incorrectly describes itself as kref.c.

Signed-off-by: Robert P. J. Day <rpjday@crashcourse.ca>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/kref.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index b0cb0ebad9e6..baf4b9e4b194 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -1,5 +1,5 @@
 /*
- * kref.c - library routines for handling generic reference counted objects
+ * kref.h - library routines for handling generic reference counted objects
  *
  * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com>
  * Copyright (C) 2004 IBM Corp.
-- 
cgit v1.2.3


From 0a9627f2649a02bea165cfd529d7bcb625c2fcad Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 16 Mar 2010 08:03:29 +0000
Subject: rps: Receive Packet Steering

This patch implements software receive side packet steering (RPS).  RPS
distributes the load of received packet processing across multiple CPUs.

Problem statement: Protocol processing done in the NAPI context for received
packets is serialized per device queue and becomes a bottleneck under high
packet load.  This substantially limits pps that can be achieved on a single
queue NIC and provides no scaling with multiple cores.

This solution queues packets early on in the receive path on the backlog queues
of other CPUs.   This allows protocol processing (e.g. IP and TCP) to be
performed on packets in parallel.   For each device (or each receive queue in
a multi-queue device) a mask of CPUs is set to indicate the CPUs that can
process packets. A CPU is selected on a per packet basis by hashing contents
of the packet header (e.g. the TCP or UDP 4-tuple) and using the result to index
into the CPU mask.  The IPI mechanism is used to raise networking receive
softirqs between CPUs.  This effectively emulates in software what a multi-queue
NIC can provide, but is generic requiring no device support.

Many devices now provide a hash over the 4-tuple on a per packet basis
(e.g. the Toeplitz hash).  This patch allow drivers to set the HW reported hash
in an skb field, and that value in turn is used to index into the RPS maps.
Using the HW generated hash can avoid cache misses on the packet when
steering it to a remote CPU.

The CPU mask is set on a per device and per queue basis in the sysfs variable
/sys/class/net/<device>/queues/rx-<n>/rps_cpus.  This is a set of canonical
bit maps for receive queues in the device (numbered by <n>).  If a device
does not support multi-queue, a single variable is used for the device (rx-0).

Generally, we have found this technique increases pps capabilities of a single
queue device with good CPU utilization.  Optimal settings for the CPU mask
seem to depend on architectures and cache hierarcy.  Below are some results
running 500 instances of netperf TCP_RR test with 1 byte req. and resp.
Results show cumulative transaction rate and system CPU utilization.

e1000e on 8 core Intel
   Without RPS: 108K tps at 33% CPU
   With RPS:    311K tps at 64% CPU

forcedeth on 16 core AMD
   Without RPS: 156K tps at 15% CPU
   With RPS:    404K tps at 49% CPU

bnx2x on 16 core AMD
   Without RPS  567K tps at 61% CPU (4 HW RX queues)
   Without RPS  738K tps at 96% CPU (8 HW RX queues)
   With RPS:    854K tps at 76% CPU (4 HW RX queues)

Caveats:
- The benefits of this patch are dependent on architecture and cache hierarchy.
Tuning the masks to get best performance is probably necessary.
- This patch adds overhead in the path for processing a single packet.  In
a lightly loaded server this overhead may eliminate the advantages of
increased parallelism, and possibly cause some relative performance degradation.
We have found that masks that are cache aware (share same caches with
the interrupting CPU) mitigate much of this.
- The RPS masks can be changed dynamically, however whenever the mask is changed
this introduces the possibility of generating out of order packets.  It's
probably best not change the masks too frequently.

Signed-off-by: Tom Herbert <therbert@google.com>

 include/linux/netdevice.h |   32 ++++-
 include/linux/skbuff.h    |    3 +
 net/core/dev.c            |  335 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      |  225 ++++++++++++++++++++++++++++++-
 net/core/skbuff.c         |    2 +
 5 files changed, 538 insertions(+), 59 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  32 ++++-
 include/linux/skbuff.h    |   3 +
 net/core/dev.c            | 335 ++++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c      | 225 ++++++++++++++++++++++++++++++-
 net/core/skbuff.c         |   2 +
 5 files changed, 538 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c79a88be7c33..de1a52bcb9e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -223,6 +223,7 @@ struct netif_rx_stats {
 	unsigned dropped;
 	unsigned time_squeeze;
 	unsigned cpu_collision;
+	unsigned received_rps;
 };
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
@@ -530,6 +531,24 @@ struct netdev_queue {
 	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+/*
+ * This structure holds an RPS map which can be of variable length.  The
+ * map is an array of CPUs.
+ */
+struct rps_map {
+	unsigned int len;
+	struct rcu_head rcu;
+	u16 cpus[0];
+};
+#define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
+
+/* This structure contains an instance of an RX queue. */
+struct netdev_rx_queue {
+	struct rps_map *rps_map;
+	struct kobject kobj;
+	struct netdev_rx_queue *first;
+	atomic_t count;
+} ____cacheline_aligned_in_smp;
 
 /*
  * This structure defines the management hooks for network devices.
@@ -878,6 +897,13 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+	struct kset		*queues_kset;
+
+	struct netdev_rx_queue	*_rx;
+
+	/* Number of RX queues allocated at alloc_netdev_mq() time  */
+	unsigned int		num_rx_queues;
+
 	struct netdev_queue	rx_queue;
 
 	struct netdev_queue	*_tx ____cacheline_aligned_in_smp;
@@ -1311,14 +1337,16 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
-	struct sk_buff_head	input_pkt_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
+	/* Elements below can be accessed between CPUs for RPS */
+	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
 
-DECLARE_PER_CPU(struct softnet_data,softnet_data);
+DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 #define HAVE_NETIF_QUEUE
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 03f816a9b659..def10b064f29 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -300,6 +300,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@nfct_reasm: netfilter conntrack re-assembly pointer
  *	@nf_bridge: Saved data about a bridged frame - see br_netfilter.c
  *	@skb_iif: ifindex of device we arrived on
+ *	@rxhash: the packet hash computed on receive
  *	@queue_mapping: Queue mapping for multiqueue devices
  *	@tc_index: Traffic control index
  *	@tc_verd: traffic control verdict
@@ -375,6 +376,8 @@ struct sk_buff {
 #endif
 #endif
 
+	__u32			rxhash;
+
 	kmemcheck_bitfield_begin(flags2);
 	__u16			queue_mapping:16;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
diff --git a/net/core/dev.c b/net/core/dev.c
index bcc490cc9452..17b168671501 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1931,7 +1931,7 @@ out_kfree_skb:
 	return rc;
 }
 
-static u32 skb_tx_hashrnd;
+static u32 hashrnd __read_mostly;
 
 u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 {
@@ -1949,7 +1949,7 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
 	else
 		hash = skb->protocol;
 
-	hash = jhash_1word(hash, skb_tx_hashrnd);
+	hash = jhash_1word(hash, hashrnd);
 
 	return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
 }
@@ -1959,10 +1959,9 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
 	if (unlikely(queue_index >= dev->real_num_tx_queues)) {
 		if (net_ratelimit()) {
-			WARN(1, "%s selects TX queue %d, but "
+			netdev_warn(dev, "selects TX queue %d, but "
 			     "real number of TX queues is %d\n",
-			     dev->name, queue_index,
-			     dev->real_num_tx_queues);
+			     queue_index, dev->real_num_tx_queues);
 		}
 		return 0;
 	}
@@ -2175,6 +2174,172 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+/*
+ * get_rps_cpu is called from netif_receive_skb and returns the target
+ * CPU from the RPS map of the receiving queue for a given skb.
+ */
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+{
+	struct ipv6hdr *ip6;
+	struct iphdr *ip;
+	struct netdev_rx_queue *rxqueue;
+	struct rps_map *map;
+	int cpu = -1;
+	u8 ip_proto;
+	u32 addr1, addr2, ports, ihl;
+
+	rcu_read_lock();
+
+	if (skb_rx_queue_recorded(skb)) {
+		u16 index = skb_get_rx_queue(skb);
+		if (unlikely(index >= dev->num_rx_queues)) {
+			if (net_ratelimit()) {
+				netdev_warn(dev, "received packet on queue "
+				    "%u, but number of RX queues is %u\n",
+				     index, dev->num_rx_queues);
+			}
+			goto done;
+		}
+		rxqueue = dev->_rx + index;
+	} else
+		rxqueue = dev->_rx;
+
+	if (!rxqueue->rps_map)
+		goto done;
+
+	if (skb->rxhash)
+		goto got_hash; /* Skip hash computation on packet header */
+
+	switch (skb->protocol) {
+	case __constant_htons(ETH_P_IP):
+		if (!pskb_may_pull(skb, sizeof(*ip)))
+			goto done;
+
+		ip = (struct iphdr *) skb->data;
+		ip_proto = ip->protocol;
+		addr1 = ip->saddr;
+		addr2 = ip->daddr;
+		ihl = ip->ihl;
+		break;
+	case __constant_htons(ETH_P_IPV6):
+		if (!pskb_may_pull(skb, sizeof(*ip6)))
+			goto done;
+
+		ip6 = (struct ipv6hdr *) skb->data;
+		ip_proto = ip6->nexthdr;
+		addr1 = ip6->saddr.s6_addr32[3];
+		addr2 = ip6->daddr.s6_addr32[3];
+		ihl = (40 >> 2);
+		break;
+	default:
+		goto done;
+	}
+	ports = 0;
+	switch (ip_proto) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+	case IPPROTO_DCCP:
+	case IPPROTO_ESP:
+	case IPPROTO_AH:
+	case IPPROTO_SCTP:
+	case IPPROTO_UDPLITE:
+		if (pskb_may_pull(skb, (ihl * 4) + 4))
+			ports = *((u32 *) (skb->data + (ihl * 4)));
+		break;
+
+	default:
+		break;
+	}
+
+	skb->rxhash = jhash_3words(addr1, addr2, ports, hashrnd);
+	if (!skb->rxhash)
+		skb->rxhash = 1;
+
+got_hash:
+	map = rcu_dereference(rxqueue->rps_map);
+	if (map) {
+		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+
+		if (cpu_online(tcpu)) {
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
+done:
+	rcu_read_unlock();
+	return cpu;
+}
+
+/*
+ * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
+ * to be sent to kick remote softirq processing.  There are two masks since
+ * the sending of IPIs must be done with interrupts enabled.  The select field
+ * indicates the current mask that enqueue_backlog uses to schedule IPIs.
+ * select is flipped before net_rps_action is called while still under lock,
+ * net_rps_action then uses the non-selected mask to send the IPIs and clears
+ * it without conflicting with enqueue_backlog operation.
+ */
+struct rps_remote_softirq_cpus {
+	cpumask_t mask[2];
+	int select;
+};
+static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
+
+/* Called from hardirq (IPI) context */
+static void trigger_softirq(void *data)
+{
+	struct softnet_data *queue = data;
+	__napi_schedule(&queue->backlog);
+	__get_cpu_var(netdev_rx_stat).received_rps++;
+}
+
+/*
+ * enqueue_to_backlog is called to queue an skb to a per CPU backlog
+ * queue (may be a remote CPU queue).
+ */
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+{
+	struct softnet_data *queue;
+	unsigned long flags;
+
+	queue = &per_cpu(softnet_data, cpu);
+
+	local_irq_save(flags);
+	__get_cpu_var(netdev_rx_stat).total++;
+
+	spin_lock(&queue->input_pkt_queue.lock);
+	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (queue->input_pkt_queue.qlen) {
+enqueue:
+			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			spin_unlock_irqrestore(&queue->input_pkt_queue.lock,
+			    flags);
+			return NET_RX_SUCCESS;
+		}
+
+		/* Schedule NAPI for backlog device */
+		if (napi_schedule_prep(&queue->backlog)) {
+			if (cpu != smp_processor_id()) {
+				struct rps_remote_softirq_cpus *rcpus =
+				    &__get_cpu_var(rps_remote_softirq_cpus);
+
+				cpu_set(cpu, rcpus->mask[rcpus->select]);
+				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+			} else
+				__napi_schedule(&queue->backlog);
+		}
+		goto enqueue;
+	}
+
+	spin_unlock(&queue->input_pkt_queue.lock);
+
+	__get_cpu_var(netdev_rx_stat).dropped++;
+	local_irq_restore(flags);
+
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
 
 /**
  *	netif_rx	-	post buffer to the network code
@@ -2193,8 +2358,7 @@ DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 int netif_rx(struct sk_buff *skb)
 {
-	struct softnet_data *queue;
-	unsigned long flags;
+	int cpu;
 
 	/* if netpoll wants it, pretend we never saw it */
 	if (netpoll_rx(skb))
@@ -2203,31 +2367,11 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-	/*
-	 * The code is rearranged so that the path is the most
-	 * short when CPU is congested, but is still operating.
-	 */
-	local_irq_save(flags);
-	queue = &__get_cpu_var(softnet_data);
-
-	__get_cpu_var(netdev_rx_stat).total++;
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
-enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
-			local_irq_restore(flags);
-			return NET_RX_SUCCESS;
-		}
-
-		napi_schedule(&queue->backlog);
-		goto enqueue;
-	}
-
-	__get_cpu_var(netdev_rx_stat).dropped++;
-	local_irq_restore(flags);
+	cpu = get_rps_cpu(skb->dev, skb);
+	if (cpu < 0)
+		cpu = smp_processor_id();
 
-	kfree_skb(skb);
-	return NET_RX_DROP;
+	return enqueue_to_backlog(skb, cpu);
 }
 EXPORT_SYMBOL(netif_rx);
 
@@ -2464,22 +2608,7 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
-/**
- *	netif_receive_skb - process receive buffer from network
- *	@skb: buffer to process
- *
- *	netif_receive_skb() is the main receive data processing function.
- *	It always succeeds. The buffer may be dropped during processing
- *	for congestion control or by the protocol layers.
- *
- *	This function may only be called from softirq context and interrupts
- *	should be enabled.
- *
- *	Return values (usually ignored):
- *	NET_RX_SUCCESS: no congestion
- *	NET_RX_DROP: packet was dropped
- */
-int netif_receive_skb(struct sk_buff *skb)
+int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
 	struct net_device *orig_dev;
@@ -2588,6 +2717,33 @@ out:
 	rcu_read_unlock();
 	return ret;
 }
+
+/**
+ *	netif_receive_skb - process receive buffer from network
+ *	@skb: buffer to process
+ *
+ *	netif_receive_skb() is the main receive data processing function.
+ *	It always succeeds. The buffer may be dropped during processing
+ *	for congestion control or by the protocol layers.
+ *
+ *	This function may only be called from softirq context and interrupts
+ *	should be enabled.
+ *
+ *	Return values (usually ignored):
+ *	NET_RX_SUCCESS: no congestion
+ *	NET_RX_DROP: packet was dropped
+ */
+int netif_receive_skb(struct sk_buff *skb)
+{
+	int cpu;
+
+	cpu = get_rps_cpu(skb->dev, skb);
+
+	if (cpu < 0)
+		return __netif_receive_skb(skb);
+	else
+		return enqueue_to_backlog(skb, cpu);
+}
 EXPORT_SYMBOL(netif_receive_skb);
 
 /* Network device is going away, flush any packets still pending  */
@@ -2914,16 +3070,16 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	do {
 		struct sk_buff *skb;
 
-		local_irq_disable();
+		spin_lock_irq(&queue->input_pkt_queue.lock);
 		skb = __skb_dequeue(&queue->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			local_irq_enable();
+			spin_unlock_irq(&queue->input_pkt_queue.lock);
 			break;
 		}
-		local_irq_enable();
+		spin_unlock_irq(&queue->input_pkt_queue.lock);
 
-		netif_receive_skb(skb);
+		__netif_receive_skb(skb);
 	} while (++work < quota && jiffies == start_time);
 
 	return work;
@@ -3012,6 +3168,22 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+/*
+ * net_rps_action sends any pending IPI's for rps.  This is only called from
+ * softirq and interrupts must be enabled.
+ */
+static void net_rps_action(cpumask_t *mask)
+{
+	int cpu;
+
+	/* Send pending IPI's to kick RPS processing on remote cpus. */
+	for_each_cpu_mask_nr(cpu, *mask) {
+		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
+		if (cpu_online(cpu))
+			__smp_call_function_single(cpu, &queue->csd, 0);
+	}
+	cpus_clear(*mask);
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3019,6 +3191,8 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+	int select;
+	struct rps_remote_softirq_cpus *rcpus;
 
 	local_irq_disable();
 
@@ -3081,8 +3255,14 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
+	select = rcpus->select;
+	rcpus->select ^= 1;
+
 	local_irq_enable();
 
+	net_rps_action(&rcpus->mask[select]);
+
 #ifdef CONFIG_NET_DMA
 	/*
 	 * There may not be any more sk_buffs coming right now, so push
@@ -3327,10 +3507,10 @@ static int softnet_seq_show(struct seq_file *seq, void *v)
 {
 	struct netif_rx_stats *s = v;
 
-	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
 		   s->total, s->dropped, s->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision);
+		   s->cpu_collision, s->received_rps);
 	return 0;
 }
 
@@ -5067,6 +5247,23 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+	if (!dev->num_rx_queues) {
+		/*
+		 * Allocate a single RX queue if driver never called
+		 * alloc_netdev_mq
+		 */
+
+		dev->_rx = kzalloc(sizeof(struct netdev_rx_queue), GFP_KERNEL);
+		if (!dev->_rx) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		dev->_rx->first = dev->_rx;
+		atomic_set(&dev->_rx->count, 1);
+		dev->num_rx_queues = 1;
+	}
+
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5424,9 +5621,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
+	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+	int i;
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5452,11 +5651,27 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+	if (!rx) {
+		printk(KERN_ERR "alloc_netdev: Unable to allocate "
+		       "rx queues.\n");
+		goto free_tx;
+	}
+
+	atomic_set(&rx->count, queue_count);
+
+	/*
+	 * Set a pointer to first element in the array which holds the
+	 * reference count.
+	 */
+	for (i = 0; i < queue_count; i++)
+		rx[i].first = rx;
+
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
 
 	if (dev_addr_init(dev))
-		goto free_tx;
+		goto free_rx;
 
 	dev_unicast_init(dev);
 
@@ -5466,6 +5681,9 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+	dev->_rx = rx;
+	dev->num_rx_queues = queue_count;
+
 	dev->gso_max_size = GSO_MAX_SIZE;
 
 	netdev_init_queues(dev);
@@ -5480,9 +5698,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	strcpy(dev->name, name);
 	return dev;
 
+free_rx:
+	kfree(rx);
 free_tx:
 	kfree(tx);
-
 free_p:
 	kfree(p);
 	return NULL;
@@ -5985,6 +6204,10 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+		queue->csd.func = trigger_softirq;
+		queue->csd.info = queue;
+		queue->csd.flags = 0;
+
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
 		queue->backlog.gro_list = NULL;
@@ -6023,7 +6246,7 @@ subsys_initcall(net_dev_init);
 
 static int __init initialize_hashrnd(void)
 {
-	get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+	get_random_bytes(&hashrnd, sizeof(hashrnd));
 	return 0;
 }
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 099c753c4213..7a46343d5ae3 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -466,6 +466,216 @@ static struct attribute_group wireless_group = {
 };
 #endif
 
+/*
+ * RX queue sysfs structures and functions.
+ */
+struct rx_queue_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, char *buf);
+	ssize_t (*store)(struct netdev_rx_queue *queue,
+	    struct rx_queue_attribute *attr, const char *buf, size_t len);
+};
+#define to_rx_queue_attr(_attr) container_of(_attr,		\
+    struct rx_queue_attribute, attr)
+
+#define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj)
+
+static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr,
+				  char *buf)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->show)
+		return -EIO;
+
+	return attribute->show(queue, attribute, buf);
+}
+
+static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t count)
+{
+	struct rx_queue_attribute *attribute = to_rx_queue_attr(attr);
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+
+	if (!attribute->store)
+		return -EIO;
+
+	return attribute->store(queue, attribute, buf, count);
+}
+
+static struct sysfs_ops rx_queue_sysfs_ops = {
+	.show = rx_queue_attr_show,
+	.store = rx_queue_attr_store,
+};
+
+static ssize_t show_rps_map(struct netdev_rx_queue *queue,
+			    struct rx_queue_attribute *attribute, char *buf)
+{
+	struct rps_map *map;
+	cpumask_var_t mask;
+	size_t len = 0;
+	int i;
+
+	if (!zalloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	rcu_read_lock();
+	map = rcu_dereference(queue->rps_map);
+	if (map)
+		for (i = 0; i < map->len; i++)
+			cpumask_set_cpu(map->cpus[i], mask);
+
+	len += cpumask_scnprintf(buf + len, PAGE_SIZE, mask);
+	if (PAGE_SIZE - len < 3) {
+		rcu_read_unlock();
+		free_cpumask_var(mask);
+		return -EINVAL;
+	}
+	rcu_read_unlock();
+
+	free_cpumask_var(mask);
+	len += sprintf(buf + len, "\n");
+	return len;
+}
+
+static void rps_map_release(struct rcu_head *rcu)
+{
+	struct rps_map *map = container_of(rcu, struct rps_map, rcu);
+
+	kfree(map);
+}
+
+ssize_t store_rps_map(struct netdev_rx_queue *queue,
+		      struct rx_queue_attribute *attribute,
+		      const char *buf, size_t len)
+{
+	struct rps_map *old_map, *map;
+	cpumask_var_t mask;
+	int err, cpu, i;
+	static DEFINE_SPINLOCK(rps_map_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+	if (err) {
+		free_cpumask_var(mask);
+		return err;
+	}
+
+	map = kzalloc(max_t(unsigned,
+	    RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
+	    GFP_KERNEL);
+	if (!map) {
+		free_cpumask_var(mask);
+		return -ENOMEM;
+	}
+
+	i = 0;
+	for_each_cpu_and(cpu, mask, cpu_online_mask)
+		map->cpus[i++] = cpu;
+
+	if (i)
+		map->len = i;
+	else {
+		kfree(map);
+		map = NULL;
+	}
+
+	spin_lock(&rps_map_lock);
+	old_map = queue->rps_map;
+	rcu_assign_pointer(queue->rps_map, map);
+	spin_unlock(&rps_map_lock);
+
+	if (old_map)
+		call_rcu(&old_map->rcu, rps_map_release);
+
+	free_cpumask_var(mask);
+	return len;
+}
+
+static struct rx_queue_attribute rps_cpus_attribute =
+	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
+
+static struct attribute *rx_queue_default_attrs[] = {
+	&rps_cpus_attribute.attr,
+	NULL
+};
+
+static void rx_queue_release(struct kobject *kobj)
+{
+	struct netdev_rx_queue *queue = to_rx_queue(kobj);
+	struct rps_map *map = queue->rps_map;
+	struct netdev_rx_queue *first = queue->first;
+
+	if (map)
+		call_rcu(&map->rcu, rps_map_release);
+
+	if (atomic_dec_and_test(&first->count))
+		kfree(first);
+}
+
+static struct kobj_type rx_queue_ktype = {
+	.sysfs_ops = &rx_queue_sysfs_ops,
+	.release = rx_queue_release,
+	.default_attrs = rx_queue_default_attrs,
+};
+
+static int rx_queue_add_kobject(struct net_device *net, int index)
+{
+	struct netdev_rx_queue *queue = net->_rx + index;
+	struct kobject *kobj = &queue->kobj;
+	int error = 0;
+
+	kobj->kset = net->queues_kset;
+	error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL,
+	    "rx-%u", index);
+	if (error) {
+		kobject_put(kobj);
+		return error;
+	}
+
+	kobject_uevent(kobj, KOBJ_ADD);
+
+	return error;
+}
+
+static int rx_queue_register_kobjects(struct net_device *net)
+{
+	int i;
+	int error = 0;
+
+	net->queues_kset = kset_create_and_add("queues",
+	    NULL, &net->dev.kobj);
+	if (!net->queues_kset)
+		return -ENOMEM;
+	for (i = 0; i < net->num_rx_queues; i++) {
+		error = rx_queue_add_kobject(net, i);
+		if (error)
+			break;
+	}
+
+	if (error)
+		while (--i >= 0)
+			kobject_put(&net->_rx[i].kobj);
+
+	return error;
+}
+
+static void rx_queue_remove_kobjects(struct net_device *net)
+{
+	int i;
+
+	for (i = 0; i < net->num_rx_queues; i++)
+		kobject_put(&net->_rx[i].kobj);
+	kset_unregister(net->queues_kset);
+}
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -529,6 +739,8 @@ void netdev_unregister_kobject(struct net_device * net)
 	if (!net_eq(dev_net(net), &init_net))
 		return;
 
+	rx_queue_remove_kobjects(net);
+
 	device_del(dev);
 }
 
@@ -537,6 +749,7 @@ int netdev_register_kobject(struct net_device *net)
 {
 	struct device *dev = &(net->dev);
 	const struct attribute_group **groups = net->sysfs_groups;
+	int error = 0;
 
 	dev->class = &net_class;
 	dev->platform_data = net;
@@ -563,7 +776,17 @@ int netdev_register_kobject(struct net_device *net)
 	if (!net_eq(dev_net(net), &init_net))
 		return 0;
 
-	return device_add(dev);
+	error = device_add(dev);
+	if (error)
+		return error;
+
+	error = rx_queue_register_kobjects(net);
+	if (error) {
+		device_del(dev);
+		return error;
+	}
+
+	return error;
 }
 
 int netdev_class_create_file(struct class_attribute *class_attr)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 93c4e060c91e..bdea0efdf8cb 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -534,6 +534,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
 	skb_dst_set(new, dst_clone(skb_dst(old)));
+	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
@@ -581,6 +582,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb)
 	C(len);
 	C(data_len);
 	C(mac_len);
+	C(rxhash);
 	n->hdr_len = skb->nohdr ? skb_headroom(skb) : skb->hdr_len;
 	n->cloned = 1;
 	n->nohdr = 0;
-- 
cgit v1.2.3


From 10708f37ae729baba9b67bd134c3720709d4ae62 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 11 Mar 2010 09:57:29 +0000
Subject: net: core: add IFLA_STATS64 support

`ip -s link` shows interface counters truncated to 32 bit. This is
because interface statistics are transported only in 32-bit quantity
to userspace. This commit adds a new IFLA_STATS64 attribute that
exports them in full 64 bit.

References: http://lkml.indiana.edu/hypermail/linux/kernel/0307.3/0215.html
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h | 33 +++++++++++++++++++++++++++++++++
 net/core/rtnetlink.c    | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 74 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd7653..cfd420ba72df 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -37,6 +37,38 @@ struct rtnl_link_stats {
 	__u32	tx_compressed;
 };
 
+struct rtnl_link_stats64 {
+	__u64	rx_packets;		/* total packets received	*/
+	__u64	tx_packets;		/* total packets transmitted	*/
+	__u64	rx_bytes;		/* total bytes received 	*/
+	__u64	tx_bytes;		/* total bytes transmitted	*/
+	__u64	rx_errors;		/* bad packets received		*/
+	__u64	tx_errors;		/* packet transmit problems	*/
+	__u64	rx_dropped;		/* no space in linux buffers	*/
+	__u64	tx_dropped;		/* no space available in linux	*/
+	__u64	multicast;		/* multicast packets received	*/
+	__u64	collisions;
+
+	/* detailed rx_errors: */
+	__u64	rx_length_errors;
+	__u64	rx_over_errors;		/* receiver ring buff overflow	*/
+	__u64	rx_crc_errors;		/* recved pkt with crc error	*/
+	__u64	rx_frame_errors;	/* recv'd frame alignment error */
+	__u64	rx_fifo_errors;		/* recv'r fifo overrun		*/
+	__u64	rx_missed_errors;	/* receiver missed packet	*/
+
+	/* detailed tx_errors */
+	__u64	tx_aborted_errors;
+	__u64	tx_carrier_errors;
+	__u64	tx_fifo_errors;
+	__u64	tx_heartbeat_errors;
+	__u64	tx_window_errors;
+
+	/* for cslip etc */
+	__u64	rx_compressed;
+	__u64	tx_compressed;
+};
+
 /* The struct should be in sync with struct ifmap */
 struct rtnl_link_ifmap {
 	__u64	mem_start;
@@ -83,6 +115,7 @@ enum {
 	IFLA_VF_VLAN,
 	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
 	IFLA_VFINFO,
+	IFLA_STATS64,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 4568120d8533..e1121f0bca6a 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -600,7 +600,39 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 
 	a->rx_compressed = b->rx_compressed;
 	a->tx_compressed = b->tx_compressed;
-};
+}
+
+static void copy_rtnl_link_stats64(struct rtnl_link_stats64 *a,
+				   const struct net_device_stats *b)
+{
+	a->rx_packets = b->rx_packets;
+	a->tx_packets = b->tx_packets;
+	a->rx_bytes = b->rx_bytes;
+	a->tx_bytes = b->tx_bytes;
+	a->rx_errors = b->rx_errors;
+	a->tx_errors = b->tx_errors;
+	a->rx_dropped = b->rx_dropped;
+	a->tx_dropped = b->tx_dropped;
+
+	a->multicast = b->multicast;
+	a->collisions = b->collisions;
+
+	a->rx_length_errors = b->rx_length_errors;
+	a->rx_over_errors = b->rx_over_errors;
+	a->rx_crc_errors = b->rx_crc_errors;
+	a->rx_frame_errors = b->rx_frame_errors;
+	a->rx_fifo_errors = b->rx_fifo_errors;
+	a->rx_missed_errors = b->rx_missed_errors;
+
+	a->tx_aborted_errors = b->tx_aborted_errors;
+	a->tx_carrier_errors = b->tx_carrier_errors;
+	a->tx_fifo_errors = b->tx_fifo_errors;
+	a->tx_heartbeat_errors = b->tx_heartbeat_errors;
+	a->tx_window_errors = b->tx_window_errors;
+
+	a->rx_compressed = b->rx_compressed;
+	a->tx_compressed = b->tx_compressed;
+}
 
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
@@ -698,6 +730,14 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 	stats = dev_get_stats(dev);
 	copy_rtnl_link_stats(nla_data(attr), stats);
 
+	attr = nla_reserve(skb, IFLA_STATS64,
+			sizeof(struct rtnl_link_stats64));
+	if (attr == NULL)
+		goto nla_put_failure;
+
+	stats = dev_get_stats(dev);
+	copy_rtnl_link_stats64(nla_data(attr), stats);
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 		struct ifla_vf_info ivi;
-- 
cgit v1.2.3


From abf35df21513c51d7761c41fa6d3b819cdf4103e Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Date: Tue, 9 Mar 2010 09:17:42 +0000
Subject: NET: Support clause 45 MDIO commands at the MDIO bus level

IEEE 802.3ae clause 45 specifies a somewhat modified MDIO protocol
for use by 10GIGE phys. The main change is a 21 bit address split into
a 5 bit device ID and a 16 bit register offset. The definition is designed
so that normal and extended devices can run on the same MDIO bus.

Extend mdio-bitbang to do the new protocol. At the MDIO bus level the
protocol is requested by or'ing MII_ADDR_C45 into the register offset.

Make phy_read/phy_write/etc pass a full 32 bit register offset.

This does not attempt to make the phy layer support C45 style PHYs, just
to provide the MDIO bus support.

Tested against a Broadcom 10GE phy with ID 0x206034, and several
Broadcom 10/100/1000 Phys in normal mode.

Signed-off-by: Jason Gunthorpe <jgunthorpe@obsidianresearch.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bitbang.c | 60 +++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/mdio_bus.c     |  4 +--
 include/linux/phy.h            | 12 ++++++---
 3 files changed, 61 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/mdio-bitbang.c b/drivers/net/phy/mdio-bitbang.c
index 2576055b350b..0ff06617a4ab 100644
--- a/drivers/net/phy/mdio-bitbang.c
+++ b/drivers/net/phy/mdio-bitbang.c
@@ -23,8 +23,13 @@
 #include <linux/types.h>
 #include <linux/delay.h>
 
-#define MDIO_READ 1
-#define MDIO_WRITE 0
+#define MDIO_READ 2
+#define MDIO_WRITE 1
+
+#define MDIO_C45 (1<<15)
+#define MDIO_C45_ADDR (MDIO_C45 | 0)
+#define MDIO_C45_READ (MDIO_C45 | 3)
+#define MDIO_C45_WRITE (MDIO_C45 | 1)
 
 #define MDIO_SETUP_TIME 10
 #define MDIO_HOLD_TIME 10
@@ -90,7 +95,7 @@ static u16 mdiobb_get_num(struct mdiobb_ctrl *ctrl, int bits)
 /* Utility to send the preamble, address, and
  * register (common to read and write).
  */
-static void mdiobb_cmd(struct mdiobb_ctrl *ctrl, int read, u8 phy, u8 reg)
+static void mdiobb_cmd(struct mdiobb_ctrl *ctrl, int op, u8 phy, u8 reg)
 {
 	const struct mdiobb_ops *ops = ctrl->ops;
 	int i;
@@ -109,23 +114,56 @@ static void mdiobb_cmd(struct mdiobb_ctrl *ctrl, int read, u8 phy, u8 reg)
 	for (i = 0; i < 32; i++)
 		mdiobb_send_bit(ctrl, 1);
 
-	/* send the start bit (01) and the read opcode (10) or write (10) */
+	/* send the start bit (01) and the read opcode (10) or write (10).
+	   Clause 45 operation uses 00 for the start and 11, 10 for
+	   read/write */
 	mdiobb_send_bit(ctrl, 0);
-	mdiobb_send_bit(ctrl, 1);
-	mdiobb_send_bit(ctrl, read);
-	mdiobb_send_bit(ctrl, !read);
+	if (op & MDIO_C45)
+		mdiobb_send_bit(ctrl, 0);
+	else
+		mdiobb_send_bit(ctrl, 1);
+	mdiobb_send_bit(ctrl, (op >> 1) & 1);
+	mdiobb_send_bit(ctrl, (op >> 0) & 1);
 
 	mdiobb_send_num(ctrl, phy, 5);
 	mdiobb_send_num(ctrl, reg, 5);
 }
 
+/* In clause 45 mode all commands are prefixed by MDIO_ADDR to specify the
+   lower 16 bits of the 21 bit address. This transfer is done identically to a
+   MDIO_WRITE except for a different code. To enable clause 45 mode or
+   MII_ADDR_C45 into the address. Theoretically clause 45 and normal devices
+   can exist on the same bus. Normal devices should ignore the MDIO_ADDR
+   phase. */
+static int mdiobb_cmd_addr(struct mdiobb_ctrl *ctrl, int phy, u32 addr)
+{
+	unsigned int dev_addr = (addr >> 16) & 0x1F;
+	unsigned int reg = addr & 0xFFFF;
+	mdiobb_cmd(ctrl, MDIO_C45_ADDR, phy, dev_addr);
+
+	/* send the turnaround (10) */
+	mdiobb_send_bit(ctrl, 1);
+	mdiobb_send_bit(ctrl, 0);
+
+	mdiobb_send_num(ctrl, reg, 16);
+
+	ctrl->ops->set_mdio_dir(ctrl, 0);
+	mdiobb_get_bit(ctrl);
+
+	return dev_addr;
+}
 
 static int mdiobb_read(struct mii_bus *bus, int phy, int reg)
 {
 	struct mdiobb_ctrl *ctrl = bus->priv;
 	int ret, i;
 
-	mdiobb_cmd(ctrl, MDIO_READ, phy, reg);
+	if (reg & MII_ADDR_C45) {
+		reg = mdiobb_cmd_addr(ctrl, phy, reg);
+		mdiobb_cmd(ctrl, MDIO_C45_READ, phy, reg);
+	} else
+		mdiobb_cmd(ctrl, MDIO_READ, phy, reg);
+
 	ctrl->ops->set_mdio_dir(ctrl, 0);
 
 	/* check the turnaround bit: the PHY should be driving it to zero */
@@ -148,7 +186,11 @@ static int mdiobb_write(struct mii_bus *bus, int phy, int reg, u16 val)
 {
 	struct mdiobb_ctrl *ctrl = bus->priv;
 
-	mdiobb_cmd(ctrl, MDIO_WRITE, phy, reg);
+	if (reg & MII_ADDR_C45) {
+		reg = mdiobb_cmd_addr(ctrl, phy, reg);
+		mdiobb_cmd(ctrl, MDIO_C45_WRITE, phy, reg);
+	} else
+		mdiobb_cmd(ctrl, MDIO_WRITE, phy, reg);
 
 	/* send the turnaround (10) */
 	mdiobb_send_bit(ctrl, 1);
diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index e17b70291bbc..6a6b8199a0d6 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -208,7 +208,7 @@ EXPORT_SYMBOL(mdiobus_scan);
  * because the bus read/write functions may wait for an interrupt
  * to conclude the operation.
  */
-int mdiobus_read(struct mii_bus *bus, int addr, u16 regnum)
+int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum)
 {
 	int retval;
 
@@ -233,7 +233,7 @@ EXPORT_SYMBOL(mdiobus_read);
  * because the bus read/write functions may wait for an interrupt
  * to conclude the operation.
  */
-int mdiobus_write(struct mii_bus *bus, int addr, u16 regnum, u16 val)
+int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val)
 {
 	int err;
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 14d7fdf6a90a..d9bce4b526b4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -81,6 +81,10 @@ typedef enum {
  */
 #define MII_BUS_ID_SIZE	(20 - 3)
 
+/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
+   IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */
+#define MII_ADDR_C45 (1<<30)
+
 /*
  * The Bus class for PHYs.  Devices which provide access to
  * PHYs should register using this structure
@@ -127,8 +131,8 @@ int mdiobus_register(struct mii_bus *bus);
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
 struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
-int mdiobus_read(struct mii_bus *bus, int addr, u16 regnum);
-int mdiobus_write(struct mii_bus *bus, int addr, u16 regnum, u16 val);
+int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum);
+int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 
 
 #define PHY_INTERRUPT_DISABLED	0x0
@@ -422,7 +426,7 @@ struct phy_fixup {
  * because the bus read/write functions may wait for an interrupt
  * to conclude the operation.
  */
-static inline int phy_read(struct phy_device *phydev, u16 regnum)
+static inline int phy_read(struct phy_device *phydev, u32 regnum)
 {
 	return mdiobus_read(phydev->bus, phydev->addr, regnum);
 }
@@ -437,7 +441,7 @@ static inline int phy_read(struct phy_device *phydev, u16 regnum)
  * because the bus read/write functions may wait for an interrupt
  * to conclude the operation.
  */
-static inline int phy_write(struct phy_device *phydev, u16 regnum, u16 val)
+static inline int phy_write(struct phy_device *phydev, u32 regnum, u16 val)
 {
 	return mdiobus_write(phydev->bus, phydev->addr, regnum, val);
 }
-- 
cgit v1.2.3


From 2a0761a35b70f170a9c14ecbcb6a959fb6e7bdf3 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Tue, 16 Mar 2010 15:54:12 +0000
Subject: ASoC: Implement WM835x microphone jack detection support

The WM8350 provides microphone presence and short circuit detection.
Integrate this with the ASoC jack reporting API.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
---
 include/linux/mfd/wm8350/audio.h |  2 ++
 sound/soc/codecs/wm8350.c        | 58 ++++++++++++++++++++++++++++++++++++++++
 sound/soc/codecs/wm8350.h        |  3 +++
 3 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/wm8350/audio.h b/include/linux/mfd/wm8350/audio.h
index d899dc0223ba..a95141eafce3 100644
--- a/include/linux/mfd/wm8350/audio.h
+++ b/include/linux/mfd/wm8350/audio.h
@@ -492,6 +492,8 @@
  */
 #define WM8350_JACK_L_LVL			0x0800
 #define WM8350_JACK_R_LVL                       0x0400
+#define WM8350_JACK_MICSCD_LVL			0x0200
+#define WM8350_JACK_MICSD_LVL			0x0100
 
 /*
  * WM8350 Platform setup
diff --git a/sound/soc/codecs/wm8350.c b/sound/soc/codecs/wm8350.c
index df2c6d9617fb..ff6c10155ee6 100644
--- a/sound/soc/codecs/wm8350.c
+++ b/sound/soc/codecs/wm8350.c
@@ -54,6 +54,7 @@ struct wm8350_output {
 struct wm8350_jack_data {
 	struct snd_soc_jack *jack;
 	int report;
+	int short_report;
 };
 
 struct wm8350_data {
@@ -62,6 +63,7 @@ struct wm8350_data {
 	struct wm8350_output out2;
 	struct wm8350_jack_data hpl;
 	struct wm8350_jack_data hpr;
+	struct wm8350_jack_data mic;
 	struct regulator_bulk_data supplies[ARRAY_SIZE(supply_names)];
 	int fll_freq_out;
 	int fll_freq_in;
@@ -1430,6 +1432,55 @@ int wm8350_hp_jack_detect(struct snd_soc_codec *codec, enum wm8350_jack which,
 }
 EXPORT_SYMBOL_GPL(wm8350_hp_jack_detect);
 
+static irqreturn_t wm8350_mic_handler(int irq, void *data)
+{
+	struct wm8350_data *priv = data;
+	struct wm8350 *wm8350 = priv->codec.control_data;
+	u16 reg;
+	int report = 0;
+
+	reg = wm8350_reg_read(wm8350, WM8350_JACK_PIN_STATUS);
+	if (reg & WM8350_JACK_MICSCD_LVL)
+		report |= priv->mic.short_report;
+	if (reg & WM8350_JACK_MICSD_LVL)
+		report |= priv->mic.report;
+
+	snd_soc_jack_report(priv->mic.jack, report,
+			    priv->mic.report | priv->mic.short_report);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * wm8350_mic_jack_detect - Enable microphone jack detection.
+ *
+ * @codec:         WM8350 codec
+ * @jack:          jack to report detection events on
+ * @detect_report: value to report when presence detected
+ * @short_report:  value to report when microphone short detected
+ *
+ * Enables the microphone jack detection of the WM8350.
+ */
+int wm8350_mic_jack_detect(struct snd_soc_codec *codec,
+			   struct snd_soc_jack *jack,
+			   int detect_report, int short_report)
+{
+	struct wm8350_data *priv = codec->private_data;
+	struct wm8350 *wm8350 = codec->control_data;
+
+	priv->mic.jack = jack;
+	priv->mic.report = detect_report;
+	priv->mic.short_report = short_report;
+
+	wm8350_set_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_TOCLK_ENA);
+	wm8350_set_bits(wm8350, WM8350_POWER_MGMT_1, WM8350_MIC_DET_ENA);
+
+	snd_soc_dapm_force_enable_pin(codec, "Mic Bias");
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(wm8350_mic_jack_detect);
+
 static struct snd_soc_codec *wm8350_codec;
 
 static int wm8350_probe(struct platform_device *pdev)
@@ -1493,6 +1544,10 @@ static int wm8350_probe(struct platform_device *pdev)
 	wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R,
 			    wm8350_hp_jack_handler, 0, "Right jack detect",
 			    priv);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICSCD,
+			    wm8350_mic_handler, 0, "Microphone short", priv);
+	wm8350_register_irq(wm8350, WM8350_IRQ_CODEC_MICD,
+			    wm8350_mic_handler, 0, "Microphone detect", priv);
 
 	ret = snd_soc_new_pcms(socdev, SNDRV_DEFAULT_IDX1, SNDRV_DEFAULT_STR1);
 	if (ret < 0) {
@@ -1521,11 +1576,14 @@ static int wm8350_remove(struct platform_device *pdev)
 			  WM8350_JDL_ENA | WM8350_JDR_ENA);
 	wm8350_clear_bits(wm8350, WM8350_POWER_MGMT_4, WM8350_TOCLK_ENA);
 
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICD, priv);
+	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_MICSCD, priv);
 	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_L, priv);
 	wm8350_free_irq(wm8350, WM8350_IRQ_CODEC_JCK_DET_R, priv);
 
 	priv->hpl.jack = NULL;
 	priv->hpr.jack = NULL;
+	priv->mic.jack = NULL;
 
 	/* cancel any work waiting to be queued. */
 	ret = cancel_delayed_work(&codec->delayed_work);
diff --git a/sound/soc/codecs/wm8350.h b/sound/soc/codecs/wm8350.h
index d088eb4b88bb..9ed0467c71db 100644
--- a/sound/soc/codecs/wm8350.h
+++ b/sound/soc/codecs/wm8350.h
@@ -25,5 +25,8 @@ enum wm8350_jack {
 
 int wm8350_hp_jack_detect(struct snd_soc_codec *codec, enum wm8350_jack which,
 			  struct snd_soc_jack *jack, int report);
+int wm8350_mic_jack_detect(struct snd_soc_codec *codec,
+			   struct snd_soc_jack *jack,
+			   int detect_report, int short_report);
 
 #endif
-- 
cgit v1.2.3


From 28b949885f80efb87d7cebdcf879c99db12c37bd Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sat, 28 Feb 2009 03:23:57 +0100
Subject: netfilter: xtables: merge xt_MARK into xt_mark

Two arguments for combining the two:
- xt_mark is pretty useless without xt_MARK
- the actual code is so small anyway that the kmod metadata and the module
  in its loaded state totally outweighs the combined actual code size.

i586-before:
-rw-r--r-- 1 jengelh users 3821 Feb 10 01:01 xt_MARK.ko
-rw-r--r-- 1 jengelh users 2592 Feb 10 00:04 xt_MARK.o
-rw-r--r-- 1 jengelh users 3274 Feb 10 01:01 xt_mark.ko
-rw-r--r-- 1 jengelh users 2108 Feb 10 00:05 xt_mark.o
   text    data     bss     dec     hex filename
    354     264       0     618     26a xt_MARK.o
    223     176       0     399     18f xt_mark.o
And the runtime size is like 14 KB.

i586-after:
-rw-r--r-- 1 jengelh users 3264 Feb 18 17:28 xt_mark.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/xt_MARK.h |  6 +----
 include/linux/netfilter/xt_mark.h |  4 +++
 net/netfilter/Kconfig             | 46 ++++++++++++++++++++------------
 net/netfilter/Makefile            |  5 ++--
 net/netfilter/xt_MARK.c           | 56 ---------------------------------------
 net/netfilter/xt_mark.c           | 35 ++++++++++++++++++++++--
 6 files changed, 70 insertions(+), 82 deletions(-)
 delete mode 100644 net/netfilter/xt_MARK.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_MARK.h b/include/linux/netfilter/xt_MARK.h
index bc9561bdef79..41c456deba22 100644
--- a/include/linux/netfilter/xt_MARK.h
+++ b/include/linux/netfilter/xt_MARK.h
@@ -1,10 +1,6 @@
 #ifndef _XT_MARK_H_target
 #define _XT_MARK_H_target
 
-#include <linux/types.h>
-
-struct xt_mark_tginfo2 {
-	__u32 mark, mask;
-};
+#include <linux/netfilter/xt_mark.h>
 
 #endif /*_XT_MARK_H_target */
diff --git a/include/linux/netfilter/xt_mark.h b/include/linux/netfilter/xt_mark.h
index 6607c8f38ea5..ecadc40d5cde 100644
--- a/include/linux/netfilter/xt_mark.h
+++ b/include/linux/netfilter/xt_mark.h
@@ -3,6 +3,10 @@
 
 #include <linux/types.h>
 
+struct xt_mark_tginfo2 {
+	__u32 mark, mask;
+};
+
 struct xt_mark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index abf4ce6d1834..236aa20ce5cc 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -314,6 +314,23 @@ config NETFILTER_XTABLES
 
 if NETFILTER_XTABLES
 
+comment "Xtables combined modules"
+
+config NETFILTER_XT_MARK
+	tristate 'nfmark target and match support'
+	default m if NETFILTER_ADVANCED=n
+	---help---
+	This option adds the "MARK" target and "mark" match.
+
+	Netfilter mark matching allows you to match packets based on the
+	"nfmark" value in the packet.
+	The target allows you to create rules in the "mangle" table which alter
+	the netfilter mark (nfmark) field associated with the packet.
+
+	Prior to routing, the nfmark can influence the routing method (see
+	"Use netfilter MARK value as routing key") and can also be used by
+	other subsystems to change their behavior.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -425,16 +442,12 @@ config NETFILTER_XT_TARGET_LED
 
 config NETFILTER_XT_TARGET_MARK
 	tristate '"MARK" target support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  This option adds a `MARK' target, which allows you to create rules
-	  in the `mangle' table which alter the netfilter mark (nfmark) field
-	  associated with the packet prior to routing. This can change
-	  the routing method (see `Use netfilter MARK value as routing
-	  key') and can also be used by other subsystems to change their
-	  behavior.
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_TARGET_NFLOG
 	tristate '"NFLOG" target support'
@@ -739,13 +752,12 @@ config NETFILTER_XT_MATCH_MAC
 
 config NETFILTER_XT_MATCH_MARK
 	tristate '"mark" match support'
-	default m if NETFILTER_ADVANCED=n
-	help
-	  Netfilter mark matching allows you to match packets based on the
-	  `nfmark' value in the packet.  This can be set by the MARK target
-	  (see below).
-
-	  To compile it as a module, choose M here.  If unsure, say N.
+	depends on NETFILTER_ADVANCED
+	select NETFILTER_XT_MARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_MARK (combined mark/MARK module).
 
 config NETFILTER_XT_MATCH_MULTIPORT
 	tristate '"multiport" Multiple port match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index f873644f02f6..19775cc30fe5 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -40,6 +40,9 @@ obj-$(CONFIG_NETFILTER_TPROXY) += nf_tproxy_core.o
 # generic X tables 
 obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
+# combos
+obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
@@ -48,7 +51,6 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o
@@ -76,7 +78,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_IPRANGE) += xt_iprange.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_MARK) += xt_mark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o
diff --git a/net/netfilter/xt_MARK.c b/net/netfilter/xt_MARK.c
deleted file mode 100644
index 225f8d11e173..000000000000
--- a/net/netfilter/xt_MARK.c
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- *	xt_MARK - Netfilter module to modify the NFMARK field of an skb
- *
- *	(C) 1999-2001 Marc Boucher <marc@mbsi.ca>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License version 2 as
- *	published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_MARK.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark modification");
-MODULE_ALIAS("ipt_MARK");
-MODULE_ALIAS("ip6t_MARK");
-
-static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_mark_tginfo2 *info = par->targinfo;
-
-	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
-	return XT_CONTINUE;
-}
-
-static struct xt_target mark_tg_reg __read_mostly = {
-	.name           = "MARK",
-	.revision       = 2,
-	.family         = NFPROTO_UNSPEC,
-	.target         = mark_tg,
-	.targetsize     = sizeof(struct xt_mark_tginfo2),
-	.me             = THIS_MODULE,
-};
-
-static int __init mark_tg_init(void)
-{
-	return xt_register_target(&mark_tg_reg);
-}
-
-static void __exit mark_tg_exit(void)
-{
-	xt_unregister_target(&mark_tg_reg);
-}
-
-module_init(mark_tg_init);
-module_exit(mark_tg_exit);
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 1db07d8125f8..035c468a0040 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -18,9 +18,20 @@
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("Xtables: packet mark match");
+MODULE_DESCRIPTION("Xtables: packet mark operations");
 MODULE_ALIAS("ipt_mark");
 MODULE_ALIAS("ip6t_mark");
+MODULE_ALIAS("ipt_MARK");
+MODULE_ALIAS("ip6t_MARK");
+
+static unsigned int
+mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_mark_tginfo2 *info = par->targinfo;
+
+	skb->mark = (skb->mark & ~info->mask) ^ info->mark;
+	return XT_CONTINUE;
+}
 
 static bool
 mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
@@ -30,6 +41,15 @@ mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
+static struct xt_target mark_tg_reg __read_mostly = {
+	.name           = "MARK",
+	.revision       = 2,
+	.family         = NFPROTO_UNSPEC,
+	.target         = mark_tg,
+	.targetsize     = sizeof(struct xt_mark_tginfo2),
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match mark_mt_reg __read_mostly = {
 	.name           = "mark",
 	.revision       = 1,
@@ -41,12 +61,23 @@ static struct xt_match mark_mt_reg __read_mostly = {
 
 static int __init mark_mt_init(void)
 {
-	return xt_register_match(&mark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&mark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&mark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&mark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit mark_mt_exit(void)
 {
 	xt_unregister_match(&mark_mt_reg);
+	xt_unregister_target(&mark_tg_reg);
 }
 
 module_init(mark_mt_init);
-- 
cgit v1.2.3


From b8f00ba27e4acc4a2224360ce3aa8a22354434c5 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 26 Feb 2010 14:20:32 +0100
Subject: netfilter: xtables: merge xt_CONNMARK into xt_connmark

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/xt_CONNMARK.h |  22 +------
 include/linux/netfilter/xt_connmark.h |  11 ++++
 net/netfilter/Kconfig                 |  39 +++++++-----
 net/netfilter/Makefile                |   3 +-
 net/netfilter/xt_CONNMARK.c           | 113 ----------------------------------
 net/netfilter/xt_connmark.c           |  84 ++++++++++++++++++++++++-
 6 files changed, 116 insertions(+), 156 deletions(-)
 delete mode 100644 net/netfilter/xt_CONNMARK.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_CONNMARK.h b/include/linux/netfilter/xt_CONNMARK.h
index 0a8545866752..2f2e48ec8023 100644
--- a/include/linux/netfilter/xt_CONNMARK.h
+++ b/include/linux/netfilter/xt_CONNMARK.h
@@ -1,26 +1,6 @@
 #ifndef _XT_CONNMARK_H_target
 #define _XT_CONNMARK_H_target
 
-#include <linux/types.h>
-
-/* Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- * by Henrik Nordstrom <hno@marasystems.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-enum {
-	XT_CONNMARK_SET = 0,
-	XT_CONNMARK_SAVE,
-	XT_CONNMARK_RESTORE
-};
-
-struct xt_connmark_tginfo1 {
-	__u32 ctmark, ctmask, nfmask;
-	__u8 mode;
-};
+#include <linux/netfilter/xt_connmark.h>
 
 #endif /*_XT_CONNMARK_H_target*/
diff --git a/include/linux/netfilter/xt_connmark.h b/include/linux/netfilter/xt_connmark.h
index 619e47cde01a..efc17a8305fb 100644
--- a/include/linux/netfilter/xt_connmark.h
+++ b/include/linux/netfilter/xt_connmark.h
@@ -12,6 +12,17 @@
  * (at your option) any later version.
  */
 
+enum {
+	XT_CONNMARK_SET = 0,
+	XT_CONNMARK_SAVE,
+	XT_CONNMARK_RESTORE
+};
+
+struct xt_connmark_tginfo1 {
+	__u32 ctmark, ctmask, nfmask;
+	__u8 mode;
+};
+
 struct xt_connmark_mtinfo1 {
 	__u32 mark, mask;
 	__u8 invert;
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 236aa20ce5cc..8550dfde7804 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -331,6 +331,18 @@ config NETFILTER_XT_MARK
 	"Use netfilter MARK value as routing key") and can also be used by
 	other subsystems to change their behavior.
 
+config NETFILTER_XT_CONNMARK
+	tristate 'ctmark target and match support'
+	depends on NF_CONNTRACK
+	depends on NETFILTER_ADVANCED
+	select NF_CONNTRACK_MARK
+	---help---
+	This option adds the "CONNMARK" target and "connmark" match.
+
+	Netfilter allows you to store a mark value per connection (a.k.a.
+	ctmark), similarly to the packet mark (nfmark). Using this
+	target and match, you can set and match on this mark.
+
 # alphabetically ordered list of targets
 
 comment "Xtables targets"
@@ -351,15 +363,11 @@ config NETFILTER_XT_TARGET_CONNMARK
 	tristate  '"CONNMARK" target support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `CONNMARK' target, which allows one to manipulate
-	  the connection mark value.  Similar to the MARK target, but
-	  affects the connection mark value rather than the packet mark value.
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_CONNMARK.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_TARGET_CONNSECMARK
 	tristate '"CONNSECMARK" target support'
@@ -621,14 +629,11 @@ config NETFILTER_XT_MATCH_CONNMARK
 	tristate  '"connmark" connection mark match support'
 	depends on NF_CONNTRACK
 	depends on NETFILTER_ADVANCED
-	select NF_CONNTRACK_MARK
-	help
-	  This option adds a `connmark' match, which allows you to match the
-	  connection mark value previously set for the session by `CONNMARK'. 
-
-	  If you want to compile it as a module, say M here and read
-	  <file:Documentation/kbuild/modules.txt>.  The module will be called
-	  ipt_connmark.  If unsure, say `N'.
+	select NETFILTER_XT_CONNMARK
+	---help---
+	This is a backwards-compat option for the user's convenience
+	(e.g. when running oldconfig). It selects
+	CONFIG_NETFILTER_XT_CONNMARK (combined connmark/CONNMARK module).
 
 config NETFILTER_XT_MATCH_CONNTRACK
 	tristate '"conntrack" connection tracking match support'
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 19775cc30fe5..cd31afe0692a 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -42,10 +42,10 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o
 
 # combos
 obj-$(CONFIG_NETFILTER_XT_MARK) += xt_mark.o
+obj-$(CONFIG_NETFILTER_XT_CONNMARK) += xt_connmark.o
 
 # targets
 obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o
-obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CONNSECMARK) += xt_CONNSECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_CT) += xt_CT.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o
@@ -66,7 +66,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNLIMIT) += xt_connlimit.o
-obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o
 obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o
diff --git a/net/netfilter/xt_CONNMARK.c b/net/netfilter/xt_CONNMARK.c
deleted file mode 100644
index 593457068ae1..000000000000
--- a/net/netfilter/xt_CONNMARK.c
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- *	xt_CONNMARK - Netfilter module to modify the connection mark values
- *
- *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
- *	by Henrik Nordstrom <hno@marasystems.com>
- *	Copyright © CC Computer Consultants GmbH, 2007 - 2008
- *	Jan Engelhardt <jengelh@computergmbh.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark modification");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ipt_CONNMARK");
-MODULE_ALIAS("ip6t_CONNMARK");
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter/xt_CONNMARK.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-
-static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
-	const struct xt_connmark_tginfo1 *info = par->targinfo;
-	enum ip_conntrack_info ctinfo;
-	struct nf_conn *ct;
-	u_int32_t newmark;
-
-	ct = nf_ct_get(skb, &ctinfo);
-	if (ct == NULL)
-		return XT_CONTINUE;
-
-	switch (info->mode) {
-	case XT_CONNMARK_SET:
-		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_SAVE:
-		newmark = (ct->mark & ~info->ctmask) ^
-		          (skb->mark & info->nfmask);
-		if (ct->mark != newmark) {
-			ct->mark = newmark;
-			nf_conntrack_event_cache(IPCT_MARK, ct);
-		}
-		break;
-	case XT_CONNMARK_RESTORE:
-		newmark = (skb->mark & ~info->nfmask) ^
-		          (ct->mark & info->ctmask);
-		skb->mark = newmark;
-		break;
-	}
-
-	return XT_CONTINUE;
-}
-
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
-{
-	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
-		printk(KERN_WARNING "cannot load conntrack support for "
-		       "proto=%u\n", par->family);
-		return false;
-	}
-	return true;
-}
-
-static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
-{
-	nf_ct_l3proto_module_put(par->family);
-}
-
-static struct xt_target connmark_tg_reg __read_mostly = {
-	.name           = "CONNMARK",
-	.revision       = 1,
-	.family         = NFPROTO_UNSPEC,
-	.checkentry     = connmark_tg_check,
-	.target         = connmark_tg,
-	.targetsize     = sizeof(struct xt_connmark_tginfo1),
-	.destroy        = connmark_tg_destroy,
-	.me             = THIS_MODULE,
-};
-
-static int __init connmark_tg_init(void)
-{
-	return xt_register_target(&connmark_tg_reg);
-}
-
-static void __exit connmark_tg_exit(void)
-{
-	xt_unregister_target(&connmark_tg_reg);
-}
-
-module_init(connmark_tg_init);
-module_exit(connmark_tg_exit);
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 122aa8b0147b..d184515604b6 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -1,5 +1,5 @@
 /*
- *	xt_connmark - Netfilter module to match connection mark values
+ *	xt_connmark - Netfilter module to operate on connection marks
  *
  *	Copyright (C) 2002,2004 MARA Systems AB <http://www.marasystems.com>
  *	by Henrik Nordstrom <hno@marasystems.com>
@@ -24,15 +24,71 @@
 #include <linux/module.h>
 #include <linux/skbuff.h>
 #include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_ecache.h>
 #include <linux/netfilter/x_tables.h>
 #include <linux/netfilter/xt_connmark.h>
 
 MODULE_AUTHOR("Henrik Nordstrom <hno@marasystems.com>");
-MODULE_DESCRIPTION("Xtables: connection mark match");
+MODULE_DESCRIPTION("Xtables: connection mark operations");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_CONNMARK");
+MODULE_ALIAS("ip6t_CONNMARK");
 MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
+static unsigned int
+connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_connmark_tginfo1 *info = par->targinfo;
+	enum ip_conntrack_info ctinfo;
+	struct nf_conn *ct;
+	u_int32_t newmark;
+
+	ct = nf_ct_get(skb, &ctinfo);
+	if (ct == NULL)
+		return XT_CONTINUE;
+
+	switch (info->mode) {
+	case XT_CONNMARK_SET:
+		newmark = (ct->mark & ~info->ctmask) ^ info->ctmark;
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_SAVE:
+		newmark = (ct->mark & ~info->ctmask) ^
+		          (skb->mark & info->nfmask);
+		if (ct->mark != newmark) {
+			ct->mark = newmark;
+			nf_conntrack_event_cache(IPCT_MARK, ct);
+		}
+		break;
+	case XT_CONNMARK_RESTORE:
+		newmark = (skb->mark & ~info->nfmask) ^
+		          (ct->mark & info->ctmask);
+		skb->mark = newmark;
+		break;
+	}
+
+	return XT_CONTINUE;
+}
+
+static bool connmark_tg_check(const struct xt_tgchk_param *par)
+{
+	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
+		printk(KERN_WARNING "cannot load conntrack support for "
+		       "proto=%u\n", par->family);
+		return false;
+	}
+	return true;
+}
+
+static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	nf_ct_l3proto_module_put(par->family);
+}
+
 static bool
 connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -62,6 +118,17 @@ static void connmark_mt_destroy(const struct xt_mtdtor_param *par)
 	nf_ct_l3proto_module_put(par->family);
 }
 
+static struct xt_target connmark_tg_reg __read_mostly = {
+	.name           = "CONNMARK",
+	.revision       = 1,
+	.family         = NFPROTO_UNSPEC,
+	.checkentry     = connmark_tg_check,
+	.target         = connmark_tg,
+	.targetsize     = sizeof(struct xt_connmark_tginfo1),
+	.destroy        = connmark_tg_destroy,
+	.me             = THIS_MODULE,
+};
+
 static struct xt_match connmark_mt_reg __read_mostly = {
 	.name           = "connmark",
 	.revision       = 1,
@@ -75,12 +142,23 @@ static struct xt_match connmark_mt_reg __read_mostly = {
 
 static int __init connmark_mt_init(void)
 {
-	return xt_register_match(&connmark_mt_reg);
+	int ret;
+
+	ret = xt_register_target(&connmark_tg_reg);
+	if (ret < 0)
+		return ret;
+	ret = xt_register_match(&connmark_mt_reg);
+	if (ret < 0) {
+		xt_unregister_target(&connmark_tg_reg);
+		return ret;
+	}
+	return 0;
 }
 
 static void __exit connmark_mt_exit(void)
 {
 	xt_unregister_match(&connmark_mt_reg);
+	xt_unregister_target(&connmark_tg_reg);
 }
 
 module_init(connmark_mt_init);
-- 
cgit v1.2.3


From 0079c5aee34880bcee7feee9960f0502c73dc5fa Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Tue, 16 Mar 2010 19:53:13 +0100
Subject: netfilter: xt_recent: add an entry reaper

One of the problems with the way xt_recent is implemented is that
there is no efficient way to remove expired entries. Of course,
one can write a rule '-m recent --remove', but you have to know
beforehand which entry to delete. This commit adds reaper
logic which checks the head of the LRU list when a rule
is invoked that has a '--seconds' value and XT_RECENT_REAP set. If an
entry ceases to accumulate time stamps, then it will eventually bubble
to the top of the LRU list where it is then reaped.

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_recent.h |  4 ++++
 net/netfilter/xt_recent.c           | 28 +++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
index d2c276609925..bba990ecb018 100644
--- a/include/linux/netfilter/xt_recent.h
+++ b/include/linux/netfilter/xt_recent.h
@@ -9,6 +9,7 @@ enum {
 	XT_RECENT_UPDATE   = 1 << 2,
 	XT_RECENT_REMOVE   = 1 << 3,
 	XT_RECENT_TTL      = 1 << 4,
+	XT_RECENT_REAP     = 1 << 5,
 
 	XT_RECENT_SOURCE   = 0,
 	XT_RECENT_DEST     = 1,
@@ -16,6 +17,9 @@ enum {
 	XT_RECENT_NAME_LEN = 200,
 };
 
+/* Only allowed with --rcheck and --update */
+#define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP)
+
 struct xt_recent_mtinfo {
 	__u32 seconds;
 	__u32 hit_count;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2ff8aae84a38..b65eca9e13a3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -143,6 +143,25 @@ static void recent_entry_remove(struct recent_table *t, struct recent_entry *e)
 	t->entries--;
 }
 
+/*
+ * Drop entries with timestamps older then 'time'.
+ */
+static void recent_entry_reap(struct recent_table *t, unsigned long time)
+{
+	struct recent_entry *e;
+
+	/*
+	 * The head of the LRU list is always the oldest entry.
+	 */
+	e = list_entry(t->lru_list.next, struct recent_entry, lru_list);
+
+	/*
+	 * The last time stamp is the most recent.
+	 */
+	if (time_after(time, e->stamps[e->index-1]))
+		recent_entry_remove(t, e);
+}
+
 static struct recent_entry *
 recent_entry_init(struct recent_table *t, const union nf_inet_addr *addr,
 		  u_int16_t family, u_int8_t ttl)
@@ -269,6 +288,10 @@ recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 				break;
 			}
 		}
+
+		/* info->seconds must be non-zero */
+		if (info->check_set & XT_RECENT_REAP)
+			recent_entry_reap(t, time);
 	}
 
 	if (info->check_set & XT_RECENT_SET ||
@@ -301,7 +324,10 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
 		return false;
 	if ((info->check_set & (XT_RECENT_SET | XT_RECENT_REMOVE)) &&
-	    (info->seconds || info->hit_count))
+	    (info->seconds || info->hit_count ||
+	    (info->check_set & XT_RECENT_MODIFIERS)))
+		return false;
+	if ((info->check_set & XT_RECENT_REAP) && !info->seconds)
 		return false;
 	if (info->hit_count > ip_pkt_list_tot) {
 		pr_info(KBUILD_MODNAME ": hitcount (%u) is larger than "
-- 
cgit v1.2.3


From 606a9a02633c02d0e09fc96706f041053dbc57ee Mon Sep 17 00:00:00 2001
From: Tim Gardner <tim.gardner@canonical.com>
Date: Wed, 17 Mar 2010 16:18:56 +0100
Subject: netfilter: xt_recent: check for unsupported user space flags

Signed-off-by: Tim Gardner <tim.gardner@canonical.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_recent.h | 3 +++
 net/netfilter/xt_recent.c           | 5 +++++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_recent.h b/include/linux/netfilter/xt_recent.h
index bba990ecb018..83318e01425e 100644
--- a/include/linux/netfilter/xt_recent.h
+++ b/include/linux/netfilter/xt_recent.h
@@ -20,6 +20,9 @@ enum {
 /* Only allowed with --rcheck and --update */
 #define XT_RECENT_MODIFIERS (XT_RECENT_TTL|XT_RECENT_REAP)
 
+#define XT_RECENT_VALID_FLAGS (XT_RECENT_CHECK|XT_RECENT_SET|XT_RECENT_UPDATE|\
+			       XT_RECENT_REMOVE|XT_RECENT_TTL|XT_RECENT_REAP)
+
 struct xt_recent_mtinfo {
 	__u32 seconds;
 	__u32 hit_count;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b65eca9e13a3..d2e7c80cd3c3 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -319,6 +319,11 @@ static bool recent_mt_check(const struct xt_mtchk_param *par)
 		get_random_bytes(&hash_rnd, sizeof(hash_rnd));
 		hash_rnd_inited = true;
 	}
+	if (info->check_set & ~XT_RECENT_VALID_FLAGS) {
+		pr_info(KBUILD_MODNAME ": Unsupported user space flags "
+			"(%08x)\n", info->check_set);
+		return false;
+	}
 	if (hweight8(info->check_set &
 		     (XT_RECENT_SET | XT_RECENT_REMOVE |
 		      XT_RECENT_CHECK | XT_RECENT_UPDATE)) != 1)
-- 
cgit v1.2.3


From a8d89807373f95f3f7637e3987710a4500a7a857 Mon Sep 17 00:00:00 2001
From: Viral Mehta <Viral.Mehta@lntinfotech.com>
Date: Wed, 17 Mar 2010 19:31:17 +0530
Subject: kfifo: fix kerneldoc for kfifo_avail and kfifo_in_locked

Function argument is not having "len" anywhere;
"len" is later used in kfifo_{in|out}.

So here, it would be appropriate to say,
it copies @n bytes and not @len

Signed-off-by: Viral Mehta <viral.mehta@lntinfotech.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/kfifo.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index bc0fc795bd35..7eb6ec41f486 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -202,7 +202,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * @n: the length of the data to be added.
  * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the @from buffer into
+ * This function copies at most @n bytes from the @from buffer into
  * the FIFO depending on the free space, and returns the number of
  * bytes copied.
  */
@@ -228,7 +228,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @n: the size of the destination buffer.
  * @lock: pointer to the spinlock to use for locking.
  *
- * This function copies at most @len bytes from the FIFO into the
+ * This function copies at most @n bytes from the FIFO into the
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-- 
cgit v1.2.3


From 16599786ae5e9d5f936706d2202d8c7224cd51ed Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 10:30:44 +0100
Subject: netfilter: update documentation fields of x_tables.h

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 84c7c928e9eb..c68ff82366b6 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -197,6 +197,7 @@ struct xt_counters_info {
  * @family:	Actual NFPROTO_* through which the function is invoked
  * 		(helpful when match->family == NFPROTO_UNSPEC)
  * @hotdrop:	drop packet if we had inspection problems
+ * Network namespace obtainable using dev_net(in/out)
  */
 struct xt_match_param {
 	const struct net_device *in, *out;
@@ -213,12 +214,14 @@ struct xt_match_param {
  * struct xt_mtchk_param - parameters for match extensions'
  * checkentry functions
  *
+ * @net:	network namespace through which the check was invoked
  * @table:	table the rule is tried to be inserted into
  * @entryinfo:	the family-specific rule data
- * 		(struct ipt_ip, ip6t_ip, ebt_entry)
+ * 		(struct ipt_ip, ip6t_ip, arpt_arp or (note) ebt_entry)
  * @match:	struct xt_match through which this function was invoked
  * @matchinfo:	per-match data
  * @hook_mask:	via which hooks the new rule is reachable
+ * Other fields as above.
  */
 struct xt_mtchk_param {
 	struct net *net;
@@ -230,7 +233,10 @@ struct xt_mtchk_param {
 	u_int8_t family;
 };
 
-/* Match destructor parameters */
+/**
+ * struct xt_mdtor_param - match destructor parameters
+ * Fields as above.
+ */
 struct xt_mtdtor_param {
 	struct net *net;
 	const struct xt_match *match;
-- 
cgit v1.2.3


From 4f948db1915ff05e4ce0fd98e6323db6a3ec0fc0 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 11:03:51 +0100
Subject: netfilter: xtables: remove almost-unused xt_match_param.data member

This member is taking up a "long" per match, yet is only used by one
module out of the roughly 90 modules, ip6t_hbh. ip6t_hbh can be
restructured a little to accomodate for the lack of the .data member.
This variant uses checking the par->match address, which should avoid
having to add two extra functions, including calls, i.e.

(hbh_mt6: call hbhdst_mt6(skb, par, NEXTHDR_OPT),
dst_mt6: call hbhdst_mt6(skb, par, NEXTHDR_DEST))

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 3 ---
 net/ipv6/netfilter/ip6t_hbh.c      | 9 ++++++---
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index c68ff82366b6..cf91473624e1 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -315,9 +315,6 @@ struct xt_match {
 	/* Set this to THIS_MODULE if you are a module, otherwise NULL */
 	struct module *me;
 
-	/* Free to use by each match */
-	unsigned long data;
-
 	const char *table;
 	unsigned int matchsize;
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index cbe8dec9744b..82593c8bdc3e 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -41,6 +41,8 @@ MODULE_ALIAS("ip6t_dst");
  *	5	-> RTALERT 2 x x
  */
 
+static struct xt_match hbh_mt6_reg[] __read_mostly;
+
 static bool
 hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 {
@@ -58,7 +60,9 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	unsigned int optlen;
 	int err;
 
-	err = ipv6_find_hdr(skb, &ptr, par->match->data, NULL);
+	err = ipv6_find_hdr(skb, &ptr,
+			    (par->match == &hbh_mt6_reg[0]) ?
+			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
 			*par->hotdrop = true;
@@ -179,13 +183,13 @@ static bool hbh_mt6_check(const struct xt_mtchk_param *par)
 
 static struct xt_match hbh_mt6_reg[] __read_mostly = {
 	{
+		/* Note, hbh_mt6 relies on the order of hbh_mt6_reg */
 		.name		= "hbh",
 		.family		= NFPROTO_IPV6,
 		.match		= hbh_mt6,
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_HOP,
 	},
 	{
 		.name		= "dst",
@@ -194,7 +198,6 @@ static struct xt_match hbh_mt6_reg[] __read_mostly = {
 		.matchsize	= sizeof(struct ip6t_opts),
 		.checkentry	= hbh_mt6_check,
 		.me		= THIS_MODULE,
-		.data		= NEXTHDR_DEST,
 	},
 };
 
-- 
cgit v1.2.3


From f5c511c67aaec323c186543856cfddab31bed1d1 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Thu, 18 Mar 2010 14:02:10 +0100
Subject: netfilter: xtables: reduce holes in struct xt_target

This will save one full padding chunk (8 bytes on x86_64) per target.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index cf91473624e1..f8f555153373 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -331,6 +331,7 @@ struct xt_target {
 	struct list_head list;
 
 	const char name[XT_FUNCTION_MAXNAMELEN-1];
+	u_int8_t revision;
 
 	/* Returns verdict. Argument order changed since 2.6.9, as this
 	   must now handle non-linear skbs, using skb_copy_bits and
@@ -363,7 +364,6 @@ struct xt_target {
 	unsigned short proto;
 
 	unsigned short family;
-	u_int8_t revision;
 };
 
 /* Furniture shopping... */
-- 
cgit v1.2.3


From 6a1c7b7e2ea3ca5f6ae73154c4b72755d262c9e0 Mon Sep 17 00:00:00 2001
From: "Olaya, Margarita" <magi.olaya@ti.com>
Date: Wed, 17 Mar 2010 17:42:29 -0500
Subject: OMAP4: PMIC: Rename twl6030_codec as twl6040_codec

Correction for chips:
twl6030 is Phoenix Power chip
twl6040 is Phoenix Audio chip

Signed-off-by: Margarita Olaya Cabrera <magi.olaya@ti.com>
Acked-by: Liam Girdwood <lrg@slimlogic.co.uk>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
---
 drivers/mfd/twl-core.c  | 4 ++--
 include/linux/i2c/twl.h | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/twl-core.c b/drivers/mfd/twl-core.c
index 562cd4935e17..720e099e506d 100644
--- a/drivers/mfd/twl-core.c
+++ b/drivers/mfd/twl-core.c
@@ -109,7 +109,7 @@
 #endif
 
 #if defined(CONFIG_TWL4030_CODEC) || defined(CONFIG_TWL4030_CODEC_MODULE) ||\
-	defined(CONFIG_SND_SOC_TWL6030) || defined(CONFIG_SND_SOC_TWL6030_MODULE)
+	defined(CONFIG_SND_SOC_TWL6040) || defined(CONFIG_SND_SOC_TWL6040_MODULE)
 #define twl_has_codec()	true
 #else
 #define twl_has_codec()	false
@@ -708,7 +708,7 @@ add_children(struct twl4030_platform_data *pdata, unsigned long features)
 	/* Phoenix*/
 	if (twl_has_codec() && pdata->codec && twl_class_is_6030()) {
 		sub_chip_id = twl_map[TWL_MODULE_AUDIO_VOICE].sid;
-		child = add_child(sub_chip_id, "twl6030_codec",
+		child = add_child(sub_chip_id, "twl6040_codec",
 				pdata->codec, sizeof(*pdata->codec),
 				false, 0, 0);
 		if (IS_ERR(child))
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index fb6784e86d5f..ebd90ce58ca2 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -569,9 +569,9 @@ struct twl4030_codec_data {
 	struct twl4030_codec_audio_data		*audio;
 	struct twl4030_codec_vibra_data		*vibra;
 
-	/* twl6030 */
-	int audpwron_gpio;      /* audio power-on gpio */
-	int naudint_irq;        /* audio interrupt */
+	/* twl6040 */
+	int audpwron_gpio;	/* audio power-on gpio */
+	int naudint_irq;	/* audio interrupt */
 };
 
 struct twl4030_platform_data {
-- 
cgit v1.2.3


From 1e94d72feab025b8f7c55d07020602f82f3a97dd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 18 Mar 2010 17:45:44 -0700
Subject: rps: Fixed build with CONFIG_SMP not enabled.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 24 ++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index de1a52bcb9e0..726ecd1af535 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1341,7 +1341,9 @@ struct softnet_data {
 	struct sk_buff		*completion_queue;
 
 	/* Elements below can be accessed between CPUs for RPS */
+#ifdef CONFIG_SMP
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+#endif
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 17b168671501..1a7e1d1d5ad9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2174,6 +2174,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
+#ifdef CONFIG_SMP
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2293,6 +2294,7 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+#endif /* CONFIG_SMP */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
@@ -2320,6 +2322,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
+#ifdef CONFIG_SMP
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2328,6 +2331,9 @@ enqueue:
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 			} else
 				__napi_schedule(&queue->backlog);
+#else
+			__napi_schedule(&queue->backlog);
+#endif
 		}
 		goto enqueue;
 	}
@@ -2367,9 +2373,13 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
+#ifdef CONFIG_SMP
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
+#else
+	cpu = smp_processor_id();
+#endif
 
 	return enqueue_to_backlog(skb, cpu);
 }
@@ -2735,6 +2745,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+#ifdef CONFIG_SMP
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -2743,6 +2754,9 @@ int netif_receive_skb(struct sk_buff *skb)
 		return __netif_receive_skb(skb);
 	else
 		return enqueue_to_backlog(skb, cpu);
+#else
+	return __netif_receive_skb(skb);
+#endif
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
@@ -3168,6 +3182,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
+#ifdef CONFIG_SMP
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3184,6 +3199,7 @@ static void net_rps_action(cpumask_t *mask)
 	}
 	cpus_clear(*mask);
 }
+#endif
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3191,8 +3207,10 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
+#ifdef CONFIG_SMP
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
+#endif
 
 	local_irq_disable();
 
@@ -3255,6 +3273,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
+#ifdef CONFIG_SMP
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -3262,6 +3281,9 @@ out:
 	local_irq_enable();
 
 	net_rps_action(&rcpus->mask[select]);
+#else
+	local_irq_enable();
+#endif
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -6204,9 +6226,11 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
+#ifdef CONFIG_SMP
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+#endif
 
 		queue->backlog.poll = process_backlog;
 		queue->backlog.weight = weight_p;
-- 
cgit v1.2.3


From 93d9b7d7a85cfb4e1711d5226eba73586dd4919f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:28:56 +0000
Subject: net: rename notifier defines for netdev type change

Since generally there could be more netdevices changing type other
than bonding, making this event type name "bonding-unrelated"

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |  6 ++++--
 include/linux/notifier.h        |  4 ++--
 net/ipv4/devinet.c              |  4 ++--
 net/ipv6/addrconf.c             | 16 ++++++++--------
 4 files changed, 16 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 430c02267d7e..7eeb18751d67 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1480,14 +1480,16 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				 bond_dev->name,
 				 bond_dev->type, slave_dev->type);
 
-			netdev_bonding_change(bond_dev, NETDEV_BONDING_OLDTYPE);
+			netdev_bonding_change(bond_dev,
+					      NETDEV_PRE_TYPE_CHANGE);
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
 				ether_setup(bond_dev);
 
-			netdev_bonding_change(bond_dev, NETDEV_BONDING_NEWTYPE);
+			netdev_bonding_change(bond_dev,
+					      NETDEV_POST_TYPE_CHANGE);
 		}
 	} else if (bond_dev->type != slave_dev->type) {
 		pr_err("%s ether type (%d) is different from other slaves (%d), can not enslave it.\n",
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index fee6c2f68075..f3635fc6e942 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -199,8 +199,8 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_FEAT_CHANGE	0x000B
 #define NETDEV_BONDING_FAILOVER 0x000C
 #define NETDEV_PRE_UP		0x000D
-#define NETDEV_BONDING_OLDTYPE  0x000E
-#define NETDEV_BONDING_NEWTYPE  0x000F
+#define NETDEV_PRE_TYPE_CHANGE	0x000E
+#define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 51ca946e3392..c75320ef95c2 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1095,10 +1095,10 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
 	case NETDEV_DOWN:
 		ip_mc_down(in_dev);
 		break;
-	case NETDEV_BONDING_OLDTYPE:
+	case NETDEV_PRE_TYPE_CHANGE:
 		ip_mc_unmap(in_dev);
 		break;
-	case NETDEV_BONDING_NEWTYPE:
+	case NETDEV_POST_TYPE_CHANGE:
 		ip_mc_remap(in_dev);
 		break;
 	case NETDEV_CHANGEMTU:
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3381b4317c27..8d41abc40db5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -137,8 +137,8 @@ static DEFINE_SPINLOCK(addrconf_verify_lock);
 static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
 static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
 
-static void addrconf_bonding_change(struct net_device *dev,
-				    unsigned long event);
+static void addrconf_type_change(struct net_device *dev,
+				 unsigned long event);
 static int addrconf_ifdown(struct net_device *dev, int how);
 
 static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags);
@@ -2584,9 +2584,9 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
 				return notifier_from_errno(err);
 		}
 		break;
-	case NETDEV_BONDING_OLDTYPE:
-	case NETDEV_BONDING_NEWTYPE:
-		addrconf_bonding_change(dev, event);
+	case NETDEV_PRE_TYPE_CHANGE:
+	case NETDEV_POST_TYPE_CHANGE:
+		addrconf_type_change(dev, event);
 		break;
 	}
 
@@ -2601,16 +2601,16 @@ static struct notifier_block ipv6_dev_notf = {
 	.priority = 0
 };
 
-static void addrconf_bonding_change(struct net_device *dev, unsigned long event)
+static void addrconf_type_change(struct net_device *dev, unsigned long event)
 {
 	struct inet6_dev *idev;
 	ASSERT_RTNL();
 
 	idev = __in6_dev_get(dev);
 
-	if (event == NETDEV_BONDING_NEWTYPE)
+	if (event == NETDEV_POST_TYPE_CHANGE)
 		ipv6_mc_remap(idev);
-	else if (event == NETDEV_BONDING_OLDTYPE)
+	else if (event == NETDEV_PRE_TYPE_CHANGE)
 		ipv6_mc_unmap(idev);
 }
 
-- 
cgit v1.2.3


From 3ca5b4042ecae5e73c59de62e4ac0db31c10e0f8 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Wed, 10 Mar 2010 10:29:35 +0000
Subject: bonding: check return value of nofitier when changing type

This patch adds the possibility to refuse the bonding type change for
other subsystems (such as for example bridge, vlan, etc.)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 11 +++++++++--
 include/linux/netdevice.h       |  2 +-
 net/core/dev.c                  |  4 ++--
 3 files changed, 12 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 7eeb18751d67..cbe9e353d46a 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1480,8 +1480,15 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				 bond_dev->name,
 				 bond_dev->type, slave_dev->type);
 
-			netdev_bonding_change(bond_dev,
-					      NETDEV_PRE_TYPE_CHANGE);
+			res = netdev_bonding_change(bond_dev,
+						    NETDEV_PRE_TYPE_CHANGE);
+			res = notifier_to_errno(res);
+			if (res) {
+				pr_err("%s: refused to change device type\n",
+				       bond_dev->name);
+				res = -EBUSY;
+				goto err_undo_flags;
+			}
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 726ecd1af535..813bed723f58 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2005,7 +2005,7 @@ extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
-extern void		netdev_bonding_change(struct net_device *dev,
+extern int		netdev_bonding_change(struct net_device *dev,
 					      unsigned long event);
 extern void		netdev_features_change(struct net_device *dev);
 /* Load a device via the kmod */
diff --git a/net/core/dev.c b/net/core/dev.c
index 1a7e1d1d5ad9..d1f027c41e73 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1084,9 +1084,9 @@ void netdev_state_change(struct net_device *dev)
 }
 EXPORT_SYMBOL(netdev_state_change);
 
-void netdev_bonding_change(struct net_device *dev, unsigned long event)
+int netdev_bonding_change(struct net_device *dev, unsigned long event)
 {
-	call_netdevice_notifiers(event, dev);
+	return call_netdevice_notifiers(event, dev);
 }
 EXPORT_SYMBOL(netdev_bonding_change);
 
-- 
cgit v1.2.3


From 01e9651a21bc0e6731da733593e4aaf4cf46b5e5 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Wed, 10 Mar 2010 09:31:01 +0000
Subject: sh: add INTC out of memory error handling

Extend the INTC code to warn and return an error code
in the case of memory allocation failure.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 29 ++++++++++++++++++++++++++++-
 include/linux/sh_intc.h |  2 +-
 2 files changed, 29 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index c2750391fd34..d4aa4d1e8dce 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -789,13 +789,15 @@ static void intc_redirect_irq(unsigned int irq, struct irq_desc *desc)
 	generic_handle_irq((unsigned int)get_irq_data(irq));
 }
 
-void __init register_intc_controller(struct intc_desc *desc)
+int __init register_intc_controller(struct intc_desc *desc)
 {
 	unsigned int i, k, smp;
 	struct intc_hw_desc *hw = &desc->hw;
 	struct intc_desc_int *d;
 
 	d = kzalloc(sizeof(*d), GFP_NOWAIT);
+	if (!d)
+		goto err0;
 
 	INIT_LIST_HEAD(&d->list);
 	list_add(&d->list, &intc_list);
@@ -806,8 +808,13 @@ void __init register_intc_controller(struct intc_desc *desc)
 	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
 
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
+	if (!d->reg)
+		goto err1;
+
 #ifdef CONFIG_SMP
 	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
+	if (!d->smp)
+		goto err2;
 #endif
 	k = 0;
 
@@ -822,6 +829,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 	if (hw->prio_regs) {
 		d->prio = kzalloc(hw->nr_vectors * sizeof(*d->prio),
 				  GFP_NOWAIT);
+		if (!d->prio)
+			goto err3;
 
 		for (i = 0; i < hw->nr_prio_regs; i++) {
 			smp = IS_SMP(hw->prio_regs[i]);
@@ -833,6 +842,8 @@ void __init register_intc_controller(struct intc_desc *desc)
 	if (hw->sense_regs) {
 		d->sense = kzalloc(hw->nr_vectors * sizeof(*d->sense),
 				   GFP_NOWAIT);
+		if (!d->sense)
+			goto err4;
 
 		for (i = 0; i < hw->nr_sense_regs; i++)
 			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
@@ -912,6 +923,22 @@ void __init register_intc_controller(struct intc_desc *desc)
 	/* enable bits matching force_enable after registering irqs */
 	if (desc->force_enable)
 		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
+
+	return 0;
+ err4:
+	kfree(d->prio);
+ err3:
+#ifdef CONFIG_SMP
+	kfree(d->smp);
+ err2:
+#endif
+	kfree(d->reg);
+ err1:
+	kfree(d);
+ err0:
+	pr_err("unable to allocate INTC memory\n");
+
+	return -ENOMEM;
 }
 
 static int intc_suspend(struct sys_device *dev, pm_message_t state)
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 51d288d8ac88..df3777035936 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -92,7 +92,7 @@ struct intc_desc symbol __initdata = {					\
 			   prio_regs, sense_regs, ack_regs),		\
 }
 
-void __init register_intc_controller(struct intc_desc *desc);
+int __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
 int reserve_irq_vector(unsigned int irq);
-- 
cgit v1.2.3


From dec710b77c2cf04bf512acada3c14a16f11708d9 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 19 Mar 2010 16:48:01 +0900
Subject: sh: INTC ioremap support

Extend the INTC code with ioremap() support V2.

Support INTC controllers that are not accessible through
a 1:1 virt:phys window. Needed by SH-Mobile ARM INTCS.

The INTC code behaves as usual if the io window resource
is omitted. The slow phys->virt lookup only happens during
setup. The fast path code operates on virtual addresses.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 drivers/sh/intc.c       | 84 +++++++++++++++++++++++++++++++++++++++++++------
 include/linux/sh_intc.h |  4 +++
 2 files changed, 78 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index d4aa4d1e8dce..a700dfec8dc3 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -43,6 +43,12 @@ struct intc_handle_int {
 	unsigned long handle;
 };
 
+struct intc_window {
+	phys_addr_t phys;
+	void __iomem *virt;
+	unsigned long size;
+};
+
 struct intc_desc_int {
 	struct list_head list;
 	struct sys_device sysdev;
@@ -56,6 +62,8 @@ struct intc_desc_int {
 	unsigned int nr_prio;
 	struct intc_handle_int *sense;
 	unsigned int nr_sense;
+	struct intc_window *window;
+	unsigned int nr_windows;
 	struct irq_chip chip;
 };
 
@@ -420,11 +428,39 @@ static int intc_set_sense(unsigned int irq, unsigned int type)
 	return 0;
 }
 
+static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
+				       unsigned long address)
+{
+	struct intc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < d->nr_windows; k++) {
+		window = d->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		address -= window->phys;
+		address += (unsigned long)window->virt;
+
+		return address;
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return address;
+}
+
 static unsigned int __init intc_get_reg(struct intc_desc_int *d,
-				 unsigned long address)
+					unsigned long address)
 {
 	unsigned int k;
 
+	address = intc_phys_to_virt(d, address);
+
 	for (k = 0; k < d->nr_reg; k++) {
 		if (d->reg[k] == address)
 			return k;
@@ -774,6 +810,8 @@ static unsigned int __init save_reg(struct intc_desc_int *d,
 				    unsigned int smp)
 {
 	if (value) {
+		value = intc_phys_to_virt(d, value);
+
 		d->reg[cnt] = value;
 #ifdef CONFIG_SMP
 		d->smp[cnt] = smp;
@@ -794,6 +832,7 @@ int __init register_intc_controller(struct intc_desc *desc)
 	unsigned int i, k, smp;
 	struct intc_hw_desc *hw = &desc->hw;
 	struct intc_desc_int *d;
+	struct resource *res;
 
 	d = kzalloc(sizeof(*d), GFP_NOWAIT);
 	if (!d)
@@ -802,6 +841,25 @@ int __init register_intc_controller(struct intc_desc *desc)
 	INIT_LIST_HEAD(&d->list);
 	list_add(&d->list, &intc_list);
 
+	if (desc->num_resources) {
+		d->nr_windows = desc->num_resources;
+		d->window = kzalloc(d->nr_windows * sizeof(*d->window),
+				    GFP_NOWAIT);
+		if (!d->window)
+			goto err1;
+
+		for (k = 0; k < d->nr_windows; k++) {
+			res = desc->resource + k;
+			WARN_ON(resource_type(res) != IORESOURCE_MEM);
+			d->window[k].phys = res->start;
+			d->window[k].size = resource_size(res);
+			d->window[k].virt = ioremap_nocache(res->start,
+							 resource_size(res));
+			if (!d->window[k].virt)
+				goto err2;
+		}
+	}
+
 	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
 	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
 	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
@@ -809,12 +867,12 @@ int __init register_intc_controller(struct intc_desc *desc)
 
 	d->reg = kzalloc(d->nr_reg * sizeof(*d->reg), GFP_NOWAIT);
 	if (!d->reg)
-		goto err1;
+		goto err2;
 
 #ifdef CONFIG_SMP
 	d->smp = kzalloc(d->nr_reg * sizeof(*d->smp), GFP_NOWAIT);
 	if (!d->smp)
-		goto err2;
+		goto err3;
 #endif
 	k = 0;
 
@@ -830,7 +888,7 @@ int __init register_intc_controller(struct intc_desc *desc)
 		d->prio = kzalloc(hw->nr_vectors * sizeof(*d->prio),
 				  GFP_NOWAIT);
 		if (!d->prio)
-			goto err3;
+			goto err4;
 
 		for (i = 0; i < hw->nr_prio_regs; i++) {
 			smp = IS_SMP(hw->prio_regs[i]);
@@ -843,7 +901,7 @@ int __init register_intc_controller(struct intc_desc *desc)
 		d->sense = kzalloc(hw->nr_vectors * sizeof(*d->sense),
 				   GFP_NOWAIT);
 		if (!d->sense)
-			goto err4;
+			goto err5;
 
 		for (i = 0; i < hw->nr_sense_regs; i++)
 			k += save_reg(d, k, hw->sense_regs[i].reg, 0);
@@ -925,17 +983,23 @@ int __init register_intc_controller(struct intc_desc *desc)
 		intc_enable_disable_enum(desc, d, desc->force_enable, 1);
 
 	return 0;
- err4:
+err5:
 	kfree(d->prio);
- err3:
+err4:
 #ifdef CONFIG_SMP
 	kfree(d->smp);
- err2:
+err3:
 #endif
 	kfree(d->reg);
- err1:
+err2:
+	for (k = 0; k < d->nr_windows; k++)
+		if (d->window[k].virt)
+			iounmap(d->window[k].virt);
+
+	kfree(d->window);
+err1:
 	kfree(d);
- err0:
+err0:
 	pr_err("unable to allocate INTC memory\n");
 
 	return -ENOMEM;
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index df3777035936..01d8168c5a1b 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -1,6 +1,8 @@
 #ifndef __SH_INTC_H
 #define __SH_INTC_H
 
+#include <linux/ioport.h>
+
 typedef unsigned char intc_enum;
 
 struct intc_vect {
@@ -71,6 +73,8 @@ struct intc_hw_desc {
 
 struct intc_desc {
 	char *name;
+	struct resource *resource;
+	unsigned int num_resources;
 	intc_enum force_enable;
 	intc_enum force_disable;
 	struct intc_hw_desc hw;
-- 
cgit v1.2.3


From 5c578aedcb21d79eeb4e9cf04ca5b276ac82614c Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Wed, 17 Mar 2010 20:31:11 +0000
Subject: IPv6: convert addrconf hash list to RCU

Convert from reader/writer lock to RCU and spinlock for addrconf
hash list.

Adds an additional helper macro for hlist_for_each_entry_continue_rcu
to handle the continue case.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 13 ++++++++
 include/net/if_inet6.h  |  1 +
 net/ipv6/addrconf.c     | 85 +++++++++++++++++++++++++------------------------
 3 files changed, 58 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 2c9b46cff3d7..004908b104d5 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -428,5 +428,18 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(pos->next))
 
+/**
+ * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue_rcu(tpos, pos, member)		\
+	for (pos = rcu_dereference((pos)->next);			\
+	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
+	     pos = rcu_dereference(pos->next))
+
+
 #endif	/* __KERNEL__ */
 #endif
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 498401541519..22a00b1d2c38 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -64,6 +64,7 @@ struct inet6_ifaddr {
 #endif
 
 	int			dead;
+	struct rcu_head		rcu;
 };
 
 struct ip6_sf_socklist {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 0488b9f8071d..7ffd5eeab967 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -127,7 +127,7 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
  *	Configured unicast address hash table
  */
 static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
-static DEFINE_RWLOCK(addrconf_hash_lock);
+static DEFINE_SPINLOCK(addrconf_hash_lock);
 
 static void addrconf_verify(unsigned long);
 
@@ -523,8 +523,13 @@ static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int old)
 }
 #endif
 
-/* Nobody refers to this ifaddr, destroy it */
+static void inet6_ifa_finish_destroy_rcu(struct rcu_head *head)
+{
+	struct inet6_ifaddr *ifp = container_of(head, struct inet6_ifaddr, rcu);
+	kfree(ifp);
+}
 
+/* Nobody refers to this ifaddr, destroy it */
 void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 {
 	WARN_ON(ifp->if_next != NULL);
@@ -545,7 +550,7 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)
 	}
 	dst_release(&ifp->rt->u.dst);
 
-	kfree(ifp);
+	call_rcu(&ifp->rcu, inet6_ifa_finish_destroy_rcu);
 }
 
 static void
@@ -616,7 +621,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 		goto out2;
 	}
 
-	write_lock(&addrconf_hash_lock);
+	spin_lock(&addrconf_hash_lock);
 
 	/* Ignore adding duplicate addresses on an interface */
 	if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) {
@@ -670,9 +675,9 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,
 	/* Add to big hash table */
 	hash = ipv6_addr_hash(addr);
 
-	hlist_add_head(&ifa->addr_lst, &inet6_addr_lst[hash]);
+	hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]);
 	in6_ifa_hold(ifa);
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 
 	write_lock(&idev->lock);
 	/* Add to inet6_dev unicast addr list. */
@@ -699,7 +704,7 @@ out2:
 
 	return ifa;
 out:
-	write_unlock(&addrconf_hash_lock);
+	spin_unlock(&addrconf_hash_lock);
 	goto out2;
 }
 
@@ -717,10 +722,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 
 	ifp->dead = 1;
 
-	write_lock_bh(&addrconf_hash_lock);
-	hlist_del_init(&ifp->addr_lst);
+	spin_lock_bh(&addrconf_hash_lock);
+	hlist_del_init_rcu(&ifp->addr_lst);
 	__in6_ifa_put(ifp);
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	write_lock_bh(&idev->lock);
 #ifdef CONFIG_IPV6_PRIVACY
@@ -1274,8 +1279,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -1285,7 +1290,8 @@ int ipv6_chk_addr(struct net *net, struct in6_addr *addr,
 				break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
+
 	return ifp != NULL;
 }
 EXPORT_SYMBOL(ipv6_chk_addr);
@@ -1341,8 +1347,8 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -1353,7 +1359,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 			}
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 
 	return ifp;
 }
@@ -2698,10 +2704,10 @@ static int addrconf_ifdown(struct net_device *dev, int how)
 		write_unlock_bh(&idev->lock);
 
 		/* clear hash table */
-		write_lock_bh(&addrconf_hash_lock);
-		hlist_del_init(&ifa->addr_lst);
+		spin_lock_bh(&addrconf_hash_lock);
+		hlist_del_init_rcu(&ifa->addr_lst);
 		__in6_ifa_put(ifa);
-		write_unlock_bh(&addrconf_hash_lock);
+		spin_unlock_bh(&addrconf_hash_lock);
 
 		__ipv6_ifa_notify(RTM_DELADDR, ifa);
 		atomic_notifier_call_chain(&inet6addr_chain, NETDEV_DOWN, ifa);
@@ -2946,11 +2952,10 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry(ifa, n,
-				     &inet6_addr_lst[state->bucket], addr_lst) {
+		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
-		}
 	}
 	return NULL;
 }
@@ -2962,10 +2967,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue(ifa, n, addr_lst) {
+	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
-	}
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
 		hlist_for_each_entry(ifa, n,
@@ -2989,9 +2993,9 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(addrconf_hash_lock)
+	__acquires(rcu)
 {
-	read_lock_bh(&addrconf_hash_lock);
+	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
 }
 
@@ -3005,9 +3009,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(addrconf_hash_lock)
+	__releases(rcu)
 {
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 }
 
 static int if6_seq_show(struct seq_file *seq, void *v)
@@ -3081,8 +3085,8 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	struct hlist_node *n;
 	u8 hash = ipv6_addr_hash(addr);
 
-	read_lock_bh(&addrconf_hash_lock);
-	hlist_for_each_entry(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	rcu_read_lock_bh();
+	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3091,7 +3095,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 			break;
 		}
 	}
-	read_unlock_bh(&addrconf_hash_lock);
+	rcu_read_unlock_bh();
 	return ret;
 }
 #endif
@@ -3107,7 +3111,8 @@ static void addrconf_verify(unsigned long foo)
 	unsigned long now, next;
 	int i;
 
-	spin_lock_bh(&addrconf_verify_lock);
+	rcu_read_lock_bh();
+	spin_lock(&addrconf_verify_lock);
 	now = jiffies;
 	next = now + ADDR_CHECK_FREQUENCY;
 
@@ -3116,8 +3121,8 @@ static void addrconf_verify(unsigned long foo)
 	for (i=0; i < IN6_ADDR_HSIZE; i++) {
 
 restart:
-		read_lock(&addrconf_hash_lock);
-		hlist_for_each_entry(ifp, node, &inet6_addr_lst[i], addr_lst) {
+		hlist_for_each_entry_rcu(ifp, node,
+					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 #ifdef CONFIG_IPV6_PRIVACY
 			unsigned long regen_advance;
@@ -3139,7 +3144,6 @@ restart:
 			    age >= ifp->valid_lft) {
 				spin_unlock(&ifp->lock);
 				in6_ifa_hold(ifp);
-				read_unlock(&addrconf_hash_lock);
 				ipv6_del_addr(ifp);
 				goto restart;
 			} else if (ifp->prefered_lft == INFINITY_LIFE_TIME) {
@@ -3161,7 +3165,6 @@ restart:
 
 				if (deprecate) {
 					in6_ifa_hold(ifp);
-					read_unlock(&addrconf_hash_lock);
 
 					ipv6_ifa_notify(0, ifp);
 					in6_ifa_put(ifp);
@@ -3179,7 +3182,7 @@ restart:
 						in6_ifa_hold(ifp);
 						in6_ifa_hold(ifpub);
 						spin_unlock(&ifp->lock);
-						read_unlock(&addrconf_hash_lock);
+
 						spin_lock(&ifpub->lock);
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
@@ -3199,12 +3202,12 @@ restart:
 				spin_unlock(&ifp->lock);
 			}
 		}
-		read_unlock(&addrconf_hash_lock);
 	}
 
 	addr_chk_timer.expires = time_before(next, jiffies + HZ) ? jiffies + HZ : next;
 	add_timer(&addr_chk_timer);
-	spin_unlock_bh(&addrconf_verify_lock);
+	spin_unlock(&addrconf_verify_lock);
+	rcu_read_unlock_bh();
 }
 
 static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
@@ -4621,10 +4624,10 @@ void addrconf_cleanup(void)
 	/*
 	 *	Check hash table.
 	 */
-	write_lock_bh(&addrconf_hash_lock);
+	spin_lock_bh(&addrconf_hash_lock);
 	for (i = 0; i < IN6_ADDR_HSIZE; i++)
 		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
-	write_unlock_bh(&addrconf_hash_lock);
+	spin_unlock_bh(&addrconf_hash_lock);
 
 	del_timer(&addr_chk_timer);
 	rtnl_unlock();
-- 
cgit v1.2.3


From 755d0e77ac9c8d125388922dc33434ed5b2ebe80 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Fri, 19 Mar 2010 04:42:24 +0000
Subject: net: rtnetlink: ignore NETDEV_PRE_TYPE_CHANGE in rtnetlink_event()

Ignore the new NETDEV_PRE_TYPE_CHANGE event in rtnetlink_event() since
there have been no changes userspace needs to be notified of.

Also add a comment to the netdev notifier event definitions to remind
people to update the exclusion list when adding new event types.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/notifier.h | 5 ++++-
 net/core/rtnetlink.c     | 1 +
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index f3635fc6e942..9c5d3fad01f3 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -182,7 +182,10 @@ static inline int notifier_to_errno(int ret)
  *	VC switch chains (for loadable kernel svgalib VC switch helpers) etc...
  */
  
-/* netdevice notifier chain */
+/* netdevice notifier chain. Please remember to update the rtnetlink
+ * notification exclusion list in rtnetlink_event() when adding new
+ * types.
+ */
 #define NETDEV_UP	0x0001	/* For now you can't veto a device up/down */
 #define NETDEV_DOWN	0x0002
 #define NETDEV_REBOOT	0x0003	/* Tell a protocol stack a network interface
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index e1121f0bca6a..ffc6cf3495ac 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1513,6 +1513,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_POST_INIT:
 	case NETDEV_REGISTER:
 	case NETDEV_CHANGE:
+	case NETDEV_PRE_TYPE_CHANGE:
 	case NETDEV_GOING_DOWN:
 	case NETDEV_UNREGISTER:
 	case NETDEV_UNREGISTER_BATCH:
-- 
cgit v1.2.3


From 32a806c194ea112cfab00f558482dd97bee5e44e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Fri, 19 Mar 2010 04:00:23 +0000
Subject: bonding: flush unicast and multicast lists when changing type

After the type change, addresses in unicast and multicast lists wouldn't make
sense, not to mention possible different lenghts. So flush both lists here.

Note "dev_addr_discard" will be very soon replaced by "dev_mc_flush" (once
mc_list conversion will be done).

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 4 ++++
 include/linux/netdevice.h       | 2 ++
 net/core/dev.c                  | 6 ++++--
 3 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index cbe9e353d46a..c2aceaab0143 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1490,6 +1490,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 				goto err_undo_flags;
 			}
 
+			/* Flush unicast and multicast addresses */
+			dev_unicast_flush(bond_dev);
+			dev_addr_discard(bond_dev);
+
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
 			else
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9fc6ee8e7508..c96c41e08e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1994,10 +1994,12 @@ extern int		dev_unicast_delete(struct net_device *dev, void *addr);
 extern int		dev_unicast_add(struct net_device *dev, void *addr);
 extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
 extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
+extern void		dev_unicast_flush(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
 extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
+extern void		dev_addr_discard(struct net_device *dev);
 extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
 extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
 extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
diff --git a/net/core/dev.c b/net/core/dev.c
index c0e260870c0a..fe2a754238a9 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4457,12 +4457,13 @@ void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
-static void dev_unicast_flush(struct net_device *dev)
+void dev_unicast_flush(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 	__hw_addr_flush(&dev->uc);
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_unicast_flush);
 
 static void dev_unicast_init(struct net_device *dev)
 {
@@ -4484,7 +4485,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 	}
 }
 
-static void dev_addr_discard(struct net_device *dev)
+void dev_addr_discard(struct net_device *dev)
 {
 	netif_addr_lock_bh(dev);
 
@@ -4493,6 +4494,7 @@ static void dev_addr_discard(struct net_device *dev)
 
 	netif_addr_unlock_bh(dev);
 }
+EXPORT_SYMBOL(dev_addr_discard);
 
 /**
  *	dev_get_flags - get flags reported to userspace
-- 
cgit v1.2.3


From 907cdda5205b012eec7513f66713749b293188c9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Fri, 19 Mar 2010 05:37:18 +0000
Subject: tcp: Add SNMP counter for DEFER_ACCEPT

Its currently hard to diagnose when ACK frames are dropped because an
application set TCP_DEFER_ACCEPT on its listening socket.

See http://bugzilla.kernel.org/show_bug.cgi?id=15507

This patch adds a SNMP value, named TCPDeferAcceptDrop

netstat -s | grep TCPDeferAcceptDrop
    TCPDeferAcceptDrop: 0

This counter is incremented every time we drop a pure ACK frame received
by a socket in SYN_RECV state because its SYNACK retrans count is lower
than defer_accept value.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h     | 1 +
 net/ipv4/proc.c          | 1 +
 net/ipv4/tcp_minisocks.c | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index 4435d1084755..d2a9aa3c6c88 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -227,6 +227,7 @@ enum
 	LINUX_MIB_SACKSHIFTFALLBACK,
 	LINUX_MIB_TCPBACKLOGDROP,
 	LINUX_MIB_TCPMINTTLDROP, /* RFC 5082 */
+	LINUX_MIB_TCPDEFERACCEPTDROP,
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 4f1f337f4337..3dc9914c1dce 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -251,6 +251,7 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPSackShiftFallback", LINUX_MIB_SACKSHIFTFALLBACK),
 	SNMP_MIB_ITEM("TCPBacklogDrop", LINUX_MIB_TCPBACKLOGDROP),
 	SNMP_MIB_ITEM("TCPMinTTLDrop", LINUX_MIB_TCPMINTTLDROP),
+	SNMP_MIB_ITEM("TCPDeferAcceptDrop", LINUX_MIB_TCPDEFERACCEPTDROP),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 4199bc6915c5..32f96278a24a 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -671,6 +671,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
 	    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
 		inet_rsk(req)->acked = 1;
+		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPDEFERACCEPTDROP);
 		return NULL;
 	}
 
-- 
cgit v1.2.3


From 4bab9d426e6dbd9ea09330919a33d35d5faab400 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 19 Mar 2010 04:46:38 +0000
Subject: dmaengine: shdma: Remove sh_dmae_slave_chan_id enum

This patch replaces the sh_dmae_slave_chan_id enum
with an unsigned int. The purpose of this chainge is
to make it possible to separate the slave id enums
from the dmaengine header.

The slave id enums varies with processor model, so in
the future it makes sense to put these in the processor
specific headers together with the pinmux enums.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/dmaengine.h | 6 +++---
 arch/sh/include/asm/siu.h       | 8 ++++----
 drivers/dma/shdma.c             | 8 ++++----
 drivers/serial/sh-sci.c         | 4 ++--
 include/linux/serial_sci.h      | 4 ++--
 5 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/dmaengine.h b/arch/sh/include/asm/dmaengine.h
index bf2f30cf0a27..568e991442b0 100644
--- a/arch/sh/include/asm/dmaengine.h
+++ b/arch/sh/include/asm/dmaengine.h
@@ -17,7 +17,7 @@
 
 #define SH_DMAC_MAX_CHANNELS	6
 
-enum sh_dmae_slave_chan_id {
+enum {
 	SHDMA_SLAVE_SCIF0_TX,
 	SHDMA_SLAVE_SCIF0_RX,
 	SHDMA_SLAVE_SCIF1_TX,
@@ -38,7 +38,7 @@ enum sh_dmae_slave_chan_id {
 };
 
 struct sh_dmae_slave_config {
-	enum sh_dmae_slave_chan_id	slave_id;
+	unsigned int			slave_id;
 	dma_addr_t			addr;
 	u32				chcr;
 	char				mid_rid;
@@ -68,7 +68,7 @@ struct device;
 
 /* Used by slave DMA clients to request DMA to/from a specific peripheral */
 struct sh_dmae_slave {
-	enum sh_dmae_slave_chan_id	slave_id; /* Set by the platform */
+	unsigned int			slave_id; /* Set by the platform */
 	struct device			*dma_dev; /* Set by the platform */
 	struct sh_dmae_slave_config	*config;  /* Set by the driver */
 };
diff --git a/arch/sh/include/asm/siu.h b/arch/sh/include/asm/siu.h
index f1b1e6944a5f..e8d4142baf59 100644
--- a/arch/sh/include/asm/siu.h
+++ b/arch/sh/include/asm/siu.h
@@ -17,10 +17,10 @@ struct device;
 
 struct siu_platform {
 	struct device *dma_dev;
-	enum sh_dmae_slave_chan_id dma_slave_tx_a;
-	enum sh_dmae_slave_chan_id dma_slave_rx_a;
-	enum sh_dmae_slave_chan_id dma_slave_tx_b;
-	enum sh_dmae_slave_chan_id dma_slave_rx_b;
+	unsigned int dma_slave_tx_a;
+	unsigned int dma_slave_rx_a;
+	unsigned int dma_slave_tx_b;
+	unsigned int dma_slave_rx_b;
 };
 
 #endif /* ASM_SIU_H */
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 5d17e09cb625..e5588f4868ca 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -266,7 +266,7 @@ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
 }
 
 static struct sh_dmae_slave_config *sh_dmae_find_slave(
-	struct sh_dmae_chan *sh_chan, enum sh_dmae_slave_chan_id slave_id)
+	struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param)
 {
 	struct dma_device *dma_dev = sh_chan->common.device;
 	struct sh_dmae_device *shdev = container_of(dma_dev,
@@ -274,11 +274,11 @@ static struct sh_dmae_slave_config *sh_dmae_find_slave(
 	struct sh_dmae_pdata *pdata = shdev->pdata;
 	int i;
 
-	if ((unsigned)slave_id >= SHDMA_SLAVE_NUMBER)
+	if (param->slave_id >= SHDMA_SLAVE_NUMBER)
 		return NULL;
 
 	for (i = 0; i < pdata->slave_num; i++)
-		if (pdata->slave[i].slave_id == slave_id)
+		if (pdata->slave[i].slave_id == param->slave_id)
 			return pdata->slave + i;
 
 	return NULL;
@@ -299,7 +299,7 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
 	if (param) {
 		struct sh_dmae_slave_config *cfg;
 
-		cfg = sh_dmae_find_slave(sh_chan, param->slave_id);
+		cfg = sh_dmae_find_slave(sh_chan, param);
 		if (!cfg)
 			return -EINVAL;
 
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index f7b9aff88f4a..2d9a06db83b5 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -90,8 +90,8 @@ struct sci_port {
 	struct dma_chan			*chan_rx;
 #ifdef CONFIG_SERIAL_SH_SCI_DMA
 	struct device			*dma_dev;
-	enum sh_dmae_slave_chan_id	slave_tx;
-	enum sh_dmae_slave_chan_id	slave_rx;
+	unsigned int			slave_tx;
+	unsigned int			slave_rx;
 	struct dma_async_tx_descriptor	*desc_tx;
 	struct dma_async_tx_descriptor	*desc_rx[2];
 	dma_cookie_t			cookie_tx;
diff --git a/include/linux/serial_sci.h b/include/linux/serial_sci.h
index 193d4bfe42ff..f5364a1de68b 100644
--- a/include/linux/serial_sci.h
+++ b/include/linux/serial_sci.h
@@ -33,8 +33,8 @@ struct plat_sci_port {
 	char		*clk;			/* clock string */
 	struct device	*dma_dev;
 #ifdef CONFIG_SERIAL_SH_SCI_DMA
-	enum sh_dmae_slave_chan_id dma_slave_tx;
-	enum sh_dmae_slave_chan_id dma_slave_rx;
+	unsigned int dma_slave_tx;
+	unsigned int dma_slave_rx;
 #endif
 };
 
-- 
cgit v1.2.3


From b2623a61cfd3c6badb8396dc85ab5a70f4a05f61 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Fri, 19 Mar 2010 04:47:10 +0000
Subject: dmaengine: shdma: Introduce include/linux/sh_dma.h

Create a common platform data header file for the
shdma dmaengine driver. This is done by moving
common structures from sh asm/dmaengine.h to
linux/sh_dma.h. DMA registers are also copied from
sh asm/dma-register.h to make the code architecture
independent.

The sh header file asm/dmaengine.h is still kept
with the slave id enum. This allows us to keep the
old processor specific code as is and slowly move
over to slave id enums in per-processor headers.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/dmaengine.h |  60 +-----------------------
 drivers/dma/shdma.c             |   3 +-
 drivers/dma/shdma.h             |   3 +-
 include/linux/sh_dma.h          | 101 ++++++++++++++++++++++++++++++++++++++++
 4 files changed, 104 insertions(+), 63 deletions(-)
 create mode 100644 include/linux/sh_dma.h

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/dmaengine.h b/arch/sh/include/asm/dmaengine.h
index abb8d660b6fb..2a02b611a9ad 100644
--- a/arch/sh/include/asm/dmaengine.h
+++ b/arch/sh/include/asm/dmaengine.h
@@ -10,12 +10,7 @@
 #ifndef ASM_DMAENGINE_H
 #define ASM_DMAENGINE_H
 
-#include <linux/dmaengine.h>
-#include <linux/list.h>
-
-#include <asm/dma-register.h>
-
-#define SH_DMAC_MAX_CHANNELS	6
+#include <linux/sh_dma.h>
 
 enum {
 	SHDMA_SLAVE_SCIF0_TX,
@@ -36,57 +31,4 @@ enum {
 	SHDMA_SLAVE_SIUB_RX,
 };
 
-struct sh_dmae_slave_config {
-	unsigned int			slave_id;
-	dma_addr_t			addr;
-	u32				chcr;
-	char				mid_rid;
-};
-
-struct sh_dmae_channel {
-	unsigned int	offset;
-	unsigned int	dmars;
-	unsigned int	dmars_bit;
-};
-
-struct sh_dmae_pdata {
-	struct sh_dmae_slave_config *slave;
-	int slave_num;
-	struct sh_dmae_channel *channel;
-	int channel_num;
-	unsigned int ts_low_shift;
-	unsigned int ts_low_mask;
-	unsigned int ts_high_shift;
-	unsigned int ts_high_mask;
-	unsigned int *ts_shift;
-	int ts_shift_num;
-	u16 dmaor_init;
-};
-
-struct device;
-
-/* Used by slave DMA clients to request DMA to/from a specific peripheral */
-struct sh_dmae_slave {
-	unsigned int			slave_id; /* Set by the platform */
-	struct device			*dma_dev; /* Set by the platform */
-	struct sh_dmae_slave_config	*config;  /* Set by the driver */
-};
-
-struct sh_dmae_regs {
-	u32 sar; /* SAR / source address */
-	u32 dar; /* DAR / destination address */
-	u32 tcr; /* TCR / transfer count */
-};
-
-struct sh_desc {
-	struct sh_dmae_regs hw;
-	struct list_head node;
-	struct dma_async_tx_descriptor async_tx;
-	enum dma_data_direction direction;
-	dma_cookie_t cookie;
-	size_t partial;
-	int chunks;
-	int mark;
-};
-
 #endif
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 83605389fb56..7a18b580f626 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -25,8 +25,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
-
-#include <asm/dmaengine.h>
+#include <linux/sh_dma.h>
 
 #include "shdma.h"
 
diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h
index d7ddf6f47154..4021275a0a43 100644
--- a/drivers/dma/shdma.h
+++ b/drivers/dma/shdma.h
@@ -17,8 +17,7 @@
 #include <linux/interrupt.h>
 #include <linux/list.h>
 
-#include <asm/dmaengine.h>
-
+#define SH_DMAC_MAX_CHANNELS 6
 #define SH_DMA_SLAVE_NUMBER 256
 #define SH_DMA_TCR_MAX 0x00FFFFFF	/* 16MB */
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
new file mode 100644
index 000000000000..cdaaff424211
--- /dev/null
+++ b/include/linux/sh_dma.h
@@ -0,0 +1,101 @@
+/*
+ * Header for the new SH dmaengine driver
+ *
+ * Copyright (C) 2010 Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef SH_DMA_H
+#define SH_DMA_H
+
+#include <linux/list.h>
+#include <linux/dmaengine.h>
+
+/* Used by slave DMA clients to request DMA to/from a specific peripheral */
+struct sh_dmae_slave {
+	unsigned int			slave_id; /* Set by the platform */
+	struct device			*dma_dev; /* Set by the platform */
+	struct sh_dmae_slave_config	*config;  /* Set by the driver */
+};
+
+struct sh_dmae_regs {
+	u32 sar; /* SAR / source address */
+	u32 dar; /* DAR / destination address */
+	u32 tcr; /* TCR / transfer count */
+};
+
+struct sh_desc {
+	struct sh_dmae_regs hw;
+	struct list_head node;
+	struct dma_async_tx_descriptor async_tx;
+	enum dma_data_direction direction;
+	dma_cookie_t cookie;
+	size_t partial;
+	int chunks;
+	int mark;
+};
+struct sh_dmae_slave_config {
+	unsigned int			slave_id;
+	dma_addr_t			addr;
+	u32				chcr;
+	char				mid_rid;
+};
+
+struct sh_dmae_channel {
+	unsigned int	offset;
+	unsigned int	dmars;
+	unsigned int	dmars_bit;
+};
+
+struct sh_dmae_pdata {
+	struct sh_dmae_slave_config *slave;
+	int slave_num;
+	struct sh_dmae_channel *channel;
+	int channel_num;
+	unsigned int ts_low_shift;
+	unsigned int ts_low_mask;
+	unsigned int ts_high_shift;
+	unsigned int ts_high_mask;
+	unsigned int *ts_shift;
+	int ts_shift_num;
+	u16 dmaor_init;
+};
+
+/* DMA register */
+#define SAR	0x00
+#define DAR	0x04
+#define TCR	0x08
+#define CHCR	0x0C
+#define DMAOR	0x40
+
+/* DMAOR definitions */
+#define DMAOR_AE	0x00000004
+#define DMAOR_NMIF	0x00000002
+#define DMAOR_DME	0x00000001
+
+/* Definitions for the SuperH DMAC */
+#define REQ_L	0x00000000
+#define REQ_E	0x00080000
+#define RACK_H	0x00000000
+#define RACK_L	0x00040000
+#define ACK_R	0x00000000
+#define ACK_W	0x00020000
+#define ACK_H	0x00000000
+#define ACK_L	0x00010000
+#define DM_INC	0x00004000
+#define DM_DEC	0x00008000
+#define DM_FIX	0x0000c000
+#define SM_INC	0x00001000
+#define SM_DEC	0x00002000
+#define SM_FIX	0x00003000
+#define RS_IN	0x00000200
+#define RS_OUT	0x00000300
+#define TS_BLK	0x00000040
+#define TM_BUR	0x00000020
+#define CHCR_DE	0x00000001
+#define CHCR_TE	0x00000002
+#define CHCR_IE	0x00000004
+
+#endif
-- 
cgit v1.2.3


From e1292ba164742e3a236e407148e00300b7196906 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Thu, 18 Mar 2010 20:19:27 -0700
Subject: ntp: Make time_adjust static

Now that no arches are accessing time_adjust directly,
make it static.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1268968769-19209-1-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 1 -
 kernel/time/ntp.c     | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index 7a082b32d8e1..e2de51eedf05 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -238,7 +238,6 @@ extern int tickadj;			/* amount of adjustment per tick */
  * phase-lock loop variables
  */
 extern int time_status;		/* clock synchronization status bits */
-extern long time_adjust;	/* The amount of adjtime left */
 
 extern void ntp_init(void);
 extern void ntp_clear(void);
diff --git a/kernel/time/ntp.c b/kernel/time/ntp.c
index 7c0f180d6e9d..c63116863a80 100644
--- a/kernel/time/ntp.c
+++ b/kernel/time/ntp.c
@@ -69,7 +69,7 @@ static s64			time_freq;
 /* time at last adjustment (secs):					*/
 static long			time_reftime;
 
-long				time_adjust;
+static long			time_adjust;
 
 /* constant (boot-param configurable) NTP tick adjustment (upscaled)	*/
 static s64			ntp_tick_adj;
-- 
cgit v1.2.3


From 3d0205bd1383aa3cac93c209b7c7d03b27930195 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Thu, 18 Mar 2010 20:19:28 -0700
Subject: ntp: Remove tickadj

There are zero users of tickadj. So remove it.

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1268968769-19209-2-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timex.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timex.h b/include/linux/timex.h
index e2de51eedf05..32d852f8cbe4 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -232,7 +232,6 @@ struct timex {
  */
 extern unsigned long tick_usec;		/* USER_HZ period (usec) */
 extern unsigned long tick_nsec;		/* ACTHZ          period (nsec) */
-extern int tickadj;			/* amount of adjustment per tick */
 
 /*
  * phase-lock loop variables
@@ -270,9 +269,6 @@ extern void second_overflow(void);
 extern void update_ntp_one_tick(void);
 extern int do_adjtimex(struct timex *);
 
-/* Don't use! Compatibility define for existing users. */
-#define tickadj	(500/HZ ? : 1)
-
 int read_current_timer(unsigned long *timer_val);
 
 /* The clock frequency of the i8253/i8254 PIT */
-- 
cgit v1.2.3


From f6e0bb56d43d1f3b2ad54d51b65c07ef3bdead16 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:32 -0700
Subject: include/linux/wireless.h: Add IW_HANDLER macro to initialize array
 entry

Copied the idea from orinoco

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 5b4c6c772a9b..0955b67616d2 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -346,6 +346,8 @@
 #define SIOCIWFIRST	0x8B00
 #define SIOCIWLAST	SIOCIWLASTPRIV		/* 0x8BFF */
 #define IW_IOCTL_IDX(cmd)	((cmd) - SIOCIWFIRST)
+#define IW_HANDLER(id, func)			\
+	[IW_IOCTL_IDX(id)] = func
 
 /* Odd : get (world access), even : set (root access) */
 #define IW_IS_SET(cmd)	(!((cmd) & 0x1))
-- 
cgit v1.2.3


From 2ae2332ed11687325096e68e326ec57f0294cff9 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Thu, 18 Mar 2010 18:29:33 -0700
Subject: wireless.h: Use SIOCIWFIRST not SIOCSIWCOMMIT for range check

These two #defines use the same value, but
SIOCIWFIRST makes more sense in this use.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/wireless.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wireless.h b/include/linux/wireless.h
index 0955b67616d2..e6827eedf18b 100644
--- a/include/linux/wireless.h
+++ b/include/linux/wireless.h
@@ -650,7 +650,7 @@
  * 32 bit bitmasks. Note : 32 bits = 0x20 = 2^5. */
 #define IW_EVENT_CAPA_BASE(cmd)		((cmd >= SIOCIWFIRSTPRIV) ? \
 					 (cmd - SIOCIWFIRSTPRIV + 0x60) : \
-					 (cmd - SIOCSIWCOMMIT))
+					 (cmd - SIOCIWFIRST))
 #define IW_EVENT_CAPA_INDEX(cmd)	(IW_EVENT_CAPA_BASE(cmd) >> 5)
 #define IW_EVENT_CAPA_MASK(cmd)		(1 << (IW_EVENT_CAPA_BASE(cmd) & 0x1F))
 /* Event capability constants - event autogenerated by the kernel
-- 
cgit v1.2.3


From f722377bfa869c9c1abeca88266bb4cfd3a5d06d Mon Sep 17 00:00:00 2001
From: Thomas Weber <swirl@gmx.li>
Date: Tue, 23 Mar 2010 19:50:16 +0100
Subject: i2c: twl.h: Fix comment typos and tab

Fix typo in comments:
	- contollable => controllable
	- uniqe => unique

Replace tab with whitespace in defines.

Signed-off-by: Thomas Weber <weber@corscience.de>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/i2c/twl.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index fb6784e86d5f..a63b77d89d30 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -496,7 +496,7 @@ struct twl4030_madc_platform_data {
 	int		irq_line;
 };
 
-/* Boards have uniqe mappings of {row, col} --> keycode.
+/* Boards have unique mappings of {row, col} --> keycode.
  * Column and row are 8 bits each, but range only from 0..7.
  * a PERSISTENT_KEY is "always on" and never reported.
  */
@@ -664,15 +664,15 @@ static inline int twl4030charger_usb_en(int enable) { return 0; }
 #define TWL4030_REG_VUSB3V1	19
 
 /* TWL6030 SMPS/LDO's */
-/* EXTERNAL dc-to-dc buck convertor contollable via SR */
+/* EXTERNAL dc-to-dc buck convertor controllable via SR */
 #define TWL6030_REG_VDD1	30
 #define TWL6030_REG_VDD2	31
 #define TWL6030_REG_VDD3	32
 
 /* Non SR compliant dc-to-dc buck convertors */
-#define	TWL6030_REG_VMEM	33
+#define TWL6030_REG_VMEM	33
 #define TWL6030_REG_V2V1	34
-#define	TWL6030_REG_V1V29	35
+#define TWL6030_REG_V1V29	35
 #define TWL6030_REG_V1V8	36
 
 /* EXTERNAL LDOs */
-- 
cgit v1.2.3


From d6dc1a386358979e12366d1f35eeb68fc181e101 Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Tue, 23 Mar 2010 09:02:33 +0200
Subject: cfg80211: Add connection quality monitoring support to nl80211

Add support for basic configuration of a connection quality monitoring to the
nl80211 interface, and basic support for notifying about triggered monitoring
events.

Via this interface a user-space connection manager may configure and receive
pre-warning events of deteriorating WLAN connection quality, and start
preparing for roaming in advance, before the connection is already lost.

An example usage of such a trigger is starting scanning for nearby AP's in
an attempt to find one with better connection quality, and associate to it
before the connection characteristics of the existing connection become too bad
or the association is even lost, leading in a prolonged delay in connectivity.

The interface currently supports only RSSI, but it could be later extended
to include other parameters, such as signal-to-noise ratio, if need for that
arises.

Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  46 +++++++++++++++++
 include/net/cfg80211.h  |  19 +++++++
 net/wireless/mlme.c     |  13 +++++
 net/wireless/nl80211.c  | 131 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/wireless/nl80211.h  |   6 +++
 5 files changed, 215 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 28ba20fda3e2..89947597b9ce 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -323,6 +323,12 @@
  *	the TX command and %NL80211_ATTR_FRAME includes the contents of the
  *	frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged
  *	the frame.
+ * @NL80211_CMD_SET_CQM: Connection quality monitor configuration. This command
+ *	is used to configure connection quality monitoring notification trigger
+ *	levels.
+ * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
+ *	command is used as an event to indicate the that a trigger level was
+ *	reached.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -419,6 +425,9 @@ enum nl80211_commands {
 	NL80211_CMD_SET_POWER_SAVE,
 	NL80211_CMD_GET_POWER_SAVE,
 
+	NL80211_CMD_SET_CQM,
+	NL80211_CMD_NOTIFY_CQM,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -691,6 +700,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was
  *	acknowledged by the recipient.
  *
+ * @NL80211_ATTR_CQM: connection quality monitor configuration in a
+ *	nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -842,6 +854,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_PS_STATE,
 
+	NL80211_ATTR_CQM,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1583,4 +1597,36 @@ enum nl80211_ps_state {
 	NL80211_PS_ENABLED,
 };
 
+/**
+ * enum nl80211_attr_cqm - connection quality monitor attributes
+ * @__NL80211_ATTR_CQM_INVALID: invalid
+ * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm (zero to disable)
+ * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm
+ * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
+ * @__NL80211_ATTR_CQM_AFTER_LAST: internal
+ * @NL80211_ATTR_CQM_MAX: highest key attribute
+ */
+enum nl80211_attr_cqm {
+	__NL80211_ATTR_CQM_INVALID,
+	NL80211_ATTR_CQM_RSSI_THOLD,
+	NL80211_ATTR_CQM_RSSI_HYST,
+	NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+
+	/* keep last */
+	__NL80211_ATTR_CQM_AFTER_LAST,
+	NL80211_ATTR_CQM_MAX = __NL80211_ATTR_CQM_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_cqm_rssi_threshold_event - RSSI threshold event
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW - The RSSI level is lower than the
+ *      configured threshold
+ * @NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH - The RSSI is higher than the
+ *      configured threshold
+ */
+enum nl80211_cqm_rssi_threshold_event {
+	NL80211_CQM_RSSI_THRESHOLD_EVENT_LOW,
+	NL80211_CQM_RSSI_THRESHOLD_EVENT_HIGH,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3d134a1fb96b..868cfd3b9724 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1007,6 +1007,7 @@ struct cfg80211_pmksa {
  *	RSN IE. It allows for faster roaming between WPA2 BSSIDs.
  * @del_pmksa: Delete a cached PMKID.
  * @flush_pmksa: Flush all cached PMKIDs.
+ * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold.
  *
  */
 struct cfg80211_ops {
@@ -1152,6 +1153,10 @@ struct cfg80211_ops {
 
 	int	(*set_power_mgmt)(struct wiphy *wiphy, struct net_device *dev,
 				  bool enabled, int timeout);
+
+	int	(*set_cqm_rssi_config)(struct wiphy *wiphy,
+				       struct net_device *dev,
+				       s32 rssi_thold, u32 rssi_hyst);
 };
 
 /*
@@ -2337,4 +2342,18 @@ bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf,
 void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
 			       const u8 *buf, size_t len, bool ack, gfp_t gfp);
 
+
+/**
+ * cfg80211_cqm_rssi_notify - connection quality monitoring rssi event
+ * @dev: network device
+ * @rssi_event: the triggered RSSI event
+ * @gfp: context flags
+ *
+ * This function is called when a configured connection quality monitoring
+ * rssi threshold reached event occurs.
+ */
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp);
+
 #endif /* __NET_CFG80211_H */
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 62bc8855e123..0855f0d32349 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -894,3 +894,16 @@ void cfg80211_action_tx_status(struct net_device *dev, u64 cookie,
 	nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp);
 }
 EXPORT_SYMBOL(cfg80211_action_tx_status);
+
+void cfg80211_cqm_rssi_notify(struct net_device *dev,
+			      enum nl80211_cqm_rssi_threshold_event rssi_event,
+			      gfp_t gfp)
+{
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct wiphy *wiphy = wdev->wiphy;
+	struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy);
+
+	/* Indicate roaming trigger event to user space */
+	nl80211_send_cqm_rssi_notify(rdev, dev, rssi_event, gfp);
+}
+EXPORT_SYMBOL(cfg80211_cqm_rssi_notify);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e447db04cf76..a7fc3d83f5f6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -149,6 +149,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 				 .len = IEEE80211_MAX_DATA_LEN },
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 };
 
 /* policy for the attributes */
@@ -4778,6 +4779,84 @@ unlock_rtnl:
 	return err;
 }
 
+static struct nla_policy
+nl80211_attr_cqm_policy[NL80211_ATTR_CQM_MAX + 1] __read_mostly = {
+	[NL80211_ATTR_CQM_RSSI_THOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_HYST] = { .type = NLA_U32 },
+	[NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT] = { .type = NLA_U32 },
+};
+
+static int nl80211_set_cqm_rssi(struct genl_info *info,
+				s32 threshold, u32 hysteresis)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	struct net_device *dev;
+	int err;
+
+	if (threshold > 0)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rdev;
+
+	wdev = dev->ieee80211_ptr;
+
+	if (!rdev->ops->set_cqm_rssi_config) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	if (wdev->iftype != NL80211_IFTYPE_STATION) {
+		err = -EOPNOTSUPP;
+		goto unlock_rdev;
+	}
+
+	err = rdev->ops->set_cqm_rssi_config(wdev->wiphy, dev,
+					     threshold, hysteresis);
+
+unlock_rdev:
+	cfg80211_unlock_rdev(rdev);
+	dev_put(dev);
+	rtnl_unlock();
+
+	return err;
+}
+
+static int nl80211_set_cqm(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *attrs[NL80211_ATTR_CQM_MAX + 1];
+	struct nlattr *cqm;
+	int err;
+
+	cqm = info->attrs[NL80211_ATTR_CQM];
+	if (!cqm) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = nla_parse_nested(attrs, NL80211_ATTR_CQM_MAX, cqm,
+			       nl80211_attr_cqm_policy);
+	if (err)
+		goto out;
+
+	if (attrs[NL80211_ATTR_CQM_RSSI_THOLD] &&
+	    attrs[NL80211_ATTR_CQM_RSSI_HYST]) {
+		s32 threshold;
+		u32 hysteresis;
+		threshold = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_THOLD]);
+		hysteresis = nla_get_u32(attrs[NL80211_ATTR_CQM_RSSI_HYST]);
+		err = nl80211_set_cqm_rssi(info, threshold, hysteresis);
+	} else
+		err = -EINVAL;
+
+out:
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -5082,6 +5161,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		/* can be retrieved by unprivileged users */
 	},
+	{
+		.cmd = NL80211_CMD_SET_CQM,
+		.doit = nl80211_set_cqm,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
@@ -5832,6 +5917,52 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 	nlmsg_free(msg);
 }
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp)
+{
+	struct sk_buff *msg;
+	struct nlattr *pinfoattr;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, gfp);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_NOTIFY_CQM);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx);
+	NLA_PUT_U32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex);
+
+	pinfoattr = nla_nest_start(msg, NL80211_ATTR_CQM);
+	if (!pinfoattr)
+		goto nla_put_failure;
+
+	NLA_PUT_U32(msg, NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT,
+		    rssi_event);
+
+	nla_nest_end(msg, pinfoattr);
+
+	if (genlmsg_end(msg, hdr) < 0) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	genlmsg_multicast_netns(wiphy_net(&rdev->wiphy), msg, 0,
+				nl80211_mlme_mcgrp.id, gfp);
+	return;
+
+ nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	nlmsg_free(msg);
+}
+
 static int nl80211_netlink_notify(struct notifier_block * nb,
 				  unsigned long state,
 				  void *_notify)
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 4ca511102c6c..2ad7fbc7d9f1 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -82,4 +82,10 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev,
 				   const u8 *buf, size_t len, bool ack,
 				   gfp_t gfp);
 
+void
+nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev,
+			     struct net_device *netdev,
+			     enum nl80211_cqm_rssi_threshold_event rssi_event,
+			     gfp_t gfp);
+
 #endif /* __NET_WIRELESS_NL80211_H */
-- 
cgit v1.2.3


From 4ac5541bdaea4d8e93710c58775cfa48ac82e09b Mon Sep 17 00:00:00 2001
From: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Date: Wed, 24 Mar 2010 10:11:14 +0200
Subject: cfg80211: Improve connection quality maintenance docs in nl80211.h

In nl80211.h, be a little more elaborate in the docs for the definitions
NL80211_ATTR_CQM_RSSI_THOLD and NL80211_ATTR_CQM_RSSI_HYST.

Reported-by: Luis R. Rodriguez <mcgrof@gmail.com>
Signed-off-by: Juuso Oikarinen <juuso.oikarinen@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 89947597b9ce..daf6a3432b92 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1600,8 +1600,12 @@ enum nl80211_ps_state {
 /**
  * enum nl80211_attr_cqm - connection quality monitor attributes
  * @__NL80211_ATTR_CQM_INVALID: invalid
- * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm (zero to disable)
- * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm
+ * @NL80211_ATTR_CQM_RSSI_THOLD: RSSI threshold in dBm. This value specifies
+ *	the threshold for the RSSI level at which an event will be sent. Zero
+ *	to disable.
+ * @NL80211_ATTR_CQM_RSSI_HYST: RSSI hysteresis in dBm. This value specifies
+ *	the minimum amount the RSSI level must change after an event before a
+ *	new event may be issued (to reduce effects of RSSI oscillation).
  * @NL80211_ATTR_CQM_RSSI_THRESHOLD_EVENT: RSSI threshold event
  * @__NL80211_ATTR_CQM_AFTER_LAST: internal
  * @NL80211_ATTR_CQM_MAX: highest key attribute
-- 
cgit v1.2.3


From fd0ec0e6216baea854465bbdb177f2d1b2ccaf22 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 10 Jul 2009 19:27:47 +0200
Subject: netfilter: xtables: consolidate code into xt_request_find_match

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h |  2 ++
 net/bridge/netfilter/ebtables.c    |  5 +----
 net/ipv4/netfilter/ip_tables.c     | 18 ++++++++----------
 net/ipv6/netfilter/ip6_tables.c    | 18 ++++++++----------
 net/netfilter/x_tables.c           | 11 +++++++++++
 5 files changed, 30 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index f8f555153373..dd9d15a73a96 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -436,6 +436,8 @@ extern struct xt_table_info *xt_replace_table(struct xt_table *table,
 
 extern struct xt_match *xt_find_match(u8 af, const char *name, u8 revision);
 extern struct xt_target *xt_find_target(u8 af, const char *name, u8 revision);
+extern struct xt_match *xt_request_find_match(u8 af, const char *name,
+					      u8 revision);
 extern struct xt_target *xt_request_find_target(u8 af, const char *name,
 						u8 revision);
 extern int xt_find_revision(u8 af, const char *name, u8 revision,
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 6d3b256d2f61..c41f3fad0587 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -361,12 +361,9 @@ ebt_check_match(struct ebt_entry_match *m, struct xt_mtchk_param *par,
 	    left - sizeof(struct ebt_entry_match) < m->match_size)
 		return -EINVAL;
 
-	match = try_then_request_module(xt_find_match(NFPROTO_BRIDGE,
-		m->u.name, 0), "ebt_%s", m->u.name);
+	match = xt_request_find_match(NFPROTO_BRIDGE, m->u.name, 0);
 	if (IS_ERR(match))
 		return PTR_ERR(match);
-	if (match == NULL)
-		return -ENOENT;
 	m->u.match = match;
 
 	par->match     = match;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e24ec48ee8cd..09f6567a85b7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -629,12 +629,11 @@ find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1472,13 +1471,12 @@ compat_find_calc_match(struct ipt_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
-						      m->u.user.revision),
-					"ipt_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 842bef374dcc..41e2429c0163 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -660,12 +660,11 @@ find_check_match(struct ip6t_entry_match *m, struct xt_mtchk_param *par)
 	struct xt_match *match;
 	int ret;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("find_check_match: `%s' not found\n", m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 
@@ -1506,13 +1505,12 @@ compat_find_calc_match(struct ip6t_entry_match *m,
 {
 	struct xt_match *match;
 
-	match = try_then_request_module(xt_find_match(AF_INET6, m->u.user.name,
-						      m->u.user.revision),
-					"ip6t_%s", m->u.user.name);
-	if (IS_ERR(match) || !match) {
+	match = xt_request_find_match(NFPROTO_IPV6, m->u.user.name,
+				      m->u.user.revision);
+	if (IS_ERR(match)) {
 		duprintf("compat_check_calc_match: `%s' not found\n",
 			 m->u.user.name);
-		return match ? PTR_ERR(match) : -ENOENT;
+		return PTR_ERR(match);
 	}
 	m->u.kernel.match = match;
 	*size += xt_compat_match_offset(match);
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index bf2806afd920..ee7fe215b3e1 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -214,6 +214,17 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
 }
 EXPORT_SYMBOL(xt_find_match);
 
+struct xt_match *
+xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
+{
+	struct xt_match *match;
+
+	match = try_then_request_module(xt_find_match(nfproto, name, revision),
+					"%st_%s", xt_prefix[nfproto], name);
+	return (match != NULL) ? match : ERR_PTR(-ENOENT);
+}
+EXPORT_SYMBOL_GPL(xt_request_find_match);
+
 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
 struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
 {
-- 
cgit v1.2.3


From b0f38452ff73da7e9e0ddc68cd5c6b93c897ca0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_match.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

This semantic patch may not be too precise (checking for functions
that use xt_mtchk_param rather than functions referenced by
xt_match.checkentry), but reviewed, it produced the intended result.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_mtchk_param *par) { ... }
// </smpl>

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 +-
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 2 +-
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 2 +-
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 2 +-
 net/ipv6/netfilter/ip6t_rt.c         | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 2 +-
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 2 +-
 net/netfilter/xt_esp.c               | 2 +-
 net/netfilter/xt_hashlimit.c         | 4 ++--
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_multiport.c         | 8 ++++----
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 4 ++--
 net/netfilter/xt_time.c              | 2 +-
 45 files changed, 50 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index dd9d15a73a96..33c1a62a0997 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -303,7 +303,7 @@ struct xt_match {
 		      const struct xt_match_param *);
 
 	/* Called when user tries to insert an entry of this type. */
-	bool (*checkentry)(const struct xt_mtchk_param *);
+	int (*checkentry)(const struct xt_mtchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_mtdtor_param *);
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 5d1176758ca5..7b6f4c4cccb7 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -36,7 +36,7 @@ ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_802_3_mt_check(const struct xt_mtchk_param *par)
+static int ebt_802_3_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 60ad6308bc1f..8a75d399b510 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -172,7 +172,7 @@ ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_among_mt_check(const struct xt_mtchk_param *par)
+static int ebt_among_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const struct ebt_entry_match *em =
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index e727697c5847..fc62055adb17 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -100,7 +100,7 @@ ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_arp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_arp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index 5de6df6f86b8..d1a555dc8878 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -77,7 +77,7 @@ ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 63e3888d20cf..fa4ecf50fdc9 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -80,7 +80,7 @@ ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_ip6_mt_check(const struct xt_mtchk_param *par)
+static int ebt_ip6_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_entry *e = par->entryinfo;
 	struct ebt_ip6_info *info = par->matchinfo;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 5b7330b62541..abfb0ecd7c17 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -65,7 +65,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / EBT_LIMIT_SCALE;
 }
 
-static bool ebt_limit_mt_check(const struct xt_mtchk_param *par)
+static int ebt_limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_limit_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index 8de8c396d913..1e5b0b316fbe 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -22,7 +22,7 @@ ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((skb->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool ebt_mark_mt_check(const struct xt_mtchk_param *par)
+static int ebt_mark_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index e2a07e6cbef3..9b3c64516605 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -20,7 +20,7 @@ ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (skb->pkt_type == info->pkt_type) ^ info->invert;
 }
 
-static bool ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
+static int ebt_pkttype_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 92a93d363765..521186fa6994 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -153,7 +153,7 @@ ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_stp_mt_check(const struct xt_mtchk_param *par)
+static int ebt_stp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const uint8_t bridge_ula[6] = {0x01, 0x80, 0xc2, 0x00, 0x00, 0x00};
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 5c44f51063c3..04a9575389d8 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -79,7 +79,7 @@ ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ebt_vlan_mt_check(const struct xt_mtchk_param *par)
+static int ebt_vlan_mt_check(const struct xt_mtchk_param *par)
 {
 	struct ebt_vlan_info *info = par->matchinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 09f6567a85b7..771ffa7b9aff 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2176,7 +2176,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
 				    !!(icmpinfo->invflags&IPT_ICMP_INV));
 }
 
-static bool icmp_checkentry(const struct xt_mtchk_param *par)
+static int icmp_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ipt_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index ea4f58a46c6e..81197f456d7f 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -70,7 +70,7 @@ addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
+static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
 {
 	struct ipt_addrtype_info_v1 *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 4f27e170c630..667ded16e120 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -55,7 +55,7 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(ahinfo->invflags & IPT_AH_INV_SPI));
 }
 
-static bool ah_mt_check(const struct xt_mtchk_param *par)
+static int ah_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index e661108c73f1..d1e234fe7f1a 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -85,7 +85,7 @@ static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool ecn_mt_check(const struct xt_mtchk_param *par)
+static int ecn_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 	const struct ipt_ip *ip = par->entryinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 41e2429c0163..595b45d52ff3 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2209,7 +2209,7 @@ icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 /* Called when user tries to insert an entry of this type. */
-static bool icmp6_checkentry(const struct xt_mtchk_param *par)
+static int icmp6_checkentry(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_icmp *icmpinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4429bfd39e11..3d570446deef 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -87,7 +87,7 @@ static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		!(ahinfo->hdrres && ah->reserved);
 }
 
-static bool ah_mt6_check(const struct xt_mtchk_param *par)
+static int ah_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ah *ahinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 5c0da913b4ab..c2dba2701fa3 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -102,7 +102,7 @@ frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 		  (ntohs(fh->frag_off) & IP6_MF));
 }
 
-static bool frag_mt6_check(const struct xt_mtchk_param *par)
+static int frag_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_frag *fraginfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f4b73889d00a..1b294317707b 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -164,7 +164,7 @@ hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hbh_mt6_check(const struct xt_mtchk_param *par)
+static int hbh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_opts *optsinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 91490ad9302c..90e1e04b7932 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -118,7 +118,7 @@ ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	}
 }
 
-static bool ipv6header_mt6_check(const struct xt_mtchk_param *par)
+static int ipv6header_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 0181eb81d24b..d9408045994c 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -62,7 +62,7 @@ static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 			  !!(mhinfo->invflags & IP6T_MH_INV_TYPE));
 }
 
-static bool mh_mt6_check(const struct xt_mtchk_param *par)
+static int mh_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_mh *mhinfo = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index c58d65336577..76397f35eafd 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -183,7 +183,7 @@ static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool rt_mt6_check(const struct xt_mtchk_param *par)
+static int rt_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_rt *rtinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 4c273e871301..1f2c35ef1427 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -132,7 +132,7 @@ xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	       !!(info->flags & XT_CLUSTER_F_INV);
 }
 
-static bool xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_cluster_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index edb7bbd9ae54..136ef4ccdacb 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -93,7 +93,7 @@ connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 		return what >= sinfo->count.from;
 }
 
-static bool connbytes_mt_check(const struct xt_mtchk_param *par)
+static int connbytes_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index d5b26dab9e26..a9fec38ab029 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -216,7 +216,7 @@ connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool connlimit_mt_check(const struct xt_mtchk_param *par)
+static int connlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_connlimit_info *info = par->matchinfo;
 	unsigned int i;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index 7a51ba63f545..df7eaff874f1 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -103,7 +103,7 @@ connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ((ct->mark & info->mask) == info->mark) ^ info->invert;
 }
 
-static bool connmark_mt_check(const struct xt_mtchk_param *par)
+static int connmark_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 387172b6b0d8..500e0338a187 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -206,7 +206,7 @@ conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
 	return conntrack_mt(skb, par, info->state_mask, info->status_mask);
 }
 
-static bool conntrack_mt_check(const struct xt_mtchk_param *par)
+static int conntrack_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 8f6014f7c881..da8c301d24ea 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -123,7 +123,7 @@ dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
-static bool dccp_mt_check(const struct xt_mtchk_param *par)
+static int dccp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 6ecedc13db0c..295da4ce822c 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -42,7 +42,7 @@ dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (dscp == info->dscp) ^ !!info->invert;
 }
 
-static bool dscp_mt_check(const struct xt_mtchk_param *par)
+static int dscp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 1a446d626769..9f5da9795674 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -60,7 +60,7 @@ static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			 !!(espinfo->invflags & XT_ESP_INV_SPI));
 }
 
-static bool esp_mt_check(const struct xt_mtchk_param *par)
+static int esp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_esp *espinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 8f3e0c02ca54..d13800c95930 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -671,7 +671,7 @@ hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_info *r = par->matchinfo;
@@ -707,7 +707,7 @@ static bool hashlimit_mt_check_v0(const struct xt_mtchk_param *par)
 	return true;
 }
 
-static bool hashlimit_mt_check(const struct xt_mtchk_param *par)
+static int hashlimit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct net *net = par->net;
 	struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index 482aff2ccf7c..6e177b279f90 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -54,7 +54,7 @@ helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool helper_mt_check(const struct xt_mtchk_param *par)
+static int helper_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_helper_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index b3dfca63fa52..138a324df8df 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -97,7 +97,7 @@ user2credits(u_int32_t user)
 	return (user * HZ * CREDITS_PER_JIFFY) / XT_LIMIT_SCALE;
 }
 
-static bool limit_mt_check(const struct xt_mtchk_param *par)
+static int limit_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv;
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 4fa90c86fdb5..b446738eab1a 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -152,7 +152,7 @@ check(u_int16_t proto,
 		&& count <= XT_MULTI_PORTS;
 }
 
-static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -161,7 +161,7 @@ static bool multiport_mt_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt_check(const struct xt_mtchk_param *par)
+static int multiport_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct ipt_ip *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
@@ -170,7 +170,7 @@ static bool multiport_mt_check(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
+static int multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport *multiinfo = par->matchinfo;
@@ -179,7 +179,7 @@ static bool multiport_mt6_check_v0(const struct xt_mtchk_param *par)
 		     multiinfo->count);
 }
 
-static bool multiport_mt6_check(const struct xt_mtchk_param *par)
+static int multiport_mt6_check(const struct xt_mtchk_param *par)
 {
 	const struct ip6t_ip6 *ip = par->entryinfo;
 	const struct xt_multiport_v1 *multiinfo = par->matchinfo;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 3d42a278408f..850e412c83ef 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -83,7 +83,7 @@ match_outdev:
 	return (!!ret ^ !(info->invert & XT_PHYSDEV_OP_OUT));
 }
 
-static bool physdev_mt_check(const struct xt_mtchk_param *par)
+static int physdev_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_physdev_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index de3aded6afb8..c9965b640b16 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -128,7 +128,7 @@ policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool policy_mt_check(const struct xt_mtchk_param *par)
+static int policy_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 390b7d09fe51..2861fac5f2e1 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -43,7 +43,7 @@ quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool quota_mt_check(const struct xt_mtchk_param *par)
+static int quota_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_quota_info *q = par->matchinfo;
 
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 4fc6a917f6de..3b5e3d613b18 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -74,7 +74,7 @@ xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
+static int xt_rateest_mt_checkentry(const struct xt_mtchk_param *par)
 {
 	struct xt_rateest_match_info *info = par->matchinfo;
 	struct xt_rateest *est1, *est2;
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 85309448c5e7..52042c8bf7f2 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -305,7 +305,7 @@ out:
 	return ret;
 }
 
-static bool recent_mt_check(const struct xt_mtchk_param *par)
+static int recent_mt_check(const struct xt_mtchk_param *par)
 {
 	struct recent_net *recent_net = recent_pernet(par->net);
 	const struct xt_recent_mtinfo *info = par->matchinfo;
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 977b182dea59..5037a7a0059c 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -144,7 +144,7 @@ sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
-static bool sctp_mt_check(const struct xt_mtchk_param *par)
+static int sctp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 94893be80276..8b15b1317f1f 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -37,7 +37,7 @@ state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return (sinfo->statemask & statebit);
 }
 
-static bool state_mt_check(const struct xt_mtchk_param *par)
+static int state_mt_check(const struct xt_mtchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 51ac1bbb4f52..a577ab008f57 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -52,7 +52,7 @@ statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return ret;
 }
 
-static bool statistic_mt_check(const struct xt_mtchk_param *par)
+static int statistic_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_statistic_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index b4d774111311..7d1412154e27 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -40,7 +40,7 @@ string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 
 #define STRING_TEXT_PRIV(m) ((struct xt_string_info *)(m))
 
-static bool string_mt_check(const struct xt_mtchk_param *par)
+static int string_mt_check(const struct xt_mtchk_param *par)
 {
 	struct xt_string_info *conf = par->matchinfo;
 	struct ts_config *ts_conf;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index b53887f83c44..00728410099f 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -120,7 +120,7 @@ static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool tcp_mt_check(const struct xt_mtchk_param *par)
+static int tcp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_tcp *tcpinfo = par->matchinfo;
 
@@ -155,7 +155,7 @@ static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 			      !!(udpinfo->invflags & XT_UDP_INV_DSTPT));
 }
 
-static bool udp_mt_check(const struct xt_mtchk_param *par)
+static int udp_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_udp *udpinfo = par->matchinfo;
 
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index 45ed05b5161f..db74f4fd57df 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -217,7 +217,7 @@ time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return true;
 }
 
-static bool time_mt_check(const struct xt_mtchk_param *par)
+static int time_mt_check(const struct xt_mtchk_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 
-- 
cgit v1.2.3


From 135367b8f6a18507af6b9a6910a14b5699415309 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Fri, 19 Mar 2010 17:16:42 +0100
Subject: netfilter: xtables: change xt_target.checkentry return type

Restore function signatures from bool to int so that we can report
memory allocation failures or similar using -ENOMEM rather than
always having to pass -EINVAL back.

// <smpl>
@@
type bool;
identifier check, par;
@@
-bool check
+int check
 (struct xt_tgchk_param *par) { ... }
// </smpl>

Minus the change it does to xt_ct_find_proto.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h  | 4 ++--
 net/bridge/netfilter/ebt_arpreply.c | 2 +-
 net/bridge/netfilter/ebt_dnat.c     | 2 +-
 net/bridge/netfilter/ebt_log.c      | 2 +-
 net/bridge/netfilter/ebt_mark.c     | 2 +-
 net/bridge/netfilter/ebt_nflog.c    | 2 +-
 net/bridge/netfilter/ebt_redirect.c | 2 +-
 net/bridge/netfilter/ebt_snat.c     | 2 +-
 net/bridge/netfilter/ebt_ulog.c     | 2 +-
 net/ipv4/netfilter/arpt_mangle.c    | 2 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c  | 2 +-
 net/ipv4/netfilter/ipt_ECN.c        | 2 +-
 net/ipv4/netfilter/ipt_LOG.c        | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c     | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c   | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c     | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c       | 2 +-
 net/ipv4/netfilter/nf_nat_rule.c    | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c       | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c    | 2 +-
 net/netfilter/xt_CONNSECMARK.c      | 2 +-
 net/netfilter/xt_CT.c               | 2 +-
 net/netfilter/xt_DSCP.c             | 2 +-
 net/netfilter/xt_HL.c               | 4 ++--
 net/netfilter/xt_LED.c              | 2 +-
 net/netfilter/xt_NFLOG.c            | 2 +-
 net/netfilter/xt_NFQUEUE.c          | 2 +-
 net/netfilter/xt_RATEEST.c          | 2 +-
 net/netfilter/xt_SECMARK.c          | 2 +-
 net/netfilter/xt_TCPMSS.c           | 4 ++--
 net/netfilter/xt_TPROXY.c           | 2 +-
 net/netfilter/xt_connmark.c         | 2 +-
 33 files changed, 37 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 33c1a62a0997..1a65d45ee4f5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -342,8 +342,8 @@ struct xt_target {
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
            called. */
-	/* Should return true or false. */
-	bool (*checkentry)(const struct xt_tgchk_param *);
+	/* Should return true or false, or an error code (-Exxxx). */
+	int (*checkentry)(const struct xt_tgchk_param *);
 
 	/* Called when entry of this type deleted. */
 	void (*destroy)(const struct xt_tgdtor_param *);
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index f392e9d93f53..2491564e9e08 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -57,7 +57,7 @@ ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
+static int ebt_arpreply_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const struct ebt_entry *e = par->entryinfo;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 2bb40d728a35..5fddebea45c2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -26,7 +26,7 @@ ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_dnat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_dnat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index e873924ddb5d..a0aeac6176ee 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -24,7 +24,7 @@
 
 static DEFINE_SPINLOCK(ebt_log_lock);
 
-static bool ebt_log_tg_check(const struct xt_tgchk_param *par)
+static int ebt_log_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_log_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 2b5ce533d6b9..dd94dafa6155 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -36,7 +36,7 @@ ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_mark_tg_check(const struct xt_tgchk_param *par)
+static int ebt_mark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 40dbd248b9ae..1f2b7bbdde73 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -35,7 +35,7 @@ ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_nflog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_nflog_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index 9be8fbcd370b..73c4d3ac6f2e 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -32,7 +32,7 @@ ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return info->target;
 }
 
-static bool ebt_redirect_tg_check(const struct xt_tgchk_param *par)
+static int ebt_redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 	unsigned int hook_mask;
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 9c7b520765a2..94bcecd90d74 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -42,7 +42,7 @@ out:
 	return info->target | ~EBT_VERDICT_BITS;
 }
 
-static bool ebt_snat_tg_check(const struct xt_tgchk_param *par)
+static int ebt_snat_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 	int tmp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 789ea36f1db1..f554bc2515d6 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -249,7 +249,7 @@ ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return EBT_CONTINUE;
 }
 
-static bool ebt_ulog_tg_check(const struct xt_tgchk_param *par)
+static int ebt_ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ebt_ulog_info *uloginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b0d5b1d0a769..4b51a027f307 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -54,7 +54,7 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
 	return mangle->target;
 }
 
-static bool checkentry(const struct xt_tgchk_param *par)
+static int checkentry(const struct xt_tgchk_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index fcaa0dc8e075..290a7b9b393e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -347,7 +347,7 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool clusterip_tg_check(const struct xt_tgchk_param *par)
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
 {
 	struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 01988752547e..9d96500a4157 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -93,7 +93,7 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ecn_tg_check(const struct xt_tgchk_param *par)
+static int ecn_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index b3bf623fa222..c9ee5c40d1bb 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -439,7 +439,7 @@ log_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool log_tg_check(const struct xt_tgchk_param *par)
+static int log_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5063ddac7c04..5a182f6de5d5 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -28,7 +28,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
 
 /* FIXME: Multiple targets. --RR */
-static bool masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 51ab01a0a95d..cbfe5f7e082a 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -22,7 +22,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
 MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
 
-static bool netmap_tg_check(const struct xt_tgchk_param *par)
+static int netmap_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 74f1f55fd61a..f8daec20fb04 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -26,7 +26,7 @@ MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
 MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
 
 /* FIXME: Take multiple ranges --RR */
-static bool redirect_tg_check(const struct xt_tgchk_param *par)
+static int redirect_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index ff32252bad59..cf76f1bc3f10 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -174,7 +174,7 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg_check(const struct xt_tgchk_param *par)
+static int reject_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_reject_info *rejinfo = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index d926201560dd..7f73bbe2193c 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -307,7 +307,7 @@ static void ipt_logfn(u_int8_t pf,
 	ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
 }
 
-static bool ulog_tg_check(const struct xt_tgchk_param *par)
+static int ulog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ulog_info *loginfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index 7d6345e416c7..117226708738 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -74,7 +74,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
 	return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
 }
 
-static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_snat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
@@ -86,7 +86,7 @@ static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
+static int ipt_dnat_checkentry(const struct xt_tgchk_param *par)
 {
 	const struct nf_nat_multi_range_compat *mr = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 5a79883220e0..bcc3fc19374a 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -451,7 +451,7 @@ log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 
-static bool log_tg6_check(const struct xt_tgchk_param *par)
+static int log_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index 45efb9f38fcb..8d5141ece671 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -213,7 +213,7 @@ reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool reject_tg6_check(const struct xt_tgchk_param *par)
+static int reject_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_reject_info *rejinfo = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index 6812865488d6..3f9d0f4f852d 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -84,7 +84,7 @@ connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connsecmark_tg_check(const struct xt_tgchk_param *par)
+static int connsecmark_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index 6509e03f1e62..c1553bf06cf6 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -53,7 +53,7 @@ static u8 xt_ct_find_proto(const struct xt_tgchk_param *par)
 		return 0;
 }
 
-static bool xt_ct_tg_check(const struct xt_tgchk_param *par)
+static int xt_ct_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conntrack_tuple t;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index bbf08a91c600..1fa7b67bf225 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -60,7 +60,7 @@ dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool dscp_tg_check(const struct xt_tgchk_param *par)
+static int dscp_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_DSCP_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 7004ed2ffa44..15ba16108182 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -101,7 +101,7 @@ hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool ttl_tg_check(const struct xt_tgchk_param *par)
+static int ttl_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_TTL_info *info = par->targinfo;
 
@@ -114,7 +114,7 @@ static bool ttl_tg_check(const struct xt_tgchk_param *par)
 	return true;
 }
 
-static bool hl_tg6_check(const struct xt_tgchk_param *par)
+static int hl_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct ip6t_HL_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index f511bea9464a..1a3e3dd5a774 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -80,7 +80,7 @@ static void led_timeout_callback(unsigned long data)
 	led_trigger_event(&ledinternal->netfilter_led_trigger, LED_OFF);
 }
 
-static bool led_tg_check(const struct xt_tgchk_param *par)
+static int led_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index a57c5cf018ec..13e6c0002c8a 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -37,7 +37,7 @@ nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool nflog_tg_check(const struct xt_tgchk_param *par)
+static int nflog_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index 7cc0de63aa0f..d435579a64ca 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -81,7 +81,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_QUEUE_NR(queue);
 }
 
-static bool nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
+static int nfqueue_tg_v1_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 maxid;
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index 87ae97e5516f..9743e50be8ef 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -85,7 +85,7 @@ xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
+static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par)
 {
 	struct xt_rateest_target_info *info = par->targinfo;
 	struct xt_rateest *est;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index 4855fd9d7c6f..48f8e4f7ea8a 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -80,7 +80,7 @@ static bool checkentry_selinux(struct xt_secmark_target_info *info)
 	return true;
 }
 
-static bool secmark_tg_check(const struct xt_tgchk_param *par)
+static int secmark_tg_check(const struct xt_tgchk_param *par)
 {
 	struct xt_secmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index 45161d9a9f23..70288dc31583 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -234,7 +234,7 @@ static inline bool find_syn_match(const struct xt_entry_match *m)
 	return false;
 }
 
-static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg4_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ipt_entry *e = par->entryinfo;
@@ -256,7 +256,7 @@ static bool tcpmss_tg4_check(const struct xt_tgchk_param *par)
 }
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
-static bool tcpmss_tg6_check(const struct xt_tgchk_param *par)
+static int tcpmss_tg6_check(const struct xt_tgchk_param *par)
 {
 	const struct xt_tcpmss_info *info = par->targinfo;
 	const struct ip6t_entry *e = par->entryinfo;
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index e9244fdc123a..189df9af4de6 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -59,7 +59,7 @@ tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return NF_DROP;
 }
 
-static bool tproxy_tg_check(const struct xt_tgchk_param *par)
+static int tproxy_tg_check(const struct xt_tgchk_param *par)
 {
 	const struct ipt_ip *i = par->entryinfo;
 
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index df7eaff874f1..0e69427f8cda 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -74,7 +74,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 	return XT_CONTINUE;
 }
 
-static bool connmark_tg_check(const struct xt_tgchk_param *par)
+static int connmark_tg_check(const struct xt_tgchk_param *par)
 {
 	if (nf_ct_l3proto_try_module_get(par->family) < 0) {
 		pr_info("cannot load conntrack support for proto=%u\n",
-- 
cgit v1.2.3


From df3345457a7a174dfb5872a070af80d456985038 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 24 Mar 2010 19:13:54 +0000
Subject: rps: add CONFIG_RPS

RPS currently depends on SMP and SYSFS

Adding a CONFIG_RPS makes sense in case this requirement changes in the
future. This patch saves about 1500 bytes of kernel text in case SMP is
on but SYSFS is off.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++++
 net/Kconfig               |  5 +++++
 net/core/dev.c            | 29 +++++++++++++++++++----------
 3 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c96c41e08e37..53c272f2a734 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -531,6 +531,7 @@ struct netdev_queue {
 	unsigned long		tx_dropped;
 } ____cacheline_aligned_in_smp;
 
+#ifdef CONFIG_RPS
 /*
  * This structure holds an RPS map which can be of variable length.  The
  * map is an array of CPUs.
@@ -549,6 +550,7 @@ struct netdev_rx_queue {
 	struct netdev_rx_queue *first;
 	atomic_t count;
 } ____cacheline_aligned_in_smp;
+#endif
 
 /*
  * This structure defines the management hooks for network devices.
@@ -897,12 +899,14 @@ struct net_device {
 
 	unsigned char		broadcast[MAX_ADDR_LEN];	/* hw bcast add	*/
 
+#ifdef CONFIG_RPS
 	struct kset		*queues_kset;
 
 	struct netdev_rx_queue	*_rx;
 
 	/* Number of RX queues allocated at alloc_netdev_mq() time  */
 	unsigned int		num_rx_queues;
+#endif
 
 	struct netdev_queue	rx_queue;
 
diff --git a/net/Kconfig b/net/Kconfig
index 041c35edb763..68514644ce91 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -203,6 +203,11 @@ source "net/ieee802154/Kconfig"
 source "net/sched/Kconfig"
 source "net/dcb/Kconfig"
 
+config RPS
+	boolean
+	depends on SMP && SYSFS
+	default y
+
 menu "Network testing"
 
 config NET_PKTGEN
diff --git a/net/core/dev.c b/net/core/dev.c
index 5e3dc28cbf5a..bcb3ed26af1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,7 +2177,7 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
@@ -2325,7 +2325,7 @@ enqueue:
 
 		/* Schedule NAPI for backlog device */
 		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
 				struct rps_remote_softirq_cpus *rcpus =
 				    &__get_cpu_var(rps_remote_softirq_cpus);
@@ -2376,7 +2376,7 @@ int netif_rx(struct sk_buff *skb)
 	if (!skb->tstamp.tv64)
 		net_timestamp(skb);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	cpu = get_rps_cpu(skb->dev, skb);
 	if (cpu < 0)
 		cpu = smp_processor_id();
@@ -2750,7 +2750,7 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int cpu;
 
 	cpu = get_rps_cpu(skb->dev, skb);
@@ -3189,7 +3189,7 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 /*
  * net_rps_action sends any pending IPI's for rps.  This is only called from
  * softirq and interrupts must be enabled.
@@ -3214,7 +3214,7 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	int select;
 	struct rps_remote_softirq_cpus *rcpus;
 #endif
@@ -3280,7 +3280,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
 	select = rcpus->select;
 	rcpus->select ^= 1;
@@ -5277,6 +5277,7 @@ int register_netdevice(struct net_device *dev)
 
 	dev->iflink = -1;
 
+#ifdef CONFIG_RPS
 	if (!dev->num_rx_queues) {
 		/*
 		 * Allocate a single RX queue if driver never called
@@ -5293,7 +5294,7 @@ int register_netdevice(struct net_device *dev)
 		atomic_set(&dev->_rx->count, 1);
 		dev->num_rx_queues = 1;
 	}
-
+#endif
 	/* Init, if this function is available */
 	if (dev->netdev_ops->ndo_init) {
 		ret = dev->netdev_ops->ndo_init(dev);
@@ -5653,11 +5654,13 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		void (*setup)(struct net_device *), unsigned int queue_count)
 {
 	struct netdev_queue *tx;
-	struct netdev_rx_queue *rx;
 	struct net_device *dev;
 	size_t alloc_size;
 	struct net_device *p;
+#ifdef CONFIG_RPS
+	struct netdev_rx_queue *rx;
 	int i;
+#endif
 
 	BUG_ON(strlen(name) >= sizeof(dev->name));
 
@@ -5683,6 +5686,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 		goto free_p;
 	}
 
+#ifdef CONFIG_RPS
 	rx = kcalloc(queue_count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
 	if (!rx) {
 		printk(KERN_ERR "alloc_netdev: Unable to allocate "
@@ -5698,6 +5702,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	 */
 	for (i = 0; i < queue_count; i++)
 		rx[i].first = rx;
+#endif
 
 	dev = PTR_ALIGN(p, NETDEV_ALIGN);
 	dev->padded = (char *)dev - (char *)p;
@@ -5713,8 +5718,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	dev->num_tx_queues = queue_count;
 	dev->real_num_tx_queues = queue_count;
 
+#ifdef CONFIG_RPS
 	dev->_rx = rx;
 	dev->num_rx_queues = queue_count;
+#endif
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 
@@ -5731,8 +5738,10 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	return dev;
 
 free_rx:
+#ifdef CONFIG_RPS
 	kfree(rx);
 free_tx:
+#endif
 	kfree(tx);
 free_p:
 	kfree(p);
@@ -6236,7 +6245,7 @@ static int __init net_dev_init(void)
 		queue->completion_queue = NULL;
 		INIT_LIST_HEAD(&queue->poll_list);
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
-- 
cgit v1.2.3


From faa4602e47690fb11221e00f9b9697c8dc0d4b19 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 25 Mar 2010 14:51:50 +0100
Subject: x86, perf, bts, mm: Delete the never used BTS-ptrace code

Support for the PMU's BTS features has been upstreamed in
v2.6.32, but we still have the old and disabled ptrace-BTS,
as Linus noticed it not so long ago.

It's buggy: TIF_DEBUGCTLMSR is trampling all over that MSR without
regard for other uses (perf) and doesn't provide the flexibility
needed for perf either.

Its users are ptrace-block-step and ptrace-bts, since ptrace-bts
was never used and ptrace-block-step can be implemented using a
much simpler approach.

So axe all 3000 lines of it. That includes the *locked_memory*()
APIs in mm/mlock.c as well.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Roland McGrath <roland@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Markus Metzger <markus.t.metzger@intel.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
LKML-Reference: <20100325135413.938004390@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/Kconfig.cpu               |   20 -
 arch/x86/Kconfig.debug             |    9 -
 arch/x86/include/asm/ds.h          |  302 --------
 arch/x86/include/asm/processor.h   |   33 +-
 arch/x86/include/asm/ptrace-abi.h  |   57 +-
 arch/x86/include/asm/ptrace.h      |    6 -
 arch/x86/include/asm/thread_info.h |    6 +-
 arch/x86/kernel/Makefile           |    2 -
 arch/x86/kernel/cpu/intel.c        |    2 -
 arch/x86/kernel/ds.c               | 1437 ------------------------------------
 arch/x86/kernel/ds_selftest.c      |  408 ----------
 arch/x86/kernel/ds_selftest.h      |   15 -
 arch/x86/kernel/dumpstack.c        |    5 -
 arch/x86/kernel/kprobes.c          |    6 +-
 arch/x86/kernel/process.c          |    9 -
 arch/x86/kernel/process_32.c       |    8 -
 arch/x86/kernel/process_64.c       |    8 -
 arch/x86/kernel/ptrace.c           |  382 ----------
 arch/x86/kernel/step.c             |   36 +-
 arch/x86/kernel/traps.c            |    5 -
 include/linux/ftrace.h             |   12 -
 include/linux/mm.h                 |    4 -
 include/linux/ptrace.h             |   12 -
 include/linux/sched.h              |    9 -
 kernel/fork.c                      |    3 -
 kernel/ptrace.c                    |    1 -
 kernel/sched.c                     |   43 --
 kernel/trace/Kconfig               |   11 -
 kernel/trace/Makefile              |    1 -
 kernel/trace/trace.h               |    4 -
 kernel/trace/trace_entries.h       |   12 -
 kernel/trace/trace_hw_branches.c   |  312 --------
 kernel/trace/trace_selftest.c      |   57 --
 mm/mlock.c                         |   41 -
 34 files changed, 9 insertions(+), 3269 deletions(-)
 delete mode 100644 arch/x86/include/asm/ds.h
 delete mode 100644 arch/x86/kernel/ds.c
 delete mode 100644 arch/x86/kernel/ds_selftest.c
 delete mode 100644 arch/x86/kernel/ds_selftest.h
 delete mode 100644 kernel/trace/trace_hw_branches.c

(limited to 'include/linux')

diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index a19829374e6a..918fbb1855cc 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -502,23 +502,3 @@ config CPU_SUP_UMC_32
 	  CPU might render the kernel unbootable.
 
 	  If unsure, say N.
-
-config X86_DS
-	def_bool X86_PTRACE_BTS
-	depends on X86_DEBUGCTLMSR
-	select HAVE_HW_BRANCH_TRACER
-
-config X86_PTRACE_BTS
-	bool "Branch Trace Store"
-	default y
-	depends on X86_DEBUGCTLMSR
-	depends on BROKEN
-	---help---
-	  This adds a ptrace interface to the hardware's branch trace store.
-
-	  Debuggers may use it to collect an execution trace of the debugged
-	  application in order to answer the question 'how did I get here?'.
-	  Debuggers may trace user mode as well as kernel mode.
-
-	  Say Y unless there is no application development on this machine
-	  and you want to save a small amount of code size.
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index bc01e3ebfeb2..bd58c8abbfbd 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -174,15 +174,6 @@ config IOMMU_LEAK
 	  Add a simple leak tracer to the IOMMU code. This is useful when you
 	  are debugging a buggy device driver that leaks IOMMU mappings.
 
-config X86_DS_SELFTEST
-    bool "DS selftest"
-    default y
-    depends on DEBUG_KERNEL
-    depends on X86_DS
-	---help---
-	  Perform Debug Store selftests at boot time.
-	  If in doubt, say "N".
-
 config HAVE_MMIOTRACE_SUPPORT
 	def_bool y
 
diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h
deleted file mode 100644
index 70dac199b093..000000000000
--- a/arch/x86/include/asm/ds.h
+++ /dev/null
@@ -1,302 +0,0 @@
-/*
- * Debug Store (DS) support
- *
- * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for branch trace store (BTS) and
- * precise-event based sampling (PEBS).
- *
- * It manages:
- * - DS and BTS hardware configuration
- * - buffer overflow handling (to be done)
- * - buffer access
- *
- * It does not do:
- * - security checking (is the caller allowed to trace the task)
- * - buffer allocation (memory accounting)
- *
- *
- * Copyright (C) 2007-2009 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
- */
-
-#ifndef _ASM_X86_DS_H
-#define _ASM_X86_DS_H
-
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/err.h>
-
-
-#ifdef CONFIG_X86_DS
-
-struct task_struct;
-struct ds_context;
-struct ds_tracer;
-struct bts_tracer;
-struct pebs_tracer;
-
-typedef void (*bts_ovfl_callback_t)(struct bts_tracer *);
-typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *);
-
-
-/*
- * A list of features plus corresponding macros to talk about them in
- * the ds_request function's flags parameter.
- *
- * We use the enum to index an array of corresponding control bits;
- * we use the macro to index a flags bit-vector.
- */
-enum ds_feature {
-	dsf_bts = 0,
-	dsf_bts_kernel,
-#define BTS_KERNEL (1 << dsf_bts_kernel)
-	/* trace kernel-mode branches */
-
-	dsf_bts_user,
-#define BTS_USER (1 << dsf_bts_user)
-	/* trace user-mode branches */
-
-	dsf_bts_overflow,
-	dsf_bts_max,
-	dsf_pebs = dsf_bts_max,
-
-	dsf_pebs_max,
-	dsf_ctl_max = dsf_pebs_max,
-	dsf_bts_timestamps = dsf_ctl_max,
-#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps)
-	/* add timestamps into BTS trace */
-
-#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS)
-};
-
-
-/*
- * Request BTS or PEBS
- *
- * Due to alignement constraints, the actual buffer may be slightly
- * smaller than the requested or provided buffer.
- *
- * Returns a pointer to a tracer structure on success, or
- * ERR_PTR(errcode) on failure.
- *
- * The interrupt threshold is independent from the overflow callback
- * to allow users to use their own overflow interrupt handling mechanism.
- *
- * The function might sleep.
- *
- * task: the task to request recording for
- * cpu:  the cpu to request recording for
- * base: the base pointer for the (non-pageable) buffer;
- * size: the size of the provided buffer in bytes
- * ovfl: pointer to a function to be called on buffer overflow;
- *       NULL if cyclic buffer requested
- * th: the interrupt threshold in records from the end of the buffer;
- *     -1 if no interrupt threshold is requested.
- * flags: a bit-mask of the above flags
- */
-extern struct bts_tracer *ds_request_bts_task(struct task_struct *task,
-					      void *base, size_t size,
-					      bts_ovfl_callback_t ovfl,
-					      size_t th, unsigned int flags);
-extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
-					     bts_ovfl_callback_t ovfl,
-					     size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
-						void *base, size_t size,
-						pebs_ovfl_callback_t ovfl,
-						size_t th, unsigned int flags);
-extern struct pebs_tracer *ds_request_pebs_cpu(int cpu,
-					       void *base, size_t size,
-					       pebs_ovfl_callback_t ovfl,
-					       size_t th, unsigned int flags);
-
-/*
- * Release BTS or PEBS resources
- * Suspend and resume BTS or PEBS tracing
- *
- * Must be called with irq's enabled.
- *
- * tracer: the tracer handle returned from ds_request_~()
- */
-extern void ds_release_bts(struct bts_tracer *tracer);
-extern void ds_suspend_bts(struct bts_tracer *tracer);
-extern void ds_resume_bts(struct bts_tracer *tracer);
-extern void ds_release_pebs(struct pebs_tracer *tracer);
-extern void ds_suspend_pebs(struct pebs_tracer *tracer);
-extern void ds_resume_pebs(struct pebs_tracer *tracer);
-
-/*
- * Release BTS or PEBS resources
- * Suspend and resume BTS or PEBS tracing
- *
- * Cpu tracers must call this on the traced cpu.
- * Task tracers must call ds_release_~_noirq() for themselves.
- *
- * May be called with irq's disabled.
- *
- * Returns 0 if successful;
- * -EPERM if the cpu tracer does not trace the current cpu.
- * -EPERM if the task tracer does not trace itself.
- *
- * tracer: the tracer handle returned from ds_request_~()
- */
-extern int ds_release_bts_noirq(struct bts_tracer *tracer);
-extern int ds_suspend_bts_noirq(struct bts_tracer *tracer);
-extern int ds_resume_bts_noirq(struct bts_tracer *tracer);
-extern int ds_release_pebs_noirq(struct pebs_tracer *tracer);
-extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer);
-extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer);
-
-
-/*
- * The raw DS buffer state as it is used for BTS and PEBS recording.
- *
- * This is the low-level, arch-dependent interface for working
- * directly on the raw trace data.
- */
-struct ds_trace {
-	/* the number of bts/pebs records */
-	size_t n;
-	/* the size of a bts/pebs record in bytes */
-	size_t size;
-	/* pointers into the raw buffer:
-	   - to the first entry */
-	void *begin;
-	/* - one beyond the last entry */
-	void *end;
-	/* - one beyond the newest entry */
-	void *top;
-	/* - the interrupt threshold */
-	void *ith;
-	/* flags given on ds_request() */
-	unsigned int flags;
-};
-
-/*
- * An arch-independent view on branch trace data.
- */
-enum bts_qualifier {
-	bts_invalid,
-#define BTS_INVALID bts_invalid
-
-	bts_branch,
-#define BTS_BRANCH bts_branch
-
-	bts_task_arrives,
-#define BTS_TASK_ARRIVES bts_task_arrives
-
-	bts_task_departs,
-#define BTS_TASK_DEPARTS bts_task_departs
-
-	bts_qual_bit_size = 4,
-	bts_qual_max = (1 << bts_qual_bit_size),
-};
-
-struct bts_struct {
-	__u64 qualifier;
-	union {
-		/* BTS_BRANCH */
-		struct {
-			__u64 from;
-			__u64 to;
-		} lbr;
-		/* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */
-		struct {
-			__u64 clock;
-			pid_t pid;
-		} event;
-	} variant;
-};
-
-
-/*
- * The BTS state.
- *
- * This gives access to the raw DS state and adds functions to provide
- * an arch-independent view of the BTS data.
- */
-struct bts_trace {
-	struct ds_trace ds;
-
-	int (*read)(struct bts_tracer *tracer, const void *at,
-		    struct bts_struct *out);
-	int (*write)(struct bts_tracer *tracer, const struct bts_struct *in);
-};
-
-
-/*
- * The PEBS state.
- *
- * This gives access to the raw DS state and the PEBS-specific counter
- * reset value.
- */
-struct pebs_trace {
-	struct ds_trace ds;
-
-	/* the number of valid counters in the below array */
-	unsigned int counters;
-
-#define MAX_PEBS_COUNTERS 4
-	/* the counter reset value */
-	unsigned long long counter_reset[MAX_PEBS_COUNTERS];
-};
-
-
-/*
- * Read the BTS or PEBS trace.
- *
- * Returns a view on the trace collected for the parameter tracer.
- *
- * The view remains valid as long as the traced task is not running or
- * the tracer is suspended.
- * Writes into the trace buffer are not reflected.
- *
- * tracer: the tracer handle returned from ds_request_~()
- */
-extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer);
-extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer);
-
-
-/*
- * Reset the write pointer of the BTS/PEBS buffer.
- *
- * Returns 0 on success; -Eerrno on error
- *
- * tracer: the tracer handle returned from ds_request_~()
- */
-extern int ds_reset_bts(struct bts_tracer *tracer);
-extern int ds_reset_pebs(struct pebs_tracer *tracer);
-
-/*
- * Set the PEBS counter reset value.
- *
- * Returns 0 on success; -Eerrno on error
- *
- * tracer: the tracer handle returned from ds_request_pebs()
- * counter: the index of the counter
- * value: the new counter reset value
- */
-extern int ds_set_pebs_reset(struct pebs_tracer *tracer,
-			     unsigned int counter, u64 value);
-
-/*
- * Initialization
- */
-struct cpuinfo_x86;
-extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *);
-
-/*
- * Context switch work
- */
-extern void ds_switch_to(struct task_struct *prev, struct task_struct *next);
-
-#else /* CONFIG_X86_DS */
-
-struct cpuinfo_x86;
-static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {}
-static inline void ds_switch_to(struct task_struct *prev,
-				struct task_struct *next) {}
-
-#endif /* CONFIG_X86_DS */
-#endif /* _ASM_X86_DS_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index b753ea59703a..5bec21a66dc5 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -21,7 +21,6 @@ struct mm_struct;
 #include <asm/msr.h>
 #include <asm/desc_defs.h>
 #include <asm/nops.h>
-#include <asm/ds.h>
 
 #include <linux/personality.h>
 #include <linux/cpumask.h>
@@ -29,6 +28,7 @@ struct mm_struct;
 #include <linux/threads.h>
 #include <linux/math64.h>
 #include <linux/init.h>
+#include <linux/err.h>
 
 #define HBP_NUM 4
 /*
@@ -473,10 +473,6 @@ struct thread_struct {
 	unsigned long		iopl;
 	/* Max allowed port in the bitmap, in bytes: */
 	unsigned		io_bitmap_max;
-/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set.  */
-	unsigned long	debugctlmsr;
-	/* Debug Store context; see asm/ds.h */
-	struct ds_context	*ds_ctx;
 };
 
 static inline unsigned long native_get_debugreg(int regno)
@@ -814,21 +810,6 @@ static inline unsigned long get_debugctlmsr(void)
     return debugctlmsr;
 }
 
-static inline unsigned long get_debugctlmsr_on_cpu(int cpu)
-{
-	u64 debugctlmsr = 0;
-	u32 val1, val2;
-
-#ifndef CONFIG_X86_DEBUGCTLMSR
-	if (boot_cpu_data.x86 < 6)
-		return 0;
-#endif
-	rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2);
-	debugctlmsr = val1 | ((u64)val2 << 32);
-
-	return debugctlmsr;
-}
-
 static inline void update_debugctlmsr(unsigned long debugctlmsr)
 {
 #ifndef CONFIG_X86_DEBUGCTLMSR
@@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr)
 	wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr);
 }
 
-static inline void update_debugctlmsr_on_cpu(int cpu,
-					     unsigned long debugctlmsr)
-{
-#ifndef CONFIG_X86_DEBUGCTLMSR
-	if (boot_cpu_data.x86 < 6)
-		return;
-#endif
-	wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR,
-		     (u32)((u64)debugctlmsr),
-		     (u32)((u64)debugctlmsr >> 32));
-}
-
 /*
  * from system description table in BIOS. Mostly for MCA use, but
  * others may find it useful:
diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h
index 86723035a515..52b098a6eebb 100644
--- a/arch/x86/include/asm/ptrace-abi.h
+++ b/arch/x86/include/asm/ptrace-abi.h
@@ -82,61 +82,6 @@
 
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
-
-/* configuration/status structure used in PTRACE_BTS_CONFIG and
-   PTRACE_BTS_STATUS commands.
-*/
-struct ptrace_bts_config {
-	/* requested or actual size of BTS buffer in bytes */
-	__u32 size;
-	/* bitmask of below flags */
-	__u32 flags;
-	/* buffer overflow signal */
-	__u32 signal;
-	/* actual size of bts_struct in bytes */
-	__u32 bts_size;
-};
-#endif /* __ASSEMBLY__ */
-
-#define PTRACE_BTS_O_TRACE	0x1 /* branch trace */
-#define PTRACE_BTS_O_SCHED	0x2 /* scheduling events w/ jiffies */
-#define PTRACE_BTS_O_SIGNAL     0x4 /* send SIG<signal> on buffer overflow
-				       instead of wrapping around */
-#define PTRACE_BTS_O_ALLOC	0x8 /* (re)allocate buffer */
-
-#define PTRACE_BTS_CONFIG	40
-/* Configure branch trace recording.
-   ADDR points to a struct ptrace_bts_config.
-   DATA gives the size of that buffer.
-   A new buffer is allocated, if requested in the flags.
-   An overflow signal may only be requested for new buffers.
-   Returns the number of bytes read.
-*/
-#define PTRACE_BTS_STATUS	41
-/* Return the current configuration in a struct ptrace_bts_config
-   pointed to by ADDR; DATA gives the size of that buffer.
-   Returns the number of bytes written.
-*/
-#define PTRACE_BTS_SIZE		42
-/* Return the number of available BTS records for draining.
-   DATA and ADDR are ignored.
-*/
-#define PTRACE_BTS_GET		43
-/* Get a single BTS record.
-   DATA defines the index into the BTS array, where 0 is the newest
-   entry, and higher indices refer to older entries.
-   ADDR is pointing to struct bts_struct (see asm/ds.h).
-*/
-#define PTRACE_BTS_CLEAR	44
-/* Clear the BTS buffer.
-   DATA and ADDR are ignored.
-*/
-#define PTRACE_BTS_DRAIN	45
-/* Read all available BTS records and clear the buffer.
-   ADDR points to an array of struct bts_struct.
-   DATA gives the size of that buffer.
-   BTS records are read from oldest to newest.
-   Returns number of BTS records drained.
-*/
+#endif
 
 #endif /* _ASM_X86_PTRACE_ABI_H */
diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
index 69a686a7dff0..78cd1ea94500 100644
--- a/arch/x86/include/asm/ptrace.h
+++ b/arch/x86/include/asm/ptrace.h
@@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx,
 extern int do_set_thread_area(struct task_struct *p, int idx,
 			      struct user_desc __user *info, int can_allocate);
 
-#ifdef CONFIG_X86_PTRACE_BTS
-extern void ptrace_bts_untrace(struct task_struct *tsk);
-
-#define arch_ptrace_untrace(tsk)	ptrace_bts_untrace(tsk)
-#endif /* CONFIG_X86_PTRACE_BTS */
-
 #endif /* __KERNEL__ */
 
 #endif /* !__ASSEMBLY__ */
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index e0d28901e969..dc85e12d1405 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -92,8 +92,6 @@ struct thread_info {
 #define TIF_IO_BITMAP		22	/* uses I/O bitmap */
 #define TIF_FREEZE		23	/* is freezing for suspend */
 #define TIF_FORCED_TF		24	/* true if TF in eflags artificially */
-#define TIF_DEBUGCTLMSR		25	/* uses thread_struct.debugctlmsr */
-#define TIF_DS_AREA_MSR		26      /* uses thread_struct.ds_area_msr */
 #define TIF_LAZY_MMU_UPDATES	27	/* task is updating the mmu lazily */
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 
@@ -115,8 +113,6 @@ struct thread_info {
 #define _TIF_IO_BITMAP		(1 << TIF_IO_BITMAP)
 #define _TIF_FREEZE		(1 << TIF_FREEZE)
 #define _TIF_FORCED_TF		(1 << TIF_FORCED_TF)
-#define _TIF_DEBUGCTLMSR	(1 << TIF_DEBUGCTLMSR)
-#define _TIF_DS_AREA_MSR	(1 << TIF_DS_AREA_MSR)
 #define _TIF_LAZY_MMU_UPDATES	(1 << TIF_LAZY_MMU_UPDATES)
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 
@@ -147,7 +143,7 @@ struct thread_info {
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW							\
-	(_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC)
+	(_TIF_IO_BITMAP|_TIF_NOTSC)
 
 #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
 #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 4c58352209e0..e77b22083721 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
 obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
-obj-$(CONFIG_X86_DS)		+= ds.o
-obj-$(CONFIG_X86_DS_SELFTEST)		+= ds_selftest.o
 obj-$(CONFIG_X86_32)		+= tls.o
 obj-$(CONFIG_IA32_EMULATION)	+= tls.o
 obj-y				+= step.o
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 7e1cca13af35..d72377c41c76 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -12,7 +12,6 @@
 #include <asm/processor.h>
 #include <asm/pgtable.h>
 #include <asm/msr.h>
-#include <asm/ds.h>
 #include <asm/bugs.h>
 #include <asm/cpu.h>
 
@@ -367,7 +366,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
 			set_cpu_cap(c, X86_FEATURE_BTS);
 		if (!(l1 & (1<<12)))
 			set_cpu_cap(c, X86_FEATURE_PEBS);
-		ds_init_intel(c);
 	}
 
 	if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
deleted file mode 100644
index 1c47390dd0e5..000000000000
--- a/arch/x86/kernel/ds.c
+++ /dev/null
@@ -1,1437 +0,0 @@
-/*
- * Debug Store support
- *
- * This provides a low-level interface to the hardware's Debug Store
- * feature that is used for branch trace store (BTS) and
- * precise-event based sampling (PEBS).
- *
- * It manages:
- * - DS and BTS hardware configuration
- * - buffer overflow handling (to be done)
- * - buffer access
- *
- * It does not do:
- * - security checking (is the caller allowed to trace the task)
- * - buffer allocation (memory accounting)
- *
- *
- * Copyright (C) 2007-2009 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2007-2009
- */
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/mm.h>
-#include <linux/trace_clock.h>
-
-#include <asm/ds.h>
-
-#include "ds_selftest.h"
-
-/*
- * The configuration for a particular DS hardware implementation:
- */
-struct ds_configuration {
-	/* The name of the configuration: */
-	const char		*name;
-
-	/* The size of pointer-typed fields in DS, BTS, and PEBS: */
-	unsigned char		sizeof_ptr_field;
-
-	/* The size of a BTS/PEBS record in bytes: */
-	unsigned char		sizeof_rec[2];
-
-	/* The number of pebs counter reset values in the DS structure. */
-	unsigned char		nr_counter_reset;
-
-	/* Control bit-masks indexed by enum ds_feature: */
-	unsigned long		ctl[dsf_ctl_max];
-};
-static struct ds_configuration ds_cfg __read_mostly;
-
-
-/* Maximal size of a DS configuration: */
-#define MAX_SIZEOF_DS		0x80
-
-/* Maximal size of a BTS record: */
-#define MAX_SIZEOF_BTS		(3 * 8)
-
-/* BTS and PEBS buffer alignment: */
-#define DS_ALIGNMENT		(1 << 3)
-
-/* Number of buffer pointers in DS: */
-#define NUM_DS_PTR_FIELDS	8
-
-/* Size of a pebs reset value in DS: */
-#define PEBS_RESET_FIELD_SIZE	8
-
-/* Mask of control bits in the DS MSR register: */
-#define BTS_CONTROL				  \
-	( ds_cfg.ctl[dsf_bts]			| \
-	  ds_cfg.ctl[dsf_bts_kernel]		| \
-	  ds_cfg.ctl[dsf_bts_user]		| \
-	  ds_cfg.ctl[dsf_bts_overflow] )
-
-/*
- * A BTS or PEBS tracer.
- *
- * This holds the configuration of the tracer and serves as a handle
- * to identify tracers.
- */
-struct ds_tracer {
-	/* The DS context (partially) owned by this tracer. */
-	struct ds_context	*context;
-	/* The buffer provided on ds_request() and its size in bytes. */
-	void			*buffer;
-	size_t			size;
-};
-
-struct bts_tracer {
-	/* The common DS part: */
-	struct ds_tracer	ds;
-
-	/* The trace including the DS configuration: */
-	struct bts_trace	trace;
-
-	/* Buffer overflow notification function: */
-	bts_ovfl_callback_t	ovfl;
-
-	/* Active flags affecting trace collection. */
-	unsigned int		flags;
-};
-
-struct pebs_tracer {
-	/* The common DS part: */
-	struct ds_tracer	ds;
-
-	/* The trace including the DS configuration: */
-	struct pebs_trace	trace;
-
-	/* Buffer overflow notification function: */
-	pebs_ovfl_callback_t	ovfl;
-};
-
-/*
- * Debug Store (DS) save area configuration (see Intel64 and IA32
- * Architectures Software Developer's Manual, section 18.5)
- *
- * The DS configuration consists of the following fields; different
- * architetures vary in the size of those fields.
- *
- * - double-word aligned base linear address of the BTS buffer
- * - write pointer into the BTS buffer
- * - end linear address of the BTS buffer (one byte beyond the end of
- *   the buffer)
- * - interrupt pointer into BTS buffer
- *   (interrupt occurs when write pointer passes interrupt pointer)
- * - double-word aligned base linear address of the PEBS buffer
- * - write pointer into the PEBS buffer
- * - end linear address of the PEBS buffer (one byte beyond the end of
- *   the buffer)
- * - interrupt pointer into PEBS buffer
- *   (interrupt occurs when write pointer passes interrupt pointer)
- * - value to which counter is reset following counter overflow
- *
- * Later architectures use 64bit pointers throughout, whereas earlier
- * architectures use 32bit pointers in 32bit mode.
- *
- *
- * We compute the base address for the first 8 fields based on:
- * - the field size stored in the DS configuration
- * - the relative field position
- * - an offset giving the start of the respective region
- *
- * This offset is further used to index various arrays holding
- * information for BTS and PEBS at the respective index.
- *
- * On later 32bit processors, we only access the lower 32bit of the
- * 64bit pointer fields. The upper halves will be zeroed out.
- */
-
-enum ds_field {
-	ds_buffer_base = 0,
-	ds_index,
-	ds_absolute_maximum,
-	ds_interrupt_threshold,
-};
-
-enum ds_qualifier {
-	ds_bts = 0,
-	ds_pebs
-};
-
-static inline unsigned long
-ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field)
-{
-	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
-	return *(unsigned long *)base;
-}
-
-static inline void
-ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field,
-       unsigned long value)
-{
-	base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual)));
-	(*(unsigned long *)base) = value;
-}
-
-
-/*
- * Locking is done only for allocating BTS or PEBS resources.
- */
-static DEFINE_SPINLOCK(ds_lock);
-
-/*
- * We either support (system-wide) per-cpu or per-thread allocation.
- * We distinguish the two based on the task_struct pointer, where a
- * NULL pointer indicates per-cpu allocation for the current cpu.
- *
- * Allocations are use-counted. As soon as resources are allocated,
- * further allocations must be of the same type (per-cpu or
- * per-thread). We model this by counting allocations (i.e. the number
- * of tracers of a certain type) for one type negatively:
- *   =0  no tracers
- *   >0  number of per-thread tracers
- *   <0  number of per-cpu tracers
- *
- * Tracers essentially gives the number of ds contexts for a certain
- * type of allocation.
- */
-static atomic_t tracers = ATOMIC_INIT(0);
-
-static inline int get_tracer(struct task_struct *task)
-{
-	int error;
-
-	spin_lock_irq(&ds_lock);
-
-	if (task) {
-		error = -EPERM;
-		if (atomic_read(&tracers) < 0)
-			goto out;
-		atomic_inc(&tracers);
-	} else {
-		error = -EPERM;
-		if (atomic_read(&tracers) > 0)
-			goto out;
-		atomic_dec(&tracers);
-	}
-
-	error = 0;
-out:
-	spin_unlock_irq(&ds_lock);
-	return error;
-}
-
-static inline void put_tracer(struct task_struct *task)
-{
-	if (task)
-		atomic_dec(&tracers);
-	else
-		atomic_inc(&tracers);
-}
-
-/*
- * The DS context is either attached to a thread or to a cpu:
- * - in the former case, the thread_struct contains a pointer to the
- *   attached context.
- * - in the latter case, we use a static array of per-cpu context
- *   pointers.
- *
- * Contexts are use-counted. They are allocated on first access and
- * deallocated when the last user puts the context.
- */
-struct ds_context {
-	/* The DS configuration; goes into MSR_IA32_DS_AREA: */
-	unsigned char		ds[MAX_SIZEOF_DS];
-
-	/* The owner of the BTS and PEBS configuration, respectively: */
-	struct bts_tracer	*bts_master;
-	struct pebs_tracer	*pebs_master;
-
-	/* Use count: */
-	unsigned long		count;
-
-	/* Pointer to the context pointer field: */
-	struct ds_context	**this;
-
-	/* The traced task; NULL for cpu tracing: */
-	struct task_struct	*task;
-
-	/* The traced cpu; only valid if task is NULL: */
-	int			cpu;
-};
-
-static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context);
-
-
-static struct ds_context *ds_get_context(struct task_struct *task, int cpu)
-{
-	struct ds_context **p_context =
-		(task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu));
-	struct ds_context *context = NULL;
-	struct ds_context *new_context = NULL;
-
-	/* Chances are small that we already have a context. */
-	new_context = kzalloc(sizeof(*new_context), GFP_KERNEL);
-	if (!new_context)
-		return NULL;
-
-	spin_lock_irq(&ds_lock);
-
-	context = *p_context;
-	if (likely(!context)) {
-		context = new_context;
-
-		context->this = p_context;
-		context->task = task;
-		context->cpu = cpu;
-		context->count = 0;
-
-		*p_context = context;
-	}
-
-	context->count++;
-
-	spin_unlock_irq(&ds_lock);
-
-	if (context != new_context)
-		kfree(new_context);
-
-	return context;
-}
-
-static void ds_put_context(struct ds_context *context)
-{
-	struct task_struct *task;
-	unsigned long irq;
-
-	if (!context)
-		return;
-
-	spin_lock_irqsave(&ds_lock, irq);
-
-	if (--context->count) {
-		spin_unlock_irqrestore(&ds_lock, irq);
-		return;
-	}
-
-	*(context->this) = NULL;
-
-	task = context->task;
-
-	if (task)
-		clear_tsk_thread_flag(task, TIF_DS_AREA_MSR);
-
-	/*
-	 * We leave the (now dangling) pointer to the DS configuration in
-	 * the DS_AREA msr. This is as good or as bad as replacing it with
-	 * NULL - the hardware would crash if we enabled tracing.
-	 *
-	 * This saves us some problems with having to write an msr on a
-	 * different cpu while preventing others from doing the same for the
-	 * next context for that same cpu.
-	 */
-
-	spin_unlock_irqrestore(&ds_lock, irq);
-
-	/* The context might still be in use for context switching. */
-	if (task && (task != current))
-		wait_task_context_switch(task);
-
-	kfree(context);
-}
-
-static void ds_install_ds_area(struct ds_context *context)
-{
-	unsigned long ds;
-
-	ds = (unsigned long)context->ds;
-
-	/*
-	 * There is a race between the bts master and the pebs master.
-	 *
-	 * The thread/cpu access is synchronized via get/put_cpu() for
-	 * task tracing and via wrmsr_on_cpu for cpu tracing.
-	 *
-	 * If bts and pebs are collected for the same task or same cpu,
-	 * the same confiuration is written twice.
-	 */
-	if (context->task) {
-		get_cpu();
-		if (context->task == current)
-			wrmsrl(MSR_IA32_DS_AREA, ds);
-		set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR);
-		put_cpu();
-	} else
-		wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA,
-			     (u32)((u64)ds), (u32)((u64)ds >> 32));
-}
-
-/*
- * Call the tracer's callback on a buffer overflow.
- *
- * context: the ds context
- * qual: the buffer type
- */
-static void ds_overflow(struct ds_context *context, enum ds_qualifier qual)
-{
-	switch (qual) {
-	case ds_bts:
-		if (context->bts_master &&
-		    context->bts_master->ovfl)
-			context->bts_master->ovfl(context->bts_master);
-		break;
-	case ds_pebs:
-		if (context->pebs_master &&
-		    context->pebs_master->ovfl)
-			context->pebs_master->ovfl(context->pebs_master);
-		break;
-	}
-}
-
-
-/*
- * Write raw data into the BTS or PEBS buffer.
- *
- * The remainder of any partially written record is zeroed out.
- *
- * context: the DS context
- * qual:    the buffer type
- * record:  the data to write
- * size:    the size of the data
- */
-static int ds_write(struct ds_context *context, enum ds_qualifier qual,
-		    const void *record, size_t size)
-{
-	int bytes_written = 0;
-
-	if (!record)
-		return -EINVAL;
-
-	while (size) {
-		unsigned long base, index, end, write_end, int_th;
-		unsigned long write_size, adj_write_size;
-
-		/*
-		 * Write as much as possible without producing an
-		 * overflow interrupt.
-		 *
-		 * Interrupt_threshold must either be
-		 * - bigger than absolute_maximum or
-		 * - point to a record between buffer_base and absolute_maximum
-		 *
-		 * Index points to a valid record.
-		 */
-		base   = ds_get(context->ds, qual, ds_buffer_base);
-		index  = ds_get(context->ds, qual, ds_index);
-		end    = ds_get(context->ds, qual, ds_absolute_maximum);
-		int_th = ds_get(context->ds, qual, ds_interrupt_threshold);
-
-		write_end = min(end, int_th);
-
-		/*
-		 * If we are already beyond the interrupt threshold,
-		 * we fill the entire buffer.
-		 */
-		if (write_end <= index)
-			write_end = end;
-
-		if (write_end <= index)
-			break;
-
-		write_size = min((unsigned long) size, write_end - index);
-		memcpy((void *)index, record, write_size);
-
-		record = (const char *)record + write_size;
-		size -= write_size;
-		bytes_written += write_size;
-
-		adj_write_size = write_size / ds_cfg.sizeof_rec[qual];
-		adj_write_size *= ds_cfg.sizeof_rec[qual];
-
-		/* Zero out trailing bytes. */
-		memset((char *)index + write_size, 0,
-		       adj_write_size - write_size);
-		index += adj_write_size;
-
-		if (index >= end)
-			index = base;
-		ds_set(context->ds, qual, ds_index, index);
-
-		if (index >= int_th)
-			ds_overflow(context, qual);
-	}
-
-	return bytes_written;
-}
-
-
-/*
- * Branch Trace Store (BTS) uses the following format. Different
- * architectures vary in the size of those fields.
- * - source linear address
- * - destination linear address
- * - flags
- *
- * Later architectures use 64bit pointers throughout, whereas earlier
- * architectures use 32bit pointers in 32bit mode.
- *
- * We compute the base address for the fields based on:
- * - the field size stored in the DS configuration
- * - the relative field position
- *
- * In order to store additional information in the BTS buffer, we use
- * a special source address to indicate that the record requires
- * special interpretation.
- *
- * Netburst indicated via a bit in the flags field whether the branch
- * was predicted; this is ignored.
- *
- * We use two levels of abstraction:
- * - the raw data level defined here
- * - an arch-independent level defined in ds.h
- */
-
-enum bts_field {
-	bts_from,
-	bts_to,
-	bts_flags,
-
-	bts_qual		= bts_from,
-	bts_clock		= bts_to,
-	bts_pid			= bts_flags,
-
-	bts_qual_mask		= (bts_qual_max - 1),
-	bts_escape		= ((unsigned long)-1 & ~bts_qual_mask)
-};
-
-static inline unsigned long bts_get(const char *base, unsigned long field)
-{
-	base += (ds_cfg.sizeof_ptr_field * field);
-	return *(unsigned long *)base;
-}
-
-static inline void bts_set(char *base, unsigned long field, unsigned long val)
-{
-	base += (ds_cfg.sizeof_ptr_field * field);
-	(*(unsigned long *)base) = val;
-}
-
-
-/*
- * The raw BTS data is architecture dependent.
- *
- * For higher-level users, we give an arch-independent view.
- * - ds.h defines struct bts_struct
- * - bts_read translates one raw bts record into a bts_struct
- * - bts_write translates one bts_struct into the raw format and
- *   writes it into the top of the parameter tracer's buffer.
- *
- * return: bytes read/written on success; -Eerrno, otherwise
- */
-static int
-bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out)
-{
-	if (!tracer)
-		return -EINVAL;
-
-	if (at < tracer->trace.ds.begin)
-		return -EINVAL;
-
-	if (tracer->trace.ds.end < (at + tracer->trace.ds.size))
-		return -EINVAL;
-
-	memset(out, 0, sizeof(*out));
-	if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) {
-		out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask);
-		out->variant.event.clock = bts_get(at, bts_clock);
-		out->variant.event.pid = bts_get(at, bts_pid);
-	} else {
-		out->qualifier = bts_branch;
-		out->variant.lbr.from = bts_get(at, bts_from);
-		out->variant.lbr.to   = bts_get(at, bts_to);
-
-		if (!out->variant.lbr.from && !out->variant.lbr.to)
-			out->qualifier = bts_invalid;
-	}
-
-	return ds_cfg.sizeof_rec[ds_bts];
-}
-
-static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in)
-{
-	unsigned char raw[MAX_SIZEOF_BTS];
-
-	if (!tracer)
-		return -EINVAL;
-
-	if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts])
-		return -EOVERFLOW;
-
-	switch (in->qualifier) {
-	case bts_invalid:
-		bts_set(raw, bts_from, 0);
-		bts_set(raw, bts_to, 0);
-		bts_set(raw, bts_flags, 0);
-		break;
-	case bts_branch:
-		bts_set(raw, bts_from, in->variant.lbr.from);
-		bts_set(raw, bts_to,   in->variant.lbr.to);
-		bts_set(raw, bts_flags, 0);
-		break;
-	case bts_task_arrives:
-	case bts_task_departs:
-		bts_set(raw, bts_qual, (bts_escape | in->qualifier));
-		bts_set(raw, bts_clock, in->variant.event.clock);
-		bts_set(raw, bts_pid, in->variant.event.pid);
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return ds_write(tracer->ds.context, ds_bts, raw,
-			ds_cfg.sizeof_rec[ds_bts]);
-}
-
-
-static void ds_write_config(struct ds_context *context,
-			    struct ds_trace *cfg, enum ds_qualifier qual)
-{
-	unsigned char *ds = context->ds;
-
-	ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin);
-	ds_set(ds, qual, ds_index, (unsigned long)cfg->top);
-	ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end);
-	ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith);
-}
-
-static void ds_read_config(struct ds_context *context,
-			   struct ds_trace *cfg, enum ds_qualifier qual)
-{
-	unsigned char *ds = context->ds;
-
-	cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base);
-	cfg->top = (void *)ds_get(ds, qual, ds_index);
-	cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum);
-	cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold);
-}
-
-static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual,
-			     void *base, size_t size, size_t ith,
-			     unsigned int flags) {
-	unsigned long buffer, adj;
-
-	/*
-	 * Adjust the buffer address and size to meet alignment
-	 * constraints:
-	 * - buffer is double-word aligned
-	 * - size is multiple of record size
-	 *
-	 * We checked the size at the very beginning; we have enough
-	 * space to do the adjustment.
-	 */
-	buffer = (unsigned long)base;
-
-	adj = ALIGN(buffer, DS_ALIGNMENT) - buffer;
-	buffer += adj;
-	size   -= adj;
-
-	trace->n = size / ds_cfg.sizeof_rec[qual];
-	trace->size = ds_cfg.sizeof_rec[qual];
-
-	size = (trace->n * trace->size);
-
-	trace->begin = (void *)buffer;
-	trace->top = trace->begin;
-	trace->end = (void *)(buffer + size);
-	/*
-	 * The value for 'no threshold' is -1, which will set the
-	 * threshold outside of the buffer, just like we want it.
-	 */
-	ith *= ds_cfg.sizeof_rec[qual];
-	trace->ith = (void *)(buffer + size - ith);
-
-	trace->flags = flags;
-}
-
-
-static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace,
-		      enum ds_qualifier qual, struct task_struct *task,
-		      int cpu, void *base, size_t size, size_t th)
-{
-	struct ds_context *context;
-	int error;
-	size_t req_size;
-
-	error = -EOPNOTSUPP;
-	if (!ds_cfg.sizeof_rec[qual])
-		goto out;
-
-	error = -EINVAL;
-	if (!base)
-		goto out;
-
-	req_size = ds_cfg.sizeof_rec[qual];
-	/* We might need space for alignment adjustments. */
-	if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT))
-		req_size += DS_ALIGNMENT;
-
-	error = -EINVAL;
-	if (size < req_size)
-		goto out;
-
-	if (th != (size_t)-1) {
-		th *= ds_cfg.sizeof_rec[qual];
-
-		error = -EINVAL;
-		if (size <= th)
-			goto out;
-	}
-
-	tracer->buffer = base;
-	tracer->size = size;
-
-	error = -ENOMEM;
-	context = ds_get_context(task, cpu);
-	if (!context)
-		goto out;
-	tracer->context = context;
-
-	/*
-	 * Defer any tracer-specific initialization work for the context until
-	 * context ownership has been clarified.
-	 */
-
-	error = 0;
- out:
-	return error;
-}
-
-static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu,
-					 void *base, size_t size,
-					 bts_ovfl_callback_t ovfl, size_t th,
-					 unsigned int flags)
-{
-	struct bts_tracer *tracer;
-	int error;
-
-	/* Buffer overflow notification is not yet implemented. */
-	error = -EOPNOTSUPP;
-	if (ovfl)
-		goto out;
-
-	error = get_tracer(task);
-	if (error < 0)
-		goto out;
-
-	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
-	if (!tracer)
-		goto out_put_tracer;
-	tracer->ovfl = ovfl;
-
-	/* Do some more error checking and acquire a tracing context. */
-	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_bts, task, cpu, base, size, th);
-	if (error < 0)
-		goto out_tracer;
-
-	/* Claim the bts part of the tracing context we acquired above. */
-	spin_lock_irq(&ds_lock);
-
-	error = -EPERM;
-	if (tracer->ds.context->bts_master)
-		goto out_unlock;
-	tracer->ds.context->bts_master = tracer;
-
-	spin_unlock_irq(&ds_lock);
-
-	/*
-	 * Now that we own the bts part of the context, let's complete the
-	 * initialization for that part.
-	 */
-	ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags);
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
-	ds_install_ds_area(tracer->ds.context);
-
-	tracer->trace.read  = bts_read;
-	tracer->trace.write = bts_write;
-
-	/* Start tracing. */
-	ds_resume_bts(tracer);
-
-	return tracer;
-
- out_unlock:
-	spin_unlock_irq(&ds_lock);
-	ds_put_context(tracer->ds.context);
- out_tracer:
-	kfree(tracer);
- out_put_tracer:
-	put_tracer(task);
- out:
-	return ERR_PTR(error);
-}
-
-struct bts_tracer *ds_request_bts_task(struct task_struct *task,
-				       void *base, size_t size,
-				       bts_ovfl_callback_t ovfl,
-				       size_t th, unsigned int flags)
-{
-	return ds_request_bts(task, 0, base, size, ovfl, th, flags);
-}
-
-struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size,
-				      bts_ovfl_callback_t ovfl,
-				      size_t th, unsigned int flags)
-{
-	return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags);
-}
-
-static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu,
-					   void *base, size_t size,
-					   pebs_ovfl_callback_t ovfl, size_t th,
-					   unsigned int flags)
-{
-	struct pebs_tracer *tracer;
-	int error;
-
-	/* Buffer overflow notification is not yet implemented. */
-	error = -EOPNOTSUPP;
-	if (ovfl)
-		goto out;
-
-	error = get_tracer(task);
-	if (error < 0)
-		goto out;
-
-	error = -ENOMEM;
-	tracer = kzalloc(sizeof(*tracer), GFP_KERNEL);
-	if (!tracer)
-		goto out_put_tracer;
-	tracer->ovfl = ovfl;
-
-	/* Do some more error checking and acquire a tracing context. */
-	error = ds_request(&tracer->ds, &tracer->trace.ds,
-			   ds_pebs, task, cpu, base, size, th);
-	if (error < 0)
-		goto out_tracer;
-
-	/* Claim the pebs part of the tracing context we acquired above. */
-	spin_lock_irq(&ds_lock);
-
-	error = -EPERM;
-	if (tracer->ds.context->pebs_master)
-		goto out_unlock;
-	tracer->ds.context->pebs_master = tracer;
-
-	spin_unlock_irq(&ds_lock);
-
-	/*
-	 * Now that we own the pebs part of the context, let's complete the
-	 * initialization for that part.
-	 */
-	ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags);
-	ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-	ds_install_ds_area(tracer->ds.context);
-
-	/* Start tracing. */
-	ds_resume_pebs(tracer);
-
-	return tracer;
-
- out_unlock:
-	spin_unlock_irq(&ds_lock);
-	ds_put_context(tracer->ds.context);
- out_tracer:
-	kfree(tracer);
- out_put_tracer:
-	put_tracer(task);
- out:
-	return ERR_PTR(error);
-}
-
-struct pebs_tracer *ds_request_pebs_task(struct task_struct *task,
-					 void *base, size_t size,
-					 pebs_ovfl_callback_t ovfl,
-					 size_t th, unsigned int flags)
-{
-	return ds_request_pebs(task, 0, base, size, ovfl, th, flags);
-}
-
-struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size,
-					pebs_ovfl_callback_t ovfl,
-					size_t th, unsigned int flags)
-{
-	return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags);
-}
-
-static void ds_free_bts(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-
-	task = tracer->ds.context->task;
-
-	WARN_ON_ONCE(tracer->ds.context->bts_master != tracer);
-	tracer->ds.context->bts_master = NULL;
-
-	/* Make sure tracing stopped and the tracer is not in use. */
-	if (task && (task != current))
-		wait_task_context_switch(task);
-
-	ds_put_context(tracer->ds.context);
-	put_tracer(task);
-
-	kfree(tracer);
-}
-
-void ds_release_bts(struct bts_tracer *tracer)
-{
-	might_sleep();
-
-	if (!tracer)
-		return;
-
-	ds_suspend_bts(tracer);
-	ds_free_bts(tracer);
-}
-
-int ds_release_bts_noirq(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long irq;
-	int error;
-
-	if (!tracer)
-		return 0;
-
-	task = tracer->ds.context->task;
-
-	local_irq_save(irq);
-
-	error = -EPERM;
-	if (!task &&
-	    (tracer->ds.context->cpu != smp_processor_id()))
-		goto out;
-
-	error = -EPERM;
-	if (task && (task != current))
-		goto out;
-
-	ds_suspend_bts_noirq(tracer);
-	ds_free_bts(tracer);
-
-	error = 0;
- out:
-	local_irq_restore(irq);
-	return error;
-}
-
-static void update_task_debugctlmsr(struct task_struct *task,
-				    unsigned long debugctlmsr)
-{
-	task->thread.debugctlmsr = debugctlmsr;
-
-	get_cpu();
-	if (task == current)
-		update_debugctlmsr(debugctlmsr);
-	put_cpu();
-}
-
-void ds_suspend_bts(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long debugctlmsr;
-	int cpu;
-
-	if (!tracer)
-		return;
-
-	tracer->flags = 0;
-
-	task = tracer->ds.context->task;
-	cpu  = tracer->ds.context->cpu;
-
-	WARN_ON(!task && irqs_disabled());
-
-	debugctlmsr = (task ?
-		       task->thread.debugctlmsr :
-		       get_debugctlmsr_on_cpu(cpu));
-	debugctlmsr &= ~BTS_CONTROL;
-
-	if (task)
-		update_task_debugctlmsr(task, debugctlmsr);
-	else
-		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
-}
-
-int ds_suspend_bts_noirq(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long debugctlmsr, irq;
-	int cpu, error = 0;
-
-	if (!tracer)
-		return 0;
-
-	tracer->flags = 0;
-
-	task = tracer->ds.context->task;
-	cpu  = tracer->ds.context->cpu;
-
-	local_irq_save(irq);
-
-	error = -EPERM;
-	if (!task && (cpu != smp_processor_id()))
-		goto out;
-
-	debugctlmsr = (task ?
-		       task->thread.debugctlmsr :
-		       get_debugctlmsr());
-	debugctlmsr &= ~BTS_CONTROL;
-
-	if (task)
-		update_task_debugctlmsr(task, debugctlmsr);
-	else
-		update_debugctlmsr(debugctlmsr);
-
-	error = 0;
- out:
-	local_irq_restore(irq);
-	return error;
-}
-
-static unsigned long ds_bts_control(struct bts_tracer *tracer)
-{
-	unsigned long control;
-
-	control = ds_cfg.ctl[dsf_bts];
-	if (!(tracer->trace.ds.flags & BTS_KERNEL))
-		control |= ds_cfg.ctl[dsf_bts_kernel];
-	if (!(tracer->trace.ds.flags & BTS_USER))
-		control |= ds_cfg.ctl[dsf_bts_user];
-
-	return control;
-}
-
-void ds_resume_bts(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long debugctlmsr;
-	int cpu;
-
-	if (!tracer)
-		return;
-
-	tracer->flags = tracer->trace.ds.flags;
-
-	task = tracer->ds.context->task;
-	cpu  = tracer->ds.context->cpu;
-
-	WARN_ON(!task && irqs_disabled());
-
-	debugctlmsr = (task ?
-		       task->thread.debugctlmsr :
-		       get_debugctlmsr_on_cpu(cpu));
-	debugctlmsr |= ds_bts_control(tracer);
-
-	if (task)
-		update_task_debugctlmsr(task, debugctlmsr);
-	else
-		update_debugctlmsr_on_cpu(cpu, debugctlmsr);
-}
-
-int ds_resume_bts_noirq(struct bts_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long debugctlmsr, irq;
-	int cpu, error = 0;
-
-	if (!tracer)
-		return 0;
-
-	tracer->flags = tracer->trace.ds.flags;
-
-	task = tracer->ds.context->task;
-	cpu  = tracer->ds.context->cpu;
-
-	local_irq_save(irq);
-
-	error = -EPERM;
-	if (!task && (cpu != smp_processor_id()))
-		goto out;
-
-	debugctlmsr = (task ?
-		       task->thread.debugctlmsr :
-		       get_debugctlmsr());
-	debugctlmsr |= ds_bts_control(tracer);
-
-	if (task)
-		update_task_debugctlmsr(task, debugctlmsr);
-	else
-		update_debugctlmsr(debugctlmsr);
-
-	error = 0;
- out:
-	local_irq_restore(irq);
-	return error;
-}
-
-static void ds_free_pebs(struct pebs_tracer *tracer)
-{
-	struct task_struct *task;
-
-	task = tracer->ds.context->task;
-
-	WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer);
-	tracer->ds.context->pebs_master = NULL;
-
-	ds_put_context(tracer->ds.context);
-	put_tracer(task);
-
-	kfree(tracer);
-}
-
-void ds_release_pebs(struct pebs_tracer *tracer)
-{
-	might_sleep();
-
-	if (!tracer)
-		return;
-
-	ds_suspend_pebs(tracer);
-	ds_free_pebs(tracer);
-}
-
-int ds_release_pebs_noirq(struct pebs_tracer *tracer)
-{
-	struct task_struct *task;
-	unsigned long irq;
-	int error;
-
-	if (!tracer)
-		return 0;
-
-	task = tracer->ds.context->task;
-
-	local_irq_save(irq);
-
-	error = -EPERM;
-	if (!task &&
-	    (tracer->ds.context->cpu != smp_processor_id()))
-		goto out;
-
-	error = -EPERM;
-	if (task && (task != current))
-		goto out;
-
-	ds_suspend_pebs_noirq(tracer);
-	ds_free_pebs(tracer);
-
-	error = 0;
- out:
-	local_irq_restore(irq);
-	return error;
-}
-
-void ds_suspend_pebs(struct pebs_tracer *tracer)
-{
-
-}
-
-int ds_suspend_pebs_noirq(struct pebs_tracer *tracer)
-{
-	return 0;
-}
-
-void ds_resume_pebs(struct pebs_tracer *tracer)
-{
-
-}
-
-int ds_resume_pebs_noirq(struct pebs_tracer *tracer)
-{
-	return 0;
-}
-
-const struct bts_trace *ds_read_bts(struct bts_tracer *tracer)
-{
-	if (!tracer)
-		return NULL;
-
-	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts);
-	return &tracer->trace;
-}
-
-const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer)
-{
-	if (!tracer)
-		return NULL;
-
-	ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs);
-
-	tracer->trace.counters = ds_cfg.nr_counter_reset;
-	memcpy(tracer->trace.counter_reset,
-	       tracer->ds.context->ds +
-	       (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field),
-	       ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE);
-
-	return &tracer->trace;
-}
-
-int ds_reset_bts(struct bts_tracer *tracer)
-{
-	if (!tracer)
-		return -EINVAL;
-
-	tracer->trace.ds.top = tracer->trace.ds.begin;
-
-	ds_set(tracer->ds.context->ds, ds_bts, ds_index,
-	       (unsigned long)tracer->trace.ds.top);
-
-	return 0;
-}
-
-int ds_reset_pebs(struct pebs_tracer *tracer)
-{
-	if (!tracer)
-		return -EINVAL;
-
-	tracer->trace.ds.top = tracer->trace.ds.begin;
-
-	ds_set(tracer->ds.context->ds, ds_pebs, ds_index,
-	       (unsigned long)tracer->trace.ds.top);
-
-	return 0;
-}
-
-int ds_set_pebs_reset(struct pebs_tracer *tracer,
-		      unsigned int counter, u64 value)
-{
-	if (!tracer)
-		return -EINVAL;
-
-	if (ds_cfg.nr_counter_reset < counter)
-		return -EINVAL;
-
-	*(u64 *)(tracer->ds.context->ds +
-		 (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) +
-		 (counter * PEBS_RESET_FIELD_SIZE)) = value;
-
-	return 0;
-}
-
-static const struct ds_configuration ds_cfg_netburst = {
-	.name = "Netburst",
-	.ctl[dsf_bts]		= (1 << 2) | (1 << 3),
-	.ctl[dsf_bts_kernel]	= (1 << 5),
-	.ctl[dsf_bts_user]	= (1 << 6),
-	.nr_counter_reset	= 1,
-};
-static const struct ds_configuration ds_cfg_pentium_m = {
-	.name = "Pentium M",
-	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-	.nr_counter_reset	= 1,
-};
-static const struct ds_configuration ds_cfg_core2_atom = {
-	.name = "Core 2/Atom",
-	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-	.ctl[dsf_bts_kernel]	= (1 << 9),
-	.ctl[dsf_bts_user]	= (1 << 10),
-	.nr_counter_reset	= 1,
-};
-static const struct ds_configuration ds_cfg_core_i7 = {
-	.name = "Core i7",
-	.ctl[dsf_bts]		= (1 << 6) | (1 << 7),
-	.ctl[dsf_bts_kernel]	= (1 << 9),
-	.ctl[dsf_bts_user]	= (1 << 10),
-	.nr_counter_reset	= 4,
-};
-
-static void
-ds_configure(const struct ds_configuration *cfg,
-	     struct cpuinfo_x86 *cpu)
-{
-	unsigned long nr_pebs_fields = 0;
-
-	printk(KERN_INFO "[ds] using %s configuration\n", cfg->name);
-
-#ifdef __i386__
-	nr_pebs_fields = 10;
-#else
-	nr_pebs_fields = 18;
-#endif
-
-	/*
-	 * Starting with version 2, architectural performance
-	 * monitoring supports a format specifier.
-	 */
-	if ((cpuid_eax(0xa) & 0xff) > 1) {
-		unsigned long perf_capabilities, format;
-
-		rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
-
-		format = (perf_capabilities >> 8) & 0xf;
-
-		switch (format) {
-		case 0:
-			nr_pebs_fields = 18;
-			break;
-		case 1:
-			nr_pebs_fields = 22;
-			break;
-		default:
-			printk(KERN_INFO
-			       "[ds] unknown PEBS format: %lu\n", format);
-			nr_pebs_fields = 0;
-			break;
-		}
-	}
-
-	memset(&ds_cfg, 0, sizeof(ds_cfg));
-	ds_cfg = *cfg;
-
-	ds_cfg.sizeof_ptr_field =
-		(cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4);
-
-	ds_cfg.sizeof_rec[ds_bts]  = ds_cfg.sizeof_ptr_field * 3;
-	ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields;
-
-	if (!cpu_has(cpu, X86_FEATURE_BTS)) {
-		ds_cfg.sizeof_rec[ds_bts] = 0;
-		printk(KERN_INFO "[ds] bts not available\n");
-	}
-	if (!cpu_has(cpu, X86_FEATURE_PEBS)) {
-		ds_cfg.sizeof_rec[ds_pebs] = 0;
-		printk(KERN_INFO "[ds] pebs not available\n");
-	}
-
-	printk(KERN_INFO "[ds] sizes: address: %u bit, ",
-	       8 * ds_cfg.sizeof_ptr_field);
-	printk("bts/pebs record: %u/%u bytes\n",
-	       ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]);
-
-	WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset);
-}
-
-void __cpuinit ds_init_intel(struct cpuinfo_x86 *c)
-{
-	/* Only configure the first cpu. Others are identical. */
-	if (ds_cfg.name)
-		return;
-
-	switch (c->x86) {
-	case 0x6:
-		switch (c->x86_model) {
-		case 0x9:
-		case 0xd: /* Pentium M */
-			ds_configure(&ds_cfg_pentium_m, c);
-			break;
-		case 0xf:
-		case 0x17: /* Core2 */
-		case 0x1c: /* Atom */
-			ds_configure(&ds_cfg_core2_atom, c);
-			break;
-		case 0x1a: /* Core i7 */
-			ds_configure(&ds_cfg_core_i7, c);
-			break;
-		default:
-			/* Sorry, don't know about them. */
-			break;
-		}
-		break;
-	case 0xf:
-		switch (c->x86_model) {
-		case 0x0:
-		case 0x1:
-		case 0x2: /* Netburst */
-			ds_configure(&ds_cfg_netburst, c);
-			break;
-		default:
-			/* Sorry, don't know about them. */
-			break;
-		}
-		break;
-	default:
-		/* Sorry, don't know about them. */
-		break;
-	}
-}
-
-static inline void ds_take_timestamp(struct ds_context *context,
-				     enum bts_qualifier qualifier,
-				     struct task_struct *task)
-{
-	struct bts_tracer *tracer = context->bts_master;
-	struct bts_struct ts;
-
-	/* Prevent compilers from reading the tracer pointer twice. */
-	barrier();
-
-	if (!tracer || !(tracer->flags & BTS_TIMESTAMPS))
-		return;
-
-	memset(&ts, 0, sizeof(ts));
-	ts.qualifier		= qualifier;
-	ts.variant.event.clock	= trace_clock_global();
-	ts.variant.event.pid	= task->pid;
-
-	bts_write(tracer, &ts);
-}
-
-/*
- * Change the DS configuration from tracing prev to tracing next.
- */
-void ds_switch_to(struct task_struct *prev, struct task_struct *next)
-{
-	struct ds_context *prev_ctx	= prev->thread.ds_ctx;
-	struct ds_context *next_ctx	= next->thread.ds_ctx;
-	unsigned long debugctlmsr	= next->thread.debugctlmsr;
-
-	/* Make sure all data is read before we start. */
-	barrier();
-
-	if (prev_ctx) {
-		update_debugctlmsr(0);
-
-		ds_take_timestamp(prev_ctx, bts_task_departs, prev);
-	}
-
-	if (next_ctx) {
-		ds_take_timestamp(next_ctx, bts_task_arrives, next);
-
-		wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds);
-	}
-
-	update_debugctlmsr(debugctlmsr);
-}
-
-static __init int ds_selftest(void)
-{
-	if (ds_cfg.sizeof_rec[ds_bts]) {
-		int error;
-
-		error = ds_selftest_bts();
-		if (error) {
-			WARN(1, "[ds] selftest failed. disabling bts.\n");
-			ds_cfg.sizeof_rec[ds_bts] = 0;
-		}
-	}
-
-	if (ds_cfg.sizeof_rec[ds_pebs]) {
-		int error;
-
-		error = ds_selftest_pebs();
-		if (error) {
-			WARN(1, "[ds] selftest failed. disabling pebs.\n");
-			ds_cfg.sizeof_rec[ds_pebs] = 0;
-		}
-	}
-
-	return 0;
-}
-device_initcall(ds_selftest);
diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c
deleted file mode 100644
index 6bc7c199ab99..000000000000
--- a/arch/x86/kernel/ds_selftest.c
+++ /dev/null
@@ -1,408 +0,0 @@
-/*
- * Debug Store support - selftest
- *
- *
- * Copyright (C) 2009 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2009
- */
-
-#include "ds_selftest.h"
-
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/smp.h>
-#include <linux/cpu.h>
-
-#include <asm/ds.h>
-
-
-#define BUFFER_SIZE		521	/* Intentionally chose an odd size. */
-#define SMALL_BUFFER_SIZE	24	/* A single bts entry. */
-
-struct ds_selftest_bts_conf {
-	struct bts_tracer *tracer;
-	int error;
-	int (*suspend)(struct bts_tracer *);
-	int (*resume)(struct bts_tracer *);
-};
-
-static int ds_selftest_bts_consistency(const struct bts_trace *trace)
-{
-	int error = 0;
-
-	if (!trace) {
-		printk(KERN_CONT "failed to access trace...");
-		/* Bail out. Other tests are pointless. */
-		return -1;
-	}
-
-	if (!trace->read) {
-		printk(KERN_CONT "bts read not available...");
-		error = -1;
-	}
-
-	/* Do some sanity checks on the trace configuration. */
-	if (!trace->ds.n) {
-		printk(KERN_CONT "empty bts buffer...");
-		error = -1;
-	}
-	if (!trace->ds.size) {
-		printk(KERN_CONT "bad bts trace setup...");
-		error = -1;
-	}
-	if (trace->ds.end !=
-	    (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) {
-		printk(KERN_CONT "bad bts buffer setup...");
-		error = -1;
-	}
-	/*
-	 * We allow top in [begin; end], since its not clear when the
-	 * overflow adjustment happens: after the increment or before the
-	 * write.
-	 */
-	if ((trace->ds.top < trace->ds.begin) ||
-	    (trace->ds.end < trace->ds.top)) {
-		printk(KERN_CONT "bts top out of bounds...");
-		error = -1;
-	}
-
-	return error;
-}
-
-static int ds_selftest_bts_read(struct bts_tracer *tracer,
-				const struct bts_trace *trace,
-				const void *from, const void *to)
-{
-	const unsigned char *at;
-
-	/*
-	 * Check a few things which do not belong to this test.
-	 * They should be covered by other tests.
-	 */
-	if (!trace)
-		return -1;
-
-	if (!trace->read)
-		return -1;
-
-	if (to < from)
-		return -1;
-
-	if (from < trace->ds.begin)
-		return -1;
-
-	if (trace->ds.end < to)
-		return -1;
-
-	if (!trace->ds.size)
-		return -1;
-
-	/* Now to the test itself. */
-	for (at = from; (void *)at < to; at += trace->ds.size) {
-		struct bts_struct bts;
-		unsigned long index;
-		int error;
-
-		if (((void *)at - trace->ds.begin) % trace->ds.size) {
-			printk(KERN_CONT
-			       "read from non-integer index...");
-			return -1;
-		}
-		index = ((void *)at - trace->ds.begin) / trace->ds.size;
-
-		memset(&bts, 0, sizeof(bts));
-		error = trace->read(tracer, at, &bts);
-		if (error < 0) {
-			printk(KERN_CONT
-			       "error reading bts trace at [%lu] (0x%p)...",
-			       index, at);
-			return error;
-		}
-
-		switch (bts.qualifier) {
-		case BTS_BRANCH:
-			break;
-		default:
-			printk(KERN_CONT
-			       "unexpected bts entry %llu at [%lu] (0x%p)...",
-			       bts.qualifier, index, at);
-			return -1;
-		}
-	}
-
-	return 0;
-}
-
-static void ds_selftest_bts_cpu(void *arg)
-{
-	struct ds_selftest_bts_conf *conf = arg;
-	const struct bts_trace *trace;
-	void *top;
-
-	if (IS_ERR(conf->tracer)) {
-		conf->error = PTR_ERR(conf->tracer);
-		conf->tracer = NULL;
-
-		printk(KERN_CONT
-		       "initialization failed (err: %d)...", conf->error);
-		return;
-	}
-
-	/* We should meanwhile have enough trace. */
-	conf->error = conf->suspend(conf->tracer);
-	if (conf->error < 0)
-		return;
-
-	/* Let's see if we can access the trace. */
-	trace = ds_read_bts(conf->tracer);
-
-	conf->error = ds_selftest_bts_consistency(trace);
-	if (conf->error < 0)
-		return;
-
-	/* If everything went well, we should have a few trace entries. */
-	if (trace->ds.top == trace->ds.begin) {
-		/*
-		 * It is possible but highly unlikely that we got a
-		 * buffer overflow and end up at exactly the same
-		 * position we started from.
-		 * Let's issue a warning, but continue.
-		 */
-		printk(KERN_CONT "no trace/overflow...");
-	}
-
-	/* Let's try to read the trace we collected. */
-	conf->error =
-		ds_selftest_bts_read(conf->tracer, trace,
-				     trace->ds.begin, trace->ds.top);
-	if (conf->error < 0)
-		return;
-
-	/*
-	 * Let's read the trace again.
-	 * Since we suspended tracing, we should get the same result.
-	 */
-	top = trace->ds.top;
-
-	trace = ds_read_bts(conf->tracer);
-	conf->error = ds_selftest_bts_consistency(trace);
-	if (conf->error < 0)
-		return;
-
-	if (top != trace->ds.top) {
-		printk(KERN_CONT "suspend not working...");
-		conf->error = -1;
-		return;
-	}
-
-	/* Let's collect some more trace - see if resume is working. */
-	conf->error = conf->resume(conf->tracer);
-	if (conf->error < 0)
-		return;
-
-	conf->error = conf->suspend(conf->tracer);
-	if (conf->error < 0)
-		return;
-
-	trace = ds_read_bts(conf->tracer);
-
-	conf->error = ds_selftest_bts_consistency(trace);
-	if (conf->error < 0)
-		return;
-
-	if (trace->ds.top == top) {
-		/*
-		 * It is possible but highly unlikely that we got a
-		 * buffer overflow and end up at exactly the same
-		 * position we started from.
-		 * Let's issue a warning and check the full trace.
-		 */
-		printk(KERN_CONT
-		       "no resume progress/overflow...");
-
-		conf->error =
-			ds_selftest_bts_read(conf->tracer, trace,
-					     trace->ds.begin, trace->ds.end);
-	} else if (trace->ds.top < top) {
-		/*
-		 * We had a buffer overflow - the entire buffer should
-		 * contain trace records.
-		 */
-		conf->error =
-			ds_selftest_bts_read(conf->tracer, trace,
-					     trace->ds.begin, trace->ds.end);
-	} else {
-		/*
-		 * It is quite likely that the buffer did not overflow.
-		 * Let's just check the delta trace.
-		 */
-		conf->error =
-			ds_selftest_bts_read(conf->tracer, trace, top,
-					     trace->ds.top);
-	}
-	if (conf->error < 0)
-		return;
-
-	conf->error = 0;
-}
-
-static int ds_suspend_bts_wrap(struct bts_tracer *tracer)
-{
-	ds_suspend_bts(tracer);
-	return 0;
-}
-
-static int ds_resume_bts_wrap(struct bts_tracer *tracer)
-{
-	ds_resume_bts(tracer);
-	return 0;
-}
-
-static void ds_release_bts_noirq_wrap(void *tracer)
-{
-	(void)ds_release_bts_noirq(tracer);
-}
-
-static int ds_selftest_bts_bad_release_noirq(int cpu,
-					     struct bts_tracer *tracer)
-{
-	int error = -EPERM;
-
-	/* Try to release the tracer on the wrong cpu. */
-	get_cpu();
-	if (cpu != smp_processor_id()) {
-		error = ds_release_bts_noirq(tracer);
-		if (error != -EPERM)
-			printk(KERN_CONT "release on wrong cpu...");
-	}
-	put_cpu();
-
-	return error ? 0 : -1;
-}
-
-static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer)
-{
-	struct bts_tracer *tracer;
-	int error;
-
-	/* Try to request cpu tracing while task tracing is active. */
-	tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL,
-				    (size_t)-1, BTS_KERNEL);
-	error = PTR_ERR(tracer);
-	if (!IS_ERR(tracer)) {
-		ds_release_bts(tracer);
-		error = 0;
-	}
-
-	if (error != -EPERM)
-		printk(KERN_CONT "cpu/task tracing overlap...");
-
-	return error ? 0 : -1;
-}
-
-static int ds_selftest_bts_bad_request_task(void *buffer)
-{
-	struct bts_tracer *tracer;
-	int error;
-
-	/* Try to request cpu tracing while task tracing is active. */
-	tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL,
-				    (size_t)-1, BTS_KERNEL);
-	error = PTR_ERR(tracer);
-	if (!IS_ERR(tracer)) {
-		error = 0;
-		ds_release_bts(tracer);
-	}
-
-	if (error != -EPERM)
-		printk(KERN_CONT "task/cpu tracing overlap...");
-
-	return error ? 0 : -1;
-}
-
-int ds_selftest_bts(void)
-{
-	struct ds_selftest_bts_conf conf;
-	unsigned char buffer[BUFFER_SIZE], *small_buffer;
-	unsigned long irq;
-	int cpu;
-
-	printk(KERN_INFO "[ds] bts selftest...");
-	conf.error = 0;
-
-	small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		conf.suspend = ds_suspend_bts_wrap;
-		conf.resume = ds_resume_bts_wrap;
-		conf.tracer =
-			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
-					   NULL, (size_t)-1, BTS_KERNEL);
-		ds_selftest_bts_cpu(&conf);
-		if (conf.error >= 0)
-			conf.error = ds_selftest_bts_bad_request_task(buffer);
-		ds_release_bts(conf.tracer);
-		if (conf.error < 0)
-			goto out;
-
-		conf.suspend = ds_suspend_bts_noirq;
-		conf.resume = ds_resume_bts_noirq;
-		conf.tracer =
-			ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE,
-					   NULL, (size_t)-1, BTS_KERNEL);
-		smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1);
-		if (conf.error >= 0) {
-			conf.error =
-				ds_selftest_bts_bad_release_noirq(cpu,
-								  conf.tracer);
-			/* We must not release the tracer twice. */
-			if (conf.error < 0)
-				conf.tracer = NULL;
-		}
-		if (conf.error >= 0)
-			conf.error = ds_selftest_bts_bad_request_task(buffer);
-		smp_call_function_single(cpu, ds_release_bts_noirq_wrap,
-					 conf.tracer, 1);
-		if (conf.error < 0)
-			goto out;
-	}
-
-	conf.suspend = ds_suspend_bts_wrap;
-	conf.resume = ds_resume_bts_wrap;
-	conf.tracer =
-		ds_request_bts_task(current, buffer, BUFFER_SIZE,
-				    NULL, (size_t)-1, BTS_KERNEL);
-	ds_selftest_bts_cpu(&conf);
-	if (conf.error >= 0)
-		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
-	ds_release_bts(conf.tracer);
-	if (conf.error < 0)
-		goto out;
-
-	conf.suspend = ds_suspend_bts_noirq;
-	conf.resume = ds_resume_bts_noirq;
-	conf.tracer =
-		ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE,
-				   NULL, (size_t)-1, BTS_KERNEL);
-	local_irq_save(irq);
-	ds_selftest_bts_cpu(&conf);
-	if (conf.error >= 0)
-		conf.error = ds_selftest_bts_bad_request_cpu(0, buffer);
-	ds_release_bts_noirq(conf.tracer);
-	local_irq_restore(irq);
-	if (conf.error < 0)
-		goto out;
-
-	conf.error = 0;
- out:
-	put_online_cpus();
-	printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed"));
-
-	return conf.error;
-}
-
-int ds_selftest_pebs(void)
-{
-	return 0;
-}
diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h
deleted file mode 100644
index 2ba8745c6663..000000000000
--- a/arch/x86/kernel/ds_selftest.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Debug Store support - selftest
- *
- *
- * Copyright (C) 2009 Intel Corporation.
- * Markus Metzger <markus.t.metzger@intel.com>, 2009
- */
-
-#ifdef CONFIG_X86_DS_SELFTEST
-extern int ds_selftest_bts(void);
-extern int ds_selftest_pebs(void);
-#else
-static inline int ds_selftest_bts(void) { return 0; }
-static inline int ds_selftest_pebs(void) { return 0; }
-#endif
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6d817554780a..c89a386930b7 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void)
 	int cpu;
 	unsigned long flags;
 
-	/* notify the hw-branch tracer so it may disable tracing and
-	   add the last trace to the trace buffer -
-	   the earlier this happens, the more useful the trace. */
-	trace_hw_branch_oops();
-
 	oops_enter();
 
 	/* racy, but better than risking deadlock. */
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index b43bbaebe2c0..7a880ad3a208 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -422,14 +422,12 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
 
 static void __kprobes clear_btf(void)
 {
-	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		update_debugctlmsr(0);
+	/* XXX */
 }
 
 static void __kprobes restore_btf(void)
 {
-	if (test_thread_flag(TIF_DEBUGCTLMSR))
-		update_debugctlmsr(current->thread.debugctlmsr);
+	/* XXX */
 }
 
 void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ad9540676fcc..1a60beb32ede 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -20,7 +20,6 @@
 #include <asm/idle.h>
 #include <asm/uaccess.h>
 #include <asm/i387.h>
-#include <asm/ds.h>
 #include <asm/debugreg.h>
 
 unsigned long idle_halt;
@@ -50,8 +49,6 @@ void free_thread_xstate(struct task_struct *tsk)
 		kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
 		tsk->thread.xstate = NULL;
 	}
-
-	WARN(tsk->thread.ds_ctx, "leaking DS context\n");
 }
 
 void free_thread_info(struct thread_info *ti)
@@ -198,12 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
 	prev = &prev_p->thread;
 	next = &next_p->thread;
 
-	if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) ||
-	    test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR))
-		ds_switch_to(prev_p, next_p);
-	else if (next->debugctlmsr != prev->debugctlmsr)
-		update_debugctlmsr(next->debugctlmsr);
-
 	if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
 	    test_tsk_thread_flag(next_p, TIF_NOTSC)) {
 		/* prev and next are different */
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index f6c62667e30c..75090c589b7a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -55,7 +55,6 @@
 #include <asm/cpu.h>
 #include <asm/idle.h>
 #include <asm/syscalls.h>
-#include <asm/ds.h>
 #include <asm/debugreg.h>
 
 asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
@@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		kfree(p->thread.io_bitmap_ptr);
 		p->thread.io_bitmap_max = 0;
 	}
-
-	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
-	p->thread.ds_ctx = NULL;
-
-	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
-	p->thread.debugctlmsr = 0;
-
 	return err;
 }
 
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index dc9690b4c4cc..cc4258f2beb5 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -49,7 +49,6 @@
 #include <asm/ia32.h>
 #include <asm/idle.h>
 #include <asm/syscalls.h>
-#include <asm/ds.h>
 #include <asm/debugreg.h>
 
 asmlinkage extern void ret_from_fork(void);
@@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 		if (err)
 			goto out;
 	}
-
-	clear_tsk_thread_flag(p, TIF_DS_AREA_MSR);
-	p->thread.ds_ctx = NULL;
-
-	clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR);
-	p->thread.debugctlmsr = 0;
-
 	err = 0;
 out:
 	if (err && p->thread.io_bitmap_ptr) {
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index a503b1fd04e5..f2fd3b80e565 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -2,9 +2,6 @@
 /*
  * Pentium III FXSR, SSE support
  *	Gareth Hughes <gareth@valinux.com>, May 2000
- *
- * BTS tracing
- *	Markus Metzger <markus.t.metzger@intel.com>, Dec 2007
  */
 
 #include <linux/kernel.h>
@@ -21,7 +18,6 @@
 #include <linux/audit.h>
 #include <linux/seccomp.h>
 #include <linux/signal.h>
-#include <linux/workqueue.h>
 #include <linux/perf_event.h>
 #include <linux/hw_breakpoint.h>
 
@@ -35,7 +31,6 @@
 #include <asm/desc.h>
 #include <asm/prctl.h>
 #include <asm/proto.h>
-#include <asm/ds.h>
 #include <asm/hw_breakpoint.h>
 
 #include "tls.h"
@@ -788,342 +783,6 @@ static int ioperm_get(struct task_struct *target,
 				   0, IO_BITMAP_BYTES);
 }
 
-#ifdef CONFIG_X86_PTRACE_BTS
-/*
- * A branch trace store context.
- *
- * Contexts may only be installed by ptrace_bts_config() and only for
- * ptraced tasks.
- *
- * Contexts are destroyed when the tracee is detached from the tracer.
- * The actual destruction work requires interrupts enabled, so the
- * work is deferred and will be scheduled during __ptrace_unlink().
- *
- * Contexts hold an additional task_struct reference on the traced
- * task, as well as a reference on the tracer's mm.
- *
- * Ptrace already holds a task_struct for the duration of ptrace operations,
- * but since destruction is deferred, it may be executed after both
- * tracer and tracee exited.
- */
-struct bts_context {
-	/* The branch trace handle. */
-	struct bts_tracer	*tracer;
-
-	/* The buffer used to store the branch trace and its size. */
-	void			*buffer;
-	unsigned int		size;
-
-	/* The mm that paid for the above buffer. */
-	struct mm_struct	*mm;
-
-	/* The task this context belongs to. */
-	struct task_struct	*task;
-
-	/* The signal to send on a bts buffer overflow. */
-	unsigned int		bts_ovfl_signal;
-
-	/* The work struct to destroy a context. */
-	struct work_struct	work;
-};
-
-static int alloc_bts_buffer(struct bts_context *context, unsigned int size)
-{
-	void *buffer = NULL;
-	int err = -ENOMEM;
-
-	err = account_locked_memory(current->mm, current->signal->rlim, size);
-	if (err < 0)
-		return err;
-
-	buffer = kzalloc(size, GFP_KERNEL);
-	if (!buffer)
-		goto out_refund;
-
-	context->buffer = buffer;
-	context->size = size;
-	context->mm = get_task_mm(current);
-
-	return 0;
-
- out_refund:
-	refund_locked_memory(current->mm, size);
-	return err;
-}
-
-static inline void free_bts_buffer(struct bts_context *context)
-{
-	if (!context->buffer)
-		return;
-
-	kfree(context->buffer);
-	context->buffer = NULL;
-
-	refund_locked_memory(context->mm, context->size);
-	context->size = 0;
-
-	mmput(context->mm);
-	context->mm = NULL;
-}
-
-static void free_bts_context_work(struct work_struct *w)
-{
-	struct bts_context *context;
-
-	context = container_of(w, struct bts_context, work);
-
-	ds_release_bts(context->tracer);
-	put_task_struct(context->task);
-	free_bts_buffer(context);
-	kfree(context);
-}
-
-static inline void free_bts_context(struct bts_context *context)
-{
-	INIT_WORK(&context->work, free_bts_context_work);
-	schedule_work(&context->work);
-}
-
-static inline struct bts_context *alloc_bts_context(struct task_struct *task)
-{
-	struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (context) {
-		context->task = task;
-		task->bts = context;
-
-		get_task_struct(task);
-	}
-
-	return context;
-}
-
-static int ptrace_bts_read_record(struct task_struct *child, size_t index,
-				  struct bts_struct __user *out)
-{
-	struct bts_context *context;
-	const struct bts_trace *trace;
-	struct bts_struct bts;
-	const unsigned char *at;
-	int error;
-
-	context = child->bts;
-	if (!context)
-		return -ESRCH;
-
-	trace = ds_read_bts(context->tracer);
-	if (!trace)
-		return -ESRCH;
-
-	at = trace->ds.top - ((index + 1) * trace->ds.size);
-	if ((void *)at < trace->ds.begin)
-		at += (trace->ds.n * trace->ds.size);
-
-	if (!trace->read)
-		return -EOPNOTSUPP;
-
-	error = trace->read(context->tracer, at, &bts);
-	if (error < 0)
-		return error;
-
-	if (copy_to_user(out, &bts, sizeof(bts)))
-		return -EFAULT;
-
-	return sizeof(bts);
-}
-
-static int ptrace_bts_drain(struct task_struct *child,
-			    long size,
-			    struct bts_struct __user *out)
-{
-	struct bts_context *context;
-	const struct bts_trace *trace;
-	const unsigned char *at;
-	int error, drained = 0;
-
-	context = child->bts;
-	if (!context)
-		return -ESRCH;
-
-	trace = ds_read_bts(context->tracer);
-	if (!trace)
-		return -ESRCH;
-
-	if (!trace->read)
-		return -EOPNOTSUPP;
-
-	if (size < (trace->ds.top - trace->ds.begin))
-		return -EIO;
-
-	for (at = trace->ds.begin; (void *)at < trace->ds.top;
-	     out++, drained++, at += trace->ds.size) {
-		struct bts_struct bts;
-
-		error = trace->read(context->tracer, at, &bts);
-		if (error < 0)
-			return error;
-
-		if (copy_to_user(out, &bts, sizeof(bts)))
-			return -EFAULT;
-	}
-
-	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
-
-	error = ds_reset_bts(context->tracer);
-	if (error < 0)
-		return error;
-
-	return drained;
-}
-
-static int ptrace_bts_config(struct task_struct *child,
-			     long cfg_size,
-			     const struct ptrace_bts_config __user *ucfg)
-{
-	struct bts_context *context;
-	struct ptrace_bts_config cfg;
-	unsigned int flags = 0;
-
-	if (cfg_size < sizeof(cfg))
-		return -EIO;
-
-	if (copy_from_user(&cfg, ucfg, sizeof(cfg)))
-		return -EFAULT;
-
-	context = child->bts;
-	if (!context)
-		context = alloc_bts_context(child);
-	if (!context)
-		return -ENOMEM;
-
-	if (cfg.flags & PTRACE_BTS_O_SIGNAL) {
-		if (!cfg.signal)
-			return -EINVAL;
-
-		return -EOPNOTSUPP;
-		context->bts_ovfl_signal = cfg.signal;
-	}
-
-	ds_release_bts(context->tracer);
-	context->tracer = NULL;
-
-	if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) {
-		int err;
-
-		free_bts_buffer(context);
-		if (!cfg.size)
-			return 0;
-
-		err = alloc_bts_buffer(context, cfg.size);
-		if (err < 0)
-			return err;
-	}
-
-	if (cfg.flags & PTRACE_BTS_O_TRACE)
-		flags |= BTS_USER;
-
-	if (cfg.flags & PTRACE_BTS_O_SCHED)
-		flags |= BTS_TIMESTAMPS;
-
-	context->tracer =
-		ds_request_bts_task(child, context->buffer, context->size,
-				    NULL, (size_t)-1, flags);
-	if (unlikely(IS_ERR(context->tracer))) {
-		int error = PTR_ERR(context->tracer);
-
-		free_bts_buffer(context);
-		context->tracer = NULL;
-		return error;
-	}
-
-	return sizeof(cfg);
-}
-
-static int ptrace_bts_status(struct task_struct *child,
-			     long cfg_size,
-			     struct ptrace_bts_config __user *ucfg)
-{
-	struct bts_context *context;
-	const struct bts_trace *trace;
-	struct ptrace_bts_config cfg;
-
-	context = child->bts;
-	if (!context)
-		return -ESRCH;
-
-	if (cfg_size < sizeof(cfg))
-		return -EIO;
-
-	trace = ds_read_bts(context->tracer);
-	if (!trace)
-		return -ESRCH;
-
-	memset(&cfg, 0, sizeof(cfg));
-	cfg.size	= trace->ds.end - trace->ds.begin;
-	cfg.signal	= context->bts_ovfl_signal;
-	cfg.bts_size	= sizeof(struct bts_struct);
-
-	if (cfg.signal)
-		cfg.flags |= PTRACE_BTS_O_SIGNAL;
-
-	if (trace->ds.flags & BTS_USER)
-		cfg.flags |= PTRACE_BTS_O_TRACE;
-
-	if (trace->ds.flags & BTS_TIMESTAMPS)
-		cfg.flags |= PTRACE_BTS_O_SCHED;
-
-	if (copy_to_user(ucfg, &cfg, sizeof(cfg)))
-		return -EFAULT;
-
-	return sizeof(cfg);
-}
-
-static int ptrace_bts_clear(struct task_struct *child)
-{
-	struct bts_context *context;
-	const struct bts_trace *trace;
-
-	context = child->bts;
-	if (!context)
-		return -ESRCH;
-
-	trace = ds_read_bts(context->tracer);
-	if (!trace)
-		return -ESRCH;
-
-	memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size);
-
-	return ds_reset_bts(context->tracer);
-}
-
-static int ptrace_bts_size(struct task_struct *child)
-{
-	struct bts_context *context;
-	const struct bts_trace *trace;
-
-	context = child->bts;
-	if (!context)
-		return -ESRCH;
-
-	trace = ds_read_bts(context->tracer);
-	if (!trace)
-		return -ESRCH;
-
-	return (trace->ds.top - trace->ds.begin) / trace->ds.size;
-}
-
-/*
- * Called from __ptrace_unlink() after the child has been moved back
- * to its original parent.
- */
-void ptrace_bts_untrace(struct task_struct *child)
-{
-	if (unlikely(child->bts)) {
-		free_bts_context(child->bts);
-		child->bts = NULL;
-	}
-}
-#endif /* CONFIG_X86_PTRACE_BTS */
-
 /*
  * Called by kernel/ptrace.c when detaching..
  *
@@ -1251,39 +910,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 		break;
 #endif
 
-	/*
-	 * These bits need more cooking - not enabled yet:
-	 */
-#ifdef CONFIG_X86_PTRACE_BTS
-	case PTRACE_BTS_CONFIG:
-		ret = ptrace_bts_config
-			(child, data, (struct ptrace_bts_config __user *)addr);
-		break;
-
-	case PTRACE_BTS_STATUS:
-		ret = ptrace_bts_status
-			(child, data, (struct ptrace_bts_config __user *)addr);
-		break;
-
-	case PTRACE_BTS_SIZE:
-		ret = ptrace_bts_size(child);
-		break;
-
-	case PTRACE_BTS_GET:
-		ret = ptrace_bts_read_record
-			(child, data, (struct bts_struct __user *) addr);
-		break;
-
-	case PTRACE_BTS_CLEAR:
-		ret = ptrace_bts_clear(child);
-		break;
-
-	case PTRACE_BTS_DRAIN:
-		ret = ptrace_bts_drain
-			(child, data, (struct bts_struct __user *) addr);
-		break;
-#endif /* CONFIG_X86_PTRACE_BTS */
-
 	default:
 		ret = ptrace_request(child, request, addr, data);
 		break;
@@ -1543,14 +1169,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 	case PTRACE_GET_THREAD_AREA:
 	case PTRACE_SET_THREAD_AREA:
-#ifdef CONFIG_X86_PTRACE_BTS
-	case PTRACE_BTS_CONFIG:
-	case PTRACE_BTS_STATUS:
-	case PTRACE_BTS_SIZE:
-	case PTRACE_BTS_GET:
-	case PTRACE_BTS_CLEAR:
-	case PTRACE_BTS_DRAIN:
-#endif /* CONFIG_X86_PTRACE_BTS */
 		return arch_ptrace(child, request, addr, data);
 
 	default:
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index 3149032ff107..7beba0769a8c 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -157,22 +157,6 @@ static int enable_single_step(struct task_struct *child)
 	return 1;
 }
 
-/*
- * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running.
- */
-static void write_debugctlmsr(struct task_struct *child, unsigned long val)
-{
-	if (child->thread.debugctlmsr == val)
-		return;
-
-	child->thread.debugctlmsr = val;
-
-	if (child != current)
-		return;
-
-	update_debugctlmsr(val);
-}
-
 /*
  * Enable single or block step.
  */
@@ -185,17 +169,9 @@ static void enable_step(struct task_struct *child, bool block)
 	 * So noone should try to use debugger block stepping in a program
 	 * that uses user-mode single stepping itself.
 	 */
-	if (enable_single_step(child) && block) {
-		set_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-		write_debugctlmsr(child,
-				  child->thread.debugctlmsr | DEBUGCTLMSR_BTF);
-	} else {
-		write_debugctlmsr(child,
-				  child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
-
-		if (!child->thread.debugctlmsr)
-			clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
-	}
+	if (!enable_single_step(child))
+		return;
+	/* XXX */
 }
 
 void user_enable_single_step(struct task_struct *child)
@@ -213,11 +189,7 @@ void user_disable_single_step(struct task_struct *child)
 	/*
 	 * Make sure block stepping (BTF) is disabled.
 	 */
-	write_debugctlmsr(child,
-			  child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF);
-
-	if (!child->thread.debugctlmsr)
-		clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR);
+	/* XXX */
 
 	/* Always clear TIF_SINGLESTEP... */
 	clear_tsk_thread_flag(child, TIF_SINGLESTEP);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 1168e4454188..e3da5d726a37 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -543,11 +543,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code)
 
 	/* DR6 may or may not be cleared by the CPU */
 	set_debugreg(0, 6);
-	/*
-	 * The processor cleared BTF, so don't mark that we need it set.
-	 */
-	clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR);
-	tsk->thread.debugctlmsr = 0;
 
 	/* Store the virtualized DR6 value */
 	tsk->thread.debugreg6 = dr6;
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 01e6adea07ec..cc12b3c556b3 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -504,18 +504,6 @@ extern int ftrace_dump_on_oops;
 #define INIT_TRACE_RECURSION
 #endif
 
-#ifdef CONFIG_HW_BRANCH_TRACER
-
-void trace_hw_branch(u64 from, u64 to);
-void trace_hw_branch_oops(void);
-
-#else /* CONFIG_HW_BRANCH_TRACER */
-
-static inline void trace_hw_branch(u64 from, u64 to) {}
-static inline void trace_hw_branch_oops(void) {}
-
-#endif /* CONFIG_HW_BRANCH_TRACER */
-
 #ifdef CONFIG_FTRACE_SYSCALLS
 
 unsigned long arch_syscall_addr(int nr);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index e70f21beb4b4..c8442b655111 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,7 +19,6 @@ struct anon_vma;
 struct file_ra_state;
 struct user_struct;
 struct writeback_control;
-struct rlimit;
 
 #ifndef CONFIG_DISCONTIGMEM          /* Don't use mapnrs, do it properly */
 extern unsigned long max_mapnr;
@@ -1449,9 +1448,6 @@ int vmemmap_populate_basepages(struct page *start_page,
 int vmemmap_populate(struct page *start_page, unsigned long pages, int node);
 void vmemmap_populate_print_last(void);
 
-extern int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
-				 size_t size);
-extern void refund_locked_memory(struct mm_struct *mm, size_t size);
 
 enum mf_flags {
 	MF_COUNT_INCREASED = 1 << 0,
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index e1fb60729979..4272521e29e9 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -345,18 +345,6 @@ static inline void user_single_step_siginfo(struct task_struct *tsk,
 #define arch_ptrace_stop(code, info)		do { } while (0)
 #endif
 
-#ifndef arch_ptrace_untrace
-/*
- * Do machine-specific work before untracing child.
- *
- * This is called for a normal detach as well as from ptrace_exit()
- * when the tracing task dies.
- *
- * Called with write_lock(&tasklist_lock) held.
- */
-#define arch_ptrace_untrace(task)		do { } while (0)
-#endif
-
 extern int task_current_syscall(struct task_struct *target, long *callno,
 				unsigned long args[6], unsigned int maxargs,
 				unsigned long *sp, unsigned long *pc);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf7..e0447c64af6a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -99,7 +99,6 @@ struct futex_pi_state;
 struct robust_list_head;
 struct bio_list;
 struct fs_struct;
-struct bts_context;
 struct perf_event_context;
 
 /*
@@ -1272,12 +1271,6 @@ struct task_struct {
 	struct list_head ptraced;
 	struct list_head ptrace_entry;
 
-	/*
-	 * This is the tracer handle for the ptrace BTS extension.
-	 * This field actually belongs to the ptracer task.
-	 */
-	struct bts_context *bts;
-
 	/* PID/PID hash table linkage. */
 	struct pid_link pids[PIDTYPE_MAX];
 	struct list_head thread_group;
@@ -2123,10 +2116,8 @@ extern void set_task_comm(struct task_struct *tsk, char *from);
 extern char *get_task_comm(char *to, struct task_struct *tsk);
 
 #ifdef CONFIG_SMP
-extern void wait_task_context_switch(struct task_struct *p);
 extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
 #else
-static inline void wait_task_context_switch(struct task_struct *p) {}
 static inline unsigned long wait_task_inactive(struct task_struct *p,
 					       long match_state)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 4799c5f0e6d0..d67f1dbfbe03 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1108,9 +1108,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	p->memcg_batch.do_batch = 0;
 	p->memcg_batch.memcg = NULL;
 #endif
-
-	p->bts = NULL;
-
 	p->stack_start = stack_start;
 
 	/* Perform scheduler related setup. Assign this task to a CPU. */
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 42ad8ae729a0..9fb51237b18c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -76,7 +76,6 @@ void __ptrace_unlink(struct task_struct *child)
 	child->parent = child->real_parent;
 	list_del_init(&child->ptrace_entry);
 
-	arch_ptrace_untrace(child);
 	if (task_is_traced(child))
 		ptrace_untrace(child);
 }
diff --git a/kernel/sched.c b/kernel/sched.c
index 9ab3cd7858d3..117b7cad31b3 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2076,49 +2076,6 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 	return 1;
 }
 
-/*
- * wait_task_context_switch -	wait for a thread to complete at least one
- *				context switch.
- *
- * @p must not be current.
- */
-void wait_task_context_switch(struct task_struct *p)
-{
-	unsigned long nvcsw, nivcsw, flags;
-	int running;
-	struct rq *rq;
-
-	nvcsw	= p->nvcsw;
-	nivcsw	= p->nivcsw;
-	for (;;) {
-		/*
-		 * The runqueue is assigned before the actual context
-		 * switch. We need to take the runqueue lock.
-		 *
-		 * We could check initially without the lock but it is
-		 * very likely that we need to take the lock in every
-		 * iteration.
-		 */
-		rq = task_rq_lock(p, &flags);
-		running = task_running(rq, p);
-		task_rq_unlock(rq, &flags);
-
-		if (likely(!running))
-			break;
-		/*
-		 * The switch count is incremented before the actual
-		 * context switch. We thus wait for two switches to be
-		 * sure at least one completed.
-		 */
-		if ((p->nvcsw - nvcsw) > 1)
-			break;
-		if ((p->nivcsw - nivcsw) > 1)
-			break;
-
-		cpu_relax();
-	}
-}
-
 /*
  * wait_task_inactive - wait for a thread to unschedule.
  *
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 13e13d428cd3..8b1797c4545b 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -44,9 +44,6 @@ config HAVE_FTRACE_MCOUNT_RECORD
 	help
 	  See Documentation/trace/ftrace-design.txt
 
-config HAVE_HW_BRANCH_TRACER
-	bool
-
 config HAVE_SYSCALL_TRACEPOINTS
 	bool
 	help
@@ -374,14 +371,6 @@ config STACK_TRACER
 
 	  Say N if unsure.
 
-config HW_BRANCH_TRACER
-	depends on HAVE_HW_BRANCH_TRACER
-	bool "Trace hw branches"
-	select GENERIC_TRACER
-	help
-	  This tracer records all branches on the system in a circular
-	  buffer, giving access to the last N branches for each cpu.
-
 config KMEMTRACE
 	bool "Trace SLAB allocations"
 	select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 78edc6490038..ffb1a5b0550e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -41,7 +41,6 @@ obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
 obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
 obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_HW_BRANCH_TRACER) += trace_hw_branches.o
 obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
 obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
 obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 2825ef2c0b15..bec2c973ff0c 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -34,7 +34,6 @@ enum trace_type {
 	TRACE_GRAPH_RET,
 	TRACE_GRAPH_ENT,
 	TRACE_USER_STACK,
-	TRACE_HW_BRANCHES,
 	TRACE_KMEM_ALLOC,
 	TRACE_KMEM_FREE,
 	TRACE_BLK,
@@ -229,7 +228,6 @@ extern void __ftrace_bad_type(void);
 			  TRACE_GRAPH_ENT);		\
 		IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry,	\
 			  TRACE_GRAPH_RET);		\
-		IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
 		IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry,	\
 			  TRACE_KMEM_ALLOC);	\
 		IF_ASSIGN(var, ent, struct kmemtrace_free_entry,	\
@@ -467,8 +465,6 @@ extern int trace_selftest_startup_sysprof(struct tracer *trace,
 					       struct trace_array *tr);
 extern int trace_selftest_startup_branch(struct tracer *trace,
 					 struct trace_array *tr);
-extern int trace_selftest_startup_hw_branches(struct tracer *trace,
-					      struct trace_array *tr);
 extern int trace_selftest_startup_ksym(struct tracer *trace,
 					 struct trace_array *tr);
 #endif /* CONFIG_FTRACE_STARTUP_TEST */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index c16a08f399df..dc008c1240da 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -318,18 +318,6 @@ FTRACE_ENTRY(branch, trace_branch,
 		 __entry->func, __entry->file, __entry->correct)
 );
 
-FTRACE_ENTRY(hw_branch, hw_branch_entry,
-
-	TRACE_HW_BRANCHES,
-
-	F_STRUCT(
-		__field(	u64,	from	)
-		__field(	u64,	to	)
-	),
-
-	F_printk("from: %llx to: %llx", __entry->from, __entry->to)
-);
-
 FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
 
 	TRACE_KMEM_ALLOC,
diff --git a/kernel/trace/trace_hw_branches.c b/kernel/trace/trace_hw_branches.c
deleted file mode 100644
index 7b97000745f5..000000000000
--- a/kernel/trace/trace_hw_branches.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * h/w branch tracer for x86 based on BTS
- *
- * Copyright (C) 2008-2009 Intel Corporation.
- * Markus Metzger <markus.t.metzger@gmail.com>, 2008-2009
- */
-#include <linux/kallsyms.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/module.h>
-#include <linux/cpu.h>
-#include <linux/smp.h>
-#include <linux/fs.h>
-
-#include <asm/ds.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-
-#define BTS_BUFFER_SIZE (1 << 13)
-
-static DEFINE_PER_CPU(struct bts_tracer *, hwb_tracer);
-static DEFINE_PER_CPU(unsigned char[BTS_BUFFER_SIZE], hwb_buffer);
-
-#define this_tracer per_cpu(hwb_tracer, smp_processor_id())
-
-static int trace_hw_branches_enabled __read_mostly;
-static int trace_hw_branches_suspended __read_mostly;
-static struct trace_array *hw_branch_trace __read_mostly;
-
-
-static void bts_trace_init_cpu(int cpu)
-{
-	per_cpu(hwb_tracer, cpu) =
-		ds_request_bts_cpu(cpu, per_cpu(hwb_buffer, cpu),
-				   BTS_BUFFER_SIZE, NULL, (size_t)-1,
-				   BTS_KERNEL);
-
-	if (IS_ERR(per_cpu(hwb_tracer, cpu)))
-		per_cpu(hwb_tracer, cpu) = NULL;
-}
-
-static int bts_trace_init(struct trace_array *tr)
-{
-	int cpu;
-
-	hw_branch_trace = tr;
-	trace_hw_branches_enabled = 0;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		bts_trace_init_cpu(cpu);
-
-		if (likely(per_cpu(hwb_tracer, cpu)))
-			trace_hw_branches_enabled = 1;
-	}
-	trace_hw_branches_suspended = 0;
-	put_online_cpus();
-
-	/* If we could not enable tracing on a single cpu, we fail. */
-	return trace_hw_branches_enabled ? 0 : -EOPNOTSUPP;
-}
-
-static void bts_trace_reset(struct trace_array *tr)
-{
-	int cpu;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu) {
-		if (likely(per_cpu(hwb_tracer, cpu))) {
-			ds_release_bts(per_cpu(hwb_tracer, cpu));
-			per_cpu(hwb_tracer, cpu) = NULL;
-		}
-	}
-	trace_hw_branches_enabled = 0;
-	trace_hw_branches_suspended = 0;
-	put_online_cpus();
-}
-
-static void bts_trace_start(struct trace_array *tr)
-{
-	int cpu;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu)
-		if (likely(per_cpu(hwb_tracer, cpu)))
-			ds_resume_bts(per_cpu(hwb_tracer, cpu));
-	trace_hw_branches_suspended = 0;
-	put_online_cpus();
-}
-
-static void bts_trace_stop(struct trace_array *tr)
-{
-	int cpu;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu)
-		if (likely(per_cpu(hwb_tracer, cpu)))
-			ds_suspend_bts(per_cpu(hwb_tracer, cpu));
-	trace_hw_branches_suspended = 1;
-	put_online_cpus();
-}
-
-static int __cpuinit bts_hotcpu_handler(struct notifier_block *nfb,
-				     unsigned long action, void *hcpu)
-{
-	int cpu = (long)hcpu;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_DOWN_FAILED:
-		/* The notification is sent with interrupts enabled. */
-		if (trace_hw_branches_enabled) {
-			bts_trace_init_cpu(cpu);
-
-			if (trace_hw_branches_suspended &&
-			    likely(per_cpu(hwb_tracer, cpu)))
-				ds_suspend_bts(per_cpu(hwb_tracer, cpu));
-		}
-		break;
-
-	case CPU_DOWN_PREPARE:
-		/* The notification is sent with interrupts enabled. */
-		if (likely(per_cpu(hwb_tracer, cpu))) {
-			ds_release_bts(per_cpu(hwb_tracer, cpu));
-			per_cpu(hwb_tracer, cpu) = NULL;
-		}
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block bts_hotcpu_notifier __cpuinitdata = {
-	.notifier_call = bts_hotcpu_handler
-};
-
-static void bts_trace_print_header(struct seq_file *m)
-{
-	seq_puts(m, "# CPU#        TO  <-  FROM\n");
-}
-
-static enum print_line_t bts_trace_print_line(struct trace_iterator *iter)
-{
-	unsigned long symflags = TRACE_ITER_SYM_OFFSET;
-	struct trace_entry *entry = iter->ent;
-	struct trace_seq *seq = &iter->seq;
-	struct hw_branch_entry *it;
-
-	trace_assign_type(it, entry);
-
-	if (entry->type == TRACE_HW_BRANCHES) {
-		if (trace_seq_printf(seq, "%4d  ", iter->cpu) &&
-		    seq_print_ip_sym(seq, it->to, symflags) &&
-		    trace_seq_printf(seq, "\t  <-  ") &&
-		    seq_print_ip_sym(seq, it->from, symflags) &&
-		    trace_seq_printf(seq, "\n"))
-			return TRACE_TYPE_HANDLED;
-		return TRACE_TYPE_PARTIAL_LINE;
-	}
-	return TRACE_TYPE_UNHANDLED;
-}
-
-void trace_hw_branch(u64 from, u64 to)
-{
-	struct ftrace_event_call *call = &event_hw_branch;
-	struct trace_array *tr = hw_branch_trace;
-	struct ring_buffer_event *event;
-	struct ring_buffer *buf;
-	struct hw_branch_entry *entry;
-	unsigned long irq1;
-	int cpu;
-
-	if (unlikely(!tr))
-		return;
-
-	if (unlikely(!trace_hw_branches_enabled))
-		return;
-
-	local_irq_save(irq1);
-	cpu = raw_smp_processor_id();
-	if (atomic_inc_return(&tr->data[cpu]->disabled) != 1)
-		goto out;
-
-	buf = tr->buffer;
-	event = trace_buffer_lock_reserve(buf, TRACE_HW_BRANCHES,
-					  sizeof(*entry), 0, 0);
-	if (!event)
-		goto out;
-	entry	= ring_buffer_event_data(event);
-	tracing_generic_entry_update(&entry->ent, 0, from);
-	entry->ent.type = TRACE_HW_BRANCHES;
-	entry->from = from;
-	entry->to   = to;
-	if (!filter_check_discard(call, entry, buf, event))
-		trace_buffer_unlock_commit(buf, event, 0, 0);
-
- out:
-	atomic_dec(&tr->data[cpu]->disabled);
-	local_irq_restore(irq1);
-}
-
-static void trace_bts_at(const struct bts_trace *trace, void *at)
-{
-	struct bts_struct bts;
-	int err = 0;
-
-	WARN_ON_ONCE(!trace->read);
-	if (!trace->read)
-		return;
-
-	err = trace->read(this_tracer, at, &bts);
-	if (err < 0)
-		return;
-
-	switch (bts.qualifier) {
-	case BTS_BRANCH:
-		trace_hw_branch(bts.variant.lbr.from, bts.variant.lbr.to);
-		break;
-	}
-}
-
-/*
- * Collect the trace on the current cpu and write it into the ftrace buffer.
- *
- * pre: tracing must be suspended on the current cpu
- */
-static void trace_bts_cpu(void *arg)
-{
-	struct trace_array *tr = (struct trace_array *)arg;
-	const struct bts_trace *trace;
-	unsigned char *at;
-
-	if (unlikely(!tr))
-		return;
-
-	if (unlikely(atomic_read(&tr->data[raw_smp_processor_id()]->disabled)))
-		return;
-
-	if (unlikely(!this_tracer))
-		return;
-
-	trace = ds_read_bts(this_tracer);
-	if (!trace)
-		return;
-
-	for (at = trace->ds.top; (void *)at < trace->ds.end;
-	     at += trace->ds.size)
-		trace_bts_at(trace, at);
-
-	for (at = trace->ds.begin; (void *)at < trace->ds.top;
-	     at += trace->ds.size)
-		trace_bts_at(trace, at);
-}
-
-static void trace_bts_prepare(struct trace_iterator *iter)
-{
-	int cpu;
-
-	get_online_cpus();
-	for_each_online_cpu(cpu)
-		if (likely(per_cpu(hwb_tracer, cpu)))
-			ds_suspend_bts(per_cpu(hwb_tracer, cpu));
-	/*
-	 * We need to collect the trace on the respective cpu since ftrace
-	 * implicitly adds the record for the current cpu.
-	 * Once that is more flexible, we could collect the data from any cpu.
-	 */
-	on_each_cpu(trace_bts_cpu, iter->tr, 1);
-
-	for_each_online_cpu(cpu)
-		if (likely(per_cpu(hwb_tracer, cpu)))
-			ds_resume_bts(per_cpu(hwb_tracer, cpu));
-	put_online_cpus();
-}
-
-static void trace_bts_close(struct trace_iterator *iter)
-{
-	tracing_reset_online_cpus(iter->tr);
-}
-
-void trace_hw_branch_oops(void)
-{
-	if (this_tracer) {
-		ds_suspend_bts_noirq(this_tracer);
-		trace_bts_cpu(hw_branch_trace);
-		ds_resume_bts_noirq(this_tracer);
-	}
-}
-
-struct tracer bts_tracer __read_mostly =
-{
-	.name		= "hw-branch-tracer",
-	.init		= bts_trace_init,
-	.reset		= bts_trace_reset,
-	.print_header	= bts_trace_print_header,
-	.print_line	= bts_trace_print_line,
-	.start		= bts_trace_start,
-	.stop		= bts_trace_stop,
-	.open		= trace_bts_prepare,
-	.close		= trace_bts_close,
-#ifdef CONFIG_FTRACE_SELFTEST
-	.selftest	= trace_selftest_startup_hw_branches,
-#endif /* CONFIG_FTRACE_SELFTEST */
-};
-
-__init static int init_bts_trace(void)
-{
-	register_hotcpu_notifier(&bts_hotcpu_notifier);
-	return register_tracer(&bts_tracer);
-}
-device_initcall(init_bts_trace);
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..a7084e7c0427 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -16,7 +16,6 @@ static inline int trace_valid_entry(struct trace_entry *entry)
 	case TRACE_BRANCH:
 	case TRACE_GRAPH_ENT:
 	case TRACE_GRAPH_RET:
-	case TRACE_HW_BRANCHES:
 	case TRACE_KSYM:
 		return 1;
 	}
@@ -754,62 +753,6 @@ trace_selftest_startup_branch(struct tracer *trace, struct trace_array *tr)
 }
 #endif /* CONFIG_BRANCH_TRACER */
 
-#ifdef CONFIG_HW_BRANCH_TRACER
-int
-trace_selftest_startup_hw_branches(struct tracer *trace,
-				   struct trace_array *tr)
-{
-	struct trace_iterator *iter;
-	struct tracer tracer;
-	unsigned long count;
-	int ret;
-
-	if (!trace->open) {
-		printk(KERN_CONT "missing open function...");
-		return -1;
-	}
-
-	ret = tracer_init(trace, tr);
-	if (ret) {
-		warn_failed_init_tracer(trace, ret);
-		return ret;
-	}
-
-	/*
-	 * The hw-branch tracer needs to collect the trace from the various
-	 * cpu trace buffers - before tracing is stopped.
-	 */
-	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
-	if (!iter)
-		return -ENOMEM;
-
-	memcpy(&tracer, trace, sizeof(tracer));
-
-	iter->trace = &tracer;
-	iter->tr = tr;
-	iter->pos = -1;
-	mutex_init(&iter->mutex);
-
-	trace->open(iter);
-
-	mutex_destroy(&iter->mutex);
-	kfree(iter);
-
-	tracing_stop();
-
-	ret = trace_test_buffer(tr, &count);
-	trace->reset(tr);
-	tracing_start();
-
-	if (!ret && !count) {
-		printk(KERN_CONT "no entries found..");
-		ret = -1;
-	}
-
-	return ret;
-}
-#endif /* CONFIG_HW_BRANCH_TRACER */
-
 #ifdef CONFIG_KSYM_TRACER
 static int ksym_selftest_dummy;
 
diff --git a/mm/mlock.c b/mm/mlock.c
index 8f4e2dfceec1..3f82720e0515 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -607,44 +607,3 @@ void user_shm_unlock(size_t size, struct user_struct *user)
 	spin_unlock(&shmlock_user_lock);
 	free_uid(user);
 }
-
-int account_locked_memory(struct mm_struct *mm, struct rlimit *rlim,
-			  size_t size)
-{
-	unsigned long lim, vm, pgsz;
-	int error = -ENOMEM;
-
-	pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-	down_write(&mm->mmap_sem);
-
-	lim = ACCESS_ONCE(rlim[RLIMIT_AS].rlim_cur) >> PAGE_SHIFT;
-	vm   = mm->total_vm + pgsz;
-	if (lim < vm)
-		goto out;
-
-	lim = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur) >> PAGE_SHIFT;
-	vm   = mm->locked_vm + pgsz;
-	if (lim < vm)
-		goto out;
-
-	mm->total_vm  += pgsz;
-	mm->locked_vm += pgsz;
-
-	error = 0;
- out:
-	up_write(&mm->mmap_sem);
-	return error;
-}
-
-void refund_locked_memory(struct mm_struct *mm, size_t size)
-{
-	unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT;
-
-	down_write(&mm->mmap_sem);
-
-	mm->total_vm  -= pgsz;
-	mm->locked_vm -= pgsz;
-
-	up_write(&mm->mmap_sem);
-}
-- 
cgit v1.2.3


From 70596b612c04694806a31dd389bd796c035085fa Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:19 +0000
Subject: net-caif: add CAIF protocol definitions

Add CAIF definitions to existing header files.
Files: if_arp.h, if_ether.h, socket.h.
Types: ARPHRD_CAIF, ETH_P_CAIF, AF_CAIF, PF_CAIF, SOL_CAIF, N_CAIF

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_arp.h   | 1 +
 include/linux/if_ether.h | 1 +
 include/linux/socket.h   | 5 ++++-
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h
index e80b7f88f7c6..6d722f41ee7c 100644
--- a/include/linux/if_arp.h
+++ b/include/linux/if_arp.h
@@ -90,6 +90,7 @@
 
 #define ARPHRD_PHONET	820		/* PhoNet media type		*/
 #define ARPHRD_PHONET_PIPE 821		/* PhoNet pipe header		*/
+#define ARPHRD_CAIF	822		/* CAIF media type		*/
 
 #define ARPHRD_VOID	  0xFFFF	/* Void type, nothing is known */
 #define ARPHRD_NONE	  0xFFFE	/* zero header length */
diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h
index 299b4121f914..bed7a4682b90 100644
--- a/include/linux/if_ether.h
+++ b/include/linux/if_ether.h
@@ -109,6 +109,7 @@
 #define ETH_P_TRAILER	0x001C		/* Trailer switch tagging	*/
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
+#define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
 
 /*
  *	This is an Ethernet frame header.
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 7b3aae2052a6..960659bd8f78 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -189,7 +189,8 @@ struct ucred {
 #define AF_ISDN		34	/* mISDN sockets 		*/
 #define AF_PHONET	35	/* Phonet sockets		*/
 #define AF_IEEE802154	36	/* IEEE802154 sockets		*/
-#define AF_MAX		37	/* For now.. */
+#define AF_CAIF		37	/* CAIF sockets			*/
+#define AF_MAX		38	/* For now.. */
 
 /* Protocol families, same as address families. */
 #define PF_UNSPEC	AF_UNSPEC
@@ -229,6 +230,7 @@ struct ucred {
 #define PF_ISDN		AF_ISDN
 #define PF_PHONET	AF_PHONET
 #define PF_IEEE802154	AF_IEEE802154
+#define PF_CAIF		AF_CAIF
 #define PF_MAX		AF_MAX
 
 /* Maximum queue length specifiable by listen.  */
@@ -300,6 +302,7 @@ struct ucred {
 #define SOL_PNPIPE	275
 #define SOL_RDS		276
 #define SOL_IUCV	277
+#define SOL_CAIF	278
 
 /* IPX options */
 #define IPX_TYPE	1
-- 
cgit v1.2.3


From f671c54207d8a47129f35a84569fdfda614d2439 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:20 +0000
Subject: net-caif: add CAIF socket and configuration headers

Add CAIF types for Socket Address, Socket Options,
and configuration parameters for the GPRS IP network interface.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/caif/caif_socket.h | 164 +++++++++++++++++++++++++++++++++++++++
 include/linux/caif/if_caif.h     |  34 ++++++++
 2 files changed, 198 insertions(+)
 create mode 100644 include/linux/caif/caif_socket.h
 create mode 100644 include/linux/caif/if_caif.h

(limited to 'include/linux')

diff --git a/include/linux/caif/caif_socket.h b/include/linux/caif/caif_socket.h
new file mode 100644
index 000000000000..8e5c8444a3f4
--- /dev/null
+++ b/include/linux/caif/caif_socket.h
@@ -0,0 +1,164 @@
+/* linux/caif_socket.h
+ * CAIF Definitions for CAIF socket and network layer
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	 Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef _LINUX_CAIF_SOCKET_H
+#define _LINUX_CAIF_SOCKET_H
+
+#include <linux/types.h>
+
+#ifdef __KERNEL__
+#include <linux/socket.h>
+#else
+#include <sys/socket.h>
+#endif
+
+
+/**
+ * enum caif_link_selector -    Physical Link Selection.
+ * @CAIF_LINK_HIGH_BANDW:	Physical interface for high-bandwidth
+ *				traffic.
+ * @CAIF_LINK_LOW_LATENCY:	Physical interface for low-latency
+ *				traffic.
+ *
+ * CAIF Link Layers can register their link properties.
+ * This enum is used for choosing between CAIF Link Layers when
+ * setting up CAIF Channels when multiple CAIF Link Layers exists.
+ */
+enum caif_link_selector {
+	CAIF_LINK_HIGH_BANDW,
+	CAIF_LINK_LOW_LATENCY
+};
+
+/**
+ * enum caif_channel_priority - CAIF channel priorities.
+ *
+ * @CAIF_PRIO_MIN:	Min priority for a channel.
+ * @CAIF_PRIO_LOW:	Low-priority channel.
+ * @CAIF_PRIO_NORMAL:	Normal/default priority level.
+ * @CAIF_PRIO_HIGH:	High priority level
+ * @CAIF_PRIO_MAX:	Max priority for channel
+ *
+ * Priority can be set on CAIF Channels in order to
+ * prioritize between traffic on different CAIF Channels.
+ * These priority levels are recommended, but the priority value
+ * is not restricted to the values defined in this enum, any value
+ * between CAIF_PRIO_MIN and CAIF_PRIO_MAX could be used.
+ */
+enum caif_channel_priority {
+	CAIF_PRIO_MIN	 = 0x01,
+	CAIF_PRIO_LOW	 = 0x04,
+	CAIF_PRIO_NORMAL = 0x0f,
+	CAIF_PRIO_HIGH	 = 0x14,
+	CAIF_PRIO_MAX	 = 0x1F
+};
+
+/**
+ * enum caif_protocol_type  -	CAIF Channel type.
+ * @CAIFPROTO_AT:		Classic AT channel.
+ * @CAIFPROTO_DATAGRAM:		Datagram channel.
+ * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
+ * @CAIFPROTO_UTIL:		Utility (Psock) channel.
+ * @CAIFPROTO_RFM:		Remote File Manager
+ *
+ * This enum defines the CAIF Channel type to be used. This defines
+ * the service to connect to on the modem.
+ */
+enum caif_protocol_type {
+	CAIFPROTO_AT,
+	CAIFPROTO_DATAGRAM,
+	CAIFPROTO_DATAGRAM_LOOP,
+	CAIFPROTO_UTIL,
+	CAIFPROTO_RFM,
+	_CAIFPROTO_MAX
+};
+#define	CAIFPROTO_MAX _CAIFPROTO_MAX
+
+/**
+ * enum caif_at_type - AT Service Endpoint
+ * @CAIF_ATTYPE_PLAIN:	     Connects to a plain vanilla AT channel.
+ */
+enum caif_at_type {
+	CAIF_ATTYPE_PLAIN = 2
+};
+
+/**
+ * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
+ * @u:			     Union of address data 'switched' by family.
+ * :
+ * @u.at:                    Applies when family = CAIFPROTO_AT.
+ *
+ * @u.at.type:               Type of AT link to set up (enum caif_at_type).
+ *
+ * @u.util:                  Applies when family = CAIFPROTO_UTIL
+ *
+ * @u.util.service:          Utility service name.
+ *
+ * @u.dgm:                   Applies when family = CAIFPROTO_DATAGRAM
+ *
+ * @u.dgm.connection_id:     Datagram connection id.
+ *
+ * @u.dgm.nsapi:             NSAPI of the PDP-Context.
+ *
+ * @u.rfm:                   Applies when family = CAIFPROTO_RFM
+ *
+ * @u.rfm.connection_id:     Connection ID for RFM.
+ *
+ * @u.rfm.volume:            Volume to mount.
+ *
+ * Description:
+ * This structure holds the connect parameters used for setting up a
+ * CAIF Channel. It defines the service to connect to on the modem.
+ */
+struct sockaddr_caif {
+	sa_family_t  family;
+	union {
+		struct {
+			__u8  type;		/* type: enum caif_at_type */
+		} at;				/* CAIFPROTO_AT */
+		struct {
+			char	  service[16];
+		} util;				/* CAIFPROTO_UTIL */
+		union {
+			__u32 connection_id;
+			__u8  nsapi;
+		} dgm;				/* CAIFPROTO_DATAGRAM(_LOOP)*/
+		struct {
+			__u32 connection_id;
+			char	  volume[16];
+		} rfm;				/* CAIFPROTO_RFM */
+	} u;
+};
+
+/**
+ * enum caif_socket_opts - CAIF option values for getsockopt and setsockopt.
+ *
+ * @CAIFSO_LINK_SELECT:		Selector used if multiple CAIF Link layers are
+ *				available. Either a high bandwidth
+ *				link can be selected (CAIF_LINK_HIGH_BANDW) or
+ *				or a low latency link (CAIF_LINK_LOW_LATENCY).
+ *                              This option is of type __u32.
+ *				Alternatively SO_BINDTODEVICE can be used.
+ *
+ * @CAIFSO_REQ_PARAM:		Used to set the request parameters for a
+ *				utility channel. (maximum 256 bytes). This
+ *				option must be set before connecting.
+ *
+ * @CAIFSO_RSP_PARAM:		Gets the response parameters for a utility
+ *				channel. (maximum 256 bytes). This option
+ *				is valid after a successful connect.
+ *
+ *
+ * This enum defines the CAIF Socket options to be used on a socket
+ *
+ */
+enum caif_socket_opts {
+	CAIFSO_LINK_SELECT	= 127,
+	CAIFSO_REQ_PARAM	= 128,
+	CAIFSO_RSP_PARAM	= 129,
+};
+
+#endif /* _LINUX_CAIF_SOCKET_H */
diff --git a/include/linux/caif/if_caif.h b/include/linux/caif/if_caif.h
new file mode 100644
index 000000000000..5e7eed4edf51
--- /dev/null
+++ b/include/linux/caif/if_caif.h
@@ -0,0 +1,34 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland/ sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#ifndef IF_CAIF_H_
+#define IF_CAIF_H_
+#include <linux/sockios.h>
+#include <linux/types.h>
+#include <linux/socket.h>
+
+/**
+ * enum ifla_caif - CAIF NetlinkRT parameters.
+ * @IFLA_CAIF_IPV4_CONNID:  Connection ID for IPv4 PDP Context.
+ *			    The type of attribute is NLA_U32.
+ * @IFLA_CAIF_IPV6_CONNID:  Connection ID for IPv6 PDP Context.
+ *			    The type of attribute is NLA_U32.
+ * @IFLA_CAIF_LOOPBACK:	    If different from zero, device is doing loopback
+ *			    The type of attribute is NLA_U8.
+ *
+ * When using RT Netlink to create, destroy or configure a CAIF IP interface,
+ * enum ifla_caif is used to specify the configuration attributes.
+ */
+enum ifla_caif {
+	__IFLA_CAIF_UNSPEC,
+	IFLA_CAIF_IPV4_CONNID,
+	IFLA_CAIF_IPV6_CONNID,
+	IFLA_CAIF_LOOPBACK,
+	__IFLA_CAIF_MAX
+};
+#define	IFLA_CAIF_MAX (__IFLA_CAIF_MAX-1)
+
+#endif /*IF_CAIF_H_*/
-- 
cgit v1.2.3


From 9b27105b4a44c54bf91ecd7d0315034ae75684f7 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Tue, 30 Mar 2010 13:56:30 +0000
Subject: net-caif-driver: add CAIF serial driver (ldisc)

Add CAIF Serial driver. This driver is implemented as a line discipline.

caif_serial uses the following module parameters:
ser_use_stx - specifies if STart of frame eXtension is in use.
ser_loop    - sets the interface in loopback mode.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig            |   2 +
 drivers/net/Makefile           |   1 +
 drivers/net/caif/Kconfig       |  17 ++
 drivers/net/caif/Makefile      |  12 ++
 drivers/net/caif/caif_serial.c | 441 +++++++++++++++++++++++++++++++++++++++++
 include/linux/tty.h            |   4 +-
 6 files changed, 475 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/caif/Kconfig
 create mode 100644 drivers/net/caif/Makefile
 create mode 100644 drivers/net/caif/caif_serial.c

(limited to 'include/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 4f1f9741c71d..da6657c11667 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -2837,6 +2837,8 @@ source "drivers/ieee802154/Kconfig"
 
 source "drivers/s390/net/Kconfig"
 
+source "drivers/net/caif/Kconfig"
+
 config XEN_NETDEV_FRONTEND
 	tristate "Xen network device frontend driver"
 	depends on XEN
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 478886234c28..ba06c1c0db8b 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -290,5 +290,6 @@ obj-$(CONFIG_VIRTIO_NET) += virtio_net.o
 obj-$(CONFIG_SFC) += sfc/
 
 obj-$(CONFIG_WIMAX) += wimax/
+obj-$(CONFIG_CAIF) += caif/
 
 obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
new file mode 100644
index 000000000000..0b28e0107697
--- /dev/null
+++ b/drivers/net/caif/Kconfig
@@ -0,0 +1,17 @@
+#
+# CAIF physical drivers
+#
+
+if CAIF
+
+comment "CAIF transport drivers"
+
+config CAIF_TTY
+	tristate "CAIF TTY transport driver"
+	default n
+	---help---
+	The CAIF TTY transport driver is a Line Discipline (ldisc)
+	identified as N_CAIF. When this ldisc is opened from user space
+	it will redirect the TTY's traffic into the CAIF stack.
+
+endif # CAIF
diff --git a/drivers/net/caif/Makefile b/drivers/net/caif/Makefile
new file mode 100644
index 000000000000..52b6d1f826f8
--- /dev/null
+++ b/drivers/net/caif/Makefile
@@ -0,0 +1,12 @@
+ifeq ($(CONFIG_CAIF_DEBUG),1)
+CAIF_DBG_FLAGS := -DDEBUG
+endif
+
+KBUILD_EXTRA_SYMBOLS=net/caif/Module.symvers
+
+ccflags-y := $(CAIF_FLAGS) $(CAIF_DBG_FLAGS)
+clean-dirs:= .tmp_versions
+clean-files:= Module.symvers modules.order *.cmd *~ \
+
+# Serial interface
+obj-$(CONFIG_CAIF_TTY) += caif_serial.o
diff --git a/drivers/net/caif/caif_serial.c b/drivers/net/caif/caif_serial.c
new file mode 100644
index 000000000000..3502f607117f
--- /dev/null
+++ b/drivers/net/caif/caif_serial.c
@@ -0,0 +1,441 @@
+/*
+ * Copyright (C) ST-Ericsson AB 2010
+ * Author:	Sjur Brendeland / sjur.brandeland@stericsson.com
+ * License terms: GNU General Public License (GPL) version 2
+ */
+
+#include <linux/init.h>
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/types.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/tty.h>
+#include <linux/file.h>
+#include <linux/if_arp.h>
+#include <net/caif/caif_device.h>
+#include <net/caif/cfcnfg.h>
+#include <linux/err.h>
+#include <linux/debugfs.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Sjur Brendeland<sjur.brandeland@stericsson.com>");
+MODULE_DESCRIPTION("CAIF serial device TTY line discipline");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_CAIF);
+
+#define SEND_QUEUE_LOW 10
+#define SEND_QUEUE_HIGH 100
+#define CAIF_SENDING	        1 /* Bit 1 = 0x02*/
+#define CAIF_FLOW_OFF_SENT	4 /* Bit 4 = 0x10 */
+#define MAX_WRITE_CHUNK	     4096
+#define ON 1
+#define OFF 0
+#define CAIF_MAX_MTU 4096
+
+/*This list is protected by the rtnl lock. */
+static LIST_HEAD(ser_list);
+
+static int ser_loop;
+module_param(ser_loop, bool, S_IRUGO);
+MODULE_PARM_DESC(ser_loop, "Run in simulated loopback mode.");
+
+static int ser_use_stx = 1;
+module_param(ser_use_stx, bool, S_IRUGO);
+MODULE_PARM_DESC(ser_use_stx, "STX enabled or not.");
+
+static int ser_use_fcs = 1;
+
+module_param(ser_use_fcs, bool, S_IRUGO);
+MODULE_PARM_DESC(ser_use_fcs, "FCS enabled or not.");
+
+static int ser_write_chunk = MAX_WRITE_CHUNK;
+module_param(ser_write_chunk, int, S_IRUGO);
+
+MODULE_PARM_DESC(ser_write_chunk, "Maximum size of data written to UART.");
+
+static struct dentry *debugfsdir;
+
+static int caif_net_open(struct net_device *dev);
+static int caif_net_close(struct net_device *dev);
+
+struct ser_device {
+	struct caif_dev_common common;
+	struct list_head node;
+	struct net_device *dev;
+	struct sk_buff_head head;
+	struct tty_struct *tty;
+	bool tx_started;
+	unsigned long state;
+	char *tty_name;
+#ifdef CONFIG_DEBUG_FS
+	struct dentry *debugfs_tty_dir;
+	struct debugfs_blob_wrapper tx_blob;
+	struct debugfs_blob_wrapper rx_blob;
+	u8 rx_data[128];
+	u8 tx_data[128];
+	u8 tty_status;
+
+#endif
+};
+
+static void caifdev_setup(struct net_device *dev);
+static void ldisc_tx_wakeup(struct tty_struct *tty);
+#ifdef CONFIG_DEBUG_FS
+static inline void update_tty_status(struct ser_device *ser)
+{
+	ser->tty_status =
+		ser->tty->stopped << 5 |
+		ser->tty->hw_stopped << 4 |
+		ser->tty->flow_stopped << 3 |
+		ser->tty->packet << 2 |
+		ser->tty->low_latency << 1 |
+		ser->tty->warned;
+}
+static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
+{
+	ser->debugfs_tty_dir =
+			debugfs_create_dir(tty->name, debugfsdir);
+	if (!IS_ERR(ser->debugfs_tty_dir)) {
+		debugfs_create_blob("last_tx_msg", S_IRUSR,
+				ser->debugfs_tty_dir,
+				&ser->tx_blob);
+
+		debugfs_create_blob("last_rx_msg", S_IRUSR,
+				ser->debugfs_tty_dir,
+				&ser->rx_blob);
+
+		debugfs_create_x32("ser_state", S_IRUSR,
+				ser->debugfs_tty_dir,
+				(u32 *)&ser->state);
+
+		debugfs_create_x8("tty_status", S_IRUSR,
+				ser->debugfs_tty_dir,
+				&ser->tty_status);
+
+	}
+	ser->tx_blob.data = ser->tx_data;
+	ser->tx_blob.size = 0;
+	ser->rx_blob.data = ser->rx_data;
+	ser->rx_blob.size = 0;
+}
+
+static inline void debugfs_deinit(struct ser_device *ser)
+{
+	debugfs_remove_recursive(ser->debugfs_tty_dir);
+}
+
+static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size)
+{
+	if (size > sizeof(ser->rx_data))
+		size = sizeof(ser->rx_data);
+	memcpy(ser->rx_data, data, size);
+	ser->rx_blob.data = ser->rx_data;
+	ser->rx_blob.size = size;
+}
+
+static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size)
+{
+	if (size > sizeof(ser->tx_data))
+		size = sizeof(ser->tx_data);
+	memcpy(ser->tx_data, data, size);
+	ser->tx_blob.data = ser->tx_data;
+	ser->tx_blob.size = size;
+}
+#else
+static inline void debugfs_init(struct ser_device *ser, struct tty_struct *tty)
+{
+}
+
+static inline void debugfs_deinit(struct ser_device *ser)
+{
+}
+
+static inline void update_tty_status(struct ser_device *ser)
+{
+}
+
+static inline void debugfs_rx(struct ser_device *ser, const u8 *data, int size)
+{
+}
+
+static inline void debugfs_tx(struct ser_device *ser, const u8 *data, int size)
+{
+}
+
+#endif
+
+static void ldisc_receive(struct tty_struct *tty, const u8 *data,
+			char *flags, int count)
+{
+	struct sk_buff *skb = NULL;
+	struct ser_device *ser;
+	int ret;
+	u8 *p;
+	ser = tty->disc_data;
+
+	/*
+	 * NOTE: flags may contain information about break or overrun.
+	 * This is not yet handled.
+	 */
+
+
+	/*
+	 * Workaround for garbage at start of transmission,
+	 * only enable if STX handling is not enabled.
+	 */
+	if (!ser->common.use_stx && !ser->tx_started) {
+		dev_info(&ser->dev->dev,
+			"Bytes received before initial transmission -"
+			"bytes discarded.\n");
+		return;
+	}
+
+	BUG_ON(ser->dev == NULL);
+
+	/* Get a suitable caif packet and copy in data. */
+	skb = netdev_alloc_skb(ser->dev, count+1);
+	BUG_ON(skb == NULL);
+	p = skb_put(skb, count);
+	memcpy(p, data, count);
+
+	skb->protocol = htons(ETH_P_CAIF);
+	skb_reset_mac_header(skb);
+	skb->dev = ser->dev;
+	debugfs_rx(ser, data, count);
+	/* Push received packet up the stack. */
+	ret = netif_rx_ni(skb);
+	if (!ret) {
+		ser->dev->stats.rx_packets++;
+		ser->dev->stats.rx_bytes += count;
+	} else
+		++ser->dev->stats.rx_dropped;
+	update_tty_status(ser);
+}
+
+static int handle_tx(struct ser_device *ser)
+{
+	struct tty_struct *tty;
+	struct sk_buff *skb;
+	int tty_wr, len, room;
+	tty = ser->tty;
+	ser->tx_started = true;
+
+	/* Enter critical section */
+	if (test_and_set_bit(CAIF_SENDING, &ser->state))
+		return 0;
+
+	/* skb_peek is safe because handle_tx is called after skb_queue_tail */
+	while ((skb = skb_peek(&ser->head)) != NULL) {
+
+		/* Make sure you don't write too much */
+		len = skb->len;
+		room = tty_write_room(tty);
+		if (!room)
+			break;
+		if (room > ser_write_chunk)
+			room = ser_write_chunk;
+		if (len > room)
+			len = room;
+
+		/* Write to tty or loopback */
+		if (!ser_loop) {
+			tty_wr = tty->ops->write(tty, skb->data, len);
+			update_tty_status(ser);
+		} else {
+			tty_wr = len;
+			ldisc_receive(tty, skb->data, NULL, len);
+		}
+		ser->dev->stats.tx_packets++;
+		ser->dev->stats.tx_bytes += tty_wr;
+
+		/* Error on TTY ?! */
+		if (tty_wr < 0)
+			goto error;
+		/* Reduce buffer written, and discard if empty */
+		skb_pull(skb, tty_wr);
+		if (skb->len == 0) {
+			struct sk_buff *tmp = skb_dequeue(&ser->head);
+			BUG_ON(tmp != skb);
+			if (in_interrupt())
+				dev_kfree_skb_irq(skb);
+			else
+				kfree_skb(skb);
+		}
+	}
+	/* Send flow off if queue is empty */
+	if (ser->head.qlen <= SEND_QUEUE_LOW &&
+		test_and_clear_bit(CAIF_FLOW_OFF_SENT, &ser->state) &&
+		ser->common.flowctrl != NULL)
+				ser->common.flowctrl(ser->dev, ON);
+	clear_bit(CAIF_SENDING, &ser->state);
+	return 0;
+error:
+	clear_bit(CAIF_SENDING, &ser->state);
+	return tty_wr;
+}
+
+static int caif_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ser_device *ser;
+	BUG_ON(dev == NULL);
+	ser = netdev_priv(dev);
+
+	/* Send flow off once, on high water mark */
+	if (ser->head.qlen > SEND_QUEUE_HIGH &&
+		!test_and_set_bit(CAIF_FLOW_OFF_SENT, &ser->state) &&
+		ser->common.flowctrl != NULL)
+
+		ser->common.flowctrl(ser->dev, OFF);
+
+	skb_queue_tail(&ser->head, skb);
+	return handle_tx(ser);
+}
+
+
+static void ldisc_tx_wakeup(struct tty_struct *tty)
+{
+	struct ser_device *ser;
+	ser = tty->disc_data;
+	BUG_ON(ser == NULL);
+	BUG_ON(ser->tty != tty);
+	handle_tx(ser);
+}
+
+
+static int ldisc_open(struct tty_struct *tty)
+{
+	struct ser_device *ser;
+	struct net_device *dev;
+	char name[64];
+	int result;
+
+	sprintf(name, "cf%s", tty->name);
+	dev = alloc_netdev(sizeof(*ser), name, caifdev_setup);
+	ser = netdev_priv(dev);
+	ser->tty = tty;
+	ser->dev = dev;
+	debugfs_init(ser, tty);
+	tty->receive_room = N_TTY_BUF_SIZE;
+	tty->disc_data = ser;
+	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+	rtnl_lock();
+	result = register_netdevice(dev);
+	if (result) {
+		rtnl_unlock();
+		free_netdev(dev);
+		return -ENODEV;
+	}
+
+	list_add(&ser->node, &ser_list);
+	rtnl_unlock();
+	netif_stop_queue(dev);
+	update_tty_status(ser);
+	return 0;
+}
+
+static void ldisc_close(struct tty_struct *tty)
+{
+	struct ser_device *ser = tty->disc_data;
+	/* Remove may be called inside or outside of rtnl_lock */
+	int islocked = rtnl_is_locked();
+	if (!islocked)
+		rtnl_lock();
+	/* device is freed automagically by net-sysfs */
+	dev_close(ser->dev);
+	unregister_netdevice(ser->dev);
+	list_del(&ser->node);
+	debugfs_deinit(ser);
+	if (!islocked)
+		rtnl_unlock();
+}
+
+/* The line discipline structure. */
+static struct tty_ldisc_ops caif_ldisc = {
+	.owner =	THIS_MODULE,
+	.magic =	TTY_LDISC_MAGIC,
+	.name =		"n_caif",
+	.open =		ldisc_open,
+	.close =	ldisc_close,
+	.receive_buf =	ldisc_receive,
+	.write_wakeup =	ldisc_tx_wakeup
+};
+
+static int register_ldisc(void)
+{
+	int result;
+	result = tty_register_ldisc(N_CAIF, &caif_ldisc);
+	if (result < 0) {
+		pr_err("cannot register CAIF ldisc=%d err=%d\n", N_CAIF,
+			result);
+		return result;
+	}
+	return result;
+}
+static const struct net_device_ops netdev_ops = {
+	.ndo_open = caif_net_open,
+	.ndo_stop = caif_net_close,
+	.ndo_start_xmit = caif_xmit
+};
+
+static void caifdev_setup(struct net_device *dev)
+{
+	struct ser_device *serdev = netdev_priv(dev);
+	dev->features = 0;
+	dev->netdev_ops = &netdev_ops;
+	dev->type = ARPHRD_CAIF;
+	dev->flags = IFF_POINTOPOINT | IFF_NOARP;
+	dev->mtu = CAIF_MAX_MTU;
+	dev->hard_header_len = CAIF_NEEDED_HEADROOM;
+	dev->tx_queue_len = 0;
+	dev->destructor = free_netdev;
+	skb_queue_head_init(&serdev->head);
+	serdev->common.link_select = CAIF_LINK_LOW_LATENCY;
+	serdev->common.use_frag = true;
+	serdev->common.use_stx = ser_use_stx;
+	serdev->common.use_fcs = ser_use_fcs;
+	serdev->dev = dev;
+}
+
+
+static int caif_net_open(struct net_device *dev)
+{
+	struct ser_device *ser;
+	ser = netdev_priv(dev);
+	netif_wake_queue(dev);
+	return 0;
+}
+
+static int caif_net_close(struct net_device *dev)
+{
+	netif_stop_queue(dev);
+	return 0;
+}
+
+static int __init caif_ser_init(void)
+{
+	int ret;
+	ret = register_ldisc();
+	debugfsdir = debugfs_create_dir("caif_serial", NULL);
+	return ret;
+}
+
+static void __exit caif_ser_exit(void)
+{
+	struct ser_device *ser = NULL;
+	struct list_head *node;
+	struct list_head *_tmp;
+	list_for_each_safe(node, _tmp, &ser_list) {
+		ser = list_entry(node, struct ser_device, node);
+		dev_close(ser->dev);
+		unregister_netdevice(ser->dev);
+		list_del(node);
+	}
+	tty_unregister_ldisc(N_CAIF);
+	debugfs_remove_recursive(debugfsdir);
+}
+
+module_init(caif_ser_init);
+module_exit(caif_ser_exit);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 568369a86306..71c7e9c96b23 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -23,7 +23,7 @@
  */
 #define NR_UNIX98_PTY_DEFAULT	4096      /* Default maximum for Unix98 ptys */
 #define NR_UNIX98_PTY_MAX	(1 << MINORBITS) /* Absolute limit */
-#define NR_LDISCS		20
+#define NR_LDISCS		21
 
 /* line disciplines */
 #define N_TTY		0
@@ -46,8 +46,8 @@
 #define N_GIGASET_M101	16	/* Siemens Gigaset M101 serial DECT adapter */
 #define N_SLCAN		17	/* Serial / USB serial CAN Adaptors */
 #define N_PPS		18	/* Pulse per Second */
-
 #define N_V253		19	/* Codec control over voice modem */
+#define N_CAIF		20      /* CAIF protocol for talking to modems */
 
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
-- 
cgit v1.2.3


From b00fabb4020d17bda4bea59507e09fadf573088d Mon Sep 17 00:00:00 2001
From: stephen hemminger <shemminger@vyatta.com>
Date: Mon, 29 Mar 2010 14:47:27 +0000
Subject: netdev: ethtool RXHASH flag

This adds ethtool and device feature flag to allow control
of receive hashing offload.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Acked-by: Jeff Garzik <jgarzik@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h   | 1 +
 include/linux/netdevice.h | 1 +
 net/core/ethtool.c        | 7 ++++++-
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b33f316bb92e..b391969a0dd9 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -310,6 +310,7 @@ struct ethtool_perm_addr {
 enum ethtool_flags {
 	ETH_FLAG_LRO		= (1 << 15),	/* LRO is enabled */
 	ETH_FLAG_NTUPLE		= (1 << 27),	/* N-tuple filters enabled */
+	ETH_FLAG_RXHASH		= (1 << 28),
 };
 
 /* The following structures are for supporting RX network flow
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 53c272f2a734..b5670ab5107c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -785,6 +785,7 @@ struct net_device {
 #define NETIF_F_SCTP_CSUM	(1 << 25) /* SCTP checksum offload */
 #define NETIF_F_FCOE_MTU	(1 << 26) /* Supports max FCoE MTU, 2158 bytes*/
 #define NETIF_F_NTUPLE		(1 << 27) /* N-tuple filters supported */
+#define NETIF_F_RXHASH		(1 << 28) /* Receive hashing offload */
 
 	/* Segmentation offload features */
 #define NETIF_F_GSO_SHIFT	16
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index f4cb6b6299d9..73c81edde8d9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -121,7 +121,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
  * NETIF_F_xxx values in include/linux/netdevice.h
  */
 static const u32 flags_dup_features =
-	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE);
+	(ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH);
 
 u32 ethtool_op_get_flags(struct net_device *dev)
 {
@@ -152,6 +152,11 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 		features &= ~NETIF_F_NTUPLE;
 	}
 
+	if (data & ETH_FLAG_RXHASH)
+		features |= NETIF_F_RXHASH;
+	else
+		features &= ~NETIF_F_RXHASH;
+
 	dev->features = features;
 	return 0;
 }
-- 
cgit v1.2.3


From e446630c960946b5c1762e4eadb618becef599e7 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Mon, 29 Mar 2010 08:57:56 +0000
Subject: Add hotplug support to mcp251x driver

Chip model can now be selected directly by matching the modalias name
(instead of filling the .model field in platform_data), and allows the
module to be auto-loaded. Previous behaviour is of course still supported.

Convert the two in-tree users to this feature (icontrol & zeus).
Tested on an Zeus platform (mcp2515).

Signed-off-by: Marc Zyngier <maz@misterjones.org>
Acked-by: Christian Pellegrin <chripell@fsfe.org>
Cc: Edwin Peer <epeer@tmtservices.co.za>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-pxa/icontrol.c         |  9 ++++-----
 arch/arm/mach-pxa/zeus.c             |  4 +---
 drivers/net/can/mcp251x.c            | 14 ++++++++++++++
 include/linux/can/platform/mcp251x.h |  4 ++--
 4 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c
index 771137fc1a82..5ccb0ceff6c4 100644
--- a/arch/arm/mach-pxa/icontrol.c
+++ b/arch/arm/mach-pxa/icontrol.c
@@ -73,7 +73,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = {
 
 static struct mcp251x_platform_data mcp251x_info = {
 	.oscillator_frequency = 16E6,
-	.model                = CAN_MCP251X_MCP2515,
 	.board_specific_setup = NULL,
 	.power_enable         = NULL,
 	.transceiver_enable   = NULL
@@ -81,7 +80,7 @@ static struct mcp251x_platform_data mcp251x_info = {
 
 static struct spi_board_info mcp251x_board_info[] = {
 	{
-		.modalias        = "mcp251x",
+		.modalias        = "mcp2515",
 		.max_speed_hz    = 6500000,
 		.bus_num         = 3,
 		.chip_select     = 0,
@@ -90,7 +89,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.irq             = gpio_to_irq(ICONTROL_MCP251x_nIRQ1)
 	},
 	{
-		.modalias        = "mcp251x",
+		.modalias        = "mcp2515",
 		.max_speed_hz    = 6500000,
 		.bus_num         = 3,
 		.chip_select     = 1,
@@ -99,7 +98,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.irq             = gpio_to_irq(ICONTROL_MCP251x_nIRQ2)
 	},
 	{
-		.modalias        = "mcp251x",
+		.modalias        = "mcp2515",
 		.max_speed_hz    = 6500000,
 		.bus_num         = 4,
 		.chip_select     = 0,
@@ -108,7 +107,7 @@ static struct spi_board_info mcp251x_board_info[] = {
 		.irq             = gpio_to_irq(ICONTROL_MCP251x_nIRQ3)
 	},
 	{
-		.modalias        = "mcp251x",
+		.modalias        = "mcp2515",
 		.max_speed_hz    = 6500000,
 		.bus_num         = 4,
 		.chip_select     = 1,
diff --git a/arch/arm/mach-pxa/zeus.c b/arch/arm/mach-pxa/zeus.c
index 39896d883584..dbd256966379 100644
--- a/arch/arm/mach-pxa/zeus.c
+++ b/arch/arm/mach-pxa/zeus.c
@@ -414,15 +414,13 @@ static int zeus_mcp2515_transceiver_enable(int enable)
 
 static struct mcp251x_platform_data zeus_mcp2515_pdata = {
 	.oscillator_frequency	= 16*1000*1000,
-	.model			= CAN_MCP251X_MCP2515,
 	.board_specific_setup	= zeus_mcp2515_setup,
-	.transceiver_enable	= zeus_mcp2515_transceiver_enable,
 	.power_enable		= zeus_mcp2515_transceiver_enable,
 };
 
 static struct spi_board_info zeus_spi_board_info[] = {
 	[0] = {
-		.modalias	= "mcp251x",
+		.modalias	= "mcp2515",
 		.platform_data	= &zeus_mcp2515_pdata,
 		.irq		= gpio_to_irq(ZEUS_CAN_GPIO),
 		.max_speed_hz	= 1*1000*1000,
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index f8cc168ec76c..f521579f5adb 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -922,12 +922,16 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 	struct net_device *net;
 	struct mcp251x_priv *priv;
 	struct mcp251x_platform_data *pdata = spi->dev.platform_data;
+	int model = spi_get_device_id(spi)->driver_data;
 	int ret = -ENODEV;
 
 	if (!pdata)
 		/* Platform data is required for osc freq */
 		goto error_out;
 
+	if (model)
+		pdata->model = model;
+
 	/* Allocate can/net device */
 	net = alloc_candev(sizeof(struct mcp251x_priv), TX_ECHO_SKB_MAX);
 	if (!net) {
@@ -1117,6 +1121,15 @@ static int mcp251x_can_resume(struct spi_device *spi)
 #define mcp251x_can_resume NULL
 #endif
 
+static struct spi_device_id mcp251x_id_table[] = {
+	{ "mcp251x", 	0 /* Use pdata.model */ },
+	{ "mcp2510",	CAN_MCP251X_MCP2510 },
+	{ "mcp2515",	CAN_MCP251X_MCP2515 },
+	{ },
+};
+
+MODULE_DEVICE_TABLE(spi, mcp251x_id_table);
+
 static struct spi_driver mcp251x_can_driver = {
 	.driver = {
 		.name = DEVICE_NAME,
@@ -1124,6 +1137,7 @@ static struct spi_driver mcp251x_can_driver = {
 		.owner = THIS_MODULE,
 	},
 
+	.id_table = mcp251x_id_table,
 	.probe = mcp251x_can_probe,
 	.remove = __devexit_p(mcp251x_can_remove),
 	.suspend = mcp251x_can_suspend,
diff --git a/include/linux/can/platform/mcp251x.h b/include/linux/can/platform/mcp251x.h
index 1448177d86d5..dba28268e651 100644
--- a/include/linux/can/platform/mcp251x.h
+++ b/include/linux/can/platform/mcp251x.h
@@ -26,8 +26,8 @@
 struct mcp251x_platform_data {
 	unsigned long oscillator_frequency;
 	int model;
-#define CAN_MCP251X_MCP2510 0
-#define CAN_MCP251X_MCP2515 1
+#define CAN_MCP251X_MCP2510 0x2510
+#define CAN_MCP251X_MCP2515 0x2515
 	int (*board_specific_setup)(struct spi_device *spi);
 	int (*transceiver_enable)(int enable);
 	int (*power_enable) (int enable);
-- 
cgit v1.2.3


From ae832d1e03ac9bf09fb8a07fb37908ab40c7cd0e Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Wed, 24 Mar 2010 10:57:43 +0800
Subject: tracing: Remove side effect from module tracepoints that caused a GPF

Remove the @refcnt argument, because it has side-effects, and arguments with
side-effects are not skipped by the jump over disabled instrumentation and are
executed even when the tracepoint is disabled.

This was also causing a GPF as found by Randy Dunlap:

Subject: 2.6.33 GP fault only when built with tracing
LKML-Reference: <4BA2B69D.3000309@oracle.com>

Note, the current 2.6.34-rc has a fix for the actual cause of the GPF,
but this fixes one of its triggers.

Tested-by: Randy Dunlap <randy.dunlap@oracle.com>
Acked-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4BA97FA7.6040406@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/module.h        |  6 ++----
 include/trace/events/module.h | 14 +++++++-------
 kernel/module.c               |  3 +--
 3 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 5e869ffd34aa..393ec39b580a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -460,8 +460,7 @@ static inline void __module_get(struct module *module)
 	if (module) {
 		preempt_disable();
 		__this_cpu_inc(module->refptr->count);
-		trace_module_get(module, _THIS_IP_,
-				 __this_cpu_read(module->refptr->count));
+		trace_module_get(module, _THIS_IP_);
 		preempt_enable();
 	}
 }
@@ -475,8 +474,7 @@ static inline int try_module_get(struct module *module)
 
 		if (likely(module_is_live(module))) {
 			__this_cpu_inc(module->refptr->count);
-			trace_module_get(module, _THIS_IP_,
-				__this_cpu_read(module->refptr->count));
+			trace_module_get(module, _THIS_IP_);
 		}
 		else
 			ret = 0;
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 4b0f48ba16a6..a585f8135bd9 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -53,9 +53,9 @@ TRACE_EVENT(module_free,
 
 DECLARE_EVENT_CLASS(module_refcnt,
 
-	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
+	TP_PROTO(struct module *mod, unsigned long ip),
 
-	TP_ARGS(mod, ip, refcnt),
+	TP_ARGS(mod, ip),
 
 	TP_STRUCT__entry(
 		__field(	unsigned long,	ip		)
@@ -65,7 +65,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_fast_assign(
 		__entry->ip	= ip;
-		__entry->refcnt	= refcnt;
+		__entry->refcnt	= __this_cpu_read(mod->refptr->count);
 		__assign_str(name, mod->name);
 	),
 
@@ -75,16 +75,16 @@ DECLARE_EVENT_CLASS(module_refcnt,
 
 DEFINE_EVENT(module_refcnt, module_get,
 
-	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
+	TP_PROTO(struct module *mod, unsigned long ip),
 
-	TP_ARGS(mod, ip, refcnt)
+	TP_ARGS(mod, ip)
 );
 
 DEFINE_EVENT(module_refcnt, module_put,
 
-	TP_PROTO(struct module *mod, unsigned long ip, int refcnt),
+	TP_PROTO(struct module *mod, unsigned long ip),
 
-	TP_ARGS(mod, ip, refcnt)
+	TP_ARGS(mod, ip)
 );
 
 TRACE_EVENT(module_request,
diff --git a/kernel/module.c b/kernel/module.c
index c968d3606dca..21591ad921f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -800,8 +800,7 @@ void module_put(struct module *module)
 		preempt_disable();
 		__this_cpu_dec(module->refptr->count);
 
-		trace_module_put(module, _RET_IP_,
-				 __this_cpu_read(module->refptr->count));
+		trace_module_put(module, _RET_IP_);
 		/* Maybe they're waiting for us to drop reference? */
 		if (unlikely(!module_is_live(module)))
 			wake_up_process(module->waiter);
-- 
cgit v1.2.3


From 66a8cb95ed04025664d1db4e952155ee1dccd048 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 31 Mar 2010 13:21:56 -0400
Subject: ring-buffer: Add place holder recording of dropped events

Currently, when the ring buffer drops events, it does not record
the fact that it did so. It does inform the writer that the event
was dropped by returning a NULL event, but it does not put in any
place holder where the event was dropped.

This is not a trivial thing to add because the ring buffer mostly
runs in overwrite (flight recorder) mode. That is, when the ring
buffer is full, new data will overwrite old data.

In a produce/consumer mode, where new data is simply dropped when
the ring buffer is full, it is trivial to add the placeholder
for dropped events. When there's more room to write new data, then
a special event can be added to notify the reader about the dropped
events.

But in overwrite mode, any new write can overwrite events. A place
holder can not be inserted into the ring buffer since there never
may be room. A reader could also come in at anytime and miss the
placeholder.

Luckily, the way the ring buffer works, the read side can find out
if events were lost or not, and how many events. Everytime a write
takes place, if it overwrites the header page (the next read) it
updates a "overrun" variable that keeps track of the number of
lost events. When a reader swaps out a page from the ring buffer,
it can record this number, perfom the swap, and then check to
see if the number changed, and take the diff if it has, which would be
the number of events dropped. This can be stored by the reader
and returned to callers of the reader.

Since the reader page swap will fail if the writer moved the head
page since the time the reader page set up the swap, this gives room
to record the overruns without worrying about races. If the reader
sets up the pages, records the overrun, than performs the swap,
if the swap succeeds, then the overrun variable has not been
updated since the setup before the swap.

For binary readers of the ring buffer, a flag is set in the header
of each sub page (sub buffer) of the ring buffer. This flag is embedded
in the size field of the data on the sub buffer, in the 31st bit (the size
can be 32 or 64 bits depending on the architecture), but only 27
bits needs to be used for the actual size (less actually).

We could add a new field in the sub buffer header to also record the
number of events dropped since the last read, but this will change the
format of the binary ring buffer a bit too much. Perhaps this change can
be made if the information on the number of events dropped is considered
important enough.

Note, the notification of dropped events is only used by consuming reads
or peeking at the ring buffer. Iterating over the ring buffer does not
keep this information because the necessary data is only available when
a page swap is made, and the iterator does not swap out pages.

Cc: Robert Richter <robert.richter@amd.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: "Luis Claudio R. Goncalves" <lclaudio@uudg.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 drivers/oprofile/cpu_buffer.c        |  4 +-
 include/linux/ring_buffer.h          |  6 ++-
 kernel/trace/ring_buffer.c           | 72 +++++++++++++++++++++++++++++++++---
 kernel/trace/ring_buffer_benchmark.c |  2 +-
 kernel/trace/trace.c                 |  4 +-
 kernel/trace/trace_functions_graph.c |  5 ++-
 kernel/trace/trace_selftest.c        |  2 +-
 7 files changed, 79 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 166b67ea622f..7581dbe456da 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -186,14 +186,14 @@ int op_cpu_buffer_write_commit(struct op_entry *entry)
 struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
 {
 	struct ring_buffer_event *e;
-	e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+	e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL, NULL);
 	if (e)
 		goto event;
 	if (ring_buffer_swap_cpu(op_ring_buffer_read,
 				 op_ring_buffer_write,
 				 cpu))
 		return NULL;
-	e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+	e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL, NULL);
 	if (e)
 		goto event;
 	return NULL;
diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 5fcc31ed5771..c8297761e414 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -120,9 +120,11 @@ int ring_buffer_write(struct ring_buffer *buffer,
 		      unsigned long length, void *data);
 
 struct ring_buffer_event *
-ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts);
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
+		 unsigned long *lost_events);
 struct ring_buffer_event *
-ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts);
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
+		    unsigned long *lost_events);
 
 struct ring_buffer_iter *
 ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index d1187ef20caf..8295650444c5 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -318,6 +318,9 @@ EXPORT_SYMBOL_GPL(ring_buffer_event_data);
 #define TS_MASK		((1ULL << TS_SHIFT) - 1)
 #define TS_DELTA_TEST	(~TS_MASK)
 
+/* Flag when events were overwritten */
+#define RB_MISSED_EVENTS	(1 << 31)
+
 struct buffer_data_page {
 	u64		 time_stamp;	/* page time stamp */
 	local_t		 commit;	/* write committed index */
@@ -416,6 +419,12 @@ int ring_buffer_print_page_header(struct trace_seq *s)
 			       (unsigned int)sizeof(field.commit),
 			       (unsigned int)is_signed_type(long));
 
+	ret = trace_seq_printf(s, "\tfield: int overwrite;\t"
+			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
+			       (unsigned int)offsetof(typeof(field), commit),
+			       1,
+			       (unsigned int)is_signed_type(long));
+
 	ret = trace_seq_printf(s, "\tfield: char data;\t"
 			       "offset:%u;\tsize:%u;\tsigned:%u;\n",
 			       (unsigned int)offsetof(typeof(field), data),
@@ -439,6 +448,8 @@ struct ring_buffer_per_cpu {
 	struct buffer_page		*tail_page;	/* write to tail */
 	struct buffer_page		*commit_page;	/* committed pages */
 	struct buffer_page		*reader_page;
+	unsigned long			lost_events;
+	unsigned long			last_overrun;
 	local_t				commit_overrun;
 	local_t				overrun;
 	local_t				entries;
@@ -2835,6 +2846,7 @@ static struct buffer_page *
 rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 {
 	struct buffer_page *reader = NULL;
+	unsigned long overwrite;
 	unsigned long flags;
 	int nr_loops = 0;
 	int ret;
@@ -2895,6 +2907,18 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	/* The reader page will be pointing to the new head */
 	rb_set_list_to_head(cpu_buffer, &cpu_buffer->reader_page->list);
 
+	/*
+	 * We want to make sure we read the overruns after we set up our
+	 * pointers to the next object. The writer side does a
+	 * cmpxchg to cross pages which acts as the mb on the writer
+	 * side. Note, the reader will constantly fail the swap
+	 * while the writer is updating the pointers, so this
+	 * guarantees that the overwrite recorded here is the one we
+	 * want to compare with the last_overrun.
+	 */
+	smp_mb();
+	overwrite = local_read(&(cpu_buffer->overrun));
+
 	/*
 	 * Here's the tricky part.
 	 *
@@ -2926,6 +2950,11 @@ rb_get_reader_page(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->reader_page = reader;
 	rb_reset_reader_page(cpu_buffer);
 
+	if (overwrite != cpu_buffer->last_overrun) {
+		cpu_buffer->lost_events = overwrite - cpu_buffer->last_overrun;
+		cpu_buffer->last_overrun = overwrite;
+	}
+
 	goto again;
 
  out:
@@ -3002,8 +3031,14 @@ static void rb_advance_iter(struct ring_buffer_iter *iter)
 		rb_advance_iter(iter);
 }
 
+static int rb_lost_events(struct ring_buffer_per_cpu *cpu_buffer)
+{
+	return cpu_buffer->lost_events;
+}
+
 static struct ring_buffer_event *
-rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
+rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts,
+	       unsigned long *lost_events)
 {
 	struct ring_buffer_event *event;
 	struct buffer_page *reader;
@@ -3055,6 +3090,8 @@ rb_buffer_peek(struct ring_buffer_per_cpu *cpu_buffer, u64 *ts)
 			ring_buffer_normalize_time_stamp(cpu_buffer->buffer,
 							 cpu_buffer->cpu, ts);
 		}
+		if (lost_events)
+			*lost_events = rb_lost_events(cpu_buffer);
 		return event;
 
 	default:
@@ -3165,12 +3202,14 @@ static inline int rb_ok_to_lock(void)
  * @buffer: The ring buffer to read
  * @cpu: The cpu to peak at
  * @ts: The timestamp counter of this event.
+ * @lost_events: a variable to store if events were lost (may be NULL)
  *
  * This will return the event that will be read next, but does
  * not consume the data.
  */
 struct ring_buffer_event *
-ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
+ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts,
+		 unsigned long *lost_events)
 {
 	struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];
 	struct ring_buffer_event *event;
@@ -3185,7 +3224,7 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts)
 	local_irq_save(flags);
 	if (dolock)
 		spin_lock(&cpu_buffer->reader_lock);
-	event = rb_buffer_peek(cpu_buffer, ts);
+	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
 	if (event && event->type_len == RINGBUF_TYPE_PADDING)
 		rb_advance_reader(cpu_buffer);
 	if (dolock)
@@ -3227,13 +3266,17 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts)
 /**
  * ring_buffer_consume - return an event and consume it
  * @buffer: The ring buffer to get the next event from
+ * @cpu: the cpu to read the buffer from
+ * @ts: a variable to store the timestamp (may be NULL)
+ * @lost_events: a variable to store if events were lost (may be NULL)
  *
  * Returns the next event in the ring buffer, and that event is consumed.
  * Meaning, that sequential reads will keep returning a different event,
  * and eventually empty the ring buffer if the producer is slower.
  */
 struct ring_buffer_event *
-ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
+ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
+		    unsigned long *lost_events)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_event *event = NULL;
@@ -3254,9 +3297,11 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts)
 	if (dolock)
 		spin_lock(&cpu_buffer->reader_lock);
 
-	event = rb_buffer_peek(cpu_buffer, ts);
-	if (event)
+	event = rb_buffer_peek(cpu_buffer, ts, lost_events);
+	if (event) {
+		cpu_buffer->lost_events = 0;
 		rb_advance_reader(cpu_buffer);
+	}
 
 	if (dolock)
 		spin_unlock(&cpu_buffer->reader_lock);
@@ -3405,6 +3450,9 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
 	cpu_buffer->write_stamp = 0;
 	cpu_buffer->read_stamp = 0;
 
+	cpu_buffer->lost_events = 0;
+	cpu_buffer->last_overrun = 0;
+
 	rb_head_page_activate(cpu_buffer);
 }
 
@@ -3684,6 +3732,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 	unsigned int commit;
 	unsigned int read;
 	u64 save_timestamp;
+	int missed_events = 0;
 	int ret = -1;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
@@ -3716,6 +3765,10 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 	read = reader->read;
 	commit = rb_page_commit(reader);
 
+	/* Check if any events were dropped */
+	if (cpu_buffer->lost_events)
+		missed_events = 1;
+
 	/*
 	 * If this page has been partially read or
 	 * if len is not big enough to read the rest of the page or
@@ -3779,6 +3832,13 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
 	}
 	ret = read;
 
+	cpu_buffer->lost_events = 0;
+	/*
+	 * Set a flag in the commit field if we lost events
+	 */
+	if (missed_events)
+		local_add(RB_MISSED_EVENTS, &bpage->commit);
+
  out_unlock:
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
 
diff --git a/kernel/trace/ring_buffer_benchmark.c b/kernel/trace/ring_buffer_benchmark.c
index df74c7982255..dc56556b55a2 100644
--- a/kernel/trace/ring_buffer_benchmark.c
+++ b/kernel/trace/ring_buffer_benchmark.c
@@ -81,7 +81,7 @@ static enum event_status read_event(int cpu)
 	int *entry;
 	u64 ts;
 
-	event = ring_buffer_consume(buffer, cpu, &ts);
+	event = ring_buffer_consume(buffer, cpu, &ts, NULL);
 	if (!event)
 		return EVENT_DROPPED;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3ec2ee6f6560..fabb0033a9be 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1556,7 +1556,7 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 	if (buf_iter)
 		event = ring_buffer_iter_peek(buf_iter, ts);
 	else
-		event = ring_buffer_peek(iter->tr->buffer, cpu, ts);
+		event = ring_buffer_peek(iter->tr->buffer, cpu, ts, NULL);
 
 	ftrace_enable_cpu();
 
@@ -1635,7 +1635,7 @@ static void trace_consume(struct trace_iterator *iter)
 {
 	/* Don't allow ftrace to trace into the ring buffers */
 	ftrace_disable_cpu();
-	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts);
+	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, NULL);
 	ftrace_enable_cpu();
 }
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e6989d9b44da..a7f75fb10aa4 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -489,9 +489,10 @@ get_return_for_leaf(struct trace_iterator *iter,
 			 * We need to consume the current entry to see
 			 * the next one.
 			 */
-			ring_buffer_consume(iter->tr->buffer, iter->cpu, NULL);
+			ring_buffer_consume(iter->tr->buffer, iter->cpu,
+					    NULL, NULL);
 			event = ring_buffer_peek(iter->tr->buffer, iter->cpu,
-						 NULL);
+						 NULL, NULL);
 		}
 
 		if (!event)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 280fea470d67..e50180874c63 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -29,7 +29,7 @@ static int trace_test_buffer_cpu(struct trace_array *tr, int cpu)
 	struct trace_entry *entry;
 	unsigned int loops = 0;
 
-	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL))) {
+	while ((event = ring_buffer_consume(tr->buffer, cpu, NULL, NULL))) {
 		entry = ring_buffer_event_data(event);
 
 		/*
-- 
cgit v1.2.3


From bc21b478425ac73f66a5ec0b375a5e0d12d609ce Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 31 Mar 2010 19:49:26 -0400
Subject: tracing: Show the lost events in the trace_pipe output

Now that the ring buffer can keep track of where events are lost.
Use this information to the output of trace_pipe:

       hackbench-3588  [001]  1326.701660: lock_acquire: ffffffff816591e0 read rcu_read_lock
       hackbench-3588  [001]  1326.701661: lock_acquire: ffff88003f4091f0 &(&dentry->d_lock)->rlock
       hackbench-3588  [001]  1326.701664: lock_release: ffff88003f4091f0 &(&dentry->d_lock)->rlock
CPU:1 [LOST 673 EVENTS]
       hackbench-3588  [001]  1326.702711: kmem_cache_free: call_site=ffffffff81102b85 ptr=ffff880026d96738
       hackbench-3588  [001]  1326.702712: lock_release: ffff88003e1480a8 &mm->mmap_sem
       hackbench-3588  [001]  1326.702713: lock_acquire: ffff88003e1480a8 &mm->mmap_sem

Even works with the function graph tracer:

 2) ! 170.098 us  |                                            }
 2)   4.036 us    |                                            rcu_irq_exit();
 2)   3.657 us    |                                            idle_cpu();
 2) ! 190.301 us  |                                          }
CPU:2 [LOST 2196 EVENTS]
 2)   0.853 us    |                            } /* cancel_dirty_page */
 2)               |                            remove_from_page_cache() {
 2)   1.578 us    |                              _raw_spin_lock_irq();
 2)               |                              __remove_from_page_cache() {

Note, it does not work with the iterator "trace" file, since it requires
the use of consuming the page from the ring buffer to determine how many
events were lost, which the iterator does not do.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  1 +
 kernel/trace/trace.c         | 30 ++++++++++++++++++++++--------
 2 files changed, 23 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c0f4b364c711..39e71b0a3bfd 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -58,6 +58,7 @@ struct trace_iterator {
 	/* The below is zeroed out in pipe_read */
 	struct trace_seq	seq;
 	struct trace_entry	*ent;
+	unsigned long		lost_events;
 	int			leftover;
 	int			cpu;
 	u64			ts;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index fabb0033a9be..0498bebcbfd1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1545,7 +1545,8 @@ static void trace_iterator_increment(struct trace_iterator *iter)
 }
 
 static struct trace_entry *
-peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
+peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
+		unsigned long *lost_events)
 {
 	struct ring_buffer_event *event;
 	struct ring_buffer_iter *buf_iter = iter->buffer_iter[cpu];
@@ -1556,7 +1557,8 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 	if (buf_iter)
 		event = ring_buffer_iter_peek(buf_iter, ts);
 	else
-		event = ring_buffer_peek(iter->tr->buffer, cpu, ts, NULL);
+		event = ring_buffer_peek(iter->tr->buffer, cpu, ts,
+					 lost_events);
 
 	ftrace_enable_cpu();
 
@@ -1564,10 +1566,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts)
 }
 
 static struct trace_entry *
-__find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
+__find_next_entry(struct trace_iterator *iter, int *ent_cpu,
+		  unsigned long *missing_events, u64 *ent_ts)
 {
 	struct ring_buffer *buffer = iter->tr->buffer;
 	struct trace_entry *ent, *next = NULL;
+	unsigned long lost_events, next_lost = 0;
 	int cpu_file = iter->cpu_file;
 	u64 next_ts = 0, ts;
 	int next_cpu = -1;
@@ -1580,7 +1584,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 	if (cpu_file > TRACE_PIPE_ALL_CPU) {
 		if (ring_buffer_empty_cpu(buffer, cpu_file))
 			return NULL;
-		ent = peek_next_entry(iter, cpu_file, ent_ts);
+		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
 		if (ent_cpu)
 			*ent_cpu = cpu_file;
 
@@ -1592,7 +1596,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 		if (ring_buffer_empty_cpu(buffer, cpu))
 			continue;
 
-		ent = peek_next_entry(iter, cpu, &ts);
+		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
 
 		/*
 		 * Pick the entry with the smallest timestamp:
@@ -1601,6 +1605,7 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 			next = ent;
 			next_cpu = cpu;
 			next_ts = ts;
+			next_lost = lost_events;
 		}
 	}
 
@@ -1610,6 +1615,9 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 	if (ent_ts)
 		*ent_ts = next_ts;
 
+	if (missing_events)
+		*missing_events = next_lost;
+
 	return next;
 }
 
@@ -1617,13 +1625,14 @@ __find_next_entry(struct trace_iterator *iter, int *ent_cpu, u64 *ent_ts)
 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
 					  int *ent_cpu, u64 *ent_ts)
 {
-	return __find_next_entry(iter, ent_cpu, ent_ts);
+	return __find_next_entry(iter, ent_cpu, NULL, ent_ts);
 }
 
 /* Find the next real entry, and increment the iterator to the next entry */
 static void *find_next_entry_inc(struct trace_iterator *iter)
 {
-	iter->ent = __find_next_entry(iter, &iter->cpu, &iter->ts);
+	iter->ent = __find_next_entry(iter, &iter->cpu,
+				      &iter->lost_events, &iter->ts);
 
 	if (iter->ent)
 		trace_iterator_increment(iter);
@@ -1635,7 +1644,8 @@ static void trace_consume(struct trace_iterator *iter)
 {
 	/* Don't allow ftrace to trace into the ring buffers */
 	ftrace_disable_cpu();
-	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts, NULL);
+	ring_buffer_consume(iter->tr->buffer, iter->cpu, &iter->ts,
+			    &iter->lost_events);
 	ftrace_enable_cpu();
 }
 
@@ -2030,6 +2040,10 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
 
+	if (iter->lost_events)
+		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+				 iter->cpu, iter->lost_events);
+
 	if (iter->trace && iter->trace->print_line) {
 		ret = iter->trace->print_line(iter);
 		if (ret != TRACE_TYPE_UNHANDLED)
-- 
cgit v1.2.3


From 897f0b3c3ff40b443c84e271bef19bd6ae885195 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 15 Mar 2010 10:10:03 +0100
Subject: sched: Kill the broken and deadlockable
 cpuset_lock/cpuset_cpus_allowed_locked code

This patch just states the fact the cpusets/cpuhotplug interaction is
broken and removes the deadlockable code which only pretends to work.

- cpuset_lock() doesn't really work. It is needed for
  cpuset_cpus_allowed_locked() but we can't take this lock in
  try_to_wake_up()->select_fallback_rq() path.

- cpuset_lock() is deadlockable. Suppose that a task T bound to CPU takes
  callback_mutex. If cpu_down(CPU) happens before T drops callback_mutex
  stop_machine() preempts T, then migration_call(CPU_DEAD) tries to take
  cpuset_lock() and hangs forever because CPU is already dead and thus
  T can't be scheduled.

- cpuset_cpus_allowed_locked() is deadlockable too. It takes task_lock()
  which is not irq-safe, but try_to_wake_up() can be called from irq.

Kill them, and change select_fallback_rq() to use cpu_possible_mask, like
we currently do without CONFIG_CPUSETS.

Also, with or without this patch, with or without CONFIG_CPUSETS, the
callers of select_fallback_rq() can race with each other or with
set_cpus_allowed() pathes.

The subsequent patches try to to fix these problems.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091003.GA9123@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpuset.h | 13 -------------
 kernel/cpuset.c        | 27 +--------------------------
 kernel/sched.c         | 10 +++-------
 3 files changed, 4 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index a5740fc4d04b..eeaaee746bee 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -21,8 +21,6 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
-				       struct cpumask *mask);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -69,9 +67,6 @@ struct seq_file;
 extern void cpuset_task_status_allowed(struct seq_file *m,
 					struct task_struct *task);
 
-extern void cpuset_lock(void);
-extern void cpuset_unlock(void);
-
 extern int cpuset_mem_spread_node(void);
 
 static inline int cpuset_do_page_mem_spread(void)
@@ -105,11 +100,6 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
 {
 	cpumask_copy(mask, cpu_possible_mask);
 }
-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
-					      struct cpumask *mask)
-{
-	cpumask_copy(mask, cpu_possible_mask);
-}
 
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 {
@@ -157,9 +147,6 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
 {
 }
 
-static inline void cpuset_lock(void) {}
-static inline void cpuset_unlock(void) {}
-
 static inline int cpuset_mem_spread_node(void)
 {
 	return 0;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index d10946748ec2..9a747f56d58c 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2182,19 +2182,10 @@ void __init cpuset_init_smp(void)
 void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 {
 	mutex_lock(&callback_mutex);
-	cpuset_cpus_allowed_locked(tsk, pmask);
-	mutex_unlock(&callback_mutex);
-}
-
-/**
- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
- * Must be called with callback_mutex held.
- **/
-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
-{
 	task_lock(tsk);
 	guarantee_online_cpus(task_cs(tsk), pmask);
 	task_unlock(tsk);
+	mutex_unlock(&callback_mutex);
 }
 
 void cpuset_init_current_mems_allowed(void)
@@ -2382,22 +2373,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
 	return 0;
 }
 
-/**
- * cpuset_lock - lock out any changes to cpuset structures
- *
- * The out of memory (oom) code needs to mutex_lock cpusets
- * from being changed while it scans the tasklist looking for a
- * task in an overlapping cpuset.  Expose callback_mutex via this
- * cpuset_lock() routine, so the oom code can lock it, before
- * locking the task list.  The tasklist_lock is a spinlock, so
- * must be taken inside callback_mutex.
- */
-
-void cpuset_lock(void)
-{
-	mutex_lock(&callback_mutex);
-}
-
 /**
  * cpuset_unlock - release lock on cpuset changes
  *
diff --git a/kernel/sched.c b/kernel/sched.c
index 52b7efd27416..c0b3ebc16317 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2296,11 +2296,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 		return dest_cpu;
 
 	/* No more Mr. Nice Guy. */
-	if (dest_cpu >= nr_cpu_ids) {
-		rcu_read_lock();
-		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
-		rcu_read_unlock();
-		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
+	if (unlikely(dest_cpu >= nr_cpu_ids)) {
+		cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+		dest_cpu = cpumask_any(cpu_active_mask);
 
 		/*
 		 * Don't tell them about moving exiting tasks or
@@ -5866,7 +5864,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
-		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
 		migrate_live_tasks(cpu);
 		rq = cpu_rq(cpu);
 		kthread_stop(rq->migration_thread);
@@ -5879,7 +5876,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		rq->idle->sched_class = &idle_sched_class;
 		migrate_dead_tasks(cpu);
 		raw_spin_unlock_irq(&rq->lock);
-		cpuset_unlock();
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
 		calc_global_load_remove(rq);
-- 
cgit v1.2.3


From 6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 15 Mar 2010 10:10:23 +0100
Subject: sched: _cpu_down(): Don't play with current->cpus_allowed

_cpu_down() changes the current task's affinity and then recovers it at
the end. The problems are well known: we can't restore old_allowed if it
was bound to the now-dead-cpu, and we can race with the userspace which
can change cpu-affinity during unplug.

_cpu_down() should not play with current->cpus_allowed at all. Instead,
take_cpu_down() can migrate the caller of _cpu_down() after __cpu_disable()
removes the dying cpu from cpu_online_mask.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091023.GA9148@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/cpu.c          | 18 ++++++------------
 kernel/sched.c        |  2 +-
 3 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 43c945152732..8bea40725c76 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1843,6 +1843,7 @@ extern void sched_clock_idle_sleep_event(void);
 extern void sched_clock_idle_wakeup_event(u64 delta_ns);
 
 #ifdef CONFIG_HOTPLUG_CPU
+extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
 extern void idle_task_exit(void);
 #else
 static inline void idle_task_exit(void) {}
diff --git a/kernel/cpu.c b/kernel/cpu.c
index f8cced2692b3..8d340faac380 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
 }
 
 struct take_cpu_down_param {
+	struct task_struct *caller;
 	unsigned long mod;
 	void *hcpu;
 };
@@ -171,6 +172,7 @@ struct take_cpu_down_param {
 static int __ref take_cpu_down(void *_param)
 {
 	struct take_cpu_down_param *param = _param;
+	unsigned int cpu = (unsigned long)param->hcpu;
 	int err;
 
 	/* Ensure this CPU doesn't handle any more interrupts. */
@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
 				param->hcpu);
 
+	if (task_cpu(param->caller) == cpu)
+		move_task_off_dead_cpu(cpu, param->caller);
 	/* Force idle task to run as soon as we yield: it should
 	   immediately notice cpu is offline and die quickly. */
 	sched_idle_next();
@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
 static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 {
 	int err, nr_calls = 0;
-	cpumask_var_t old_allowed;
 	void *hcpu = (void *)(long)cpu;
 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
 	struct take_cpu_down_param tcd_param = {
+		.caller = current,
 		.mod = mod,
 		.hcpu = hcpu,
 	};
@@ -205,9 +209,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 	if (!cpu_online(cpu))
 		return -EINVAL;
 
-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
-		return -ENOMEM;
-
 	cpu_hotplug_begin();
 	set_cpu_active(cpu, false);
 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
@@ -224,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 		goto out_release;
 	}
 
-	/* Ensure that we are not runnable on dying cpu */
-	cpumask_copy(old_allowed, &current->cpus_allowed);
-	set_cpus_allowed_ptr(current, cpu_active_mask);
-
 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
 	if (err) {
 		set_cpu_active(cpu, true);
@@ -236,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 
-		goto out_allowed;
+		goto out_release;
 	}
 	BUG_ON(cpu_online(cpu));
 
@@ -254,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
 
 	check_for_tasks(cpu);
 
-out_allowed:
-	set_cpus_allowed_ptr(current, old_allowed);
 out_release:
 	cpu_hotplug_done();
 	if (!err) {
@@ -263,7 +258,6 @@ out_release:
 					    hcpu) == NOTIFY_BAD)
 			BUG();
 	}
-	free_cpumask_var(old_allowed);
 	return err;
 }
 
diff --git a/kernel/sched.c b/kernel/sched.c
index 165b532dd8c2..11119deffa48 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5442,7 +5442,7 @@ static int migration_thread(void *data)
 /*
  * Figure out where task on dead CPU should go, use force if necessary.
  */
-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
 {
 	struct rq *rq = cpu_rq(dead_cpu);
 	int needs_cpu, uninitialized_var(dest_cpu);
-- 
cgit v1.2.3


From 9084bb8246ea935b98320554229e2f371f7f52fa Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 15 Mar 2010 10:10:27 +0100
Subject: sched: Make select_fallback_rq() cpuset friendly

Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems
with select_fallback_rq(). It can be called from any context and can't use
any cpuset locks including task_lock(). It is called when the task doesn't
have online cpus in ->cpus_allowed but ttwu/etc must be able to find a
suitable cpu.

I am not proud of this patch. Everything which needs such a fat comment
can't be good even if correct. But I'd prefer to not change the locking
rules in the code I hardly understand, and in any case I believe this
simple change make the code much more correct compared to deadlocks we
currently have.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100315091027.GA9155@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/cpuset.h |  7 +++++++
 kernel/cpuset.c        | 42 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c         |  4 +---
 3 files changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index eeaaee746bee..a73454aec333 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -21,6 +21,7 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
 extern int cpuset_init(void);
 extern void cpuset_init_smp(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 #define cpuset_current_mems_allowed (current->mems_allowed)
 void cpuset_init_current_mems_allowed(void);
@@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
 	cpumask_copy(mask, cpu_possible_mask);
 }
 
+static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
+{
+	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+	return cpumask_any(cpu_active_mask);
+}
+
 static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
 {
 	return node_possible_map;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9a747f56d58c..9a50c5f6e727 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2188,6 +2188,48 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
 	mutex_unlock(&callback_mutex);
 }
 
+int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+{
+	const struct cpuset *cs;
+	int cpu;
+
+	rcu_read_lock();
+	cs = task_cs(tsk);
+	if (cs)
+		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
+	rcu_read_unlock();
+
+	/*
+	 * We own tsk->cpus_allowed, nobody can change it under us.
+	 *
+	 * But we used cs && cs->cpus_allowed lockless and thus can
+	 * race with cgroup_attach_task() or update_cpumask() and get
+	 * the wrong tsk->cpus_allowed. However, both cases imply the
+	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
+	 * which takes task_rq_lock().
+	 *
+	 * If we are called after it dropped the lock we must see all
+	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
+	 * set any mask even if it is not right from task_cs() pov,
+	 * the pending set_cpus_allowed_ptr() will fix things.
+	 */
+
+	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
+	if (cpu >= nr_cpu_ids) {
+		/*
+		 * Either tsk->cpus_allowed is wrong (see above) or it
+		 * is actually empty. The latter case is only possible
+		 * if we are racing with remove_tasks_in_empty_cpuset().
+		 * Like above we can temporary set any mask and rely on
+		 * set_cpus_allowed_ptr() as synchronization point.
+		 */
+		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
+		cpu = cpumask_any(cpu_active_mask);
+	}
+
+	return cpu;
+}
+
 void cpuset_init_current_mems_allowed(void)
 {
 	nodes_setall(current->mems_allowed);
diff --git a/kernel/sched.c b/kernel/sched.c
index 11119deffa48..9a38c7a24ed7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2300,9 +2300,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
 
 	/* No more Mr. Nice Guy. */
 	if (unlikely(dest_cpu >= nr_cpu_ids)) {
-		cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
-		dest_cpu = cpumask_any(cpu_active_mask);
-
+		dest_cpu = cpuset_cpus_allowed_fallback(p);
 		/*
 		 * Don't tell them about moving exiting tasks or
 		 * kernel threads (both mm NULL), since they never
-- 
cgit v1.2.3


From 0017d735092844118bef006696a750a0e4ef6ebd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 24 Mar 2010 18:34:10 +0100
Subject: sched: Fix TASK_WAKING vs fork deadlock

Oleg noticed a few races with the TASK_WAKING usage on fork.

 - since TASK_WAKING is basically a spinlock, it should be IRQ safe
 - since we set TASK_WAKING (*) without holding rq->lock it could
   be there still is a rq->lock holder, thereby not actually
   providing full serialization.

(*) in fact we clear PF_STARTING, which in effect enables TASK_WAKING.

Cure the second issue by not setting TASK_WAKING in sched_fork(), but
only temporarily in wake_up_new_task() while calling select_task_rq().

Cure the first by holding rq->lock around the select_task_rq() call,
this will disable IRQs, this however requires that we push down the
rq->lock release into select_task_rq_fair()'s cgroup stuff.

Because select_task_rq_fair() still needs to drop the rq->lock we
cannot fully get rid of TASK_WAKING.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   |  3 ++-
 kernel/sched.c          | 65 ++++++++++++++++++-------------------------------
 kernel/sched_fair.c     |  8 ++++--
 kernel/sched_idletask.c |  3 ++-
 kernel/sched_rt.c       |  5 ++--
 5 files changed, 36 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8bea40725c76..fb6c18843ee8 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1046,7 +1046,8 @@ struct sched_class {
 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
 
 #ifdef CONFIG_SMP
-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
+			       int sd_flag, int flags);
 
 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
 	void (*post_schedule) (struct rq *this_rq);
diff --git a/kernel/sched.c b/kernel/sched.c
index 9a38c7a24ed7..dcd17736dae1 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -916,14 +916,10 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
 /*
  * Check whether the task is waking, we use this to synchronize against
  * ttwu() so that task_cpu() reports a stable number.
- *
- * We need to make an exception for PF_STARTING tasks because the fork
- * path might require task_rq_lock() to work, eg. it can call
- * set_cpus_allowed_ptr() from the cpuset clone_ns code.
  */
 static inline int task_is_waking(struct task_struct *p)
 {
-	return unlikely((p->state == TASK_WAKING) && !(p->flags & PF_STARTING));
+	return unlikely(p->state == TASK_WAKING);
 }
 
 /*
@@ -2320,9 +2316,9 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
  * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
  */
 static inline
-int select_task_rq(struct task_struct *p, int sd_flags, int wake_flags)
+int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
 {
-	int cpu = p->sched_class->select_task_rq(p, sd_flags, wake_flags);
+	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
 
 	/*
 	 * In order not to call set_task_cpu() on a blocking task we need
@@ -2393,17 +2389,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	if (p->sched_class->task_waking)
 		p->sched_class->task_waking(rq, p);
 
-	__task_rq_unlock(rq);
-
-	cpu = select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
-	if (cpu != orig_cpu) {
-		/*
-		 * Since we migrate the task without holding any rq->lock,
-		 * we need to be careful with task_rq_lock(), since that
-		 * might end up locking an invalid rq.
-		 */
+	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+	if (cpu != orig_cpu)
 		set_task_cpu(p, cpu);
-	}
+	__task_rq_unlock(rq);
 
 	rq = cpu_rq(cpu);
 	raw_spin_lock(&rq->lock);
@@ -2530,11 +2519,11 @@ void sched_fork(struct task_struct *p, int clone_flags)
 
 	__sched_fork(p);
 	/*
-	 * We mark the process as waking here. This guarantees that
+	 * We mark the process as running here. This guarantees that
 	 * nobody will actually run it, and a signal or other external
 	 * event cannot wake it up and insert it on the runqueue either.
 	 */
-	p->state = TASK_WAKING;
+	p->state = TASK_RUNNING;
 
 	/*
 	 * Revert to default priority/policy on fork if requested.
@@ -2601,28 +2590,25 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
 	int cpu __maybe_unused = get_cpu();
 
 #ifdef CONFIG_SMP
+	rq = task_rq_lock(p, &flags);
+	p->state = TASK_WAKING;
+
 	/*
 	 * Fork balancing, do it here and not earlier because:
 	 *  - cpus_allowed can change in the fork path
 	 *  - any previously selected cpu might disappear through hotplug
 	 *
-	 * We still have TASK_WAKING but PF_STARTING is gone now, meaning
-	 * ->cpus_allowed is stable, we have preemption disabled, meaning
-	 * cpu_online_mask is stable.
+	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
+	 * without people poking at ->cpus_allowed.
 	 */
-	cpu = select_task_rq(p, SD_BALANCE_FORK, 0);
+	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
 	set_task_cpu(p, cpu);
-#endif
-
-	/*
-	 * Since the task is not on the rq and we still have TASK_WAKING set
-	 * nobody else will migrate this task.
-	 */
-	rq = cpu_rq(cpu);
-	raw_spin_lock_irqsave(&rq->lock, flags);
 
-	BUG_ON(p->state != TASK_WAKING);
 	p->state = TASK_RUNNING;
+	task_rq_unlock(rq, &flags);
+#endif
+
+	rq = task_rq_lock(p, &flags);
 	activate_task(rq, p, 0);
 	trace_sched_wakeup_new(rq, p, 1);
 	check_preempt_curr(rq, p, WF_FORK);
@@ -3068,19 +3054,15 @@ void sched_exec(void)
 {
 	struct task_struct *p = current;
 	struct migration_req req;
-	int dest_cpu, this_cpu;
 	unsigned long flags;
 	struct rq *rq;
-
-	this_cpu = get_cpu();
-	dest_cpu = p->sched_class->select_task_rq(p, SD_BALANCE_EXEC, 0);
-	if (dest_cpu == this_cpu) {
-		put_cpu();
-		return;
-	}
+	int dest_cpu;
 
 	rq = task_rq_lock(p, &flags);
-	put_cpu();
+	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+	if (dest_cpu == smp_processor_id())
+		goto unlock;
+
 	/*
 	 * select_task_rq() can race against ->cpus_allowed
 	 */
@@ -3098,6 +3080,7 @@ void sched_exec(void)
 
 		return;
 	}
+unlock:
 	task_rq_unlock(rq, &flags);
 }
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 49ad99378f82..8a5e7632d09b 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1423,7 +1423,8 @@ select_idle_sibling(struct task_struct *p, struct sched_domain *sd, int target)
  *
  * preempt must be disabled.
  */
-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
+static int
+select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
 {
 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
 	int cpu = smp_processor_id();
@@ -1521,8 +1522,11 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 				  cpumask_weight(sched_domain_span(sd))))
 			tmp = affine_sd;
 
-		if (tmp)
+		if (tmp) {
+			raw_spin_unlock(&rq->lock);
 			update_shares(tmp);
+			raw_spin_lock(&rq->lock);
+		}
 	}
 #endif
 
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index a8a6d8a50947..5af709f503b0 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -6,7 +6,8 @@
  */
 
 #ifdef CONFIG_SMP
-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
+static int
+select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 {
 	return task_cpu(p); /* IDLE tasks as never migrated */
 }
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 012d69bb67c7..fde895f8044d 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -948,10 +948,9 @@ static void yield_task_rt(struct rq *rq)
 #ifdef CONFIG_SMP
 static int find_lowest_rq(struct task_struct *task);
 
-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+static int
+select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
 {
-	struct rq *rq = task_rq(p);
-
 	if (sd_flag != SD_BALANCE_WAKE)
 		return smp_processor_id();
 
-- 
cgit v1.2.3


From 371fd7e7a56a5c136d31aa980011bd2f131c3ef5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 24 Mar 2010 16:38:48 +0100
Subject: sched: Add enqueue/dequeue flags

In order to reduce the dependency on TASK_WAKING rework the enqueue
interface to support a proper flags field.

Replace the int wakeup, bool head arguments with an int flags argument
and create the following flags:

  ENQUEUE_WAKEUP - the enqueue is a wakeup of a sleeping task,
  ENQUEUE_WAKING - the enqueue has relative vruntime due to
                   having sched_class::task_waking() called,
  ENQUEUE_HEAD - the waking task should be places on the head
                 of the priority queue (where appropriate).

For symmetry also convert sched_class::dequeue() to a flags scheme.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h   | 11 ++++++++---
 kernel/sched.c          | 32 +++++++++++++++++---------------
 kernel/sched_fair.c     | 25 ++++++++-----------------
 kernel/sched_idletask.c |  2 +-
 kernel/sched_rt.c       |  8 ++++----
 5 files changed, 38 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index fb6c18843ee8..e3e900f318d7 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1032,12 +1032,17 @@ struct sched_domain;
 #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
 #define WF_FORK		0x02		/* child wakeup after fork */
 
+#define ENQUEUE_WAKEUP		1
+#define ENQUEUE_WAKING		2
+#define ENQUEUE_HEAD		4
+
+#define DEQUEUE_SLEEP		1
+
 struct sched_class {
 	const struct sched_class *next;
 
-	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
-			      bool head);
-	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int flags);
+	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int flags);
 	void (*yield_task) (struct rq *rq);
 
 	void (*check_preempt_curr) (struct rq *rq, struct task_struct *p, int flags);
diff --git a/kernel/sched.c b/kernel/sched.c
index 14c8d2a1b38a..4a57e96dd6c7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1877,44 +1877,43 @@ static void update_avg(u64 *avg, u64 sample)
 	*avg += diff >> 3;
 }
 
-static void
-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+static void enqueue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_rq_clock(rq);
 	sched_info_queued(p);
-	p->sched_class->enqueue_task(rq, p, wakeup, head);
+	p->sched_class->enqueue_task(rq, p, flags);
 	p->se.on_rq = 1;
 }
 
-static void dequeue_task(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	update_rq_clock(rq);
 	sched_info_dequeued(p);
-	p->sched_class->dequeue_task(rq, p, sleep);
+	p->sched_class->dequeue_task(rq, p, flags);
 	p->se.on_rq = 0;
 }
 
 /*
  * activate_task - move a task to the runqueue.
  */
-static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+static void activate_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible--;
 
-	enqueue_task(rq, p, wakeup, false);
+	enqueue_task(rq, p, flags);
 	inc_nr_running(rq);
 }
 
 /*
  * deactivate_task - remove a task from the runqueue.
  */
-static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
+static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
 {
 	if (task_contributes_to_load(p))
 		rq->nr_uninterruptible++;
 
-	dequeue_task(rq, p, sleep);
+	dequeue_task(rq, p, flags);
 	dec_nr_running(rq);
 }
 
@@ -2353,6 +2352,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 {
 	int cpu, orig_cpu, this_cpu, success = 0;
 	unsigned long flags;
+	unsigned long en_flags = ENQUEUE_WAKEUP;
 	struct rq *rq;
 
 	this_cpu = get_cpu();
@@ -2386,8 +2386,10 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
 	}
 	p->state = TASK_WAKING;
 
-	if (p->sched_class->task_waking)
+	if (p->sched_class->task_waking) {
 		p->sched_class->task_waking(rq, p);
+		en_flags |= ENQUEUE_WAKING;
+	}
 
 	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
 	if (cpu != orig_cpu)
@@ -2432,7 +2434,7 @@ out_activate:
 		schedstat_inc(p, se.statistics.nr_wakeups_local);
 	else
 		schedstat_inc(p, se.statistics.nr_wakeups_remote);
-	activate_task(rq, p, 1);
+	activate_task(rq, p, en_flags);
 	success = 1;
 
 out_running:
@@ -3623,7 +3625,7 @@ need_resched_nonpreemptible:
 		if (unlikely(signal_pending_state(prev->state, prev)))
 			prev->state = TASK_RUNNING;
 		else
-			deactivate_task(rq, prev, 1);
+			deactivate_task(rq, prev, DEQUEUE_SLEEP);
 		switch_count = &prev->nvcsw;
 	}
 
@@ -4193,7 +4195,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
 	if (running)
 		p->sched_class->set_curr_task(rq);
 	if (on_rq) {
-		enqueue_task(rq, p, 0, oldprio < prio);
+		enqueue_task(rq, p, oldprio < prio ? ENQUEUE_HEAD : 0);
 
 		check_class_changed(rq, p, prev_class, oldprio, running);
 	}
@@ -4236,7 +4238,7 @@ void set_user_nice(struct task_struct *p, long nice)
 	delta = p->prio - old_prio;
 
 	if (on_rq) {
-		enqueue_task(rq, p, 0, false);
+		enqueue_task(rq, p, 0);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
@@ -8180,7 +8182,7 @@ void sched_move_task(struct task_struct *tsk)
 	if (unlikely(running))
 		tsk->sched_class->set_curr_task(rq);
 	if (on_rq)
-		enqueue_task(rq, tsk, 0, false);
+		enqueue_task(rq, tsk, 0);
 
 	task_rq_unlock(rq, &flags);
 }
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8a5e7632d09b..88d3053ac7c2 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -757,9 +757,6 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
 	se->vruntime = vruntime;
 }
 
-#define ENQUEUE_WAKEUP	1
-#define ENQUEUE_MIGRATE 2
-
 static void
 enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
@@ -767,7 +764,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 	 * Update the normalized vruntime before updating min_vruntime
 	 * through callig update_curr().
 	 */
-	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_WAKING))
 		se->vruntime += cfs_rq->min_vruntime;
 
 	/*
@@ -803,7 +800,7 @@ static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
 }
 
 static void
-dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
+dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
 {
 	/*
 	 * Update run-time statistics of the 'current'.
@@ -811,7 +808,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 	update_curr(cfs_rq);
 
 	update_stats_dequeue(cfs_rq, se);
-	if (sleep) {
+	if (flags & DEQUEUE_SLEEP) {
 #ifdef CONFIG_SCHEDSTATS
 		if (entity_is_task(se)) {
 			struct task_struct *tsk = task_of(se);
@@ -836,7 +833,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
 	 * update can refer to the ->curr item and we need to reflect this
 	 * movement in our normalized position.
 	 */
-	if (!sleep)
+	if (!(flags & DEQUEUE_SLEEP))
 		se->vruntime -= cfs_rq->min_vruntime;
 }
 
@@ -1045,16 +1042,10 @@ static inline void hrtick_update(struct rq *rq)
  * then put the task into the rbtree:
  */
 static void
-enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
-	int flags = 0;
-
-	if (wakeup)
-		flags |= ENQUEUE_WAKEUP;
-	if (p->state == TASK_WAKING)
-		flags |= ENQUEUE_MIGRATE;
 
 	for_each_sched_entity(se) {
 		if (se->on_rq)
@@ -1072,18 +1063,18 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
  * decreased. We remove the task from the rbtree and
  * update the fair scheduling stats:
  */
-static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct cfs_rq *cfs_rq;
 	struct sched_entity *se = &p->se;
 
 	for_each_sched_entity(se) {
 		cfs_rq = cfs_rq_of(se);
-		dequeue_entity(cfs_rq, se, sleep);
+		dequeue_entity(cfs_rq, se, flags);
 		/* Don't dequeue parent if it has other entities besides us */
 		if (cfs_rq->load.weight)
 			break;
-		sleep = 1;
+		flags |= DEQUEUE_SLEEP;
 	}
 
 	hrtick_update(rq);
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 5af709f503b0..bea2b8f12024 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -33,7 +33,7 @@ static struct task_struct *pick_next_task_idle(struct rq *rq)
  * message if some code attempts to do it:
  */
 static void
-dequeue_task_idle(struct rq *rq, struct task_struct *p, int sleep)
+dequeue_task_idle(struct rq *rq, struct task_struct *p, int flags)
 {
 	raw_spin_unlock_irq(&rq->lock);
 	printk(KERN_ERR "bad: scheduling from the idle thread!\n");
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index fde895f8044d..8afb953e31c6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -888,20 +888,20 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
  * Adding/removing a task to/from a priority array:
  */
 static void
-enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+enqueue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct sched_rt_entity *rt_se = &p->rt;
 
-	if (wakeup)
+	if (flags & ENQUEUE_WAKEUP)
 		rt_se->timeout = 0;
 
-	enqueue_rt_entity(rt_se, head);
+	enqueue_rt_entity(rt_se, flags & ENQUEUE_HEAD);
 
 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
 		enqueue_pushable_task(rq, p);
 }
 
-static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
+static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int flags)
 {
 	struct sched_rt_entity *rt_se = &p->rt;
 
-- 
cgit v1.2.3


From 8626d3b4328061f5b82b11ae1d6918a0c3602f42 Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 2 Apr 2010 01:05:27 +0000
Subject: phylib: Support phy module autoloading

We don't use the normal hotplug mechanism because it doesn't work. It will
load the module some time after the device appears, but that's not good
enough for us -- we need the driver loaded _immediately_ because otherwise
the NIC driver may just abort and then the phy 'device' goes away.

[bwh: s/phy/mdio/ in module alias, kerneldoc for struct mdio_device_id]

Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Acked-by: Andy Fleming <afleming@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c    | 12 ++++++++++++
 include/linux/mod_devicetable.h | 26 ++++++++++++++++++++++++++
 include/linux/phy.h             |  1 +
 scripts/mod/file2alias.c        | 26 ++++++++++++++++++++++++++
 4 files changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index db1794546c56..1a99bb244106 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -149,6 +149,7 @@ EXPORT_SYMBOL(phy_scan_fixups);
 struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 {
 	struct phy_device *dev;
+
 	/* We allocate the device, and initialize the
 	 * default values */
 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
@@ -179,6 +180,17 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	mutex_init(&dev->lock);
 	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
+	/* Request the appropriate module unconditionally; don't
+	   bother trying to do so only if it isn't already loaded,
+	   because that gets complicated. A hotplug event would have
+	   done an unconditional modprobe anyway.
+	   We don't do normal hotplug because it won't work for MDIO
+	   -- because it relies on the device staying around for long
+	   enough for the driver to get loaded. With MDIO, the NIC
+	   driver will get bored and give up as soon as it finds that
+	   there's no driver _already_ loaded. */
+	request_module(MDIO_MODULE_PREFIX MDIO_ID_FMT, MDIO_ID_ARGS(phy_id));
+
 	return dev;
 }
 EXPORT_SYMBOL(phy_device_create);
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f58e9d836f32..55f1f9c9506c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -474,4 +474,30 @@ struct platform_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+#define MDIO_MODULE_PREFIX	"mdio:"
+
+#define MDIO_ID_FMT "%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d%d"
+#define MDIO_ID_ARGS(_id) \
+	(_id)>>31, ((_id)>>30) & 1, ((_id)>>29) & 1, ((_id)>>28) & 1,	\
+	((_id)>>27) & 1, ((_id)>>26) & 1, ((_id)>>25) & 1, ((_id)>>24) & 1, \
+	((_id)>>23) & 1, ((_id)>>22) & 1, ((_id)>>21) & 1, ((_id)>>20) & 1, \
+	((_id)>>19) & 1, ((_id)>>18) & 1, ((_id)>>17) & 1, ((_id)>>16) & 1, \
+	((_id)>>15) & 1, ((_id)>>14) & 1, ((_id)>>13) & 1, ((_id)>>12) & 1, \
+	((_id)>>11) & 1, ((_id)>>10) & 1, ((_id)>>9) & 1, ((_id)>>8) & 1, \
+	((_id)>>7) & 1, ((_id)>>6) & 1, ((_id)>>5) & 1, ((_id)>>4) & 1, \
+	((_id)>>3) & 1, ((_id)>>2) & 1, ((_id)>>1) & 1, (_id) & 1
+
+/**
+ * struct mdio_device_id - identifies PHY devices on an MDIO/MII bus
+ * @phy_id: The result of
+ *     (mdio_read(&MII_PHYSID1) << 16 | mdio_read(&PHYSID2)) & @phy_id_mask
+ *     for this PHY type
+ * @phy_id_mask: Defines the significant bits of @phy_id.  A value of 0
+ *     is used to terminate an array of struct mdio_device_id.
+ */
+struct mdio_device_id {
+	__u32 phy_id;
+	__u32 phy_id_mask;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d9bce4b526b4..987e111f7b11 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -24,6 +24,7 @@
 #include <linux/mii.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
+#include <linux/mod_devicetable.h>
 
 #include <asm/atomic.h>
 
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 220213e603db..36a60a853173 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -796,6 +796,28 @@ static int do_platform_entry(const char *filename,
 	return 1;
 }
 
+static int do_mdio_entry(const char *filename,
+			 struct mdio_device_id *id, char *alias)
+{
+	int i;
+
+	alias += sprintf(alias, MDIO_MODULE_PREFIX);
+
+	for (i = 0; i < 32; i++) {
+		if (!((id->phy_id_mask >> (31-i)) & 1))
+			*(alias++) = '?';
+		else if ((id->phy_id >> (31-i)) & 1)
+			*(alias++) = '1';
+		else
+			*(alias++) = '0';
+	}
+
+	/* Terminate the string */
+	*alias = 0;
+
+	return 1;
+}
+
 /* Ignore any prefix, eg. some architectures prepend _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
@@ -943,6 +965,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct platform_device_id), "platform",
 			 do_platform_entry, mod);
+	else if (sym_is(symname, "__mod_mdio_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct mdio_device_id), "mdio",
+			 do_mdio_entry, mod);
 	free(zeros);
 }
 
-- 
cgit v1.2.3


From a748ee2426817a95b1f03012d8f339c45c722ae1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:09 +0000
Subject: net: move address list functions to a separate file

+little renaming of unicast functions to be smooth with multicast ones

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c |   2 +-
 drivers/net/macvlan.c           |  10 +-
 drivers/scsi/fcoe/fcoe.c        |  14 +-
 include/linux/netdevice.h       |  15 +-
 net/8021q/vlan.c                |   4 +-
 net/8021q/vlan_dev.c            |  14 +-
 net/core/Makefile               |   3 +-
 net/core/dev.c                  | 430 +-----------------------------------
 net/core/dev_addr_lists.c       | 478 ++++++++++++++++++++++++++++++++++++++++
 net/dsa/slave.c                 |  14 +-
 net/packet/af_packet.c          |   4 +-
 11 files changed, 524 insertions(+), 464 deletions(-)
 create mode 100644 net/core/dev_addr_lists.c

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f8ab60b914c4..d6ae63b2cf00 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1491,7 +1491,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 			}
 
 			/* Flush unicast and multicast addresses */
-			dev_unicast_flush(bond_dev);
+			dev_uc_flush(bond_dev);
 			dev_addr_discard(bond_dev);
 
 			if (slave_dev->type != ARPHRD_ETHER)
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 445e73c343ba..9a939d828b47 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -282,7 +282,7 @@ static int macvlan_open(struct net_device *dev)
 	if (macvlan_addr_busy(vlan->port, dev->dev_addr))
 		goto out;
 
-	err = dev_unicast_add(lowerdev, dev->dev_addr);
+	err = dev_uc_add(lowerdev, dev->dev_addr);
 	if (err < 0)
 		goto out;
 	if (dev->flags & IFF_ALLMULTI) {
@@ -294,7 +294,7 @@ static int macvlan_open(struct net_device *dev)
 	return 0;
 
 del_unicast:
-	dev_unicast_delete(lowerdev, dev->dev_addr);
+	dev_uc_del(lowerdev, dev->dev_addr);
 out:
 	return err;
 }
@@ -308,7 +308,7 @@ static int macvlan_stop(struct net_device *dev)
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(lowerdev, -1);
 
-	dev_unicast_delete(lowerdev, dev->dev_addr);
+	dev_uc_del(lowerdev, dev->dev_addr);
 
 	macvlan_hash_del(vlan);
 	return 0;
@@ -332,11 +332,11 @@ static int macvlan_set_mac_address(struct net_device *dev, void *p)
 		if (macvlan_addr_busy(vlan->port, addr->sa_data))
 			return -EBUSY;
 
-		err = dev_unicast_add(lowerdev, addr->sa_data);
+		err = dev_uc_add(lowerdev, addr->sa_data);
 		if (err)
 			return err;
 
-		dev_unicast_delete(lowerdev, dev->dev_addr);
+		dev_uc_del(lowerdev, dev->dev_addr);
 
 		macvlan_hash_change_addr(vlan, addr->sa_data);
 	}
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 2f47ae7cce91..de33e38a4059 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -308,9 +308,9 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
 	 * for multiple unicast MACs.
 	 */
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_add(netdev, flogi_maddr);
+	dev_uc_add(netdev, flogi_maddr);
 	if (fip->spma)
-		dev_unicast_add(netdev, fip->ctl_src_addr);
+		dev_uc_add(netdev, fip->ctl_src_addr);
 	dev_mc_add(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 
 	/*
@@ -394,9 +394,9 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 
 	/* Delete secondary MAC addresses */
 	memcpy(flogi_maddr, (u8[6]) FC_FCOE_FLOGI_MAC, ETH_ALEN);
-	dev_unicast_delete(netdev, flogi_maddr);
+	dev_uc_del(netdev, flogi_maddr);
 	if (fip->spma)
-		dev_unicast_delete(netdev, fip->ctl_src_addr);
+		dev_uc_del(netdev, fip->ctl_src_addr);
 	dev_mc_delete(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
 
 	/* Tell the LLD we are done w/ FCoE */
@@ -490,9 +490,9 @@ static void fcoe_update_src_mac(struct fc_lport *lport, u8 *addr)
 
 	rtnl_lock();
 	if (!is_zero_ether_addr(port->data_src_addr))
-		dev_unicast_delete(fcoe->netdev, port->data_src_addr);
+		dev_uc_del(fcoe->netdev, port->data_src_addr);
 	if (!is_zero_ether_addr(addr))
-		dev_unicast_add(fcoe->netdev, addr);
+		dev_uc_add(fcoe->netdev, addr);
 	memcpy(port->data_src_addr, addr, ETH_ALEN);
 	rtnl_unlock();
 }
@@ -819,7 +819,7 @@ static void fcoe_if_destroy(struct fc_lport *lport)
 
 	rtnl_lock();
 	if (!is_zero_ether_addr(port->data_src_addr))
-		dev_unicast_delete(netdev, port->data_src_addr);
+		dev_uc_del(netdev, port->data_src_addr);
 	rtnl_unlock();
 
 	/* receives may not be stopped until after this */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5670ab5107c..60f0c83192fe 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1991,15 +1991,20 @@ extern int dev_addr_add_multiple(struct net_device *to_dev,
 extern int dev_addr_del_multiple(struct net_device *to_dev,
 				 struct net_device *from_dev,
 				 unsigned char addr_type);
+extern void dev_addr_flush(struct net_device *dev);
+extern int dev_addr_init(struct net_device *dev);
+
+/* Functions used for unicast addresses handling */
+extern int dev_uc_add(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_uc_sync(struct net_device *to, struct net_device *from);
+extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
+extern void dev_uc_flush(struct net_device *dev);
+extern void dev_uc_init(struct net_device *dev);
 
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int		dev_unicast_delete(struct net_device *dev, void *addr);
-extern int		dev_unicast_add(struct net_device *dev, void *addr);
-extern int		dev_unicast_sync(struct net_device *to, struct net_device *from);
-extern void		dev_unicast_unsync(struct net_device *to, struct net_device *from);
-extern void		dev_unicast_flush(struct net_device *dev);
 extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
 extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
 extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c
index c39a5f41169c..bd33f02013ec 100644
--- a/net/8021q/vlan.c
+++ b/net/8021q/vlan.c
@@ -356,13 +356,13 @@ static void vlan_sync_address(struct net_device *dev,
 	 * the new address */
 	if (compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    !compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_delete(dev, vlandev->dev_addr);
+		dev_uc_del(dev, vlandev->dev_addr);
 
 	/* vlan address was equal to the old address and is different from
 	 * the new address */
 	if (!compare_ether_addr(vlandev->dev_addr, vlan->real_dev_addr) &&
 	    compare_ether_addr(vlandev->dev_addr, dev->dev_addr))
-		dev_unicast_add(dev, vlandev->dev_addr);
+		dev_uc_add(dev, vlandev->dev_addr);
 
 	memcpy(vlan->real_dev_addr, dev->dev_addr, ETH_ALEN);
 }
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 9e83272fc5b0..7f4d247237e4 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -461,7 +461,7 @@ static int vlan_dev_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, dev->dev_addr);
+		err = dev_uc_add(real_dev, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -490,7 +490,7 @@ clear_allmulti:
 		dev_set_allmulti(real_dev, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 out:
 	netif_carrier_off(dev);
 	return err;
@@ -505,14 +505,14 @@ static int vlan_dev_stop(struct net_device *dev)
 		vlan_gvrp_request_leave(dev);
 
 	dev_mc_unsync(real_dev, dev);
-	dev_unicast_unsync(real_dev, dev);
+	dev_uc_unsync(real_dev, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(real_dev, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(real_dev, -1);
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 	netif_carrier_off(dev);
 	return 0;
@@ -531,13 +531,13 @@ static int vlan_dev_set_mac_address(struct net_device *dev, void *p)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, real_dev->dev_addr)) {
-		err = dev_unicast_add(real_dev, addr->sa_data);
+		err = dev_uc_add(real_dev, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, real_dev->dev_addr))
-		dev_unicast_delete(real_dev, dev->dev_addr);
+		dev_uc_del(real_dev, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
@@ -654,7 +654,7 @@ static void vlan_dev_change_rx_flags(struct net_device *dev, int change)
 static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 {
 	dev_mc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
-	dev_unicast_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
+	dev_uc_sync(vlan_dev_info(vlan_dev)->real_dev, vlan_dev);
 }
 
 /*
diff --git a/net/core/Makefile b/net/core/Makefile
index 08791ac3e05a..0a899f1aadb9 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -8,7 +8,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
 obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
+			dev_addr_lists.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index c6b52068d5ec..949c62dba719 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,314 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* hw addresses list handling functions */
-
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	int alloc_size;
-
-	if (addr_len > MAX_ADDR_LEN)
-		return -EINVAL;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    ha->type == addr_type) {
-			ha->refcount++;
-			return 0;
-		}
-	}
-
-
-	alloc_size = sizeof(*ha);
-	if (alloc_size < L1_CACHE_BYTES)
-		alloc_size = L1_CACHE_BYTES;
-	ha = kmalloc(alloc_size, GFP_ATOMIC);
-	if (!ha)
-		return -ENOMEM;
-	memcpy(ha->addr, addr, addr_len);
-	ha->type = addr_type;
-	ha->refcount = 1;
-	ha->synced = false;
-	list_add_tail_rcu(&ha->list, &list->list);
-	list->count++;
-	return 0;
-}
-
-static void ha_rcu_free(struct rcu_head *head)
-{
-	struct netdev_hw_addr *ha;
-
-	ha = container_of(head, struct netdev_hw_addr, rcu_head);
-	kfree(ha);
-}
-
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-
-	list_for_each_entry(ha, &list->list, list) {
-		if (!memcmp(ha->addr, addr, addr_len) &&
-		    (ha->type == addr_type || !addr_type)) {
-			if (--ha->refcount)
-				return 0;
-			list_del_rcu(&ha->list);
-			call_rcu(&ha->rcu_head, ha_rcu_free);
-			list->count--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha, *ha2;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
-		if (err)
-			goto unroll;
-	}
-	return 0;
-
-unroll:
-	list_for_each_entry(ha2, &from_list->list, list) {
-		if (ha2 == ha)
-			break;
-		type = addr_type ? addr_type : ha2->type;
-		__hw_addr_del(to_list, ha2->addr, addr_len, type);
-	}
-	return err;
-}
-
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
-{
-	struct netdev_hw_addr *ha;
-	unsigned char type;
-
-	list_for_each_entry(ha, &from_list->list, list) {
-		type = addr_type ? addr_type : ha->type;
-		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
-	}
-}
-
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
-{
-	int err = 0;
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (!ha->synced) {
-			err = __hw_addr_add(to_list, ha->addr,
-					    addr_len, ha->type);
-			if (err)
-				break;
-			ha->synced = true;
-			ha->refcount++;
-		} else if (ha->refcount == 1) {
-			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
-			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
-		}
-	}
-	return err;
-}
-
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
-		if (ha->synced) {
-			__hw_addr_del(to_list, ha->addr,
-				      addr_len, ha->type);
-			ha->synced = false;
-			__hw_addr_del(from_list, ha->addr,
-				      addr_len, ha->type);
-		}
-	}
-}
-
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
-{
-	struct netdev_hw_addr *ha, *tmp;
-
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		list_del_rcu(&ha->list);
-		call_rcu(&ha->rcu_head, ha_rcu_free);
-	}
-	list->count = 0;
-}
-
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
-{
-	INIT_LIST_HEAD(&list->list);
-	list->count = 0;
-}
-
-/* Device addresses handling functions */
-
-static void dev_addr_flush(struct net_device *dev)
-{
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_flush(&dev->dev_addrs);
-	dev->dev_addr = NULL;
-}
-
-static int dev_addr_init(struct net_device *dev)
-{
-	unsigned char addr[MAX_ADDR_LEN];
-	struct netdev_hw_addr *ha;
-	int err;
-
-	/* rtnl_mutex must be held here */
-
-	__hw_addr_init(&dev->dev_addrs);
-	memset(addr, 0, sizeof(addr));
-	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
-			    NETDEV_HW_ADDR_T_LAN);
-	if (!err) {
-		/*
-		 * Get the first (previously created) address from the list
-		 * and set dev_addr pointer to this location.
-		 */
-		ha = list_first_entry(&dev->dev_addrs.list,
-				      struct netdev_hw_addr, list);
-		dev->dev_addr = ha->addr;
-	}
-	return err;
-}
-
-/**
- *	dev_addr_add	- Add a device address
- *	@dev: device
- *	@addr: address to add
- *	@addr_type: address type
- *
- *	Add a device address to the device or increase the reference count if
- *	it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add);
-
-/**
- *	dev_addr_del	- Release a device address.
- *	@dev: device
- *	@addr: address to delete
- *	@addr_type: address type
- *
- *	Release reference to a device address and remove it from the device
- *	if the reference count drops to zero.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del(struct net_device *dev, unsigned char *addr,
-		 unsigned char addr_type)
-{
-	int err;
-	struct netdev_hw_addr *ha;
-
-	ASSERT_RTNL();
-
-	/*
-	 * We can not remove the first address from the list because
-	 * dev->dev_addr points to that.
-	 */
-	ha = list_first_entry(&dev->dev_addrs.list,
-			      struct netdev_hw_addr, list);
-	if (ha->addr == dev->dev_addr && ha->refcount == 1)
-		return -ENOENT;
-
-	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
-			    addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_del);
-
-/**
- *	dev_addr_add_multiple	- Add device addresses from another device
- *	@to_dev: device to which addresses will be added
- *	@from_dev: device from which addresses will be added
- *	@addr_type: address type - 0 means type will be used from from_dev
- *
- *	Add device addresses of the one device to another.
- **
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_add_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-				     to_dev->addr_len, addr_type);
-	if (!err)
-		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_addr_add_multiple);
-
-/**
- *	dev_addr_del_multiple	- Delete device addresses by another device
- *	@to_dev: device where the addresses will be deleted
- *	@from_dev: device by which addresses the addresses will be deleted
- *	@addr_type: address type - 0 means type will used from from_dev
- *
- *	Deletes addresses in to device by the list of addresses in from device.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_addr_del_multiple(struct net_device *to_dev,
-			  struct net_device *from_dev,
-			  unsigned char addr_type)
-{
-	ASSERT_RTNL();
-
-	if (from_dev->addr_len != to_dev->addr_len)
-		return -EINVAL;
-	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
-			       to_dev->addr_len, addr_type);
-	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
-	return 0;
-}
-EXPORT_SYMBOL(dev_addr_del_multiple);
-
 /* multicast addresses handling functions */
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -4336,57 +4028,6 @@ int __dev_addr_add(struct dev_addr_list **list, int *count,
 	return 0;
 }
 
-/**
- *	dev_unicast_delete	- Release secondary unicast address.
- *	@dev: device
- *	@addr: address to delete
- *
- *	Release reference to a secondary unicast address and remove it
- *	from the device if the reference count drops to zero.
- *
- * 	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_delete(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_delete);
-
-/**
- *	dev_unicast_add		- add a secondary unicast address
- *	@dev: device
- *	@addr: address to add
- *
- *	Add a secondary unicast address to the device or increase
- *	the reference count if it already exists.
- *
- *	The caller must hold the rtnl_mutex.
- */
-int dev_unicast_add(struct net_device *dev, void *addr)
-{
-	int err;
-
-	ASSERT_RTNL();
-
-	netif_addr_lock_bh(dev);
-	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
-			    NETDEV_HW_ADDR_T_UNICAST);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_add);
 
 int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
 		    struct dev_addr_list **from, int *from_count)
@@ -4436,71 +4077,6 @@ void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
 }
 EXPORT_SYMBOL_GPL(__dev_addr_unsync);
 
-/**
- *	dev_unicast_sync - Synchronize device's unicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- *	Add newly added addresses to the destination device and release
- *	addresses that have no users left. The source device must be
- *	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_rx_mode
- *	function of layered software devices.
- */
-int dev_unicast_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	if (to->addr_len != from->addr_len)
-		return -EINVAL;
-
-	netif_addr_lock_bh(to);
-	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-	return err;
-}
-EXPORT_SYMBOL(dev_unicast_sync);
-
-/**
- *	dev_unicast_unsync - Remove synchronized addresses from the destination device
- *	@to: destination device
- *	@from: source device
- *
- *	Remove all addresses that were added to the destination device by
- *	dev_unicast_sync(). This function is intended to be called from the
- *	dev->stop function of layered software devices.
- */
-void dev_unicast_unsync(struct net_device *to, struct net_device *from)
-{
-	if (to->addr_len != from->addr_len)
-		return;
-
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
-	__dev_set_rx_mode(to);
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_unicast_unsync);
-
-void dev_unicast_flush(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-	__hw_addr_flush(&dev->uc);
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_unicast_flush);
-
-static void dev_unicast_init(struct net_device *dev)
-{
-	__hw_addr_init(&dev->uc);
-}
-
-
 static void __dev_addr_discard(struct dev_addr_list **list)
 {
 	struct dev_addr_list *tmp;
@@ -5153,7 +4729,7 @@ static void rollback_registered_many(struct list_head *head)
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
-		dev_unicast_flush(dev);
+		dev_uc_flush(dev);
 		dev_addr_discard(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
@@ -5734,7 +5310,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
-	dev_unicast_init(dev);
+	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
 
@@ -5968,7 +5544,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	/*
 	 *	Flush the unicast and multicast chains
 	 */
-	dev_unicast_flush(dev);
+	dev_uc_flush(dev);
 	dev_addr_discard(dev);
 
 	netdev_unregister_kobject(dev);
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
new file mode 100644
index 000000000000..7e52b6d18add
--- /dev/null
+++ b/net/core/dev_addr_lists.c
@@ -0,0 +1,478 @@
+/*
+ * net/core/dev_addr_lists.c - Functions for handling net device lists
+ * Copyright (c) 2010 Jiri Pirko <jpirko@redhat.com>
+ *
+ * This file contains functions for working with unicast, multicast and device
+ * addresses lists.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/netdevice.h>
+#include <linux/rtnetlink.h>
+#include <linux/list.h>
+
+/*
+ * General list handling functions
+ */
+
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	int alloc_size;
+
+	if (addr_len > MAX_ADDR_LEN)
+		return -EINVAL;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    ha->type == addr_type) {
+			ha->refcount++;
+			return 0;
+		}
+	}
+
+
+	alloc_size = sizeof(*ha);
+	if (alloc_size < L1_CACHE_BYTES)
+		alloc_size = L1_CACHE_BYTES;
+	ha = kmalloc(alloc_size, GFP_ATOMIC);
+	if (!ha)
+		return -ENOMEM;
+	memcpy(ha->addr, addr, addr_len);
+	ha->type = addr_type;
+	ha->refcount = 1;
+	ha->synced = false;
+	list_add_tail_rcu(&ha->list, &list->list);
+	list->count++;
+	return 0;
+}
+
+static void ha_rcu_free(struct rcu_head *head)
+{
+	struct netdev_hw_addr *ha;
+
+	ha = container_of(head, struct netdev_hw_addr, rcu_head);
+	kfree(ha);
+}
+
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+
+	list_for_each_entry(ha, &list->list, list) {
+		if (!memcmp(ha->addr, addr, addr_len) &&
+		    (ha->type == addr_type || !addr_type)) {
+			if (--ha->refcount)
+				return 0;
+			list_del_rcu(&ha->list);
+			call_rcu(&ha->rcu_head, ha_rcu_free);
+			list->count--;
+			return 0;
+		}
+	}
+	return -ENOENT;
+}
+
+static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len,
+				  unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha, *ha2;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		err = __hw_addr_add(to_list, ha->addr, addr_len, type);
+		if (err)
+			goto unroll;
+	}
+	return 0;
+
+unroll:
+	list_for_each_entry(ha2, &from_list->list, list) {
+		if (ha2 == ha)
+			break;
+		type = addr_type ? addr_type : ha2->type;
+		__hw_addr_del(to_list, ha2->addr, addr_len, type);
+	}
+	return err;
+}
+
+static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len,
+				   unsigned char addr_type)
+{
+	struct netdev_hw_addr *ha;
+	unsigned char type;
+
+	list_for_each_entry(ha, &from_list->list, list) {
+		type = addr_type ? addr_type : ha->type;
+		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
+	}
+}
+
+static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
+			  int addr_len)
+{
+	int err = 0;
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (!ha->synced) {
+			err = __hw_addr_add(to_list, ha->addr,
+					    addr_len, ha->type);
+			if (err)
+				break;
+			ha->synced = true;
+			ha->refcount++;
+		} else if (ha->refcount == 1) {
+			__hw_addr_del(to_list, ha->addr, addr_len, ha->type);
+			__hw_addr_del(from_list, ha->addr, addr_len, ha->type);
+		}
+	}
+	return err;
+}
+
+static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
+			     int addr_len)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &from_list->list, list) {
+		if (ha->synced) {
+			__hw_addr_del(to_list, ha->addr,
+				      addr_len, ha->type);
+			ha->synced = false;
+			__hw_addr_del(from_list, ha->addr,
+				      addr_len, ha->type);
+		}
+	}
+}
+
+static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+{
+	struct netdev_hw_addr *ha, *tmp;
+
+	list_for_each_entry_safe(ha, tmp, &list->list, list) {
+		list_del_rcu(&ha->list);
+		call_rcu(&ha->rcu_head, ha_rcu_free);
+	}
+	list->count = 0;
+}
+
+static void __hw_addr_init(struct netdev_hw_addr_list *list)
+{
+	INIT_LIST_HEAD(&list->list);
+	list->count = 0;
+}
+
+/*
+ * Device addresses handling functions
+ */
+
+/**
+ *	dev_addr_flush - Flush device address list
+ *	@dev: device
+ *
+ *	Flush device address list and reset ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+void dev_addr_flush(struct net_device *dev)
+{
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_flush(&dev->dev_addrs);
+	dev->dev_addr = NULL;
+}
+EXPORT_SYMBOL(dev_addr_flush);
+
+/**
+ *	dev_addr_init - Init device address list
+ *	@dev: device
+ *
+ *	Init device address list and create the first element,
+ *	used by ->dev_addr.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_init(struct net_device *dev)
+{
+	unsigned char addr[MAX_ADDR_LEN];
+	struct netdev_hw_addr *ha;
+	int err;
+
+	/* rtnl_mutex must be held here */
+
+	__hw_addr_init(&dev->dev_addrs);
+	memset(addr, 0, sizeof(addr));
+	err = __hw_addr_add(&dev->dev_addrs, addr, sizeof(addr),
+			    NETDEV_HW_ADDR_T_LAN);
+	if (!err) {
+		/*
+		 * Get the first (previously created) address from the list
+		 * and set dev_addr pointer to this location.
+		 */
+		ha = list_first_entry(&dev->dev_addrs.list,
+				      struct netdev_hw_addr, list);
+		dev->dev_addr = ha->addr;
+	}
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_init);
+
+/**
+ *	dev_addr_add - Add a device address
+ *	@dev: device
+ *	@addr: address to add
+ *	@addr_type: address type
+ *
+ *	Add a device address to the device or increase the reference count if
+ *	it already exists.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	err = __hw_addr_add(&dev->dev_addrs, addr, dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add);
+
+/**
+ *	dev_addr_del - Release a device address.
+ *	@dev: device
+ *	@addr: address to delete
+ *	@addr_type: address type
+ *
+ *	Release reference to a device address and remove it from the device
+ *	if the reference count drops to zero.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del(struct net_device *dev, unsigned char *addr,
+		 unsigned char addr_type)
+{
+	int err;
+	struct netdev_hw_addr *ha;
+
+	ASSERT_RTNL();
+
+	/*
+	 * We can not remove the first address from the list because
+	 * dev->dev_addr points to that.
+	 */
+	ha = list_first_entry(&dev->dev_addrs.list,
+			      struct netdev_hw_addr, list);
+	if (ha->addr == dev->dev_addr && ha->refcount == 1)
+		return -ENOENT;
+
+	err = __hw_addr_del(&dev->dev_addrs, addr, dev->addr_len,
+			    addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_del);
+
+/**
+ *	dev_addr_add_multiple - Add device addresses from another device
+ *	@to_dev: device to which addresses will be added
+ *	@from_dev: device from which addresses will be added
+ *	@addr_type: address type - 0 means type will be used from from_dev
+ *
+ *	Add device addresses of the one device to another.
+ **
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_add_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	int err;
+
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	err = __hw_addr_add_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+				     to_dev->addr_len, addr_type);
+	if (!err)
+		call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_addr_add_multiple);
+
+/**
+ *	dev_addr_del_multiple - Delete device addresses by another device
+ *	@to_dev: device where the addresses will be deleted
+ *	@from_dev: device by which addresses the addresses will be deleted
+ *	@addr_type: address type - 0 means type will used from from_dev
+ *
+ *	Deletes addresses in to device by the list of addresses in from device.
+ *
+ *	The caller must hold the rtnl_mutex.
+ */
+int dev_addr_del_multiple(struct net_device *to_dev,
+			  struct net_device *from_dev,
+			  unsigned char addr_type)
+{
+	ASSERT_RTNL();
+
+	if (from_dev->addr_len != to_dev->addr_len)
+		return -EINVAL;
+	__hw_addr_del_multiple(&to_dev->dev_addrs, &from_dev->dev_addrs,
+			       to_dev->addr_len, addr_type);
+	call_netdevice_notifiers(NETDEV_CHANGEADDR, to_dev);
+	return 0;
+}
+EXPORT_SYMBOL(dev_addr_del_multiple);
+
+/*
+ * Unicast list handling functions
+ */
+
+/**
+ *	dev_uc_add - Add a secondary unicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a secondary unicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_uc_add(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_add);
+
+/**
+ *	dev_uc_del - Release secondary unicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a secondary unicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_uc_del(struct net_device *dev, unsigned char *addr)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del(&dev->uc, addr, dev->addr_len,
+			    NETDEV_HW_ADDR_T_UNICAST);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_del);
+
+/**
+ *	dev_uc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_rx_mode
+ *	function of layered software devices.
+ */
+int dev_uc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->uc, &from->uc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_uc_sync);
+
+/**
+ *	dev_uc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_uc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_uc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->uc, &from->uc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_uc_unsync);
+
+/**
+ *	dev_uc_flush - Flush unicast addresses
+ *	@dev: device
+ *
+ *	Flush unicast addresses.
+ */
+void dev_uc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->uc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_uc_flush);
+
+/**
+ *	dev_uc_flush - Init unicast address list
+ *	@dev: device
+ *
+ *	Init unicast address list.
+ */
+void dev_uc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->uc);
+}
+EXPORT_SYMBOL(dev_uc_init);
+
+/*
+ * Multicast list handling functions
+ */
+
+/* To be filled here */
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 2175e6d5cc8d..8fdca56bb08f 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -67,7 +67,7 @@ static int dsa_slave_open(struct net_device *dev)
 		return -ENETDOWN;
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr)) {
-		err = dev_unicast_add(master, dev->dev_addr);
+		err = dev_uc_add(master, dev->dev_addr);
 		if (err < 0)
 			goto out;
 	}
@@ -90,7 +90,7 @@ clear_allmulti:
 		dev_set_allmulti(master, -1);
 del_unicast:
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 out:
 	return err;
 }
@@ -101,14 +101,14 @@ static int dsa_slave_close(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_unsync(master, dev);
-	dev_unicast_unsync(master, dev);
+	dev_uc_unsync(master, dev);
 	if (dev->flags & IFF_ALLMULTI)
 		dev_set_allmulti(master, -1);
 	if (dev->flags & IFF_PROMISC)
 		dev_set_promiscuity(master, -1);
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 	return 0;
 }
@@ -130,7 +130,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev)
 	struct net_device *master = p->parent->dst->master_netdev;
 
 	dev_mc_sync(master, dev);
-	dev_unicast_sync(master, dev);
+	dev_uc_sync(master, dev);
 }
 
 static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
@@ -147,13 +147,13 @@ static int dsa_slave_set_mac_address(struct net_device *dev, void *a)
 		goto out;
 
 	if (compare_ether_addr(addr->sa_data, master->dev_addr)) {
-		err = dev_unicast_add(master, addr->sa_data);
+		err = dev_uc_add(master, addr->sa_data);
 		if (err < 0)
 			return err;
 	}
 
 	if (compare_ether_addr(dev->dev_addr, master->dev_addr))
-		dev_unicast_delete(master, dev->dev_addr);
+		dev_uc_del(master, dev->dev_addr);
 
 out:
 	memcpy(dev->dev_addr, addr->sa_data, ETH_ALEN);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 1612d417d10c..48c1e0ae565f 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1705,9 +1705,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_unicast_add(dev, i->addr);
+			return dev_uc_add(dev, i->addr);
 		else
-			return dev_unicast_delete(dev, i->addr);
+			return dev_uc_del(dev, i->addr);
 		break;
 	default:
 		break;
-- 
cgit v1.2.3


From 22bedad3ce112d5ca1eaf043d4990fa2ed698c87 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 1 Apr 2010 21:22:57 +0000
Subject: net: convert multicast list to list_head

Converts the list and the core manipulating with it to be the same as uc_list.

+uses two functions for adding/removing mc address (normal and "global"
 variant) instead of a function parameter.
+removes dev_mcast.c completely.
+exposes netdev_hw_addr_list_* macros along with __hw_addr_* functions for
 manipulation with lists on a sandbox (used in bonding and 80211 drivers)

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/nes/nes_nic.c            |   7 +-
 drivers/infiniband/ulp/ipoib/ipoib_multicast.c |   9 +-
 drivers/media/dvb/dvb-core/dvb_net.c           |  10 +-
 drivers/net/3c505.c                            |   7 +-
 drivers/net/3c523.c                            |   7 +-
 drivers/net/3c527.c                            |   6 +-
 drivers/net/7990.c                             |   6 +-
 drivers/net/8139cp.c                           |   6 +-
 drivers/net/8139too.c                          |   6 +-
 drivers/net/82596.c                            |   6 +-
 drivers/net/a2065.c                            |   6 +-
 drivers/net/amd8111e.c                         |   6 +-
 drivers/net/arm/am79c961a.c                    |   6 +-
 drivers/net/arm/at91_ether.c                   |   6 +-
 drivers/net/arm/ixp4xx_eth.c                   |   8 +-
 drivers/net/arm/ks8695net.c                    |  10 +-
 drivers/net/at1700.c                           |   6 +-
 drivers/net/atl1c/atl1c_main.c                 |   6 +-
 drivers/net/atl1e/atl1e_main.c                 |   6 +-
 drivers/net/atlx/atl2.c                        |   6 +-
 drivers/net/atlx/atlx.c                        |   6 +-
 drivers/net/atp.c                              |   6 +-
 drivers/net/au1000_eth.c                       |   6 +-
 drivers/net/b44.c                              |   6 +-
 drivers/net/bcm63xx_enet.c                     |   6 +-
 drivers/net/benet/be_cmds.c                    |   6 +-
 drivers/net/bfin_mac.c                         |   6 +-
 drivers/net/bmac.c                             |  12 +-
 drivers/net/bnx2.c                             |   5 +-
 drivers/net/bnx2x_main.c                       |  18 +-
 drivers/net/bonding/bond_main.c                | 157 +++++--------
 drivers/net/bonding/bonding.h                  |   2 +-
 drivers/net/cassini.c                          |  12 +-
 drivers/net/chelsio/pm3393.c                   |   7 +-
 drivers/net/cpmac.c                            |  16 +-
 drivers/net/cris/eth_v10.c                     |   6 +-
 drivers/net/cxgb3/xgmac.c                      |   8 +-
 drivers/net/davinci_emac.c                     |   7 +-
 drivers/net/declance.c                         |   6 +-
 drivers/net/defxx.c                            |   6 +-
 drivers/net/depca.c                            |   6 +-
 drivers/net/dl2k.c                             |   6 +-
 drivers/net/dm9000.c                           |   6 +-
 drivers/net/e100.c                             |   6 +-
 drivers/net/e1000/e1000_main.c                 |   7 +-
 drivers/net/e1000e/netdev.c                    |   7 +-
 drivers/net/eepro.c                            |   6 +-
 drivers/net/eexpress.c                         |   6 +-
 drivers/net/ehea/ehea_main.c                   |   6 +-
 drivers/net/enic/enic_main.c                   |   6 +-
 drivers/net/epic100.c                          |   6 +-
 drivers/net/ethoc.c                            |   6 +-
 drivers/net/ewrk3.c                            |   6 +-
 drivers/net/fealnx.c                           |   6 +-
 drivers/net/fec.c                              |  10 +-
 drivers/net/fec_mpc52xx.c                      |   6 +-
 drivers/net/forcedeth.c                        |  10 +-
 drivers/net/fs_enet/mac-fcc.c                  |   6 +-
 drivers/net/fs_enet/mac-fec.c                  |   6 +-
 drivers/net/fs_enet/mac-scc.c                  |   6 +-
 drivers/net/gianfar.c                          |   9 +-
 drivers/net/greth.c                            |   6 +-
 drivers/net/hamachi.c                          |   8 +-
 drivers/net/hp100.c                            |   6 +-
 drivers/net/ibm_newemac/core.c                 |  11 +-
 drivers/net/ibmlana.c                          |   8 +-
 drivers/net/ibmveth.c                          |   6 +-
 drivers/net/igb/igb_main.c                     |   6 +-
 drivers/net/igbvf/netdev.c                     |   6 +-
 drivers/net/ioc3-eth.c                         |   6 +-
 drivers/net/ipg.c                              |   6 +-
 drivers/net/iseries_veth.c                     |   6 +-
 drivers/net/ixgb/ixgb_main.c                   |   6 +-
 drivers/net/ixgbe/ixgbe_common.c               |   6 +-
 drivers/net/ixgbevf/vf.c                       |   6 +-
 drivers/net/jme.c                              |   6 +-
 drivers/net/korina.c                           |   6 +-
 drivers/net/ks8851.c                           |   6 +-
 drivers/net/ks8851_mll.c                       |  12 +-
 drivers/net/ksz884x.c                          |  11 +-
 drivers/net/lib82596.c                         |   6 +-
 drivers/net/lib8390.c                          |   6 +-
 drivers/net/ll_temac_main.c                    |  16 +-
 drivers/net/lp486e.c                           |   6 +-
 drivers/net/macb.c                             |   6 +-
 drivers/net/mace.c                             |   6 +-
 drivers/net/macmace.c                          |   6 +-
 drivers/net/mlx4/en_netdev.c                   |   6 +-
 drivers/net/mv643xx_eth.c                      |   6 +-
 drivers/net/myri10ge/myri10ge.c                |   8 +-
 drivers/net/natsemi.c                          |   6 +-
 drivers/net/netxen/netxen_nic_hw.c             |  12 +-
 drivers/net/ni52.c                             |   6 +-
 drivers/net/niu.c                              |   5 +-
 drivers/net/octeon/octeon_mgmt.c               |   6 +-
 drivers/net/pci-skeleton.c                     |   6 +-
 drivers/net/pcmcia/axnet_cs.c                  |   6 +-
 drivers/net/pcmcia/fmvj18x_cs.c                |   6 +-
 drivers/net/pcmcia/nmclan_cs.c                 |   6 +-
 drivers/net/pcmcia/smc91c92_cs.c               |   8 +-
 drivers/net/pcmcia/xirc2ps_cs.c                |   6 +-
 drivers/net/pcnet32.c                          |   6 +-
 drivers/net/ps3_gelic_net.c                    |   6 +-
 drivers/net/qlcnic/qlcnic_hw.c                 |   6 +-
 drivers/net/qlge/qlge_main.c                   |   6 +-
 drivers/net/r6040.c                            |  10 +-
 drivers/net/r8169.c                            |   6 +-
 drivers/net/s2io.c                             |   8 +-
 drivers/net/sb1250-mac.c                       |   6 +-
 drivers/net/sc92031.c                          |   6 +-
 drivers/net/sfc/efx.c                          |   6 +-
 drivers/net/sis190.c                           |   6 +-
 drivers/net/sis900.c                           |  10 +-
 drivers/net/skfp/skfddi.c                      |  12 +-
 drivers/net/skge.c                             |  12 +-
 drivers/net/sky2.c                             |   6 +-
 drivers/net/smc911x.c                          |  13 +-
 drivers/net/smc9194.c                          |  13 +-
 drivers/net/smc91x.c                           |   8 +-
 drivers/net/smsc911x.c                         |   6 +-
 drivers/net/smsc9420.c                         |   6 +-
 drivers/net/sonic.c                            |   6 +-
 drivers/net/spider_net.c                       |   6 +-
 drivers/net/starfire.c                         |  10 +-
 drivers/net/stmmac/dwmac100.c                  |   6 +-
 drivers/net/stmmac/dwmac1000_core.c            |   6 +-
 drivers/net/sun3_82586.c                       |   6 +-
 drivers/net/sunbmac.c                          |   6 +-
 drivers/net/sundance.c                         |   6 +-
 drivers/net/sungem.c                           |   6 +-
 drivers/net/sunhme.c                           |  12 +-
 drivers/net/sunlance.c                         |   6 +-
 drivers/net/sunqe.c                            |   6 +-
 drivers/net/sunvnet.c                          |   8 +-
 drivers/net/tc35815.c                          |   6 +-
 drivers/net/tehuti.c                           |   6 +-
 drivers/net/tg3.c                              |   6 +-
 drivers/net/tlan.c                             |   8 +-
 drivers/net/tokenring/3c359.c                  |  12 +-
 drivers/net/tokenring/ibmtr.c                  |  12 +-
 drivers/net/tokenring/lanstreamer.c            |  12 +-
 drivers/net/tokenring/olympic.c                |  12 +-
 drivers/net/tokenring/tms380tr.c               |  12 +-
 drivers/net/tsi108_eth.c                       |   6 +-
 drivers/net/tulip/de2104x.c                    |  12 +-
 drivers/net/tulip/de4x5.c                      |  10 +-
 drivers/net/tulip/dmfe.c                       |  12 +-
 drivers/net/tulip/tulip_core.c                 |  27 ++-
 drivers/net/tulip/uli526x.c                    |   6 +-
 drivers/net/tulip/winbond-840.c                |  12 +-
 drivers/net/typhoon.c                          |   6 +-
 drivers/net/ucc_geth.c                         |   8 +-
 drivers/net/usb/asix.c                         |  16 +-
 drivers/net/usb/catc.c                         |   6 +-
 drivers/net/usb/dm9601.c                       |   6 +-
 drivers/net/usb/mcs7830.c                      |   6 +-
 drivers/net/usb/smsc75xx.c                     |   6 +-
 drivers/net/usb/smsc95xx.c                     |   6 +-
 drivers/net/via-rhine.c                        |   6 +-
 drivers/net/via-velocity.c                     |   6 +-
 drivers/net/virtio_net.c                       |   5 +-
 drivers/net/vmxnet3/vmxnet3_drv.c              |   6 +-
 drivers/net/vxge/vxge-main.c                   |   6 +-
 drivers/net/wireless/adm8211.c                 |  12 +-
 drivers/net/wireless/ath/ar9170/main.c         |  14 +-
 drivers/net/wireless/ath/ath5k/base.c          |  17 +-
 drivers/net/wireless/libertas/main.c           |  12 +-
 drivers/net/wireless/libertas_tf/main.c        |  14 +-
 drivers/net/wireless/mwl8k.c                   |  24 +-
 drivers/net/wireless/orinoco/hw.c              |   6 +-
 drivers/net/wireless/orinoco/hw.h              |   1 -
 drivers/net/wireless/ray_cs.c                  |  12 +-
 drivers/net/wireless/rndis_wlan.c              |   6 +-
 drivers/net/wireless/rtl818x/rtl8180_dev.c     |   6 +-
 drivers/net/wireless/rtl818x/rtl8187_dev.c     |   4 +-
 drivers/net/wireless/wl12xx/wl1271_main.c      |  25 +-
 drivers/net/wireless/zd1201.c                  |   6 +-
 drivers/net/wireless/zd1211rw/zd_mac.c         |  13 +-
 drivers/net/yellowfin.c                        |  12 +-
 drivers/s390/net/qeth_l2_main.c                |   5 +-
 drivers/scsi/fcoe/fcoe.c                       |   4 +-
 drivers/staging/arlan/arlan-main.c             |   9 +-
 drivers/staging/et131x/et131x_netdev.c         |   6 +-
 drivers/staging/slicoss/slicoss.c              |   6 +-
 drivers/staging/vt6655/device_main.c           |   6 +-
 drivers/staging/vt6656/main_usb.c              |   6 +-
 drivers/staging/wavelan/wavelan.c              |  10 +-
 drivers/staging/wavelan/wavelan_cs.c           |  12 +-
 drivers/staging/winbond/wbusb.c                |   6 +-
 drivers/staging/wlags49_h2/wl_netdev.c         |  12 +-
 include/linux/netdevice.h                      |  82 +++----
 include/net/mac80211.h                         |   2 +-
 net/802/garp.c                                 |   4 +-
 net/appletalk/ddp.c                            |   2 +-
 net/bluetooth/bnep/netdev.c                    |   8 +-
 net/core/Makefile                              |   5 +-
 net/core/dev.c                                 | 145 +-----------
 net/core/dev_addr_lists.c                      | 305 +++++++++++++++++++++++--
 net/core/dev_mcast.c                           | 232 -------------------
 net/decnet/dn_dev.c                            |  12 +-
 net/ipv4/igmp.c                                |   4 +-
 net/ipv4/netfilter/ipt_CLUSTERIP.c             |   4 +-
 net/ipv6/mcast.c                               |   4 +-
 net/mac80211/driver-ops.h                      |   8 +-
 net/mac80211/ieee80211_i.h                     |   3 +-
 net/mac80211/iface.c                           |   6 +-
 net/mac80211/main.c                            |   2 +-
 net/packet/af_packet.c                         |   4 +-
 208 files changed, 1137 insertions(+), 1327 deletions(-)
 delete mode 100644 net/core/dev_mcast.c

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 91fdde382e82..ce9ef6bc865c 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -876,7 +876,7 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 	if (!mc_all_on) {
 		char *addrs;
 		int i;
-		struct dev_mc_list *mcaddr;
+		struct netdev_hw_addr *ha;
 
 		addrs = kmalloc(ETH_ALEN * mc_count, GFP_ATOMIC);
 		if (!addrs) {
@@ -884,9 +884,8 @@ static void nes_netdev_set_multicast_list(struct net_device *netdev)
 			goto unlock;
 		}
 		i = 0;
-		netdev_for_each_mc_addr(mcaddr, netdev)
-			memcpy(get_addr(addrs, i++),
-			       mcaddr->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(get_addr(addrs, i++), ha->addr, ETH_ALEN);
 
 		perfect_filter_register_address = NES_IDX_PERFECT_FILTER_LOW +
 						pft_entries_preallocated * 0x8;
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
index 19eba3c877cb..c8a0f7dab5b0 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
@@ -783,7 +783,7 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 	struct ipoib_dev_priv *priv =
 		container_of(work, struct ipoib_dev_priv, restart_task);
 	struct net_device *dev = priv->dev;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct ipoib_mcast *mcast, *tmcast;
 	LIST_HEAD(remove_list);
 	unsigned long flags;
@@ -808,14 +808,13 @@ void ipoib_mcast_restart_task(struct work_struct *work)
 		clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
 
 	/* Mark all of the entries that are found or don't exist */
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		union ib_gid mgid;
 
-		if (!ipoib_mcast_addr_is_valid(mclist->dmi_addr,
-					       dev->broadcast))
+		if (!ipoib_mcast_addr_is_valid(ha->addr, dev->broadcast))
 			continue;
 
-		memcpy(mgid.raw, mclist->dmi_addr + 4, sizeof mgid);
+		memcpy(mgid.raw, ha->addr + 4, sizeof mgid);
 
 		mcast = __ipoib_mcast_find(dev, &mgid);
 		if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
diff --git a/drivers/media/dvb/dvb-core/dvb_net.c b/drivers/media/dvb/dvb-core/dvb_net.c
index dba1c84058b7..cccea412088b 100644
--- a/drivers/media/dvb/dvb-core/dvb_net.c
+++ b/drivers/media/dvb/dvb-core/dvb_net.c
@@ -1109,14 +1109,14 @@ static int dvb_net_feed_stop(struct net_device *dev)
 }
 
 
-static int dvb_set_mc_filter (struct net_device *dev, struct dev_mc_list *mc)
+static int dvb_set_mc_filter(struct net_device *dev, unsigned char *addr)
 {
 	struct dvb_net_priv *priv = netdev_priv(dev);
 
 	if (priv->multi_num == DVB_NET_MULTICAST_MAX)
 		return -ENOMEM;
 
-	memcpy(priv->multi_macs[priv->multi_num], mc->dmi_addr, 6);
+	memcpy(priv->multi_macs[priv->multi_num], addr, ETH_ALEN);
 
 	priv->multi_num++;
 	return 0;
@@ -1140,7 +1140,7 @@ static void wq_set_multicast_list (struct work_struct *work)
 		dprintk("%s: allmulti mode\n", dev->name);
 		priv->rx_mode = RX_MODE_ALL_MULTI;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 
 		dprintk("%s: set_mc_list, %d entries\n",
 			dev->name, netdev_mc_count(dev));
@@ -1148,8 +1148,8 @@ static void wq_set_multicast_list (struct work_struct *work)
 		priv->rx_mode = RX_MODE_MULTI;
 		priv->multi_num = 0;
 
-		netdev_for_each_mc_addr(mc, dev)
-			dvb_set_mc_filter(dev, mc);
+		netdev_for_each_mc_addr(ha, dev)
+			dvb_set_mc_filter(dev, ha->addr);
 	}
 
 	netif_addr_unlock_bh(dev);
diff --git a/drivers/net/3c505.c b/drivers/net/3c505.c
index 04b5bba19021..81c8b31e629f 100644
--- a/drivers/net/3c505.c
+++ b/drivers/net/3c505.c
@@ -1216,7 +1216,7 @@ static int elp_close(struct net_device *dev)
 static void elp_set_mc_list(struct net_device *dev)
 {
 	elp_device *adapter = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 	unsigned long flags;
 
@@ -1231,8 +1231,9 @@ static void elp_set_mc_list(struct net_device *dev)
 		adapter->tx_pcb.command = CMD_LOAD_MULTICAST_LIST;
 		adapter->tx_pcb.length = 6 * netdev_mc_count(dev);
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
-			memcpy(adapter->tx_pcb.data.multicast[i++], dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev)
+			memcpy(adapter->tx_pcb.data.multicast[i++],
+			       ha->addr, 6);
 		adapter->got[CMD_LOAD_MULTICAST_LIST] = 0;
 		if (!send_pcb(dev, &adapter->tx_pcb))
 			pr_err("%s: couldn't send set_multicast command\n", dev->name);
diff --git a/drivers/net/3c523.c b/drivers/net/3c523.c
index beed4fa10c6e..966cb12e1e0a 100644
--- a/drivers/net/3c523.c
+++ b/drivers/net/3c523.c
@@ -625,7 +625,7 @@ static int init586(struct net_device *dev)
 	volatile struct iasetup_cmd_struct *ias_cmd;
 	volatile struct tdr_cmd_struct *tdr_cmd;
 	volatile struct mcsetup_cmd_struct *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs = netdev_mc_count(dev);
 
 	ptr = (void *) ((char *) p->scb + sizeof(struct scb_struct));
@@ -788,8 +788,9 @@ static int init586(struct net_device *dev)
 			mc_cmd->cmd_link = 0xffff;
 			mc_cmd->mc_cnt = num_addrs * 6;
 			i = 0;
-			netdev_for_each_mc_addr(dmi, dev)
-				memcpy((char *) mc_cmd->mc_list[i++], dmi->dmi_addr, 6);
+			netdev_for_each_mc_addr(ha, dev)
+				memcpy((char *) mc_cmd->mc_list[i++],
+				       ha->addr, 6);
 			p->scb->cbl_offset = make16(mc_cmd);
 			p->scb->cmd = CUC_START;
 			elmc_id_attn586();
diff --git a/drivers/net/3c527.c b/drivers/net/3c527.c
index 5c07b147ec99..38395dfa4963 100644
--- a/drivers/net/3c527.c
+++ b/drivers/net/3c527.c
@@ -1533,7 +1533,7 @@ static void do_mc32_set_multicast_list(struct net_device *dev, int retry)
 	{
 		unsigned char block[62];
 		unsigned char *bp;
-		struct dev_mc_list *dmc;
+		struct netdev_hw_addr *ha;
 
 		if(retry==0)
 			lp->mc_list_valid = 0;
@@ -1543,8 +1543,8 @@ static void do_mc32_set_multicast_list(struct net_device *dev, int retry)
 			block[0]=netdev_mc_count(dev);
 			bp=block+2;
 
-			netdev_for_each_mc_addr(dmc, dev) {
-				memcpy(bp, dmc->dmi_addr, 6);
+			netdev_for_each_mc_addr(ha, dev) {
+				memcpy(bp, ha->addr, 6);
 				bp+=6;
 			}
 			if(mc32_command_nowait(dev, 2, block,
diff --git a/drivers/net/7990.c b/drivers/net/7990.c
index 4e9a5a20b6a6..818837d8ffe7 100644
--- a/drivers/net/7990.c
+++ b/drivers/net/7990.c
@@ -595,7 +595,7 @@ static void lance_load_multicast (struct net_device *dev)
         struct lance_private *lp = netdev_priv(dev);
         volatile struct lance_init_block *ib = lp->init_block;
         volatile u16 *mcast_table = (u16 *)&ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
         char *addrs;
         u32 crc;
 
@@ -610,8 +610,8 @@ static void lance_load_multicast (struct net_device *dev)
         ib->filter [1] = 0;
 
         /* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-                addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
                 /* multicast address? */
                 if (!(*addrs & 1))
diff --git a/drivers/net/8139cp.c b/drivers/net/8139cp.c
index 3d4406b16658..e4e2aef11017 100644
--- a/drivers/net/8139cp.c
+++ b/drivers/net/8139cp.c
@@ -909,11 +909,11 @@ static void __cp_set_rx_mode (struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index b4efc913978b..f61784c3c4a2 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -2502,11 +2502,11 @@ static void __set_rx_mode (struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
diff --git a/drivers/net/82596.c b/drivers/net/82596.c
index f94d17d78bb0..3a28b1f451d9 100644
--- a/drivers/net/82596.c
+++ b/drivers/net/82596.c
@@ -1542,7 +1542,7 @@ static void set_multicast_list(struct net_device *dev)
 	}
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		unsigned char *cp;
 		struct mc_cmd *cmd;
 
@@ -1552,10 +1552,10 @@ static void set_multicast_list(struct net_device *dev)
 		cmd->cmd.command = CmdMulticastList;
 		cmd->mc_cnt = cnt * ETH_ALEN;
 		cp = cmd->mc_addrs;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (!cnt--)
 				break;
-			memcpy(cp, dmi->dmi_addr, ETH_ALEN);
+			memcpy(cp, ha->addr, ETH_ALEN);
 			if (i596_debug > 1)
 				DEB(DEB_MULTI,printk(KERN_INFO "%s: Adding address %pM\n",
 						dev->name, cp));
diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index bd4d829eca12..7cce643793c3 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -603,7 +603,7 @@ static void lance_load_multicast (struct net_device *dev)
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_init_block *ib = lp->init_block;
 	volatile u16 *mcast_table = (u16 *)&ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -618,8 +618,8 @@ static void lance_load_multicast (struct net_device *dev)
 	ib->filter [1] = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/amd8111e.c b/drivers/net/amd8111e.c
index b8a59d255b49..d002c764a26c 100644
--- a/drivers/net/amd8111e.c
+++ b/drivers/net/amd8111e.c
@@ -1377,7 +1377,7 @@ list to the device.
 */
 static void amd8111e_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	struct amd8111e_priv *lp = netdev_priv(dev);
 	u32 mc_filter[2] ;
 	int bit_num;
@@ -1408,8 +1408,8 @@ static void amd8111e_set_multicast_list(struct net_device *dev)
 	/* load all the multicast addresses in the logic filter */
 	lp->options |= OPTION_MULTICAST_ENABLE;
 	mc_filter[1] = mc_filter[0] = 0;
-	netdev_for_each_mc_addr(mc_ptr, dev) {
-		bit_num = (ether_crc_le(ETH_ALEN, mc_ptr->dmi_addr) >> 26) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		bit_num = (ether_crc_le(ETH_ALEN, ha->addr) >> 26) & 0x3f;
 		mc_filter[bit_num >> 5] |= 1 << (bit_num & 31);
 	}
 	amd8111e_writeq(*(u64*)mc_filter,lp->mmio+ LADRF);
diff --git a/drivers/net/arm/am79c961a.c b/drivers/net/arm/am79c961a.c
index f1f58c5e27bf..a4b5b08276f8 100644
--- a/drivers/net/arm/am79c961a.c
+++ b/drivers/net/arm/am79c961a.c
@@ -383,12 +383,12 @@ static void am79c961_setmulticastlist (struct net_device *dev)
 	} else if (dev->flags & IFF_ALLMULTI) {
 		memset(multi_hash, 0xff, sizeof(multi_hash));
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		memset(multi_hash, 0x00, sizeof(multi_hash));
 
-		netdev_for_each_mc_addr(dmi, dev)
-			am79c961_mc_hash(dmi->dmi_addr, multi_hash);
+		netdev_for_each_mc_addr(ha, dev)
+			am79c961_mc_hash(ha->addr, multi_hash);
 	}
 
 	spin_lock_irqsave(&priv->chip_lock, flags);
diff --git a/drivers/net/arm/at91_ether.c b/drivers/net/arm/at91_ether.c
index 8b23d5a175bf..f31e8b6cbf73 100644
--- a/drivers/net/arm/at91_ether.c
+++ b/drivers/net/arm/at91_ether.c
@@ -556,14 +556,14 @@ static int hash_get_index(__u8 *addr)
  */
 static void at91ether_sethashtable(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ixp4xx_eth.c
index 6e2ae1d06df1..f9d168775d06 100644
--- a/drivers/net/arm/ixp4xx_eth.c
+++ b/drivers/net/arm/ixp4xx_eth.c
@@ -735,7 +735,7 @@ static int eth_xmit(struct sk_buff *skb, struct net_device *dev)
 static void eth_set_mcast_list(struct net_device *dev)
 {
 	struct port *port = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u8 diffs[ETH_ALEN], *addr;
 	int i;
 
@@ -748,11 +748,11 @@ static void eth_set_mcast_list(struct net_device *dev)
 	memset(diffs, 0, ETH_ALEN);
 
 	addr = NULL;
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (!addr)
-			addr = mclist->dmi_addr; /* first MAC address */
+			addr = ha->addr; /* first MAC address */
 		for (i = 0; i < ETH_ALEN; i++)
-			diffs[i] |= addr[i] ^ mclist->dmi_addr[i];
+			diffs[i] |= addr[i] ^ ha->addr[i];
 	}
 
 	for (i = 0; i < ETH_ALEN; i++) {
diff --git a/drivers/net/arm/ks8695net.c b/drivers/net/arm/ks8695net.c
index e7810b74f396..6ec245c6394b 100644
--- a/drivers/net/arm/ks8695net.c
+++ b/drivers/net/arm/ks8695net.c
@@ -331,16 +331,16 @@ ks8695_init_partial_multicast(struct ks8695_priv *ksp,
 {
 	u32 low, high;
 	int i;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	i = 0;
-	netdev_for_each_mc_addr(dmi, ndev) {
+	netdev_for_each_mc_addr(ha, ndev) {
 		/* Ran out of space in chip? */
 		BUG_ON(i == KS8695_NR_ADDRESSES);
 
-		low = (dmi->dmi_addr[2] << 24) | (dmi->dmi_addr[3] << 16) |
-		      (dmi->dmi_addr[4] << 8) | (dmi->dmi_addr[5]);
-		high = (dmi->dmi_addr[0] << 8) | (dmi->dmi_addr[1]);
+		low = (ha->addr[2] << 24) | (ha->addr[3] << 16) |
+		      (ha->addr[4] << 8) | (ha->addr[5]);
+		high = (ha->addr[0] << 8) | (ha->addr[1]);
 
 		ks8695_writereg(ksp, KS8695_AAL_(i), low);
 		ks8695_writereg(ksp, KS8695_AAH_(i), AAH_E | high);
diff --git a/drivers/net/at1700.c b/drivers/net/at1700.c
index 309843ab8869..e2a549a60e25 100644
--- a/drivers/net/at1700.c
+++ b/drivers/net/at1700.c
@@ -848,12 +848,12 @@ set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0x00, sizeof(mc_filter));
 		outb(1, ioaddr + RX_MODE);	/* Ignore almost all multicasts. */
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit =
-				ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26;
+				ether_crc_le(ETH_ALEN, ha->addr) >> 26;
 			mc_filter[bit >> 3] |= (1 << bit);
 		}
 		outb(0x02, ioaddr + RX_MODE);	/* Use normal mode. */
diff --git a/drivers/net/atl1c/atl1c_main.c b/drivers/net/atl1c/atl1c_main.c
index a5508e1b261b..3d7051135c3a 100644
--- a/drivers/net/atl1c/atl1c_main.c
+++ b/drivers/net/atl1c/atl1c_main.c
@@ -354,7 +354,7 @@ static void atl1c_set_multi(struct net_device *netdev)
 {
 	struct atl1c_adapter *adapter = netdev_priv(netdev);
 	struct atl1c_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 mac_ctrl_data;
 	u32 hash_value;
 
@@ -377,8 +377,8 @@ static void atl1c_set_multi(struct net_device *netdev)
 	AT_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl1c_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl1c_hash_mc_addr(hw, ha->addr);
 		atl1c_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atl1e/atl1e_main.c b/drivers/net/atl1e/atl1e_main.c
index 7231b5779122..b6605d433e91 100644
--- a/drivers/net/atl1e/atl1e_main.c
+++ b/drivers/net/atl1e/atl1e_main.c
@@ -284,7 +284,7 @@ static void atl1e_set_multi(struct net_device *netdev)
 {
 	struct atl1e_adapter *adapter = netdev_priv(netdev);
 	struct atl1e_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 mac_ctrl_data = 0;
 	u32 hash_value;
 
@@ -307,8 +307,8 @@ static void atl1e_set_multi(struct net_device *netdev)
 	AT_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl1e_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl1e_hash_mc_addr(hw, ha->addr);
 		atl1e_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atlx/atl2.c b/drivers/net/atlx/atl2.c
index 199f2c9ce749..078d9d1b427c 100644
--- a/drivers/net/atlx/atl2.c
+++ b/drivers/net/atlx/atl2.c
@@ -135,7 +135,7 @@ static void atl2_set_multi(struct net_device *netdev)
 {
 	struct atl2_adapter *adapter = netdev_priv(netdev);
 	struct atl2_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	u32 hash_value;
 
@@ -157,8 +157,8 @@ static void atl2_set_multi(struct net_device *netdev)
 	ATL2_WRITE_REG_ARRAY(hw, REG_RX_HASH_TABLE, 1, 0);
 
 	/* comoute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atl2_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atl2_hash_mc_addr(hw, ha->addr);
 		atl2_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atlx/atlx.c b/drivers/net/atlx/atlx.c
index 72f3306352e2..f979ea2d6d3c 100644
--- a/drivers/net/atlx/atlx.c
+++ b/drivers/net/atlx/atlx.c
@@ -123,7 +123,7 @@ static void atlx_set_multi(struct net_device *netdev)
 {
 	struct atlx_adapter *adapter = netdev_priv(netdev);
 	struct atlx_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	u32 hash_value;
 
@@ -144,8 +144,8 @@ static void atlx_set_multi(struct net_device *netdev)
 	iowrite32(0, (hw->hw_addr + REG_RX_HASH_TABLE) + (1 << 2));
 
 	/* compute mc addresses' hash value ,and put it into hash table */
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
-		hash_value = atlx_hash_mc_addr(hw, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash_value = atlx_hash_mc_addr(hw, ha->addr);
 		atlx_hash_set(hw, hash_value);
 	}
 }
diff --git a/drivers/net/atp.c b/drivers/net/atp.c
index 6ad16205dc17..0d730c8329d4 100644
--- a/drivers/net/atp.c
+++ b/drivers/net/atp.c
@@ -883,11 +883,11 @@ static void set_rx_mode_8012(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		new_mode = CMR2h_Normal;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+		netdev_for_each_mc_addr(ha, dev) {
+			int filterbit = ether_crc_le(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
 		}
 		new_mode = CMR2h_Normal;
diff --git a/drivers/net/au1000_eth.c b/drivers/net/au1000_eth.c
index 4da191b87b0d..29631593cc23 100644
--- a/drivers/net/au1000_eth.c
+++ b/drivers/net/au1000_eth.c
@@ -957,12 +957,12 @@ static void au1000_multicast_list(struct net_device *dev)
 		aup->mac->control &= ~MAC_PROMISCUOUS;
 		printk(KERN_INFO "%s: Pass all multicast\n", dev->name);
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 mc_filter[2];	/* Multicast hash filter */
 
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev)
-			set_bit(ether_crc(ETH_ALEN, mclist->dmi_addr)>>26,
+		netdev_for_each_mc_addr(ha, dev)
+			set_bit(ether_crc(ETH_ALEN, ha->addr)>>26,
 					(long *)mc_filter);
 		aup->mac->multi_hash_high = mc_filter[1];
 		aup->mac->multi_hash_low = mc_filter[0];
diff --git a/drivers/net/b44.c b/drivers/net/b44.c
index 332c60356285..b2c5fd7b63af 100644
--- a/drivers/net/b44.c
+++ b/drivers/net/b44.c
@@ -1680,15 +1680,15 @@ static struct net_device_stats *b44_get_stats(struct net_device *dev)
 
 static int __b44_load_mcast(struct b44 *bp, struct net_device *dev)
 {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i, num_ents;
 
 	num_ents = min_t(int, netdev_mc_count(dev), B44_MCAST_TABLE_SIZE);
 	i = 0;
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i == num_ents)
 			break;
-		__b44_cam_write(bp, mclist->dmi_addr, i++ + 1);
+		__b44_cam_write(bp, ha->addr, i++ + 1);
 	}
 	return i+1;
 }
diff --git a/drivers/net/bcm63xx_enet.c b/drivers/net/bcm63xx_enet.c
index 37eb8021de1c..51733404478e 100644
--- a/drivers/net/bcm63xx_enet.c
+++ b/drivers/net/bcm63xx_enet.c
@@ -603,7 +603,7 @@ static int bcm_enet_set_mac_address(struct net_device *dev, void *p)
 static void bcm_enet_set_multicast_list(struct net_device *dev)
 {
 	struct bcm_enet_priv *priv;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	u32 val;
 	int i;
 
@@ -631,14 +631,14 @@ static void bcm_enet_set_multicast_list(struct net_device *dev)
 	}
 
 	i = 0;
-	netdev_for_each_mc_addr(mc_list, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		u8 *dmi_addr;
 		u32 tmp;
 
 		if (i == 3)
 			break;
 		/* update perfect match registers */
-		dmi_addr = mc_list->dmi_addr;
+		dmi_addr = ha->addr;
 		tmp = (dmi_addr[2] << 24) | (dmi_addr[3] << 16) |
 			(dmi_addr[4] << 8) | dmi_addr[5];
 		enet_writel(priv, tmp, ENET_PML_REG(i + 1));
diff --git a/drivers/net/benet/be_cmds.c b/drivers/net/benet/be_cmds.c
index 9f53d9e86e2b..61a9afdb83f4 100644
--- a/drivers/net/benet/be_cmds.c
+++ b/drivers/net/benet/be_cmds.c
@@ -1159,13 +1159,13 @@ int be_cmd_multicast_set(struct be_adapter *adapter, u32 if_id,
 	req->interface_id = if_id;
 	if (netdev) {
 		int i;
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 
 		req->num_mac = cpu_to_le16(netdev_mc_count(netdev));
 
 		i = 0;
-		netdev_for_each_mc_addr(mc, netdev)
-			memcpy(req->mac[i].byte, mc->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(req->mac[i].byte, ha->addr, ETH_ALEN);
 	} else {
 		req->promiscuous = 1;
 	}
diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c
index 587f93cf03f6..c488cea8f455 100644
--- a/drivers/net/bfin_mac.c
+++ b/drivers/net/bfin_mac.c
@@ -812,14 +812,14 @@ static void bfin_mac_timeout(struct net_device *dev)
 static void bfin_mac_multicast_hash(struct net_device *dev)
 {
 	u32 emac_hashhi, emac_hashlo;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
 	emac_hashhi = emac_hashlo = 0;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* skip non-multicast addresses */
 		if (!(*addrs & 1))
diff --git a/drivers/net/bmac.c b/drivers/net/bmac.c
index 119468e76323..1245e9835765 100644
--- a/drivers/net/bmac.c
+++ b/drivers/net/bmac.c
@@ -971,7 +971,7 @@ bmac_remove_multi(struct net_device *dev,
  */
 static void bmac_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct bmac_data *bp = netdev_priv(dev);
 	int num_addrs = netdev_mc_count(dev);
 	unsigned short rx_cfg;
@@ -1000,8 +1000,8 @@ static void bmac_set_multicast(struct net_device *dev)
 			rx_cfg = bmac_rx_on(dev, 0, 0);
 			XXDEBUG(("bmac: multi disabled, rx_cfg=%#08x\n", rx_cfg));
 		} else {
-			netdev_for_each_mc_addr(dmi, dev)
-				bmac_addhash(bp, dmi->dmi_addr);
+			netdev_for_each_mc_addr(ha, dev)
+				bmac_addhash(bp, ha->addr);
 			bmac_update_hash_table_mask(dev, bp);
 			rx_cfg = bmac_rx_on(dev, 1, 0);
 			XXDEBUG(("bmac: multi enabled, rx_cfg=%#08x\n", rx_cfg));
@@ -1015,7 +1015,7 @@ static void bmac_set_multicast(struct net_device *dev)
 
 static void bmac_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i;
 	unsigned short rx_cfg;
@@ -1039,8 +1039,8 @@ static void bmac_set_multicast(struct net_device *dev)
 
 		for(i = 0; i < 4; i++) hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if(!(*addrs & 1))
 				continue;
diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index 381887ba677c..0b69ffb7951d 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -3544,7 +3544,6 @@ bnx2_set_rx_mode(struct net_device *dev)
 	}
 	else {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *mclist;
 		u32 mc_filter[NUM_MC_HASH_REGISTERS];
 		u32 regidx;
 		u32 bit;
@@ -3552,8 +3551,8 @@ bnx2_set_rx_mode(struct net_device *dev)
 
 		memset(mc_filter, 0, 4 * NUM_MC_HASH_REGISTERS);
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			bit = crc & 0xff;
 			regidx = (bit & 0xe0) >> 5;
 			bit &= 0x1f;
diff --git a/drivers/net/bnx2x_main.c b/drivers/net/bnx2x_main.c
index f4ea99d06c77..fa9275c2ef5c 100644
--- a/drivers/net/bnx2x_main.c
+++ b/drivers/net/bnx2x_main.c
@@ -11496,21 +11496,21 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
 	else { /* some multicasts */
 		if (CHIP_IS_E1(bp)) {
 			int i, old, offset;
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 			struct mac_configuration_cmd *config =
 						bnx2x_sp(bp, mcast_config);
 
 			i = 0;
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				config->config_table[i].
 					cam_entry.msb_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[0]);
+					swab16(*(u16 *)&ha->addr[0]);
 				config->config_table[i].
 					cam_entry.middle_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[2]);
+					swab16(*(u16 *)&ha->addr[2]);
 				config->config_table[i].
 					cam_entry.lsb_mac_addr =
-					swab16(*(u16 *)&mclist->dmi_addr[4]);
+					swab16(*(u16 *)&ha->addr[4]);
 				config->config_table[i].cam_entry.flags =
 							cpu_to_le16(port);
 				config->config_table[i].
@@ -11564,18 +11564,18 @@ static void bnx2x_set_rx_mode(struct net_device *dev)
 				      0);
 		} else { /* E1H */
 			/* Accept one or more multicasts */
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 			u32 mc_filter[MC_HASH_SIZE];
 			u32 crc, bit, regidx;
 			int i;
 
 			memset(mc_filter, 0, 4 * MC_HASH_SIZE);
 
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				DP(NETIF_MSG_IFUP, "Adding mcast MAC: %pM\n",
-				   mclist->dmi_addr);
+				   ha->addr);
 
-				crc = crc32c_le(0, mclist->dmi_addr, ETH_ALEN);
+				crc = crc32c_le(0, ha->addr, ETH_ALEN);
 				bit = (crc >> 24) & 0xff;
 				regidx = bit >> 5;
 				bit &= 0x1f;
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d6ae63b2cf00..22682f1c8473 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -761,32 +761,6 @@ static int bond_check_dev_link(struct bonding *bond,
 
 /*----------------------------- Multicast list ------------------------------*/
 
-/*
- * Returns 0 if dmi1 and dmi2 are the same, non-0 otherwise
- */
-static inline int bond_is_dmi_same(const struct dev_mc_list *dmi1,
-				   const struct dev_mc_list *dmi2)
-{
-	return memcmp(dmi1->dmi_addr, dmi2->dmi_addr, dmi1->dmi_addrlen) == 0 &&
-			dmi1->dmi_addrlen == dmi2->dmi_addrlen;
-}
-
-/*
- * returns dmi entry if found, NULL otherwise
- */
-static struct dev_mc_list *bond_mc_list_find_dmi(struct dev_mc_list *dmi,
-						 struct dev_mc_list *mc_list)
-{
-	struct dev_mc_list *idmi;
-
-	for (idmi = mc_list; idmi; idmi = idmi->next) {
-		if (bond_is_dmi_same(dmi, idmi))
-			return idmi;
-	}
-
-	return NULL;
-}
-
 /*
  * Push the promiscuity flag down to appropriate slaves
  */
@@ -839,18 +813,18 @@ static int bond_set_allmulti(struct bonding *bond, int inc)
  * Add a Multicast address to slaves
  * according to mode
  */
-static void bond_mc_add(struct bonding *bond, void *addr, int alen)
+static void bond_mc_add(struct bonding *bond, void *addr)
 {
 	if (USES_PRIMARY(bond->params.mode)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave)
-			dev_mc_add(bond->curr_active_slave->dev, addr, alen, 0);
+			dev_mc_add(bond->curr_active_slave->dev, addr);
 	} else {
 		struct slave *slave;
 		int i;
 
 		bond_for_each_slave(bond, slave, i)
-			dev_mc_add(slave->dev, addr, alen, 0);
+			dev_mc_add(slave->dev, addr);
 	}
 }
 
@@ -858,18 +832,17 @@ static void bond_mc_add(struct bonding *bond, void *addr, int alen)
  * Remove a multicast address from slave
  * according to mode
  */
-static void bond_mc_delete(struct bonding *bond, void *addr, int alen)
+static void bond_mc_del(struct bonding *bond, void *addr)
 {
 	if (USES_PRIMARY(bond->params.mode)) {
 		/* write lock already acquired */
 		if (bond->curr_active_slave)
-			dev_mc_delete(bond->curr_active_slave->dev, addr,
-				      alen, 0);
+			dev_mc_del(bond->curr_active_slave->dev, addr);
 	} else {
 		struct slave *slave;
 		int i;
 		bond_for_each_slave(bond, slave, i) {
-			dev_mc_delete(slave->dev, addr, alen, 0);
+			dev_mc_del(slave->dev, addr);
 		}
 	}
 }
@@ -895,50 +868,6 @@ static void bond_resend_igmp_join_requests(struct bonding *bond)
 	rcu_read_unlock();
 }
 
-/*
- * Totally destroys the mc_list in bond
- */
-static void bond_mc_list_destroy(struct bonding *bond)
-{
-	struct dev_mc_list *dmi;
-
-	dmi = bond->mc_list;
-	while (dmi) {
-		bond->mc_list = dmi->next;
-		kfree(dmi);
-		dmi = bond->mc_list;
-	}
-
-	bond->mc_list = NULL;
-}
-
-/*
- * Copy all the Multicast addresses from src to the bonding device dst
- */
-static int bond_mc_list_copy(struct dev_mc_list *mc_list, struct bonding *bond,
-			     gfp_t gfp_flag)
-{
-	struct dev_mc_list *dmi, *new_dmi;
-
-	for (dmi = mc_list; dmi; dmi = dmi->next) {
-		new_dmi = kmalloc(sizeof(struct dev_mc_list), gfp_flag);
-
-		if (!new_dmi) {
-			/* FIXME: Potential memory leak !!! */
-			return -ENOMEM;
-		}
-
-		new_dmi->next = bond->mc_list;
-		bond->mc_list = new_dmi;
-		new_dmi->dmi_addrlen = dmi->dmi_addrlen;
-		memcpy(new_dmi->dmi_addr, dmi->dmi_addr, dmi->dmi_addrlen);
-		new_dmi->dmi_users = dmi->dmi_users;
-		new_dmi->dmi_gusers = dmi->dmi_gusers;
-	}
-
-	return 0;
-}
-
 /*
  * flush all members of flush->mc_list from device dev->mc_list
  */
@@ -946,16 +875,16 @@ static void bond_mc_list_flush(struct net_device *bond_dev,
 			       struct net_device *slave_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
-	for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
-		dev_mc_delete(slave_dev, dmi->dmi_addr, dmi->dmi_addrlen, 0);
+	netdev_for_each_mc_addr(ha, bond_dev)
+		dev_mc_del(slave_dev, ha->addr);
 
 	if (bond->params.mode == BOND_MODE_8023AD) {
 		/* del lacpdu mc addr from mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
-		dev_mc_delete(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
+		dev_mc_del(slave_dev, lacpdu_multicast);
 	}
 }
 
@@ -969,7 +898,7 @@ static void bond_mc_list_flush(struct net_device *bond_dev,
 static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 			 struct slave *old_active)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	if (!USES_PRIMARY(bond->params.mode))
 		/* nothing to do -  mc list is already up-to-date on
@@ -984,9 +913,8 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(old_active->dev, -1);
 
-		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_delete(old_active->dev, dmi->dmi_addr,
-				      dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond->dev)
+			dev_mc_del(old_active->dev, ha->addr);
 	}
 
 	if (new_active) {
@@ -997,9 +925,8 @@ static void bond_mc_swap(struct bonding *bond, struct slave *new_active,
 		if (bond->dev->flags & IFF_ALLMULTI)
 			dev_set_allmulti(new_active->dev, 1);
 
-		for (dmi = bond->dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_add(new_active->dev, dmi->dmi_addr,
-				   dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond->dev)
+			dev_mc_add(new_active->dev, ha->addr);
 		bond_resend_igmp_join_requests(bond);
 	}
 }
@@ -1406,7 +1333,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	struct bonding *bond = netdev_priv(bond_dev);
 	const struct net_device_ops *slave_ops = slave_dev->netdev_ops;
 	struct slave *new_slave = NULL;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct sockaddr addr;
 	int link_reporting;
 	int old_features = bond_dev->features;
@@ -1492,7 +1419,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 			/* Flush unicast and multicast addresses */
 			dev_uc_flush(bond_dev);
-			dev_addr_discard(bond_dev);
+			dev_mc_flush(bond_dev);
 
 			if (slave_dev->type != ARPHRD_ETHER)
 				bond_setup_by_slave(bond_dev, slave_dev);
@@ -1601,9 +1528,8 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 
 		netif_addr_lock_bh(bond_dev);
 		/* upload master's mc_list to new slave */
-		for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next)
-			dev_mc_add(slave_dev, dmi->dmi_addr,
-				   dmi->dmi_addrlen, 0);
+		netdev_for_each_mc_addr(ha, bond_dev)
+			dev_mc_add(slave_dev, ha->addr);
 		netif_addr_unlock_bh(bond_dev);
 	}
 
@@ -1611,7 +1537,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		/* add lacpdu mc addr to mc list */
 		u8 lacpdu_multicast[ETH_ALEN] = MULTICAST_LACPDU_ADDR;
 
-		dev_mc_add(slave_dev, lacpdu_multicast, ETH_ALEN, 0);
+		dev_mc_add(slave_dev, lacpdu_multicast);
 	}
 
 	bond_add_vlans_on_slave(bond, slave_dev);
@@ -3913,10 +3839,24 @@ static int bond_do_ioctl(struct net_device *bond_dev, struct ifreq *ifr, int cmd
 	return res;
 }
 
+static bool bond_addr_in_mc_list(unsigned char *addr,
+				 struct netdev_hw_addr_list *list,
+				 int addrlen)
+{
+	struct netdev_hw_addr *ha;
+
+	netdev_hw_addr_list_for_each(ha, list)
+		if (!memcmp(ha->addr, addr, addrlen))
+			return true;
+
+	return false;
+}
+
 static void bond_set_multicast_list(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
+	bool found;
 
 	/*
 	 * Do promisc before checking multicast_mode
@@ -3951,20 +3891,25 @@ static void bond_set_multicast_list(struct net_device *bond_dev)
 	bond->flags = bond_dev->flags;
 
 	/* looking for addresses to add to slaves' mc list */
-	for (dmi = bond_dev->mc_list; dmi; dmi = dmi->next) {
-		if (!bond_mc_list_find_dmi(dmi, bond->mc_list))
-			bond_mc_add(bond, dmi->dmi_addr, dmi->dmi_addrlen);
+	netdev_for_each_mc_addr(ha, bond_dev) {
+		found = bond_addr_in_mc_list(ha->addr, &bond->mc_list,
+					     bond_dev->addr_len);
+		if (!found)
+			bond_mc_add(bond, ha->addr);
 	}
 
 	/* looking for addresses to delete from slaves' list */
-	for (dmi = bond->mc_list; dmi; dmi = dmi->next) {
-		if (!bond_mc_list_find_dmi(dmi, bond_dev->mc_list))
-			bond_mc_delete(bond, dmi->dmi_addr, dmi->dmi_addrlen);
+	netdev_hw_addr_list_for_each(ha, &bond->mc_list) {
+		found = bond_addr_in_mc_list(ha->addr, &bond_dev->mc,
+					     bond_dev->addr_len);
+		if (!found)
+			bond_mc_del(bond, ha->addr);
 	}
 
 	/* save master's multicast list */
-	bond_mc_list_destroy(bond);
-	bond_mc_list_copy(bond_dev->mc_list, bond, GFP_ATOMIC);
+	__hw_addr_flush(&bond->mc_list);
+	__hw_addr_add_multiple(&bond->mc_list, &bond_dev->mc,
+			       bond_dev->addr_len, NETDEV_HW_ADDR_T_MULTICAST);
 
 	read_unlock(&bond->lock);
 }
@@ -4534,9 +4479,7 @@ static void bond_uninit(struct net_device *bond_dev)
 	if (bond->wq)
 		destroy_workqueue(bond->wq);
 
-	netif_addr_lock_bh(bond_dev);
-	bond_mc_list_destroy(bond);
-	netif_addr_unlock_bh(bond_dev);
+	__hw_addr_flush(&bond->mc_list);
 }
 
 /*------------------------- Module initialization ---------------------------*/
@@ -4908,6 +4851,8 @@ static int bond_init(struct net_device *bond_dev)
 	list_add_tail(&bond->bond_list, &bn->dev_list);
 
 	bond_prepare_sysfs_group(bond);
+
+	__hw_addr_init(&bond->mc_list);
 	return 0;
 }
 
diff --git a/drivers/net/bonding/bonding.h b/drivers/net/bonding/bonding.h
index 257a7a4dfce9..2aa336720591 100644
--- a/drivers/net/bonding/bonding.h
+++ b/drivers/net/bonding/bonding.h
@@ -202,7 +202,7 @@ struct bonding {
 	char     proc_file_name[IFNAMSIZ];
 #endif /* CONFIG_PROC_FS */
 	struct   list_head bond_list;
-	struct   dev_mc_list *mc_list;
+	struct   netdev_hw_addr_list mc_list;
 	int      (*xmit_hash_policy)(struct sk_buff *, int);
 	__be32   master_ip;
 	u16      flags;
diff --git a/drivers/net/cassini.c b/drivers/net/cassini.c
index 9bd155e4111c..bd857a20a755 100644
--- a/drivers/net/cassini.c
+++ b/drivers/net/cassini.c
@@ -2957,20 +2957,20 @@ static void cas_process_mc_list(struct cas *cp)
 {
 	u16 hash_table[16];
 	u32 crc;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i = 1;
 
 	memset(hash_table, 0, sizeof(hash_table));
-	netdev_for_each_mc_addr(dmi, cp->dev) {
+	netdev_for_each_mc_addr(ha, cp->dev) {
 		if (i <= CAS_MC_EXACT_MATCH_SIZE) {
 			/* use the alternate mac address registers for the
 			 * first 15 multicast addresses
 			 */
-			writel((dmi->dmi_addr[4] << 8) | dmi->dmi_addr[5],
+			writel((ha->addr[4] << 8) | ha->addr[5],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 0));
-			writel((dmi->dmi_addr[2] << 8) | dmi->dmi_addr[3],
+			writel((ha->addr[2] << 8) | ha->addr[3],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 1));
-			writel((dmi->dmi_addr[0] << 8) | dmi->dmi_addr[1],
+			writel((ha->addr[0] << 8) | ha->addr[1],
 			       cp->regs + REG_MAC_ADDRN(i*3 + 2));
 			i++;
 		}
@@ -2978,7 +2978,7 @@ static void cas_process_mc_list(struct cas *cp)
 			/* use hw hash table for the next series of
 			 * multicast addresses
 			 */
-			crc = ether_crc_le(ETH_ALEN, dmi->dmi_addr);
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			crc >>= 24;
 			hash_table[crc >> 4] |= 1 << (15 - (crc & 0xf));
 		}
diff --git a/drivers/net/chelsio/pm3393.c b/drivers/net/chelsio/pm3393.c
index a6eb30a6e2b9..13fd9628db1d 100644
--- a/drivers/net/chelsio/pm3393.c
+++ b/drivers/net/chelsio/pm3393.c
@@ -376,12 +376,13 @@ static int pm3393_set_rx_mode(struct cmac *cmac, struct t1_rx_mode *rm)
 		rx_mode |= SUNI1x10GEXP_BITMSK_RXXG_MHASH_EN;
 	} else if (t1_rx_mode_mc_cnt(rm)) {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int bit;
 		u16 mc_filter[4] = { 0, };
 
-		netdev_for_each_mc_addr(dmi, t1_get_netdev(rm)) {
-			bit = (ether_crc(ETH_ALEN, dmi->dmi_addr) >> 23) & 0x3f; /* bit[23:28] */
+		netdev_for_each_mc_addr(ha, t1_get_netdev(rm)) {
+			/* bit[23:28] */
+			bit = (ether_crc(ETH_ALEN, ha->addr) >> 23) & 0x3f;
 			mc_filter[bit >> 4] |= 1 << (bit & 0xf);
 		}
 		pmwrite(cmac, SUNI1x10GEXP_REG_RXXG_MULTICAST_HASH_LOW, mc_filter[0]);
diff --git a/drivers/net/cpmac.c b/drivers/net/cpmac.c
index 60777fd90b33..bdfff784645c 100644
--- a/drivers/net/cpmac.c
+++ b/drivers/net/cpmac.c
@@ -328,7 +328,7 @@ static int cpmac_config(struct net_device *dev, struct ifmap *map)
 
 static void cpmac_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *iter;
+	struct netdev_hw_addr *ha;
 	u8 tmp;
 	u32 mbp, bit, hash[2] = { 0, };
 	struct cpmac_priv *priv = netdev_priv(dev);
@@ -348,19 +348,19 @@ static void cpmac_set_multicast_list(struct net_device *dev)
 			 * cpmac uses some strange mac address hashing
 			 * (not crc32)
 			 */
-			netdev_for_each_mc_addr(iter, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				bit = 0;
-				tmp = iter->dmi_addr[0];
+				tmp = ha->addr[0];
 				bit  ^= (tmp >> 2) ^ (tmp << 4);
-				tmp = iter->dmi_addr[1];
+				tmp = ha->addr[1];
 				bit  ^= (tmp >> 4) ^ (tmp << 2);
-				tmp = iter->dmi_addr[2];
+				tmp = ha->addr[2];
 				bit  ^= (tmp >> 6) ^ tmp;
-				tmp = iter->dmi_addr[3];
+				tmp = ha->addr[3];
 				bit  ^= (tmp >> 2) ^ (tmp << 4);
-				tmp = iter->dmi_addr[4];
+				tmp = ha->addr[4];
 				bit  ^= (tmp >> 4) ^ (tmp << 2);
-				tmp = iter->dmi_addr[5];
+				tmp = ha->addr[5];
 				bit  ^= (tmp >> 6) ^ tmp;
 				bit &= 0x3f;
 				hash[bit / 32] |= 1 << (bit % 32);
diff --git a/drivers/net/cris/eth_v10.c b/drivers/net/cris/eth_v10.c
index dd24aadb778c..59110bc119a8 100644
--- a/drivers/net/cris/eth_v10.c
+++ b/drivers/net/cris/eth_v10.c
@@ -1596,16 +1596,16 @@ set_multicast_list(struct net_device *dev)
 	} else {
 		/* MC mode, receive normal and MC packets */
 		char hash_ix;
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *baddr;
 
 		lo_bits = 0x00000000ul;
 		hi_bits = 0x00000000ul;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* Calculate the hash index for the GA registers */
 
 			hash_ix = 0;
-			baddr = dmi->dmi_addr;
+			baddr = ha->addr;
 			hash_ix ^= (*baddr) & 0x3f;
 			hash_ix ^= ((*baddr) >> 6) & 0x03;
 			++baddr;
diff --git a/drivers/net/cxgb3/xgmac.c b/drivers/net/cxgb3/xgmac.c
index c142a2132e9f..3af19a550372 100644
--- a/drivers/net/cxgb3/xgmac.c
+++ b/drivers/net/cxgb3/xgmac.c
@@ -311,16 +311,16 @@ int t3_mac_set_rx_mode(struct cmac *mac, struct net_device *dev)
 	if (dev->flags & IFF_ALLMULTI)
 		hash_lo = hash_hi = 0xffffffff;
 	else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int exact_addr_idx = mac->nucast;
 
 		hash_lo = hash_hi = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			if (exact_addr_idx < EXACT_ADDR_FILTERS)
 				set_addr_filter(mac, exact_addr_idx++,
-						dmi->dmi_addr);
+						ha->addr);
 			else {
-				int hash = hash_hw_addr(dmi->dmi_addr);
+				int hash = hash_hw_addr(ha->addr);
 
 				if (hash < 32)
 					hash_lo |= (1 << hash);
diff --git a/drivers/net/davinci_emac.c b/drivers/net/davinci_emac.c
index 2b8edd2efbf6..1f9df5c6a75a 100644
--- a/drivers/net/davinci_emac.c
+++ b/drivers/net/davinci_emac.c
@@ -952,13 +952,14 @@ static void emac_dev_mcast_set(struct net_device *ndev)
 			emac_add_mcast(priv, EMAC_ALL_MULTI_SET, NULL);
 		}
 		if (!netdev_mc_empty(ndev)) {
-			struct dev_mc_list *mc_ptr;
+			struct netdev_hw_addr *ha;
+
 			mbp_enable = (mbp_enable | EMAC_MBP_RXMCAST);
 			emac_add_mcast(priv, EMAC_ALL_MULTI_CLR, NULL);
 			/* program multicast address list into EMAC hardware */
-			netdev_for_each_mc_addr(mc_ptr, ndev) {
+			netdev_for_each_mc_addr(ha, ndev) {
 				emac_add_mcast(priv, EMAC_MULTICAST_ADD,
-					       (u8 *) mc_ptr->dmi_addr);
+					       (u8 *) ha->addr);
 			}
 		} else {
 			mbp_enable = (mbp_enable & ~EMAC_MBP_RXMCAST);
diff --git a/drivers/net/declance.c b/drivers/net/declance.c
index 8cf3cc6f20e2..fb3f0984c289 100644
--- a/drivers/net/declance.c
+++ b/drivers/net/declance.c
@@ -940,7 +940,7 @@ static void lance_load_multicast(struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile u16 *ib = (volatile u16 *)dev->mem_start;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -959,8 +959,8 @@ static void lance_load_multicast(struct net_device *dev)
 	*lib_ptr(ib, filter[3], lp->type) = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/defxx.c b/drivers/net/defxx.c
index ed53a8d45f4e..e5667c55844e 100644
--- a/drivers/net/defxx.c
+++ b/drivers/net/defxx.c
@@ -2195,7 +2195,7 @@ static void dfx_ctl_set_multicast_list(struct net_device *dev)
 {
 	DFX_board_t *bp = netdev_priv(dev);
 	int					i;			/* used as index in for loop */
-	struct dev_mc_list	*dmi;		/* ptr to multicast addr entry */
+	struct netdev_hw_addr *ha;
 
 	/* Enable LLC frame promiscuous mode, if necessary */
 
@@ -2241,9 +2241,9 @@ static void dfx_ctl_set_multicast_list(struct net_device *dev)
 		/* Copy addresses to multicast address table, then update adapter CAM */
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			memcpy(&bp->mc_table[i++ * FDDI_K_ALEN],
-			       dmi->dmi_addr, FDDI_K_ALEN);
+			       ha->addr, FDDI_K_ALEN);
 
 		if (dfx_ctl_update_cam(bp) != DFX_K_SUCCESS)
 			{
diff --git a/drivers/net/depca.c b/drivers/net/depca.c
index 744c1928dfca..a88300a0d1e8 100644
--- a/drivers/net/depca.c
+++ b/drivers/net/depca.c
@@ -1272,7 +1272,7 @@ static void set_multicast_list(struct net_device *dev)
 static void SetMulticastFilter(struct net_device *dev)
 {
 	struct depca_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i, j, bit, byte;
 	u16 hashcode;
@@ -1287,8 +1287,8 @@ static void SetMulticastFilter(struct net_device *dev)
 			lp->init_block.mcast_table[i] = 0;
 		}
 		/* Add multicast addresses */
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 			if ((*addrs & 0x01) == 1) {	/* multicast address? */
 				crc = ether_crc(ETH_ALEN, addrs);
 				hashcode = (crc & 1);	/* hashcode is 6 LSb of CRC ... */
diff --git a/drivers/net/dl2k.c b/drivers/net/dl2k.c
index b05bad829827..6579225dbd91 100644
--- a/drivers/net/dl2k.c
+++ b/drivers/net/dl2k.c
@@ -1132,14 +1132,14 @@ set_multicast (struct net_device *dev)
 		/* Receive broadcast and multicast frames */
 		rx_mode = ReceiveBroadcast | ReceiveMulticast | ReceiveUnicast;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		/* Receive broadcast frames and multicast frames filtering
 		   by Hashtable */
 		rx_mode =
 		    ReceiveBroadcast | ReceiveMulticastHash | ReceiveUnicast;
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int bit, index = 0;
-			int crc = ether_crc_le (ETH_ALEN, mclist->dmi_addr);
+			int crc = ether_crc_le(ETH_ALEN, ha->addr);
 			/* The inverted high significant 6 bits of CRC are
 			   used as an index to hashtable */
 			for (bit = 0; bit < 6; bit++)
diff --git a/drivers/net/dm9000.c b/drivers/net/dm9000.c
index 1c67f1138ca7..989f2beb123b 100644
--- a/drivers/net/dm9000.c
+++ b/drivers/net/dm9000.c
@@ -724,7 +724,7 @@ static void
 dm9000_hash_table(struct net_device *dev)
 {
 	board_info_t *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	int i, oft;
 	u32 hash_val;
 	u16 hash_table[4];
@@ -752,8 +752,8 @@ dm9000_hash_table(struct net_device *dev)
 		rcr |= RCR_ALL;
 
 	/* the multicast address in Hash Table : 64 bits */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		hash_val = ether_crc_le(6, mcptr->dmi_addr) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		hash_val = ether_crc_le(6, ha->addr) & 0x3f;
 		hash_table[hash_val / 16] |= (u16) 1 << (hash_val % 16);
 	}
 
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index c0cd57656681..3e8d0005540f 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -1545,16 +1545,16 @@ static int e100_hw_init(struct nic *nic)
 static void e100_multi(struct nic *nic, struct cb *cb, struct sk_buff *skb)
 {
 	struct net_device *netdev = nic->netdev;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u16 i, count = min(netdev_mc_count(netdev), E100_MAX_MULTICAST_ADDRS);
 
 	cb->command = cpu_to_le16(cb_multi);
 	cb->u.multi.count = cpu_to_le16(count * ETH_ALEN);
 	i = 0;
-	netdev_for_each_mc_addr(list, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == count)
 			break;
-		memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &list->dmi_addr,
+		memcpy(&cb->u.multi.addr[i++ * ETH_ALEN], &ha->addr,
 			ETH_ALEN);
 	}
 }
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 8be6faee43e6..41330349b07a 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2101,7 +2101,6 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	struct e1000_hw *hw = &adapter->hw;
 	struct netdev_hw_addr *ha;
 	bool use_uc = false;
-	struct dev_addr_list *mc_ptr;
 	u32 rctl;
 	u32 hash_value;
 	int i, rar_entries = E1000_RAR_ENTRIES;
@@ -2161,17 +2160,17 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 
 	WARN_ON(i == rar_entries);
 
-	netdev_for_each_mc_addr(mc_ptr, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == rar_entries) {
 			/* load any remaining addresses into the hash table */
 			u32 hash_reg, hash_bit, mta;
-			hash_value = e1000_hash_mc_addr(hw, mc_ptr->da_addr);
+			hash_value = e1000_hash_mc_addr(hw, ha->addr);
 			hash_reg = (hash_value >> 5) & 0x7F;
 			hash_bit = hash_value & 0x1F;
 			mta = (1 << hash_bit);
 			mcarray[hash_reg] |= mta;
 		} else {
-			e1000_rar_set(hw, mc_ptr->da_addr, i++);
+			e1000_rar_set(hw, ha->addr, i++);
 		}
 	}
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index 5304959ae1f7..02f7d20f3c80 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2567,7 +2567,7 @@ static void e1000_set_multi(struct net_device *netdev)
 {
 	struct e1000_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list;
 	u32 rctl;
 	int i;
@@ -2599,9 +2599,8 @@ static void e1000_set_multi(struct net_device *netdev)
 
 		/* prepare a packed array of only addresses. */
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, netdev)
-			memcpy(mta_list + (i++ * ETH_ALEN),
-			       mc_ptr->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, netdev)
+			memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 		e1000_update_mc_addr_list(hw, mta_list, i);
 		kfree(mta_list);
diff --git a/drivers/net/eepro.c b/drivers/net/eepro.c
index 15d6266b80c3..b01e6997403b 100644
--- a/drivers/net/eepro.c
+++ b/drivers/net/eepro.c
@@ -1287,7 +1287,7 @@ set_multicast_list(struct net_device *dev)
 	struct eepro_local *lp = netdev_priv(dev);
 	short ioaddr = dev->base_addr;
 	unsigned short mode;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int mc_count = netdev_mc_count(dev);
 
 	if (dev->flags&(IFF_ALLMULTI|IFF_PROMISC) || mc_count > 63)
@@ -1332,8 +1332,8 @@ set_multicast_list(struct net_device *dev)
 		outw(0, ioaddr + IO_PORT);
 		outw(6 * (mc_count + 1), ioaddr + IO_PORT);
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			eaddrs = (unsigned short *) dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			eaddrs = (unsigned short *) ha->addr;
 			outw(*eaddrs++, ioaddr + IO_PORT);
 			outw(*eaddrs++, ioaddr + IO_PORT);
 			outw(*eaddrs++, ioaddr + IO_PORT);
diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
index 7013dc8a6cbc..b3882fd8db6d 100644
--- a/drivers/net/eexpress.c
+++ b/drivers/net/eexpress.c
@@ -1576,7 +1576,7 @@ static void eexp_hw_init586(struct net_device *dev)
 
 static void eexp_setup_filter(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned short ioaddr = dev->base_addr;
 	int count = netdev_mc_count(dev);
 	int i;
@@ -1589,8 +1589,8 @@ static void eexp_setup_filter(struct net_device *dev)
 	outw(CONF_NR_MULTICAST & ~31, ioaddr+SM_PTR);
 	outw(6*count, ioaddr+SHADOW(CONF_NR_MULTICAST));
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
-		unsigned short *data = (unsigned short *) dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		unsigned short *data = (unsigned short *) ha->addr;
 
 		if (i == count)
 			break;
diff --git a/drivers/net/ehea/ehea_main.c b/drivers/net/ehea/ehea_main.c
index b004eaba3d7b..b97411aaa77c 100644
--- a/drivers/net/ehea/ehea_main.c
+++ b/drivers/net/ehea/ehea_main.c
@@ -1966,7 +1966,7 @@ static void ehea_add_multicast_entry(struct ehea_port *port, u8 *mc_mac_addr)
 static void ehea_set_multicast_list(struct net_device *dev)
 {
 	struct ehea_port *port = netdev_priv(dev);
-	struct dev_mc_list *k_mcl_entry;
+	struct netdev_hw_addr *ha;
 	int ret;
 
 	if (dev->flags & IFF_PROMISC) {
@@ -1997,8 +1997,8 @@ static void ehea_set_multicast_list(struct net_device *dev)
 			goto out;
 		}
 
-		netdev_for_each_mc_addr(k_mcl_entry, dev)
-			ehea_add_multicast_entry(port, k_mcl_entry->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			ehea_add_multicast_entry(port, ha->addr);
 
 	}
 out:
diff --git a/drivers/net/enic/enic_main.c b/drivers/net/enic/enic_main.c
index 6d70c349c954..1232887c243d 100644
--- a/drivers/net/enic/enic_main.c
+++ b/drivers/net/enic/enic_main.c
@@ -822,7 +822,7 @@ static int enic_set_mac_addr(struct net_device *netdev, char *addr)
 static void enic_set_multicast_list(struct net_device *netdev)
 {
 	struct enic *enic = netdev_priv(netdev);
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	int directed = 1;
 	int multicast = (netdev->flags & IFF_MULTICAST) ? 1 : 0;
 	int broadcast = (netdev->flags & IFF_BROADCAST) ? 1 : 0;
@@ -852,10 +852,10 @@ static void enic_set_multicast_list(struct net_device *netdev)
 	 */
 
 	i = 0;
-	netdev_for_each_mc_addr(list, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == mc_count)
 			break;
-		memcpy(mc_addr[i++], list->dmi_addr, ETH_ALEN);
+		memcpy(mc_addr[i++], ha->addr, ETH_ALEN);
 	}
 
 	for (i = 0; i < enic->mc_count; i++) {
diff --git a/drivers/net/epic100.c b/drivers/net/epic100.c
index 39c271b6be44..f6584a1ad3bc 100644
--- a/drivers/net/epic100.c
+++ b/drivers/net/epic100.c
@@ -1400,12 +1400,12 @@ static void set_rx_mode(struct net_device *dev)
 		outl(0x0004, ioaddr + RxCtrl);
 		return;
 	} else {					/* Never executed, for now. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit_nr =
-				ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+				ether_crc_le(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit_nr >> 3] |= (1 << bit_nr);
 		}
 	}
diff --git a/drivers/net/ethoc.c b/drivers/net/ethoc.c
index 209742304e20..f6be5aeaf94c 100644
--- a/drivers/net/ethoc.c
+++ b/drivers/net/ethoc.c
@@ -755,7 +755,7 @@ static void ethoc_set_multicast_list(struct net_device *dev)
 {
 	struct ethoc *priv = netdev_priv(dev);
 	u32 mode = ethoc_read(priv, MODER);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u32 hash[2] = { 0, 0 };
 
 	/* set loopback mode if requested */
@@ -783,8 +783,8 @@ static void ethoc_set_multicast_list(struct net_device *dev)
 		hash[0] = 0xffffffff;
 		hash[1] = 0xffffffff;
 	} else {
-		netdev_for_each_mc_addr(mc, dev) {
-			u32 crc = ether_crc(ETH_ALEN, mc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 crc = ether_crc(ETH_ALEN, ha->addr);
 			int bit = (crc >> 26) & 0x3f;
 			hash[bit >> 5] |= 1 << (bit & 0x1f);
 		}
diff --git a/drivers/net/ewrk3.c b/drivers/net/ewrk3.c
index a2bade586886..11ba70f49971 100644
--- a/drivers/net/ewrk3.c
+++ b/drivers/net/ewrk3.c
@@ -1169,7 +1169,7 @@ static void set_multicast_list(struct net_device *dev)
 static void SetMulticastFilter(struct net_device *dev)
 {
 	struct ewrk3_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u_long iobase = dev->base_addr;
 	int i;
 	char *addrs, bit, byte;
@@ -1213,8 +1213,8 @@ static void SetMulticastFilter(struct net_device *dev)
 		}
 
 		/* Update table */
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 			if ((*addrs & 0x01) == 1) {	/* multicast address? */
 				crc = ether_crc_le(ETH_ALEN, addrs);
 				hashcode = crc & ((1 << 9) - 1);	/* hashcode is 9 LSb of CRC */
diff --git a/drivers/net/fealnx.c b/drivers/net/fealnx.c
index 9d5ad08a119f..e8a2705237bf 100644
--- a/drivers/net/fealnx.c
+++ b/drivers/net/fealnx.c
@@ -1792,12 +1792,12 @@ static void __set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = CR_W_AB | CR_W_AM;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit;
-			bit = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26) ^ 0x3F;
+			bit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
 			mc_filter[bit >> 5] |= (1 << bit);
 		}
 		rx_mode = CR_W_AB | CR_W_AM;
diff --git a/drivers/net/fec.c b/drivers/net/fec.c
index 848eb1968abf..2b1651aee13f 100644
--- a/drivers/net/fec.c
+++ b/drivers/net/fec.c
@@ -954,7 +954,7 @@ fec_enet_close(struct net_device *dev)
 static void set_multicast_list(struct net_device *dev)
 {
 	struct fec_enet_private *fep = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned int i, bit, data, crc, tmp;
 	unsigned char hash;
 
@@ -984,16 +984,16 @@ static void set_multicast_list(struct net_device *dev)
 	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_HIGH);
 	writel(0, fep->hwp + FEC_GRP_HASH_TABLE_LOW);
 
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		/* Only support group multicast for now */
-		if (!(dmi->dmi_addr[0] & 1))
+		if (!(ha->addr[0] & 1))
 			continue;
 
 		/* calculate crc32 value of mac address */
 		crc = 0xffffffff;
 
-		for (i = 0; i < dmi->dmi_addrlen; i++) {
-			data = dmi->dmi_addr[i];
+		for (i = 0; i < dev->addr_len; i++) {
+			data = ha->addr[i];
 			for (bit = 0; bit < 8; bit++, data >>= 1) {
 				crc = (crc >> 1) ^
 				(((crc ^ data) & 1) ? CRC32_POLY : 0);
diff --git a/drivers/net/fec_mpc52xx.c b/drivers/net/fec_mpc52xx.c
index 704155e2bdd1..667ba1391b9d 100644
--- a/drivers/net/fec_mpc52xx.c
+++ b/drivers/net/fec_mpc52xx.c
@@ -574,12 +574,12 @@ static void mpc52xx_fec_set_multicast_list(struct net_device *dev)
 			out_be32(&fec->gaddr2, 0xffffffff);
 		} else {
 			u32 crc;
-			struct dev_mc_list *dmi;
+			struct netdev_hw_addr *ha;
 			u32 gaddr1 = 0x00000000;
 			u32 gaddr2 = 0x00000000;
 
-			netdev_for_each_mc_addr(dmi, dev) {
-				crc = ether_crc_le(6, dmi->dmi_addr) >> 26;
+			netdev_for_each_mc_addr(ha, dev) {
+				crc = ether_crc_le(6, ha->addr) >> 26;
 				if (crc >= 32)
 					gaddr1 |= 1 << (crc-32);
 				else
diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c
index ca05e5662029..6a2b64f0a7db 100644
--- a/drivers/net/forcedeth.c
+++ b/drivers/net/forcedeth.c
@@ -3103,12 +3103,14 @@ static void nv_set_multicast(struct net_device *dev)
 			if (dev->flags & IFF_ALLMULTI) {
 				alwaysOn[0] = alwaysOn[1] = alwaysOff[0] = alwaysOff[1] = 0;
 			} else {
-				struct dev_mc_list *walk;
+				struct netdev_hw_addr *ha;
 
-				netdev_for_each_mc_addr(walk, dev) {
+				netdev_for_each_mc_addr(ha, dev) {
+					unsigned char *addr = ha->addr;
 					u32 a, b;
-					a = le32_to_cpu(*(__le32 *) walk->dmi_addr);
-					b = le16_to_cpu(*(__le16 *) (&walk->dmi_addr[4]));
+
+					a = le32_to_cpu(*(__le32 *) addr);
+					b = le16_to_cpu(*(__le16 *) (&addr[4]));
 					alwaysOn[0] &= a;
 					alwaysOff[0] &= ~a;
 					alwaysOn[1] &= b;
diff --git a/drivers/net/fs_enet/mac-fcc.c b/drivers/net/fs_enet/mac-fcc.c
index cf4f674f9e2e..b3bad7c15d02 100644
--- a/drivers/net/fs_enet/mac-fcc.c
+++ b/drivers/net/fs_enet/mac-fcc.c
@@ -231,12 +231,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/fs_enet/mac-fec.c b/drivers/net/fs_enet/mac-fec.c
index cd2c6cca5f24..75974c6d201b 100644
--- a/drivers/net/fs_enet/mac-fec.c
+++ b/drivers/net/fs_enet/mac-fec.c
@@ -232,12 +232,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/fs_enet/mac-scc.c b/drivers/net/fs_enet/mac-scc.c
index c490a466cae1..0ab6a346a193 100644
--- a/drivers/net/fs_enet/mac-scc.c
+++ b/drivers/net/fs_enet/mac-scc.c
@@ -224,12 +224,12 @@ static void set_multicast_finish(struct net_device *dev)
 
 static void set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *pmc;
+	struct netdev_hw_addr *ha;
 
 	if ((dev->flags & IFF_PROMISC) == 0) {
 		set_multicast_start(dev);
-		netdev_for_each_mc_addr(pmc, dev)
-			set_multicast_one(dev, pmc->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			set_multicast_one(dev, ha->addr);
 		set_multicast_finish(dev);
 	} else
 		set_promiscuous_mode(dev);
diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
index b6715553cf17..fdd26c2b1a2f 100644
--- a/drivers/net/gianfar.c
+++ b/drivers/net/gianfar.c
@@ -2797,7 +2797,7 @@ static void adjust_link(struct net_device *dev)
  * whenever dev->flags is changed */
 static void gfar_set_multi(struct net_device *dev)
 {
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	struct gfar_private *priv = netdev_priv(dev);
 	struct gfar __iomem *regs = priv->gfargrp[0].regs;
 	u32 tempval;
@@ -2870,13 +2870,12 @@ static void gfar_set_multi(struct net_device *dev)
 			return;
 
 		/* Parse the list, and set the appropriate bits */
-		netdev_for_each_mc_addr(mc_ptr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (idx < em_num) {
-				gfar_set_mac_for_addr(dev, idx,
-						mc_ptr->dmi_addr);
+				gfar_set_mac_for_addr(dev, idx, ha->addr);
 				idx++;
 			} else
-				gfar_set_hash_for_addr(dev, mc_ptr->dmi_addr);
+				gfar_set_hash_for_addr(dev, ha->addr);
 		}
 	}
 
diff --git a/drivers/net/greth.c b/drivers/net/greth.c
index c5e0d28a6de9..fbe6ab6b919b 100644
--- a/drivers/net/greth.c
+++ b/drivers/net/greth.c
@@ -988,7 +988,7 @@ static u32 greth_hash_get_index(__u8 *addr)
 
 static void greth_set_hash_filter(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	struct greth_private *greth = netdev_priv(dev);
 	struct greth_regs *regs = (struct greth_regs *) greth->regs;
 	u32 mc_filter[2];
@@ -996,8 +996,8 @@ static void greth_set_hash_filter(struct net_device *dev)
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = greth_hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = greth_hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/hamachi.c b/drivers/net/hamachi.c
index 373546dd0831..2bfcca6d180c 100644
--- a/drivers/net/hamachi.c
+++ b/drivers/net/hamachi.c
@@ -1858,12 +1858,12 @@ static void set_rx_mode(struct net_device *dev)
 		/* Too many to match, or accept all multicasts. */
 		writew(0x000B, ioaddr + AddrMode);
 	} else if (!netdev_mc_empty(dev)) { /* Must use the CAM filter. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int i = 0;
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			writel(*(u32*)(mclist->dmi_addr), ioaddr + 0x100 + i*8);
-			writel(0x20000 | (*(u16*)&mclist->dmi_addr[4]),
+		netdev_for_each_mc_addr(ha, dev) {
+			writel(*(u32 *)(ha->addr), ioaddr + 0x100 + i*8);
+			writel(0x20000 | (*(u16 *)&ha->addr[4]),
 				   ioaddr + 0x104 + i*8);
 			i++;
 		}
diff --git a/drivers/net/hp100.c b/drivers/net/hp100.c
index b766a69bf0ca..86b2b4332341 100644
--- a/drivers/net/hp100.c
+++ b/drivers/net/hp100.c
@@ -2100,15 +2100,15 @@ static void hp100_set_multicast_list(struct net_device *dev)
 		} else {
 			int i, idx;
 			u_char *addrs;
-			struct dev_mc_list *dmi;
+			struct netdev_hw_addr *ha;
 
 			memset(&lp->hash_bytes, 0x00, 8);
 #ifdef HP100_DEBUG
 			printk("hp100: %s: computing hash filter - mc_count = %i\n",
 			       dev->name, netdev_mc_count(dev));
 #endif
-			netdev_for_each_mc_addr(dmi, dev) {
-				addrs = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				addrs = ha->addr;
 				if ((*addrs & 0x01) == 0x01) {	/* multicast address? */
 #ifdef HP100_DEBUG
 					printk("hp100: %s: multicast = %pM, ",
diff --git a/drivers/net/ibm_newemac/core.c b/drivers/net/ibm_newemac/core.c
index d8533a4ef825..40c78507ef16 100644
--- a/drivers/net/ibm_newemac/core.c
+++ b/drivers/net/ibm_newemac/core.c
@@ -388,18 +388,19 @@ static void emac_hash_mc(struct emac_instance *dev)
 	const int regs = EMAC_XAHT_REGS(dev);
 	u32 *gaht_base = emac_gaht_base(dev);
 	u32 gaht_temp[regs];
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	DBG(dev, "hash_mc %d" NL, netdev_mc_count(dev->ndev));
 
 	memset(gaht_temp, 0, sizeof (gaht_temp));
 
-	netdev_for_each_mc_addr(dmi, dev->ndev) {
+	netdev_for_each_mc_addr(ha, dev->ndev) {
 		int slot, reg, mask;
-		DBG2(dev, "mc %pM" NL, dmi->dmi_addr);
+		DBG2(dev, "mc %pM" NL, ha->addr);
 
-		slot = EMAC_XAHT_CRC_TO_SLOT(dev, ether_crc(ETH_ALEN, dmi->dmi_addr));
+		slot = EMAC_XAHT_CRC_TO_SLOT(dev,
+					     ether_crc(ETH_ALEN, ha->addr));
 		reg = EMAC_XAHT_SLOT_TO_REG(dev, slot);
 		mask = EMAC_XAHT_SLOT_TO_MASK(dev, slot);
 
@@ -1176,7 +1177,7 @@ static int emac_open(struct net_device *ndev)
 		netif_carrier_on(dev->ndev);
 
 	/* Required for Pause packet support in EMAC */
-	dev_mc_add(ndev, default_mcast_addr, sizeof(default_mcast_addr), 1);
+	dev_mc_add_global(ndev, default_mcast_addr);
 
 	emac_configure(dev);
 	mal_poll_add(dev->mal, &dev->commac);
diff --git a/drivers/net/ibmlana.c b/drivers/net/ibmlana.c
index b5d0f4e973f7..76949e08ee84 100644
--- a/drivers/net/ibmlana.c
+++ b/drivers/net/ibmlana.c
@@ -385,7 +385,7 @@ static void InitBoard(struct net_device *dev)
 	int camcnt;
 	camentry_t cams[16];
 	u32 cammask;
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	u16 rcrval;
 
 	/* reset the SONIC */
@@ -420,8 +420,8 @@ static void InitBoard(struct net_device *dev)
 	/* start putting the multicast addresses into the CAM list.  Stop if
 	   it is full. */
 
-	netdev_for_each_mc_addr(mcptr, dev) {
-		putcam(cams, &camcnt, mcptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		putcam(cams, &camcnt, ha->addr);
 		if (camcnt == 16)
 			break;
 	}
@@ -479,7 +479,7 @@ static void InitBoard(struct net_device *dev)
 	/* if still multicast addresses left or ALLMULTI is set, set the multicast
 	   enable bit */
 
-	if ((dev->flags & IFF_ALLMULTI) || (mcptr != NULL))
+	if ((dev->flags & IFF_ALLMULTI) || netdev_mc_count(dev) > camcnt)
 		rcrval |= RCREG_AMC;
 
 	/* promiscous mode ? */
diff --git a/drivers/net/ibmveth.c b/drivers/net/ibmveth.c
index 0bc777bac9b4..f468590ed454 100644
--- a/drivers/net/ibmveth.c
+++ b/drivers/net/ibmveth.c
@@ -1072,7 +1072,7 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 			ibmveth_error_printk("h_multicast_ctrl rc=%ld when entering promisc mode\n", lpar_rc);
 		}
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		/* clear the filter table & disable filtering */
 		lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 					   IbmVethMcastEnableRecv |
@@ -1083,10 +1083,10 @@ static void ibmveth_set_multicast_list(struct net_device *netdev)
 			ibmveth_error_printk("h_multicast_ctrl rc=%ld when attempting to clear filter table\n", lpar_rc);
 		}
 		/* add the addresses to the filter table */
-		netdev_for_each_mc_addr(mclist, netdev) {
+		netdev_for_each_mc_addr(ha, netdev) {
 			// add the multicast address to the filter table
 			unsigned long mcast_addr = 0;
-			memcpy(((char *)&mcast_addr)+2, mclist->dmi_addr, 6);
+			memcpy(((char *)&mcast_addr)+2, ha->addr, 6);
 			lpar_rc = h_multicast_ctrl(adapter->vdev->unit_address,
 						   IbmVethMcastAddFilter,
 						   mcast_addr);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index ea875709f053..78cc742e233f 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -2864,7 +2864,7 @@ static int igb_write_mc_addr_list(struct net_device *netdev)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list;
 	int i;
 
@@ -2881,8 +2881,8 @@ static int igb_write_mc_addr_list(struct net_device *netdev)
 
 	/* The shared function expects a packed array of only addresses. */
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 	igb_update_mc_addr_list(hw, mta_list, i);
 	kfree(mta_list);
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index a77afd8a14bb..ea8abf5c1ef2 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -1399,7 +1399,7 @@ static void igbvf_set_multi(struct net_device *netdev)
 {
 	struct igbvf_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8  *mta_list = NULL;
 	int i;
 
@@ -1414,8 +1414,8 @@ static void igbvf_set_multi(struct net_device *netdev)
 
 	/* prepare a packed array of only addresses. */
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, netdev)
+		memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
 
 	hw->mac.ops.update_mc_addr_list(hw, mta_list, i, 0, 0);
 	kfree(mta_list);
diff --git a/drivers/net/ioc3-eth.c b/drivers/net/ioc3-eth.c
index 70871b9b045a..57d873da9789 100644
--- a/drivers/net/ioc3-eth.c
+++ b/drivers/net/ioc3-eth.c
@@ -1664,7 +1664,7 @@ static int ioc3_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 
 static void ioc3_set_multicast_list(struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ioc3_private *ip = netdev_priv(dev);
 	struct ioc3 *ioc3 = ip->regs;
 	u64 ehar = 0;
@@ -1688,8 +1688,8 @@ static void ioc3_set_multicast_list(struct net_device *dev)
 			ip->ehar_h = 0xffffffff;
 			ip->ehar_l = 0xffffffff;
 		} else {
-			netdev_for_each_mc_addr(dmi, dev) {
-				char *addr = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				char *addr = ha->addr;
 
 				if (!(*addr & 1))
 					continue;
diff --git a/drivers/net/ipg.c b/drivers/net/ipg.c
index 0d7ad3f2d0f3..67cfc7d9d895 100644
--- a/drivers/net/ipg.c
+++ b/drivers/net/ipg.c
@@ -569,7 +569,7 @@ static int ipg_config_autoneg(struct net_device *dev)
 static void ipg_nic_set_multicast_list(struct net_device *dev)
 {
 	void __iomem *ioaddr = ipg_ioaddr(dev);
-	struct dev_mc_list *mc_list_ptr;
+	struct netdev_hw_addr *ha;
 	unsigned int hashindex;
 	u32 hashtable[2];
 	u8 receivemode;
@@ -608,9 +608,9 @@ static void ipg_nic_set_multicast_list(struct net_device *dev)
 	hashtable[1] = 0x00000000;
 
 	/* Cycle through all multicast addresses to filter. */
-	netdev_for_each_mc_addr(mc_list_ptr, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		/* Calculate CRC result for each multicast address. */
-		hashindex = crc32_le(0xffffffff, mc_list_ptr->dmi_addr,
+		hashindex = crc32_le(0xffffffff, ha->addr,
 				     ETH_ALEN);
 
 		/* Use only the least significant 6 bits. */
diff --git a/drivers/net/iseries_veth.c b/drivers/net/iseries_veth.c
index e6e972d9b7ca..cd65b8629bcc 100644
--- a/drivers/net/iseries_veth.c
+++ b/drivers/net/iseries_veth.c
@@ -961,15 +961,15 @@ static void veth_set_multicast_list(struct net_device *dev)
 			(netdev_mc_count(dev) > VETH_MAX_MCAST)) {
 		port->promiscuous = 1;
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		port->promiscuous = 0;
 
 		/* Update table */
 		port->num_mcast = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			u8 *addr = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			u8 *addr = ha->addr;
 			u64 xaddr = 0;
 
 			if (addr[0] & 0x01) {/* multicast address? */
diff --git a/drivers/net/ixgb/ixgb_main.c b/drivers/net/ixgb/ixgb_main.c
index c9fef65cb98b..912dd1d5772c 100644
--- a/drivers/net/ixgb/ixgb_main.c
+++ b/drivers/net/ixgb/ixgb_main.c
@@ -1058,7 +1058,7 @@ ixgb_set_multi(struct net_device *netdev)
 {
 	struct ixgb_adapter *adapter = netdev_priv(netdev);
 	struct ixgb_hw *hw = &adapter->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u32 rctl;
 	int i;
 
@@ -1089,9 +1089,9 @@ ixgb_set_multi(struct net_device *netdev)
 		IXGB_WRITE_REG(hw, RCTL, rctl);
 
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, netdev)
+		netdev_for_each_mc_addr(ha, netdev)
 			memcpy(&mta[i++ * IXGB_ETH_LENGTH_OF_ADDRESS],
-			       mc_ptr->dmi_addr, IXGB_ETH_LENGTH_OF_ADDRESS);
+			       ha->addr, IXGB_ETH_LENGTH_OF_ADDRESS);
 
 		ixgb_mc_addr_list_update(hw, mta, netdev_mc_count(netdev), 0);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 4d1c3a429457..6eb5814ca7da 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -1494,7 +1494,7 @@ static void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr)
 s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
 				      struct net_device *netdev)
 {
-	struct dev_addr_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 i;
 
 	/*
@@ -1510,9 +1510,9 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw,
 		IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0);
 
 	/* Add the new addresses */
-	netdev_for_each_mc_addr(dmi, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		hw_dbg(hw, " Adding the multicast addresses:\n");
-		ixgbe_set_mta(hw, dmi->dmi_addr);
+		ixgbe_set_mta(hw, ha->addr);
 	}
 
 	/* Enable mta */
diff --git a/drivers/net/ixgbevf/vf.c b/drivers/net/ixgbevf/vf.c
index f457c52b5ed4..852e9c4fd934 100644
--- a/drivers/net/ixgbevf/vf.c
+++ b/drivers/net/ixgbevf/vf.c
@@ -259,7 +259,7 @@ static s32 ixgbevf_set_rar_vf(struct ixgbe_hw *hw, u32 index, u8 *addr,
 static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 					  struct net_device *netdev)
 {
-	struct dev_addr_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ixgbe_mbx_info *mbx = &hw->mbx;
 	u32 msgbuf[IXGBE_VFMAILBOX_SIZE];
 	u16 *vector_list = (u16 *)&msgbuf[1];
@@ -281,10 +281,10 @@ static s32 ixgbevf_update_mc_addr_list_vf(struct ixgbe_hw *hw,
 	msgbuf[0] |= cnt << IXGBE_VT_MSGINFO_SHIFT;
 
 	i = 0;
-	netdev_for_each_mc_addr(dmi, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == cnt)
 			break;
-		vector_list[i++] = ixgbevf_mta_vector(hw, dmi->dmi_addr);
+		vector_list[i++] = ixgbevf_mta_vector(hw, ha->addr);
 	}
 
 	mbx->ops.write_posted(hw, msgbuf, IXGBE_VFMAILBOX_SIZE);
diff --git a/drivers/net/jme.c b/drivers/net/jme.c
index c0b59a555384..10e816d2caff 100644
--- a/drivers/net/jme.c
+++ b/drivers/net/jme.c
@@ -2009,12 +2009,12 @@ jme_set_multi(struct net_device *netdev)
 	} else if (netdev->flags & IFF_ALLMULTI) {
 		jme->reg_rxmcs |= RXMCS_ALLMULFRAME;
 	} else if (netdev->flags & IFF_MULTICAST) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int bit_nr;
 
 		jme->reg_rxmcs |= RXMCS_MULFRAME | RXMCS_MULFILTERED;
-		netdev_for_each_mc_addr(mclist, netdev) {
-			bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3F;
+		netdev_for_each_mc_addr(ha, netdev) {
+			bit_nr = ether_crc(ETH_ALEN, ha->addr) & 0x3F;
 			mc_hash[bit_nr >> 5] |= 1 << (bit_nr & 0x1F);
 		}
 
diff --git a/drivers/net/korina.c b/drivers/net/korina.c
index edaedc7aa03f..26bf1b76b997 100644
--- a/drivers/net/korina.c
+++ b/drivers/net/korina.c
@@ -482,7 +482,7 @@ static void korina_multicast_list(struct net_device *dev)
 {
 	struct korina_private *lp = netdev_priv(dev);
 	unsigned long flags;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 recognise = ETH_ARC_AB;	/* always accept broadcasts */
 	int i;
 
@@ -502,8 +502,8 @@ static void korina_multicast_list(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/ks8851.c b/drivers/net/ks8851.c
index 66be4e449f02..4dcd61f81ec2 100644
--- a/drivers/net/ks8851.c
+++ b/drivers/net/ks8851.c
@@ -956,13 +956,13 @@ static void ks8851_set_rx_mode(struct net_device *dev)
 		rxctrl.rxcr1 = (RXCR1_RXME | RXCR1_RXAE |
 				RXCR1_RXPAFMA | RXCR1_RXMAFMA);
 	} else if (dev->flags & IFF_MULTICAST && !netdev_mc_empty(dev)) {
-		struct dev_mc_list *mcptr;
+		struct netdev_hw_addr *ha;
 		u32 crc;
 
 		/* accept some multicast */
 
-		netdev_for_each_mc_addr(mcptr, dev) {
-			crc = ether_crc(ETH_ALEN, mcptr->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc(ETH_ALEN, ha->addr);
 			crc >>= (32 - 6);  /* get top six bits */
 
 			rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf));
diff --git a/drivers/net/ks8851_mll.c b/drivers/net/ks8851_mll.c
index ee3fe30b2ad7..70a3d98f4bd7 100644
--- a/drivers/net/ks8851_mll.c
+++ b/drivers/net/ks8851_mll.c
@@ -362,7 +362,6 @@ static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
 
 #define MAX_MCAST_LST			32
 #define HW_MCAST_SIZE			8
-#define MAC_ADDR_LEN			6
 
 /**
  * union ks_tx_hdr - tx header data
@@ -450,7 +449,7 @@ struct ks_net {
 	u16			promiscuous;
 	u16			all_mcast;
 	u16			mcast_lst_size;
-	u8			mcast_lst[MAX_MCAST_LST][MAC_ADDR_LEN];
+	u8			mcast_lst[MAX_MCAST_LST][ETH_ALEN];
 	u8			mcast_bits[HW_MCAST_SIZE];
 	u8			mac_addr[6];
 	u8                      fid;
@@ -1170,7 +1169,7 @@ static void ks_set_mcast(struct ks_net *ks, u16 mcast)
 static void ks_set_rx_mode(struct net_device *netdev)
 {
 	struct ks_net *ks = netdev_priv(netdev);
-	struct dev_mc_list *ptr;
+	struct netdev_hw_addr *ha;
 
 	/* Turn on/off promiscuous mode. */
 	if ((netdev->flags & IFF_PROMISC) == IFF_PROMISC)
@@ -1187,13 +1186,12 @@ static void ks_set_rx_mode(struct net_device *netdev)
 		if (netdev_mc_count(netdev) <= MAX_MCAST_LST) {
 			int i = 0;
 
-			netdev_for_each_mc_addr(ptr, netdev) {
-				if (!(*ptr->dmi_addr & 1))
+			netdev_for_each_mc_addr(ha, netdev) {
+				if (!(*ha->addr & 1))
 					continue;
 				if (i >= MAX_MCAST_LST)
 					break;
-				memcpy(ks->mcast_lst[i++], ptr->dmi_addr,
-				MAC_ADDR_LEN);
+				memcpy(ks->mcast_lst[i++], ha->addr, ETH_ALEN);
 			}
 			ks->mcast_lst_size = (u8)i;
 			ks_set_grpaddr(ks);
diff --git a/drivers/net/ksz884x.c b/drivers/net/ksz884x.c
index b843bf7d8c17..348769521615 100644
--- a/drivers/net/ksz884x.c
+++ b/drivers/net/ksz884x.c
@@ -5763,7 +5763,7 @@ static void netdev_set_rx_mode(struct net_device *dev)
 	struct dev_priv *priv = netdev_priv(dev);
 	struct dev_info *hw_priv = priv->adapter;
 	struct ksz_hw *hw = &hw_priv->hw;
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	int multicast = (dev->flags & IFF_ALLMULTI);
 
 	dev_set_promiscuous(dev, priv, hw, (dev->flags & IFF_PROMISC));
@@ -5780,7 +5780,7 @@ static void netdev_set_rx_mode(struct net_device *dev)
 		int i = 0;
 
 		/* List too big to support so turn on all multicast mode. */
-		if (dev->mc_count > MAX_MULTICAST_LIST) {
+		if (netdev_mc_count(dev) > MAX_MULTICAST_LIST) {
 			if (MAX_MULTICAST_LIST != hw->multi_list_size) {
 				hw->multi_list_size = MAX_MULTICAST_LIST;
 				++hw->all_multi;
@@ -5789,13 +5789,12 @@ static void netdev_set_rx_mode(struct net_device *dev)
 			return;
 		}
 
-		netdev_for_each_mc_addr(mc_ptr, dev) {
-			if (!(*mc_ptr->dmi_addr & 1))
+		netdev_for_each_mc_addr(ha, dev) {
+			if (!(*ha->addr & 1))
 				continue;
 			if (i >= MAX_MULTICAST_LIST)
 				break;
-			memcpy(hw->multi_list[i++], mc_ptr->dmi_addr,
-				MAC_ADDR_LEN);
+			memcpy(hw->multi_list[i++], ha->addr, MAC_ADDR_LEN);
 		}
 		hw->multi_list_size = (u8) i;
 		hw_set_grp_addr(hw);
diff --git a/drivers/net/lib82596.c b/drivers/net/lib82596.c
index 443c39a3732f..fddaf921885e 100644
--- a/drivers/net/lib82596.c
+++ b/drivers/net/lib82596.c
@@ -1388,7 +1388,7 @@ static void set_multicast_list(struct net_device *dev)
 	}
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		unsigned char *cp;
 		struct mc_cmd *cmd;
 
@@ -1396,10 +1396,10 @@ static void set_multicast_list(struct net_device *dev)
 		cmd->cmd.command = SWAP16(CmdMulticastList);
 		cmd->mc_cnt = SWAP16(netdev_mc_count(dev) * 6);
 		cp = cmd->mc_addrs;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (!cnt--)
 				break;
-			memcpy(cp, dmi->dmi_addr, 6);
+			memcpy(cp, ha->addr, 6);
 			if (i596_debug > 1)
 				DEB(DEB_MULTI,
 				    printk(KERN_DEBUG
diff --git a/drivers/net/lib8390.c b/drivers/net/lib8390.c
index 56f66f485400..526dc9cbc3c6 100644
--- a/drivers/net/lib8390.c
+++ b/drivers/net/lib8390.c
@@ -905,10 +905,10 @@ static struct net_device_stats *__ei_get_stats(struct net_device *dev)
 
 static inline void make_mc_bits(u8 *bits, struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		u32 crc = ether_crc(ETH_ALEN, dmi->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		u32 crc = ether_crc(ETH_ALEN, ha->addr);
 		/*
 		 * The 8390 uses the 6 most significant bits of the
 		 * CRC to index the multicast table.
diff --git a/drivers/net/ll_temac_main.c b/drivers/net/ll_temac_main.c
index a18e3485476e..30474d6b15c3 100644
--- a/drivers/net/ll_temac_main.c
+++ b/drivers/net/ll_temac_main.c
@@ -250,20 +250,20 @@ static void temac_set_multicast_list(struct net_device *ndev)
 		temac_indirect_out32(lp, XTE_AFM_OFFSET, XTE_AFM_EPPRM_MASK);
 		dev_info(&ndev->dev, "Promiscuous mode enabled.\n");
 	} else if (!netdev_mc_empty(ndev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		i = 0;
-		netdev_for_each_mc_addr(mclist, ndev) {
+		netdev_for_each_mc_addr(ha, ndev) {
 			if (i >= MULTICAST_CAM_TABLE_NUM)
 				break;
-			multi_addr_msw = ((mclist->dmi_addr[3] << 24) |
-					  (mclist->dmi_addr[2] << 16) |
-					  (mclist->dmi_addr[1] << 8) |
-					  (mclist->dmi_addr[0]));
+			multi_addr_msw = ((ha->addr[3] << 24) |
+					  (ha->addr[2] << 16) |
+					  (ha->addr[1] << 8) |
+					  (ha->addr[0]));
 			temac_indirect_out32(lp, XTE_MAW0_OFFSET,
 					     multi_addr_msw);
-			multi_addr_lsw = ((mclist->dmi_addr[5] << 8) |
-					  (mclist->dmi_addr[4]) | (i << 16));
+			multi_addr_lsw = ((ha->addr[5] << 8) |
+					  (ha->addr[4]) | (i << 16));
 			temac_indirect_out32(lp, XTE_MAW1_OFFSET,
 					     multi_addr_lsw);
 			i++;
diff --git a/drivers/net/lp486e.c b/drivers/net/lp486e.c
index 3e3cc04defd0..72379c5439dc 100644
--- a/drivers/net/lp486e.c
+++ b/drivers/net/lp486e.c
@@ -1256,7 +1256,7 @@ static void set_multicast_list(struct net_device *dev) {
 			dev->name, netdev_mc_count(dev));
 
 	if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *cp;
 		cmd = kmalloc(sizeof(struct i596_cmd) + 2 +
 			      netdev_mc_count(dev) * 6, GFP_ATOMIC);
@@ -1267,8 +1267,8 @@ static void set_multicast_list(struct net_device *dev) {
 		cmd->command = CmdMulticastList;
 		*((unsigned short *) (cmd + 1)) = netdev_mc_count(dev) * 6;
 		cp = ((char *)(cmd + 1))+2;
-		netdev_for_each_mc_addr(dmi, dev) {
-			memcpy(cp, dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(cp, ha->addr, 6);
 			cp += 6;
 		}
 		if (i596_debug & LOG_SRCDST)
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index c8a18a6203c8..eab121945d7c 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -882,15 +882,15 @@ static int hash_get_index(__u8 *addr)
  */
 static void macb_sethashtable(struct net_device *dev)
 {
-	struct dev_mc_list *curr;
+	struct netdev_hw_addr *ha;
 	unsigned long mc_filter[2];
 	unsigned int bitnr;
 	struct macb *bp = netdev_priv(dev);
 
 	mc_filter[0] = mc_filter[1] = 0;
 
-	netdev_for_each_mc_addr(curr, dev) {
-		bitnr = hash_get_index(curr->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		bitnr = hash_get_index(ha->addr);
 		mc_filter[bitnr >> 5] |= 1 << (bitnr & 31);
 	}
 
diff --git a/drivers/net/mace.c b/drivers/net/mace.c
index ab5f0bf6d1ae..2328a7399dcb 100644
--- a/drivers/net/mace.c
+++ b/drivers/net/mace.c
@@ -598,7 +598,7 @@ static void mace_set_multicast(struct net_device *dev)
 	mp->maccc |= PROM;
     } else {
 	unsigned char multicast_filter[8];
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	if (dev->flags & IFF_ALLMULTI) {
 	    for (i = 0; i < 8; i++)
@@ -606,8 +606,8 @@ static void mace_set_multicast(struct net_device *dev)
 	} else {
 	    for (i = 0; i < 8; i++)
 		multicast_filter[i] = 0;
-	    netdev_for_each_mc_addr(dmi, dev) {
-	        crc = ether_crc_le(6, dmi->dmi_addr);
+	    netdev_for_each_mc_addr(ha, dev) {
+	        crc = ether_crc_le(6, ha->addr);
 		i = crc >> 26;	/* bit number in multicast_filter */
 		multicast_filter[i >> 3] |= 1 << (i & 7);
 	    }
diff --git a/drivers/net/macmace.c b/drivers/net/macmace.c
index 13ba8f4afb7e..8a50c67e5928 100644
--- a/drivers/net/macmace.c
+++ b/drivers/net/macmace.c
@@ -508,7 +508,7 @@ static void mace_set_multicast(struct net_device *dev)
 		mb->maccc |= PROM;
 	} else {
 		unsigned char multicast_filter[8];
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 
 		if (dev->flags & IFF_ALLMULTI) {
 			for (i = 0; i < 8; i++) {
@@ -517,8 +517,8 @@ static void mace_set_multicast(struct net_device *dev)
 		} else {
 			for (i = 0; i < 8; i++)
 				multicast_filter[i] = 0;
-			netdev_for_each_mc_addr(dmi, dev) {
-				crc = ether_crc_le(6, dmi->dmi_addr);
+			netdev_for_each_mc_addr(ha, dev) {
+				crc = ether_crc_le(6, ha->addr);
 				/* bit number in multicast_filter */
 				i = crc >> 26;
 				multicast_filter[i >> 3] |= 1 << (i & 7);
diff --git a/drivers/net/mlx4/en_netdev.c b/drivers/net/mlx4/en_netdev.c
index 5eb96fe6ec5c..455464223b43 100644
--- a/drivers/net/mlx4/en_netdev.c
+++ b/drivers/net/mlx4/en_netdev.c
@@ -168,7 +168,7 @@ static void mlx4_en_clear_list(struct net_device *dev)
 static void mlx4_en_cache_mclist(struct net_device *dev)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	char *mc_addrs;
 	int mc_addrs_cnt = netdev_mc_count(dev);
 	int i;
@@ -179,8 +179,8 @@ static void mlx4_en_cache_mclist(struct net_device *dev)
 		return;
 	}
 	i = 0;
-	netdev_for_each_mc_addr(mclist, dev)
-		memcpy(mc_addrs + i++ * ETH_ALEN, mclist->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(mc_addrs + i++ * ETH_ALEN, ha->addr, ETH_ALEN);
 	priv->mc_addrs = mc_addrs;
 	priv->mc_addrs_cnt = mc_addrs_cnt;
 }
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index c97b6e4365a9..d5ebe43b0e65 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -1769,7 +1769,7 @@ static void mv643xx_eth_program_multicast_filter(struct net_device *dev)
 	struct mv643xx_eth_private *mp = netdev_priv(dev);
 	u32 *mc_spec;
 	u32 *mc_other;
-	struct dev_addr_list *addr;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
@@ -1794,8 +1794,8 @@ oom:
 	memset(mc_spec, 0, 0x100);
 	memset(mc_other, 0, 0x100);
 
-	netdev_for_each_mc_addr(addr, dev) {
-		u8 *a = addr->da_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		u8 *a = ha->addr;
 		u32 *table;
 		int entry;
 
diff --git a/drivers/net/myri10ge/myri10ge.c b/drivers/net/myri10ge/myri10ge.c
index e84dd3ee9c5a..6d33adf988dc 100644
--- a/drivers/net/myri10ge/myri10ge.c
+++ b/drivers/net/myri10ge/myri10ge.c
@@ -3001,7 +3001,7 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 {
 	struct myri10ge_priv *mgp = netdev_priv(dev);
 	struct myri10ge_cmd cmd;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	__be32 data[2] = { 0, 0 };
 	int err;
 
@@ -3038,8 +3038,8 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 	}
 
 	/* Walk the multicast list, and add each address */
-	netdev_for_each_mc_addr(mc_list, dev) {
-		memcpy(data, &mc_list->dmi_addr, 6);
+	netdev_for_each_mc_addr(ha, dev) {
+		memcpy(data, &ha->addr, 6);
 		cmd.data0 = ntohl(data[0]);
 		cmd.data1 = ntohl(data[1]);
 		err = myri10ge_send_cmd(mgp, MXGEFW_JOIN_MULTICAST_GROUP,
@@ -3047,7 +3047,7 @@ static void myri10ge_set_multicast_list(struct net_device *dev)
 
 		if (err != 0) {
 			netdev_err(dev, "Failed MXGEFW_JOIN_MULTICAST_GROUP, error status:%d %pM\n",
-				   err, mc_list->dmi_addr);
+				   err, ha->addr);
 			goto abort;
 		}
 	}
diff --git a/drivers/net/natsemi.c b/drivers/net/natsemi.c
index e52038783245..9250bf6573ec 100644
--- a/drivers/net/natsemi.c
+++ b/drivers/net/natsemi.c
@@ -2493,12 +2493,12 @@ static void __set_rx_mode(struct net_device *dev)
 		rx_mode = RxFilterEnable | AcceptBroadcast
 			| AcceptAllMulticast | AcceptMyPhys;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int i;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int b = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 23) & 0x1ff;
+		netdev_for_each_mc_addr(ha, dev) {
+			int b = (ether_crc(ETH_ALEN, ha->addr) >> 23) & 0x1ff;
 			mc_filter[b/8] |= (1 << (b & 0x07));
 		}
 		rx_mode = RxFilterEnable | AcceptBroadcast
diff --git a/drivers/net/netxen/netxen_nic_hw.c b/drivers/net/netxen/netxen_nic_hw.c
index a945591298a8..781ca893ee0a 100644
--- a/drivers/net/netxen/netxen_nic_hw.c
+++ b/drivers/net/netxen/netxen_nic_hw.c
@@ -537,7 +537,7 @@ netxen_nic_set_mcast_addr(struct netxen_adapter *adapter,
 void netxen_p2_nic_set_multi(struct net_device *netdev)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 null_addr[6];
 	int i;
 
@@ -571,8 +571,8 @@ void netxen_p2_nic_set_multi(struct net_device *netdev)
 	netxen_nic_enable_mcast_filter(adapter);
 
 	i = 0;
-	netdev_for_each_mc_addr(mc_ptr, netdev)
-		netxen_nic_set_mcast_addr(adapter, i++, mc_ptr->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev)
+		netxen_nic_set_mcast_addr(adapter, i++, ha->addr);
 
 	/* Clear out remaining addresses */
 	while (i < adapter->max_mc_count)
@@ -680,7 +680,7 @@ static int nx_p3_nic_add_mac(struct netxen_adapter *adapter,
 void netxen_p3_nic_set_multi(struct net_device *netdev)
 {
 	struct netxen_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u32 mode = VPORT_MISS_MODE_DROP;
 	LIST_HEAD(del_list);
@@ -707,8 +707,8 @@ void netxen_p3_nic_set_multi(struct net_device *netdev)
 	}
 
 	if (!netdev_mc_empty(netdev)) {
-		netdev_for_each_mc_addr(mc_ptr, netdev)
-			nx_p3_nic_add_mac(adapter, mc_ptr->dmi_addr, &del_list);
+		netdev_for_each_mc_addr(ha, netdev)
+			nx_p3_nic_add_mac(adapter, ha->addr, &del_list);
 	}
 
 send_fw_cmd:
diff --git a/drivers/net/ni52.c b/drivers/net/ni52.c
index 05c29c2cef2a..a76fabe26292 100644
--- a/drivers/net/ni52.c
+++ b/drivers/net/ni52.c
@@ -596,7 +596,7 @@ static int init586(struct net_device *dev)
 	struct iasetup_cmd_struct __iomem *ias_cmd;
 	struct tdr_cmd_struct __iomem *tdr_cmd;
 	struct mcsetup_cmd_struct __iomem *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs = netdev_mc_count(dev);
 
 	ptr = p->scb + 1;
@@ -725,8 +725,8 @@ static int init586(struct net_device *dev)
 		writew(num_addrs * 6, &mc_cmd->mc_cnt);
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
-			memcpy_toio(mc_cmd->mc_list[i++], dmi->dmi_addr, 6);
+		netdev_for_each_mc_addr(ha, dev)
+			memcpy_toio(mc_cmd->mc_list[i++], ha->addr, 6);
 
 		writew(make16(mc_cmd), &p->scb->cbl_offset);
 		writeb(CUC_START, &p->scb->cmd_cuc);
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 0678f3106cbc..7b52c466cf48 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6313,7 +6313,6 @@ static void niu_set_rx_mode(struct net_device *dev)
 {
 	struct niu *np = netdev_priv(dev);
 	int i, alt_cnt, err;
-	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
 	unsigned long flags;
 	u16 hash[16] = { 0, };
@@ -6365,8 +6364,8 @@ static void niu_set_rx_mode(struct net_device *dev)
 		for (i = 0; i < 16; i++)
 			hash[i] = 0xffff;
 	} else if (!netdev_mc_empty(dev)) {
-		netdev_for_each_mc_addr(addr, dev) {
-			u32 crc = ether_crc_le(ETH_ALEN, addr->da_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 crc = ether_crc_le(ETH_ALEN, ha->addr);
 
 			crc >>= 24;
 			hash[crc >> 4] |= (1 << (15 - (crc & 0xf)));
diff --git a/drivers/net/octeon/octeon_mgmt.c b/drivers/net/octeon/octeon_mgmt.c
index be368e5cbf75..ee894ed35f7f 100644
--- a/drivers/net/octeon/octeon_mgmt.c
+++ b/drivers/net/octeon/octeon_mgmt.c
@@ -474,7 +474,7 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 	unsigned int cam_mode = 1; /* 1 - Accept on CAM match */
 	unsigned int multicast_mode = 1; /* 1 - Reject all multicast.  */
 	struct octeon_mgmt_cam_state cam_state;
-	struct dev_addr_list *list;
+	struct netdev_hw_addr *ha;
 	struct list_head *pos;
 	int available_cam_entries;
 
@@ -510,8 +510,8 @@ static void octeon_mgmt_set_rx_filtering(struct net_device *netdev)
 		}
 	}
 	if (multicast_mode == 0) {
-		netdev_for_each_mc_addr(list, netdev)
-			octeon_mgmt_cam_state_add(&cam_state, list->da_addr);
+		netdev_for_each_mc_addr(ha, netdev)
+			octeon_mgmt_cam_state_add(&cam_state, ha->addr);
 	}
 
 
diff --git a/drivers/net/pci-skeleton.c b/drivers/net/pci-skeleton.c
index 36785853a149..dc3b4c7914fd 100644
--- a/drivers/net/pci-skeleton.c
+++ b/drivers/net/pci-skeleton.c
@@ -1813,12 +1813,12 @@ static void netdrv_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 		}
diff --git a/drivers/net/pcmcia/axnet_cs.c b/drivers/net/pcmcia/axnet_cs.c
index 09291e60d309..333cb3344d54 100644
--- a/drivers/net/pcmcia/axnet_cs.c
+++ b/drivers/net/pcmcia/axnet_cs.c
@@ -1623,11 +1623,11 @@ static struct net_device_stats *get_stats(struct net_device *dev)
  
 static inline void make_mc_bits(u8 *bits, struct net_device *dev)
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32 crc;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		crc = ether_crc(ETH_ALEN, dmi->dmi_addr);
+	netdev_for_each_mc_addr(ha, dev) {
+		crc = ether_crc(ETH_ALEN, ha->addr);
 		/* 
 		 * The 8390 uses the 6 most significant bits of the
 		 * CRC to index the multicast table.
diff --git a/drivers/net/pcmcia/fmvj18x_cs.c b/drivers/net/pcmcia/fmvj18x_cs.c
index b9dc80b9d04a..6734f7d6da98 100644
--- a/drivers/net/pcmcia/fmvj18x_cs.c
+++ b/drivers/net/pcmcia/fmvj18x_cs.c
@@ -1196,11 +1196,11 @@ static void set_rx_mode(struct net_device *dev)
 	memset(mc_filter, 0x00, sizeof(mc_filter));
 	outb(1, ioaddr + RX_MODE);	/* Ignore almost all multicasts. */
     } else {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 
 	memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-	    unsigned int bit = ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+	    unsigned int bit = ether_crc_le(ETH_ALEN, ha->addr) >> 26;
 	    mc_filter[bit >> 3] |= (1 << (bit & 7));
 	}
 	outb(2, ioaddr + RX_MODE);	/* Use normal mode. */
diff --git a/drivers/net/pcmcia/nmclan_cs.c b/drivers/net/pcmcia/nmclan_cs.c
index c717b143f11a..c516c1996354 100644
--- a/drivers/net/pcmcia/nmclan_cs.c
+++ b/drivers/net/pcmcia/nmclan_cs.c
@@ -1475,7 +1475,7 @@ static void set_multicast_list(struct net_device *dev)
 {
   mace_private *lp = netdev_priv(dev);
   int adr[ETHER_ADDR_LEN] = {0}; /* Ethernet address */
-  struct dev_mc_list *dmi;
+  struct netdev_hw_addr *ha;
 
 #ifdef PCMCIA_DEBUG
   {
@@ -1495,8 +1495,8 @@ static void set_multicast_list(struct net_device *dev)
   if (num_addrs > 0) {
     /* Calculate multicast logical address filter */
     memset(lp->multicast_ladrf, 0, MACE_LADRF_LEN);
-    netdev_for_each_mc_addr(dmi, dev) {
-      memcpy(adr, dmi->dmi_addr, ETHER_ADDR_LEN);
+    netdev_for_each_mc_addr(ha, dev) {
+      memcpy(adr, ha->addr, ETHER_ADDR_LEN);
       BuildLAF(lp->multicast_ladrf, adr);
     }
   }
diff --git a/drivers/net/pcmcia/smc91c92_cs.c b/drivers/net/pcmcia/smc91c92_cs.c
index 5adc662c4bfb..f45c626003a4 100644
--- a/drivers/net/pcmcia/smc91c92_cs.c
+++ b/drivers/net/pcmcia/smc91c92_cs.c
@@ -1616,12 +1616,12 @@ static void set_rx_mode(struct net_device *dev)
 	rx_cfg_setting = RxStripCRC | RxEnable | RxAllMulti;
     else {
 	if (!netdev_mc_empty(dev)) {
-	    struct dev_mc_list *mc_addr;
+	    struct netdev_hw_addr *ha;
 
-	    netdev_for_each_mc_addr(mc_addr, dev) {
-		u_int position = ether_crc(6, mc_addr->dmi_addr);
+	    netdev_for_each_mc_addr(ha, dev) {
+		u_int position = ether_crc(6, ha->addr);
 #ifndef final_version		/* Verify multicast address. */
-		if ((mc_addr->dmi_addr[0] & 1) == 0)
+		if ((ha->addr[0] & 1) == 0)
 		    continue;
 #endif
 		multicast_table[position >> 29] |= 1 << ((position >> 26) & 7);
diff --git a/drivers/net/pcmcia/xirc2ps_cs.c b/drivers/net/pcmcia/xirc2ps_cs.c
index 4d1802e457be..656be931207a 100644
--- a/drivers/net/pcmcia/xirc2ps_cs.c
+++ b/drivers/net/pcmcia/xirc2ps_cs.c
@@ -1398,7 +1398,7 @@ static void set_addresses(struct net_device *dev)
 {
 	unsigned int ioaddr = dev->base_addr;
 	local_info_t *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct set_address_info sa_info;
 	int i;
 
@@ -1413,10 +1413,10 @@ static void set_addresses(struct net_device *dev)
 
 	set_address(&sa_info, dev->dev_addr);
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i++ == 9)
 			break;
-		set_address(&sa_info, dmi->dmi_addr);
+		set_address(&sa_info, ha->addr);
 	}
 	while (i++ < 9)
 		set_address(&sa_info, dev->dev_addr);
diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
index 084d78dd1637..a2254f749a9a 100644
--- a/drivers/net/pcnet32.c
+++ b/drivers/net/pcnet32.c
@@ -2590,7 +2590,7 @@ static void pcnet32_load_multicast(struct net_device *dev)
 	struct pcnet32_private *lp = netdev_priv(dev);
 	volatile struct pcnet32_init_block *ib = lp->init_block;
 	volatile __le16 *mcast_table = (__le16 *)ib->filter;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned long ioaddr = dev->base_addr;
 	char *addrs;
 	int i;
@@ -2611,8 +2611,8 @@ static void pcnet32_load_multicast(struct net_device *dev)
 	ib->filter[1] = 0;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/ps3_gelic_net.c b/drivers/net/ps3_gelic_net.c
index a602da6418c8..fed5df9c5ea1 100644
--- a/drivers/net/ps3_gelic_net.c
+++ b/drivers/net/ps3_gelic_net.c
@@ -546,7 +546,7 @@ out:
 void gelic_net_set_multi(struct net_device *netdev)
 {
 	struct gelic_card *card = netdev_card(netdev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	unsigned int i;
 	uint8_t *p;
 	u64 addr;
@@ -580,9 +580,9 @@ void gelic_net_set_multi(struct net_device *netdev)
 	}
 
 	/* set multicast addresses */
-	netdev_for_each_mc_addr(mc, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		addr = 0;
-		p = mc->dmi_addr;
+		p = ha->addr;
 		for (i = 0; i < ETH_ALEN; i++) {
 			addr <<= 8;
 			addr |= *p++;
diff --git a/drivers/net/qlcnic/qlcnic_hw.c b/drivers/net/qlcnic/qlcnic_hw.c
index 14c999ab4222..9a1daa4ebe22 100644
--- a/drivers/net/qlcnic/qlcnic_hw.c
+++ b/drivers/net/qlcnic/qlcnic_hw.c
@@ -415,7 +415,7 @@ static int qlcnic_nic_add_mac(struct qlcnic_adapter *adapter, u8 *addr)
 void qlcnic_set_multi(struct net_device *netdev)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	u8 bcast_addr[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
 	u32 mode = VPORT_MISS_MODE_DROP;
 
@@ -434,8 +434,8 @@ void qlcnic_set_multi(struct net_device *netdev)
 	}
 
 	if (!netdev_mc_empty(netdev)) {
-		netdev_for_each_mc_addr(mc_ptr, netdev) {
-			qlcnic_nic_add_mac(adapter, mc_ptr->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			qlcnic_nic_add_mac(adapter, ha->addr);
 		}
 	}
 
diff --git a/drivers/net/qlge/qlge_main.c b/drivers/net/qlge/qlge_main.c
index fd34f266c0a8..76df96813a7b 100644
--- a/drivers/net/qlge/qlge_main.c
+++ b/drivers/net/qlge/qlge_main.c
@@ -4207,7 +4207,7 @@ static struct net_device_stats *qlge_get_stats(struct net_device
 static void qlge_set_multicast_list(struct net_device *ndev)
 {
 	struct ql_adapter *qdev = (struct ql_adapter *)netdev_priv(ndev);
-	struct dev_mc_list *mc_ptr;
+	struct netdev_hw_addr *ha;
 	int i, status;
 
 	status = ql_sem_spinlock(qdev, SEM_RT_IDX_MASK);
@@ -4271,8 +4271,8 @@ static void qlge_set_multicast_list(struct net_device *ndev)
 		if (status)
 			goto exit;
 		i = 0;
-		netdev_for_each_mc_addr(mc_ptr, ndev) {
-			if (ql_set_mac_addr_reg(qdev, (u8 *) mc_ptr->dmi_addr,
+		netdev_for_each_mc_addr(ha, ndev) {
+			if (ql_set_mac_addr_reg(qdev, (u8 *) ha->addr,
 						MAC_ADDR_TYPE_MULTI_MAC, i)) {
 				netif_err(qdev, hw, qdev->ndev,
 					  "Failed to loadmulticast address.\n");
diff --git a/drivers/net/r6040.c b/drivers/net/r6040.c
index 15d5373dc8f3..f5a0e963e688 100644
--- a/drivers/net/r6040.c
+++ b/drivers/net/r6040.c
@@ -938,7 +938,7 @@ static void r6040_multicast_list(struct net_device *dev)
 	u16 *adrp;
 	u16 reg;
 	unsigned long flags;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	/* MAC Address */
@@ -973,8 +973,8 @@ static void r6040_multicast_list(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
@@ -994,9 +994,9 @@ static void r6040_multicast_list(struct net_device *dev)
 	}
 	/* Multicast Address 1~4 case */
 	i = 0;
-	netdev_for_each_mc_addr(dmi, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (i < MCAST_MAX) {
-			adrp = (u16 *) dmi->dmi_addr;
+			adrp = (u16 *) ha->addr;
 			iowrite16(adrp[0], ioaddr + MID_1L + 8 * i);
 			iowrite16(adrp[1], ioaddr + MID_1M + 8 * i);
 			iowrite16(adrp[2], ioaddr + MID_1H + 8 * i);
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
index f7ffa5d8ffeb..64cd250f642d 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
@@ -4785,12 +4785,12 @@ static void rtl_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
 		}
diff --git a/drivers/net/s2io.c b/drivers/net/s2io.c
index 2eb7f8a0d926..ad5a6a873b29 100644
--- a/drivers/net/s2io.c
+++ b/drivers/net/s2io.c
@@ -4964,7 +4964,7 @@ static struct net_device_stats *s2io_get_stats(struct net_device *dev)
 static void s2io_set_multicast(struct net_device *dev)
 {
 	int i, j, prev_cnt;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct s2io_nic *sp = netdev_priv(dev);
 	struct XENA_dev_config __iomem *bar0 = sp->bar0;
 	u64 val64 = 0, multi_mac = 0x010203040506ULL, mask =
@@ -5093,12 +5093,12 @@ static void s2io_set_multicast(struct net_device *dev)
 
 		/* Create the new Rx filter list and update the same in H/W. */
 		i = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			memcpy(sp->usr_addrs[i].addr, mclist->dmi_addr,
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(sp->usr_addrs[i].addr, ha->addr,
 			       ETH_ALEN);
 			mac_addr = 0;
 			for (j = 0; j < ETH_ALEN; j++) {
-				mac_addr |= mclist->dmi_addr[j];
+				mac_addr |= ha->addr[j];
 				mac_addr <<= 8;
 			}
 			mac_addr >>= 8;
diff --git a/drivers/net/sb1250-mac.c b/drivers/net/sb1250-mac.c
index 9944e5d662c0..f7de960ca06f 100644
--- a/drivers/net/sb1250-mac.c
+++ b/drivers/net/sb1250-mac.c
@@ -2112,7 +2112,7 @@ static void sbmac_setmulti(struct sbmac_softc *sc)
 	uint64_t reg;
 	void __iomem *port;
 	int idx;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct net_device *dev = sc->sbm_dev;
 
 	/*
@@ -2161,10 +2161,10 @@ static void sbmac_setmulti(struct sbmac_softc *sc)
 	 * XXX if the table overflows */
 
 	idx = 1;		/* skip station address */
-	netdev_for_each_mc_addr(mclist, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		if (idx == MAC_ADDR_COUNT)
 			break;
-		reg = sbmac_addr2reg(mclist->dmi_addr);
+		reg = sbmac_addr2reg(ha->addr);
 		port = sc->sbm_base + R_MAC_ADDR_BASE+(idx * sizeof(uint64_t));
 		__raw_writeq(reg, port);
 		idx++;
diff --git a/drivers/net/sc92031.c b/drivers/net/sc92031.c
index d87c4787fffa..1b3260588933 100644
--- a/drivers/net/sc92031.c
+++ b/drivers/net/sc92031.c
@@ -433,13 +433,13 @@ static void _sc92031_set_mar(struct net_device *dev)
 	    (dev->flags & IFF_ALLMULTI))
 		mar0 = mar1 = 0xffffffff;
 	else if (dev->flags & IFF_MULTICAST) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
-		netdev_for_each_mc_addr(mc_list, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 crc;
 			unsigned bit = 0;
 
-			crc = ~ether_crc(ETH_ALEN, mc_list->dmi_addr);
+			crc = ~ether_crc(ETH_ALEN, ha->addr);
 			crc >>= 24;
 
 			if (crc & 0x01)	bit |= 0x02;
diff --git a/drivers/net/sfc/efx.c b/drivers/net/sfc/efx.c
index 88f2fb193abe..e07b82b266d9 100644
--- a/drivers/net/sfc/efx.c
+++ b/drivers/net/sfc/efx.c
@@ -1602,7 +1602,7 @@ static int efx_set_mac_address(struct net_device *net_dev, void *data)
 static void efx_set_multicast_list(struct net_device *net_dev)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	union efx_multicast_hash *mc_hash = &efx->multicast_hash;
 	u32 crc;
 	int bit;
@@ -1614,8 +1614,8 @@ static void efx_set_multicast_list(struct net_device *net_dev)
 		memset(mc_hash, 0xff, sizeof(*mc_hash));
 	} else {
 		memset(mc_hash, 0x00, sizeof(*mc_hash));
-		netdev_for_each_mc_addr(mc_list, net_dev) {
-			crc = ether_crc_le(ETH_ALEN, mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, net_dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			bit = crc & (EFX_MCAST_HASH_ENTRIES - 1);
 			set_bit_le(bit, mc_hash->byte);
 		}
diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index 760d9e83a465..6a05b93ae6c9 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -848,13 +848,13 @@ static void sis190_set_rx_mode(struct net_device *dev)
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0xffffffff;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		rx_mode = AcceptBroadcast | AcceptMyPhys;
 		mc_filter[1] = mc_filter[0] = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int bit_nr =
-				ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+				ether_crc(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 			rx_mode |= AcceptMulticast;
 		}
diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c
index bc7ff411aad5..6293592635be 100644
--- a/drivers/net/sis900.c
+++ b/drivers/net/sis900.c
@@ -2298,12 +2298,14 @@ static void set_rx_mode(struct net_device *net_dev)
 		/* Accept Broadcast packet, destination address matchs our
 		 * MAC address, use Receive Filter to reject unwanted MCAST
 		 * packets */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		rx_mode = RFAAB;
 
-		netdev_for_each_mc_addr(mclist, net_dev) {
-			unsigned int bit_nr =
-				sis900_mcast_bitnr(mclist->dmi_addr, sis_priv->chipset_rev);
+		netdev_for_each_mc_addr(ha, net_dev) {
+			unsigned int bit_nr;
+
+			bit_nr = sis900_mcast_bitnr(ha->addr,
+						    sis_priv->chipset_rev);
 			mc_filter[bit_nr >> 4] |= (1 << (bit_nr & 0xf));
 		}
 	}
diff --git a/drivers/net/skfp/skfddi.c b/drivers/net/skfp/skfddi.c
index 1921a54ea995..7e5b9f310fb2 100644
--- a/drivers/net/skfp/skfddi.c
+++ b/drivers/net/skfp/skfddi.c
@@ -852,7 +852,7 @@ static void skfp_ctl_set_multicast_list(struct net_device *dev)
 static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev)
 {
 	struct s_smc *smc = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 	/* Enable promiscuous mode, if necessary */
 	if (dev->flags & IFF_PROMISC) {
@@ -876,13 +876,13 @@ static void skfp_ctl_set_multicast_list_wo_lock(struct net_device *dev)
 				/* use exact filtering */
 
 				// point to first multicast addr
-				netdev_for_each_mc_addr(dmi, dev) {
-					mac_add_multicast(smc, 
-							  (struct fddi_addr *)dmi->dmi_addr, 
-							  1);
+				netdev_for_each_mc_addr(ha, dev) {
+					mac_add_multicast(smc,
+						(struct fddi_addr *)ha->addr,
+						1);
 
 					pr_debug(KERN_INFO "ENABLE MC ADDRESS: %pMF\n",
-						dmi->dmi_addr);
+						ha->addr);
 				}
 
 			} else {	// more MC addresses than HW supports
diff --git a/drivers/net/skge.c b/drivers/net/skge.c
index bd8c5e8413b1..de5ef3877baf 100644
--- a/drivers/net/skge.c
+++ b/drivers/net/skge.c
@@ -2917,7 +2917,7 @@ static void genesis_set_multicast(struct net_device *dev)
 	struct skge_port *skge = netdev_priv(dev);
 	struct skge_hw *hw = skge->hw;
 	int port = skge->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u32 mode;
 	u8 filter[8];
 
@@ -2937,8 +2937,8 @@ static void genesis_set_multicast(struct net_device *dev)
 		    skge->flow_status == FLOW_STAT_SYMMETRIC)
 			genesis_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			genesis_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			genesis_add_filter(filter, ha->addr);
 	}
 
 	xm_write32(hw, port, XM_MODE, mode);
@@ -2956,7 +2956,7 @@ static void yukon_set_multicast(struct net_device *dev)
 	struct skge_port *skge = netdev_priv(dev);
 	struct skge_hw *hw = skge->hw;
 	int port = skge->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	int rx_pause = (skge->flow_status == FLOW_STAT_REM_SEND ||
 			skge->flow_status == FLOW_STAT_SYMMETRIC);
 	u16 reg;
@@ -2979,8 +2979,8 @@ static void yukon_set_multicast(struct net_device *dev)
 		if (rx_pause)
 			yukon_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			yukon_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			yukon_add_filter(filter, ha->addr);
 	}
 
 
diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index f1c0ec308bfe..5b97edb7a35f 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -3627,7 +3627,7 @@ static void sky2_set_multicast(struct net_device *dev)
 	struct sky2_port *sky2 = netdev_priv(dev);
 	struct sky2_hw *hw = sky2->hw;
 	unsigned port = sky2->port;
-	struct dev_mc_list *list;
+	struct netdev_hw_addr *ha;
 	u16 reg;
 	u8 filter[8];
 	int rx_pause;
@@ -3651,8 +3651,8 @@ static void sky2_set_multicast(struct net_device *dev)
 		if (rx_pause)
 			sky2_add_filter(filter, pause_mc_addr);
 
-		netdev_for_each_mc_addr(list, dev)
-			sky2_add_filter(filter, list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			sky2_add_filter(filter, ha->addr);
 	}
 
 	gma_write16(hw, port, GM_MC_ADDR_H1,
diff --git a/drivers/net/smc911x.c b/drivers/net/smc911x.c
index 0f97b5af91e4..6278734c735f 100644
--- a/drivers/net/smc911x.c
+++ b/drivers/net/smc911x.c
@@ -1341,7 +1341,7 @@ static void smc911x_set_multicast_list(struct net_device *dev)
 	 * within that register.
 	 */
 	else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 
 		/* Set the Hash perfec mode */
 		mcr |= MAC_CR_HPFILT_;
@@ -1349,19 +1349,16 @@ static void smc911x_set_multicast_list(struct net_device *dev)
 		/* start with a table of all zeros: reject all */
 		memset(multicast_table, 0, sizeof(multicast_table));
 
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 position;
 
-			/* do we have a pointer here? */
-			if (!cur_addr)
-				break;
 			/* make sure this is a multicast address -
 				shouldn't this be a given if we have it here ? */
-			if (!(*cur_addr->dmi_addr & 1))
-				 continue;
+			if (!(*ha->addr & 1))
+				continue;
 
 			/* upper 6 bits are used as hash index */
-			position = ether_crc(ETH_ALEN, cur_addr->dmi_addr)>>26;
+			position = ether_crc(ETH_ALEN, ha->addr)>>26;
 
 			multicast_table[position>>5] |= 1 << (position&0x1f);
 		}
diff --git a/drivers/net/smc9194.c b/drivers/net/smc9194.c
index ee1487791d55..9e5e9e792e86 100644
--- a/drivers/net/smc9194.c
+++ b/drivers/net/smc9194.c
@@ -417,7 +417,7 @@ static void smc_shutdown( int ioaddr )
 
 
 /*
- . Function: smc_setmulticast( int ioaddr, int count, dev_mc_list * adds )
+ . Function: smc_setmulticast( int ioaddr, struct net_device *dev )
  . Purpose:
  .    This sets the internal hardware table to filter out unwanted multicast
  .    packets before they take up memory.
@@ -438,26 +438,23 @@ static void smc_setmulticast(int ioaddr, struct net_device *dev)
 {
 	int			i;
 	unsigned char		multicast_table[ 8 ];
-	struct dev_mc_list *cur_addr;
+	struct netdev_hw_addr *ha;
 	/* table for flipping the order of 3 bits */
 	unsigned char invert3[] = { 0, 4, 2, 6, 1, 5, 3, 7 };
 
 	/* start with a table of all zeros: reject all */
 	memset( multicast_table, 0, sizeof( multicast_table ) );
 
-	netdev_for_each_mc_addr(cur_addr, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		int position;
 
-		/* do we have a pointer here? */
-		if ( !cur_addr )
-			break;
 		/* make sure this is a multicast address - shouldn't this
 		   be a given if we have it here ? */
-		if ( !( *cur_addr->dmi_addr & 1 ) )
+		if (!(*ha->addr & 1))
 			continue;
 
 		/* only use the low order bits */
-		position = ether_crc_le(6, cur_addr->dmi_addr) & 0x3f;
+		position = ether_crc_le(6, ha->addr) & 0x3f;
 
 		/* do some messy swapping to put the bit in the right spot */
 		multicast_table[invert3[position&7]] |=
diff --git a/drivers/net/smc91x.c b/drivers/net/smc91x.c
index 2e8133809dc0..20c756577631 100644
--- a/drivers/net/smc91x.c
+++ b/drivers/net/smc91x.c
@@ -1413,7 +1413,7 @@ static void smc_set_multicast_list(struct net_device *dev)
 	 * within that register.
 	 */
 	else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 
 		/* table for flipping the order of 3 bits */
 		static const unsigned char invert3[] = {0, 4, 2, 6, 1, 5, 3, 7};
@@ -1421,16 +1421,16 @@ static void smc_set_multicast_list(struct net_device *dev)
 		/* start with a table of all zeros: reject all */
 		memset(multicast_table, 0, sizeof(multicast_table));
 
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			int position;
 
 			/* make sure this is a multicast address -
 		   	   shouldn't this be a given if we have it here ? */
-			if (!(*cur_addr->dmi_addr & 1))
+			if (!(*ha->addr & 1))
 				continue;
 
 			/* only use the low order bits */
-			position = crc32_le(~0, cur_addr->dmi_addr, 6) & 0x3f;
+			position = crc32_le(~0, ha->addr, 6) & 0x3f;
 
 			/* do some messy swapping to put the bit in the right spot */
 			multicast_table[invert3[position&7]] |=
diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
index 4fd1d8b38788..d6e3a12d65d3 100644
--- a/drivers/net/smsc911x.c
+++ b/drivers/net/smsc911x.c
@@ -1383,13 +1383,13 @@ static void smsc911x_set_multicast_list(struct net_device *dev)
 		/* Enabling specific multicast addresses */
 		unsigned int hash_high = 0;
 		unsigned int hash_low = 0;
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		pdata->set_bits_mask = MAC_CR_HPFILT_;
 		pdata->clear_bits_mask = (MAC_CR_PRMS_ | MAC_CR_MCPAS_);
 
-		netdev_for_each_mc_addr(mc_list, dev) {
-			unsigned int bitnum = smsc911x_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			unsigned int bitnum = smsc911x_hash(ha->addr);
 			unsigned int mask = 0x01 << (bitnum & 0x1F);
 
 			if (bitnum & 0x20)
diff --git a/drivers/net/smsc9420.c b/drivers/net/smsc9420.c
index 34fa10d8ad40..5409ec1a5fd7 100644
--- a/drivers/net/smsc9420.c
+++ b/drivers/net/smsc9420.c
@@ -1063,12 +1063,12 @@ static void smsc9420_set_multicast_list(struct net_device *dev)
 		mac_cr |= MAC_CR_MCPAS_;
 		mac_cr &= (~MAC_CR_HPFILT_);
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 hash_lo = 0, hash_hi = 0;
 
 		smsc_dbg(HW, "Multicast filter enabled");
-		netdev_for_each_mc_addr(mc_list, dev) {
-			u32 bit_num = smsc9420_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			u32 bit_num = smsc9420_hash(ha->addr);
 			u32 mask = 1 << (bit_num & 0x1F);
 
 			if (bit_num & 0x20)
diff --git a/drivers/net/sonic.c b/drivers/net/sonic.c
index 287c251075e5..e5d67327d707 100644
--- a/drivers/net/sonic.c
+++ b/drivers/net/sonic.c
@@ -531,7 +531,7 @@ static void sonic_multicast_list(struct net_device *dev)
 {
 	struct sonic_local *lp = netdev_priv(dev);
 	unsigned int rcr;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char *addr;
 	int i;
 
@@ -550,8 +550,8 @@ static void sonic_multicast_list(struct net_device *dev)
 				       netdev_mc_count(dev));
 			sonic_set_cam_enable(dev, 1);  /* always enable our own address */
 			i = 1;
-			netdev_for_each_mc_addr(dmi, dev) {
-				addr = dmi->dmi_addr;
+			netdev_for_each_mc_addr(ha, dev) {
+				addr = ha->addr;
 				sonic_cda_put(dev, i, SONIC_CD_CAP0, addr[1] << 8 | addr[0]);
 				sonic_cda_put(dev, i, SONIC_CD_CAP1, addr[3] << 8 | addr[2]);
 				sonic_cda_put(dev, i, SONIC_CD_CAP2, addr[5] << 8 | addr[4]);
diff --git a/drivers/net/spider_net.c b/drivers/net/spider_net.c
index 5ba9d989f8fc..c04c8f9a2d96 100644
--- a/drivers/net/spider_net.c
+++ b/drivers/net/spider_net.c
@@ -625,7 +625,7 @@ spider_net_get_multicast_hash(struct net_device *netdev, __u8 *addr)
 static void
 spider_net_set_multi(struct net_device *netdev)
 {
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u8 hash;
 	int i;
 	u32 reg;
@@ -646,8 +646,8 @@ spider_net_set_multi(struct net_device *netdev)
 	hash = spider_net_get_multicast_hash(netdev, netdev->broadcast); */
 	set_bit(0xfd, bitmask);
 
-	netdev_for_each_mc_addr(mc, netdev) {
-		hash = spider_net_get_multicast_hash(netdev, mc->dmi_addr);
+	netdev_for_each_mc_addr(ha, netdev) {
+		hash = spider_net_get_multicast_hash(netdev, ha->addr);
 		set_bit(hash, bitmask);
 	}
 
diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c
index 6dfa69899019..8a6d27cdc0bd 100644
--- a/drivers/net/starfire.c
+++ b/drivers/net/starfire.c
@@ -1766,7 +1766,7 @@ static void set_rx_mode(struct net_device *dev)
 	struct netdev_private *np = netdev_priv(dev);
 	void __iomem *ioaddr = np->base;
 	u32 rx_mode = MinVLANPrio;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 #ifdef VLAN_SUPPORT
 
@@ -1804,8 +1804,8 @@ static void set_rx_mode(struct net_device *dev)
 		/* Use the 16 element perfect filter, skip first two entries. */
 		void __iomem *filter_addr = ioaddr + PerfFilterTable + 2 * 16;
 		__be16 *eaddrs;
-		netdev_for_each_mc_addr(mclist, dev) {
-			eaddrs = (__be16 *)mclist->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			eaddrs = (__be16 *) ha->addr;
 			writew(be16_to_cpu(eaddrs[2]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[1]), filter_addr); filter_addr += 4;
 			writew(be16_to_cpu(eaddrs[0]), filter_addr); filter_addr += 8;
@@ -1825,10 +1825,10 @@ static void set_rx_mode(struct net_device *dev)
 		__le16 mc_filter[32] __attribute__ ((aligned(sizeof(long))));	/* Multicast hash filter */
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The chip uses the upper 9 CRC bits
 			   as index into the hash table */
-			int bit_nr = ether_crc_le(ETH_ALEN, mclist->dmi_addr) >> 23;
+			int bit_nr = ether_crc_le(ETH_ALEN, ha->addr) >> 23;
 			__le32 *fptr = (__le32 *) &mc_filter[(bit_nr >> 4) & ~1];
 
 			*fptr |= cpu_to_le32(1 << (bit_nr & 31));
diff --git a/drivers/net/stmmac/dwmac100.c b/drivers/net/stmmac/dwmac100.c
index a183de29c39a..c0a1c9df3ac7 100644
--- a/drivers/net/stmmac/dwmac100.c
+++ b/drivers/net/stmmac/dwmac100.c
@@ -316,7 +316,7 @@ static void dwmac100_set_filter(struct net_device *dev)
 			   MAC_CONTROL_HO | MAC_CONTROL_HP);
 	} else {
 		u32 mc_filter[2];
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		/* Perfect filter mode for physical address and Hash
 		   filter for multicast */
@@ -325,11 +325,11 @@ static void dwmac100_set_filter(struct net_device *dev)
 			   MAC_CONTROL_IF | MAC_CONTROL_HO);
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The upper 6 bits of the calculated CRC are used to
 			 * index the contens of the hash table */
 			int bit_nr =
-			    ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+			    ether_crc(ETH_ALEN, ha->addr) >> 26;
 			/* The most significant bit determines the register to
 			 * use (H/L) while the other 5 bits determine the bit
 			 * within the register. */
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index a6538ae4694c..bf73fda6adfb 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -94,17 +94,17 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		writel(0xffffffff, ioaddr + GMAC_HASH_LOW);
 	} else if (!netdev_mc_empty(dev)) {
 		u32 mc_filter[2];
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		/* Hash filter for multicast */
 		value = GMAC_FRAME_FILTER_HMC;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* The upper 6 bits of the calculated CRC are used to
 			   index the contens of the hash table */
 			int bit_nr =
-			    bitrev32(~crc32_le(~0, mclist->dmi_addr, 6)) >> 26;
+			    bitrev32(~crc32_le(~0, ha->addr, 6)) >> 26;
 			/* The most significant bit determines the register to
 			 * use (H/L) while the other 5 bits determine the bit
 			 * within the register. */
diff --git a/drivers/net/sun3_82586.c b/drivers/net/sun3_82586.c
index 2f6a760e5f21..2cfa065c39c0 100644
--- a/drivers/net/sun3_82586.c
+++ b/drivers/net/sun3_82586.c
@@ -413,7 +413,7 @@ static int init586(struct net_device *dev)
 	volatile struct iasetup_cmd_struct *ias_cmd;
 	volatile struct tdr_cmd_struct *tdr_cmd;
 	volatile struct mcsetup_cmd_struct *mc_cmd;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	int num_addrs=netdev_mc_count(dev);
 
 	ptr = (void *) ((char *)p->scb + sizeof(struct scb_struct));
@@ -537,9 +537,9 @@ static int init586(struct net_device *dev)
 		mc_cmd->mc_cnt = swab16(num_addrs * 6);
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev)
+		netdev_for_each_mc_addr(ha, dev)
 			memcpy((char *) mc_cmd->mc_list[i++],
-			       dmi->dmi_addr, ETH_ALEN);
+			       ha->addr, ETH_ALEN);
 
 		p->scb->cbl_offset = make16(mc_cmd);
 		p->scb->cmd_cuc = CUC_START;
diff --git a/drivers/net/sunbmac.c b/drivers/net/sunbmac.c
index a0bd361d5eca..4dd159b4116b 100644
--- a/drivers/net/sunbmac.c
+++ b/drivers/net/sunbmac.c
@@ -999,7 +999,7 @@ static void bigmac_set_multicast(struct net_device *dev)
 {
 	struct bigmac *bp = netdev_priv(dev);
 	void __iomem *bregs = bp->bregs;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	int i;
 	u32 tmp, crc;
@@ -1028,8 +1028,8 @@ static void bigmac_set_multicast(struct net_device *dev)
 		for (i = 0; i < 4; i++)
 			hash_table[i] = 0;
 
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sundance.c b/drivers/net/sundance.c
index a855934dfc3b..47e8cce10d1e 100644
--- a/drivers/net/sundance.c
+++ b/drivers/net/sundance.c
@@ -1523,13 +1523,13 @@ static void set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = AcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		int bit;
 		int index;
 		int crc;
 		memset (mc_filter, 0, sizeof (mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = ether_crc_le (ETH_ALEN, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc_le(ETH_ALEN, ha->addr);
 			for (index=0, bit=0; bit < 6; bit++, crc <<= 1)
 				if (crc & 0x80000000) index |= 1 << bit;
 			mc_filter[index/16] |= (1 << (index % 16));
diff --git a/drivers/net/sungem.c b/drivers/net/sungem.c
index 70196bc5fe61..58a27541ae39 100644
--- a/drivers/net/sungem.c
+++ b/drivers/net/sungem.c
@@ -1846,12 +1846,12 @@ static u32 gem_setup_multicast(struct gem *gp)
 	} else {
 		u16 hash_table[16];
 		u32 crc;
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, gp->dev) {
-			char *addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, gp->dev) {
+			char *addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunhme.c b/drivers/net/sunhme.c
index b17dbb11bd67..20deb14e98ff 100644
--- a/drivers/net/sunhme.c
+++ b/drivers/net/sunhme.c
@@ -1523,13 +1523,13 @@ static int happy_meal_init(struct happy_meal *hp)
 		hme_write32(hp, bregs + BMAC_HTABLE3, 0xffff);
 	} else if ((hp->dev->flags & IFF_PROMISC) == 0) {
 		u16 hash_table[4];
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		char *addrs;
 		u32 crc;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, hp->dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, hp->dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
@@ -2362,7 +2362,7 @@ static void happy_meal_set_multicast(struct net_device *dev)
 {
 	struct happy_meal *hp = netdev_priv(dev);
 	void __iomem *bregs = hp->bigmacregs;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 
@@ -2380,8 +2380,8 @@ static void happy_meal_set_multicast(struct net_device *dev)
 		u16 hash_table[4];
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunlance.c b/drivers/net/sunlance.c
index d7c73f478ef5..674570584bd5 100644
--- a/drivers/net/sunlance.c
+++ b/drivers/net/sunlance.c
@@ -1170,7 +1170,7 @@ static int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void lance_load_multicast(struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	char *addrs;
 	u32 crc;
 	u32 val;
@@ -1195,8 +1195,8 @@ static void lance_load_multicast(struct net_device *dev)
 		return;
 
 	/* Add addresses */
-	netdev_for_each_mc_addr(dmi, dev) {
-		addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrs = ha->addr;
 
 		/* multicast address? */
 		if (!(*addrs & 1))
diff --git a/drivers/net/sunqe.c b/drivers/net/sunqe.c
index be637dce944c..239f09772191 100644
--- a/drivers/net/sunqe.c
+++ b/drivers/net/sunqe.c
@@ -627,7 +627,7 @@ static int qe_start_xmit(struct sk_buff *skb, struct net_device *dev)
 static void qe_set_multicast(struct net_device *dev)
 {
 	struct sunqe *qep = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u8 new_mconfig = qep->mconfig;
 	char *addrs;
 	int i;
@@ -651,8 +651,8 @@ static void qe_set_multicast(struct net_device *dev)
 		u8 *hbytes = (unsigned char *) &hash_table[0];
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(dmi, dev) {
-			addrs = dmi->dmi_addr;
+		netdev_for_each_mc_addr(ha, dev) {
+			addrs = ha->addr;
 
 			if (!(*addrs & 1))
 				continue;
diff --git a/drivers/net/sunvnet.c b/drivers/net/sunvnet.c
index 6b1b7cea7f6b..6cf8b06be5cd 100644
--- a/drivers/net/sunvnet.c
+++ b/drivers/net/sunvnet.c
@@ -763,12 +763,12 @@ static struct vnet_mcast_entry *__vnet_mc_find(struct vnet *vp, u8 *addr)
 
 static void __update_mc_list(struct vnet *vp, struct net_device *dev)
 {
-	struct dev_addr_list *p;
+	struct netdev_hw_addr *ha;
 
-	netdev_for_each_mc_addr(p, dev) {
+	netdev_for_each_mc_addr(ha, dev) {
 		struct vnet_mcast_entry *m;
 
-		m = __vnet_mc_find(vp, p->dmi_addr);
+		m = __vnet_mc_find(vp, ha->addr);
 		if (m) {
 			m->hit = 1;
 			continue;
@@ -778,7 +778,7 @@ static void __update_mc_list(struct vnet *vp, struct net_device *dev)
 			m = kzalloc(sizeof(*m), GFP_ATOMIC);
 			if (!m)
 				continue;
-			memcpy(m->addr, p->dmi_addr, ETH_ALEN);
+			memcpy(m->addr, ha->addr, ETH_ALEN);
 			m->hit = 1;
 
 			m->next = vp->mcast_list;
diff --git a/drivers/net/tc35815.c b/drivers/net/tc35815.c
index 49bd84c0d583..36149ddace46 100644
--- a/drivers/net/tc35815.c
+++ b/drivers/net/tc35815.c
@@ -1954,16 +1954,16 @@ tc35815_set_multicast_list(struct net_device *dev)
 		/* Disable promiscuous mode, use normal mode. */
 		tc_writel(CAM_CompEn | CAM_BroadAcc | CAM_GroupAcc, &tr->CAM_Ctl);
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *cur_addr;
+		struct netdev_hw_addr *ha;
 		int i;
 		int ena_bits = CAM_Ena_Bit(CAM_ENTRY_SOURCE);
 
 		tc_writel(0, &tr->CAM_Ctl);
 		/* Walk the address list, and load the filter */
 		i = 0;
-		netdev_for_each_mc_addr(cur_addr, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			/* entry 0,1 is reserved. */
-			tc35815_set_cam_entry(dev, i + 2, cur_addr->dmi_addr);
+			tc35815_set_cam_entry(dev, i + 2, ha->addr);
 			ena_bits |= CAM_Ena_Bit(i + 2);
 			i++;
 		}
diff --git a/drivers/net/tehuti.c b/drivers/net/tehuti.c
index 068a47174fca..a38aede5c8d6 100644
--- a/drivers/net/tehuti.c
+++ b/drivers/net/tehuti.c
@@ -808,7 +808,7 @@ static void bdx_setmulti(struct net_device *ndev)
 			WRITE_REG(priv, regRX_MCST_HASH0 + i * 4, ~0);
 	} else if (!netdev_mc_empty(ndev)) {
 		u8 hash;
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 reg, val;
 
 		/* set IMF to deny all multicast frames */
@@ -825,10 +825,10 @@ static void bdx_setmulti(struct net_device *ndev)
 		 * into RX_MAC_MCST regs. we skip this phase now and accept ALL
 		 * multicast frames throu IMF */
 		/* accept the rest of addresses throu IMF */
-		netdev_for_each_mc_addr(mclist, ndev) {
+		netdev_for_each_mc_addr(ha, ndev) {
 			hash = 0;
 			for (i = 0; i < ETH_ALEN; i++)
-				hash ^= mclist->dmi_addr[i];
+				hash ^= ha->addr[i];
 			reg = regRX_MCST_HASH0 + ((hash >> 5) << 2);
 			val = READ_REG(priv, reg);
 			val |= (1 << (hash % 32));
diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 22cf1c446de3..aff11f84f525 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -9457,14 +9457,14 @@ static void __tg3_set_rx_mode(struct net_device *dev)
 		tg3_set_multi (tp, 0);
 	} else {
 		/* Accept one or more multicast(s). */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u32 mc_filter[4] = { 0, };
 		u32 regidx;
 		u32 bit;
 		u32 crc;
 
-		netdev_for_each_mc_addr(mclist, dev) {
-			crc = calc_crc (mclist->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = calc_crc(ha->addr, ETH_ALEN);
 			bit = ~crc & 0x7f;
 			regidx = (bit & 0x60) >> 5;
 			bit &= 0x1f;
diff --git a/drivers/net/tlan.c b/drivers/net/tlan.c
index 05ea30a94e89..8ffec22b74bf 100644
--- a/drivers/net/tlan.c
+++ b/drivers/net/tlan.c
@@ -1314,7 +1314,7 @@ static struct net_device_stats *TLan_GetStats( struct net_device *dev )
 
 static void TLan_SetMulticastList( struct net_device *dev )
 {
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	u32			hash1 = 0;
 	u32			hash2 = 0;
 	int			i;
@@ -1336,12 +1336,12 @@ static void TLan_SetMulticastList( struct net_device *dev )
 			TLan_DioWrite32( dev->base_addr, TLAN_HASH_2, 0xFFFFFFFF );
 		} else {
 			i = 0;
-			netdev_for_each_mc_addr(dmi, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				if ( i < 3 ) {
 					TLan_SetMac( dev, i + 1,
-						     (char *) &dmi->dmi_addr );
+						     (char *) &ha->addr);
 				} else {
-					offset = TLan_HashFunc( (u8 *) &dmi->dmi_addr );
+					offset = TLan_HashFunc((u8 *)&ha->addr);
 					if ( offset < 32 )
 						hash1 |= ( 1 << offset );
 					else
diff --git a/drivers/net/tokenring/3c359.c b/drivers/net/tokenring/3c359.c
index 8c54d89e60d7..4673e38c52a9 100644
--- a/drivers/net/tokenring/3c359.c
+++ b/drivers/net/tokenring/3c359.c
@@ -1390,7 +1390,7 @@ static int xl_close(struct net_device *dev)
 static void xl_set_rx_mode(struct net_device *dev) 
 {
 	struct xl_private *xl_priv = netdev_priv(dev);
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[4] ; 
 	u16 options ; 
 
@@ -1407,11 +1407,11 @@ static void xl_set_rx_mode(struct net_device *dev)
 
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ;
 
-	netdev_for_each_mc_addr(dmi, dev) {
-                dev_mc_address[0] |= dmi->dmi_addr[2] ;
-                dev_mc_address[1] |= dmi->dmi_addr[3] ;
-                dev_mc_address[2] |= dmi->dmi_addr[4] ;
-                dev_mc_address[3] |= dmi->dmi_addr[5] ;
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
         }
 
 	if (memcmp(xl_priv->xl_functional_addr,dev_mc_address,4) != 0) { /* Options have changed, run the command */
diff --git a/drivers/net/tokenring/ibmtr.c b/drivers/net/tokenring/ibmtr.c
index 1a0967246e2f..eebdaae24328 100644
--- a/drivers/net/tokenring/ibmtr.c
+++ b/drivers/net/tokenring/ibmtr.c
@@ -986,7 +986,7 @@ static void open_sap(unsigned char type, struct net_device *dev)
 static void tok_set_multicast_list(struct net_device *dev)
 {
 	struct tok_info *ti = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	unsigned char address[4];
 
 	int i;
@@ -995,11 +995,11 @@ static void tok_set_multicast_list(struct net_device *dev)
 	/*BMS ifconfig tr down or hot unplug a PCMCIA card ??hownowbrowncow*/
 	if (/*BMSHELPdev->start == 0 ||*/ ti->open_status != OPEN) return;
 	address[0] = address[1] = address[2] = address[3] = 0;
-	netdev_for_each_mc_addr(mclist, dev) {
-		address[0] |= mclist->dmi_addr[2];
-		address[1] |= mclist->dmi_addr[3];
-		address[2] |= mclist->dmi_addr[4];
-		address[3] |= mclist->dmi_addr[5];
+	netdev_for_each_mc_addr(ha, dev) {
+		address[0] |= ha->addr[2];
+		address[1] |= ha->addr[3];
+		address[2] |= ha->addr[4];
+		address[3] |= ha->addr[5];
 	}
 	SET_PAGE(ti->srb_page);
 	for (i = 0; i < sizeof(struct srb_set_funct_addr); i++)
diff --git a/drivers/net/tokenring/lanstreamer.c b/drivers/net/tokenring/lanstreamer.c
index 01c780f25e98..88c893100c2b 100644
--- a/drivers/net/tokenring/lanstreamer.c
+++ b/drivers/net/tokenring/lanstreamer.c
@@ -1266,7 +1266,7 @@ static void streamer_set_rx_mode(struct net_device *dev)
 	    netdev_priv(dev);
 	__u8 __iomem *streamer_mmio = streamer_priv->streamer_mmio;
 	__u8 options = 0;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[5];
 
 	writel(streamer_priv->srb, streamer_mmio + LAPA);
@@ -1302,11 +1302,11 @@ static void streamer_set_rx_mode(struct net_device *dev)
 	writel(streamer_priv->srb,streamer_mmio+LAPA);
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ; 
   
-	netdev_for_each_mc_addr(dmi, dev) {
-   	        dev_mc_address[0] |= dmi->dmi_addr[2] ; 
-		dev_mc_address[1] |= dmi->dmi_addr[3] ; 
-		dev_mc_address[2] |= dmi->dmi_addr[4] ; 
-		dev_mc_address[3] |= dmi->dmi_addr[5] ; 
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
 	}
   
 	writew(htons(SRB_SET_FUNC_ADDRESS << 8),streamer_mmio+LAPDINC);
diff --git a/drivers/net/tokenring/olympic.c b/drivers/net/tokenring/olympic.c
index 3decaf4b6cf2..3d2fbe60b46e 100644
--- a/drivers/net/tokenring/olympic.c
+++ b/drivers/net/tokenring/olympic.c
@@ -1139,7 +1139,7 @@ static void olympic_set_rx_mode(struct net_device *dev)
    	u8 __iomem *olympic_mmio = olympic_priv->olympic_mmio ; 
 	u8 options = 0; 
 	u8 __iomem *srb;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	unsigned char dev_mc_address[4] ; 
 
 	writel(olympic_priv->srb,olympic_mmio+LAPA);
@@ -1177,11 +1177,11 @@ static void olympic_set_rx_mode(struct net_device *dev)
 
 	dev_mc_address[0] = dev_mc_address[1] = dev_mc_address[2] = dev_mc_address[3] = 0 ; 
 
-	netdev_for_each_mc_addr(dmi, dev) {
-		dev_mc_address[0] |= dmi->dmi_addr[2] ; 
-		dev_mc_address[1] |= dmi->dmi_addr[3] ; 
-		dev_mc_address[2] |= dmi->dmi_addr[4] ; 
-		dev_mc_address[3] |= dmi->dmi_addr[5] ; 
+	netdev_for_each_mc_addr(ha, dev) {
+		dev_mc_address[0] |= ha->addr[2];
+		dev_mc_address[1] |= ha->addr[3];
+		dev_mc_address[2] |= ha->addr[4];
+		dev_mc_address[3] |= ha->addr[5];
 	}
 
 	writeb(SRB_SET_FUNC_ADDRESS,srb+0);
diff --git a/drivers/net/tokenring/tms380tr.c b/drivers/net/tokenring/tms380tr.c
index b545e20d2898..9fa2c36ff904 100644
--- a/drivers/net/tokenring/tms380tr.c
+++ b/drivers/net/tokenring/tms380tr.c
@@ -1212,17 +1212,17 @@ static void tms380tr_set_multicast_list(struct net_device *dev)
 		}
 		else
 		{
-			struct dev_mc_list *mclist;
+			struct netdev_hw_addr *ha;
 
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				((char *)(&tp->ocpl.FunctAddr))[0] |=
-					mclist->dmi_addr[2];
+					ha->addr[2];
 				((char *)(&tp->ocpl.FunctAddr))[1] |=
-					mclist->dmi_addr[3];
+					ha->addr[3];
 				((char *)(&tp->ocpl.FunctAddr))[2] |=
-					mclist->dmi_addr[4];
+					ha->addr[4];
 				((char *)(&tp->ocpl.FunctAddr))[3] |=
-					mclist->dmi_addr[5];
+					ha->addr[5];
 			}
 		}
 		tms380tr_exec_cmd(dev, OC_SET_FUNCT_ADDR);
diff --git a/drivers/net/tsi108_eth.c b/drivers/net/tsi108_eth.c
index 80333a4d3287..1366541c30a2 100644
--- a/drivers/net/tsi108_eth.c
+++ b/drivers/net/tsi108_eth.c
@@ -1186,15 +1186,15 @@ static void tsi108_set_rx_mode(struct net_device *dev)
 
 	if (dev->flags & IFF_ALLMULTI || !netdev_mc_empty(dev)) {
 		int i;
-		struct dev_mc_list *mc;
+		struct netdev_hw_addr *ha;
 		rxcfg |= TSI108_EC_RXCFG_MFE | TSI108_EC_RXCFG_MC_HASH;
 
 		memset(data->mc_hash, 0, sizeof(data->mc_hash));
 
-		netdev_for_each_mc_addr(mc, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			u32 hash, crc;
 
-			crc = ether_crc(6, mc->dmi_addr);
+			crc = ether_crc(6, ha->addr);
 			hash = crc >> 23;
 			__set_bit(hash, &data->mc_hash[0]);
 		}
diff --git a/drivers/net/tulip/de2104x.c b/drivers/net/tulip/de2104x.c
index cb429723b2c8..a68b91764627 100644
--- a/drivers/net/tulip/de2104x.c
+++ b/drivers/net/tulip/de2104x.c
@@ -670,15 +670,15 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 {
 	struct de_private *de = netdev_priv(dev);
 	u16 hash_table[32];
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 	u16 *eaddrs;
 
 	memset(hash_table, 0, sizeof(hash_table));
 	set_bit_le(255, hash_table); 			/* Broadcast entry */
 	/* This should work on big-endian machines as well. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff;
+	netdev_for_each_mc_addr(ha, dev) {
+		int index = ether_crc_le(ETH_ALEN, ha->addr) & 0x1ff;
 
 		set_bit_le(index, hash_table);
 	}
@@ -699,13 +699,13 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
 {
 	struct de_private *de = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u16 *eaddrs;
 
 	/* We have <= 14 addresses so we can use the wonderful
 	   16 address perfect filtering of the Tulip. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		eaddrs = (u16 *)mclist->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		eaddrs = (u16 *) ha->addr;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
diff --git a/drivers/net/tulip/de4x5.c b/drivers/net/tulip/de4x5.c
index 55ade8949a68..cac4bcc83187 100644
--- a/drivers/net/tulip/de4x5.c
+++ b/drivers/net/tulip/de4x5.c
@@ -1951,7 +1951,7 @@ static void
 SetMulticastFilter(struct net_device *dev)
 {
     struct de4x5_private *lp = netdev_priv(dev);
-    struct dev_mc_list *dmi;
+    struct netdev_hw_addr *ha;
     u_long iobase = dev->base_addr;
     int i, bit, byte;
     u16 hashcode;
@@ -1966,8 +1966,8 @@ SetMulticastFilter(struct net_device *dev)
     if ((dev->flags & IFF_ALLMULTI) || (netdev_mc_count(dev) > 14)) {
 	omr |= OMR_PM;                       /* Pass all multicasts */
     } else if (lp->setup_f == HASH_PERF) {   /* Hash Filtering */
-	netdev_for_each_mc_addr(dmi, dev) {
-	    addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+	    addrs = ha->addr;
 	    if ((*addrs & 0x01) == 1) {      /* multicast address? */
 		crc = ether_crc_le(ETH_ALEN, addrs);
 		hashcode = crc & HASH_BITS;  /* hashcode is 9 LSb of CRC */
@@ -1983,8 +1983,8 @@ SetMulticastFilter(struct net_device *dev)
 	    }
 	}
     } else {                                 /* Perfect filtering */
-	netdev_for_each_mc_addr(dmi, dev) {
-	    addrs = dmi->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+	    addrs = ha->addr;
 	    for (i=0; i<ETH_ALEN; i++) {
 		*(pa + (i&1)) = *addrs++;
 		if (i & 0x01) pa += 4;
diff --git a/drivers/net/tulip/dmfe.c b/drivers/net/tulip/dmfe.c
index 95b38d803e9b..a039c8ab98b2 100644
--- a/drivers/net/tulip/dmfe.c
+++ b/drivers/net/tulip/dmfe.c
@@ -1454,7 +1454,7 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr)
 
 static void dm9132_id_table(struct DEVICE *dev)
 {
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	u16 * addrptr;
 	unsigned long ioaddr = dev->base_addr+0xc0;		/* ID Table */
 	u32 hash_val;
@@ -1478,8 +1478,8 @@ static void dm9132_id_table(struct DEVICE *dev)
 	hash_table[3] = 0x8000;
 
 	/* the multicast address in Hash Table : 64 bits */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		hash_val = cal_CRC((char *) mcptr->dmi_addr, 6, 0) & 0x3f;
+	netdev_for_each_mc_addr(ha, dev) {
+		hash_val = cal_CRC((char *) ha->addr, 6, 0) & 0x3f;
 		hash_table[hash_val / 16] |= (u16) 1 << (hash_val % 16);
 	}
 
@@ -1497,7 +1497,7 @@ static void dm9132_id_table(struct DEVICE *dev)
 static void send_filter_frame(struct DEVICE *dev)
 {
 	struct dmfe_board_info *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	struct tx_desc *txptr;
 	u16 * addrptr;
 	u32 * suptr;
@@ -1520,8 +1520,8 @@ static void send_filter_frame(struct DEVICE *dev)
 	*suptr++ = 0xffff;
 
 	/* fit the multicast address */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		addrptr = (u16 *) mcptr->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrptr = (u16 *) ha->addr;
 		*suptr++ = addrptr[0];
 		*suptr++ = addrptr[1];
 		*suptr++ = addrptr[2];
diff --git a/drivers/net/tulip/tulip_core.c b/drivers/net/tulip/tulip_core.c
index 7f544ef2f5fc..c4b7cd726b69 100644
--- a/drivers/net/tulip/tulip_core.c
+++ b/drivers/net/tulip/tulip_core.c
@@ -990,15 +990,15 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 {
 	struct tulip_private *tp = netdev_priv(dev);
 	u16 hash_table[32];
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 	u16 *eaddrs;
 
 	memset(hash_table, 0, sizeof(hash_table));
 	set_bit_le(255, hash_table); 			/* Broadcast entry */
 	/* This should work on big-endian machines as well. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		int index = ether_crc_le(ETH_ALEN, mclist->dmi_addr) & 0x1ff;
+	netdev_for_each_mc_addr(ha, dev) {
+		int index = ether_crc_le(ETH_ALEN, ha->addr) & 0x1ff;
 
 		set_bit_le(index, hash_table);
 	}
@@ -1018,13 +1018,13 @@ static void build_setup_frame_hash(u16 *setup_frm, struct net_device *dev)
 static void build_setup_frame_perfect(u16 *setup_frm, struct net_device *dev)
 {
 	struct tulip_private *tp = netdev_priv(dev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	u16 *eaddrs;
 
 	/* We have <= 14 addresses so we can use the wonderful
 	   16 address perfect filtering of the Tulip. */
-	netdev_for_each_mc_addr(mclist, dev) {
-		eaddrs = (u16 *)mclist->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		eaddrs = (u16 *) ha->addr;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
 		*setup_frm++ = *eaddrs; *setup_frm++ = *eaddrs++;
@@ -1061,7 +1061,7 @@ static void set_rx_mode(struct net_device *dev)
 	} else	if (tp->flags & MC_HASH_ONLY) {
 		/* Some work-alikes have only a 64-entry hash filter table. */
 		/* Should verify correctness on big-endian/__powerpc__ */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		if (netdev_mc_count(dev) > 64) {
 			/* Arbitrary non-effective limit. */
 			tp->csr6 |= AcceptAllMulticast;
@@ -1069,18 +1069,21 @@ static void set_rx_mode(struct net_device *dev)
 		} else {
 			u32 mc_filter[2] = {0, 0};		 /* Multicast hash filter */
 			int filterbit;
-			netdev_for_each_mc_addr(mclist, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				if (tp->flags & COMET_MAC_ADDR)
-					filterbit = ether_crc_le(ETH_ALEN, mclist->dmi_addr);
+					filterbit = ether_crc_le(ETH_ALEN,
+								 ha->addr);
 				else
-					filterbit = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+					filterbit = ether_crc(ETH_ALEN,
+							      ha->addr) >> 26;
 				filterbit &= 0x3f;
 				mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
 				if (tulip_debug > 2)
 					dev_info(&dev->dev,
 						 "Added filter for %pM  %08x bit %d\n",
-						 mclist->dmi_addr,
-						 ether_crc(ETH_ALEN, mclist->dmi_addr), filterbit);
+						 ha->addr,
+						 ether_crc(ETH_ALEN, ha->addr),
+						 filterbit);
 			}
 			if (mc_filter[0] == tp->mc_filter[0]  &&
 				mc_filter[1] == tp->mc_filter[1])
diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
index 0ab05af237e5..b79d908fe34e 100644
--- a/drivers/net/tulip/uli526x.c
+++ b/drivers/net/tulip/uli526x.c
@@ -1392,7 +1392,7 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr)
 static void send_filter_frame(struct net_device *dev, int mc_cnt)
 {
 	struct uli526x_board_info *db = netdev_priv(dev);
-	struct dev_mc_list *mcptr;
+	struct netdev_hw_addr *ha;
 	struct tx_desc *txptr;
 	u16 * addrptr;
 	u32 * suptr;
@@ -1415,8 +1415,8 @@ static void send_filter_frame(struct net_device *dev, int mc_cnt)
 	*suptr++ = 0xffff << FLT_SHIFT;
 
 	/* fit the multicast address */
-	netdev_for_each_mc_addr(mcptr, dev) {
-		addrptr = (u16 *) mcptr->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addrptr = (u16 *) ha->addr;
 		*suptr++ = addrptr[0] << FLT_SHIFT;
 		*suptr++ = addrptr[1] << FLT_SHIFT;
 		*suptr++ = addrptr[2] << FLT_SHIFT;
diff --git a/drivers/net/tulip/winbond-840.c b/drivers/net/tulip/winbond-840.c
index 304f43866c44..3e3822c98a0d 100644
--- a/drivers/net/tulip/winbond-840.c
+++ b/drivers/net/tulip/winbond-840.c
@@ -1367,13 +1367,15 @@ static u32 __set_rx_mode(struct net_device *dev)
 		memset(mc_filter, 0xff, sizeof(mc_filter));
 		rx_mode = RxAcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int filterbit = (ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26) ^ 0x3F;
-			filterbit &= 0x3f;
-			mc_filter[filterbit >> 5] |= 1 << (filterbit & 31);
+		netdev_for_each_mc_addr(ha, dev) {
+			int filbit;
+
+			filbit = (ether_crc(ETH_ALEN, ha->addr) >> 26) ^ 0x3F;
+			filbit &= 0x3f;
+			mc_filter[filbit >> 5] |= 1 << (filbit & 31);
 		}
 		rx_mode = RxAcceptBroadcast | AcceptMulticast | AcceptMyPhys;
 	}
diff --git a/drivers/net/typhoon.c b/drivers/net/typhoon.c
index cd24e5f2b2a2..3053f85aa066 100644
--- a/drivers/net/typhoon.c
+++ b/drivers/net/typhoon.c
@@ -921,11 +921,11 @@ typhoon_set_rx_mode(struct net_device *dev)
 		/* Too many to match, or accept all multicasts. */
 		filter |= TYPHOON_RX_FILTER_ALL_MCAST;
 	} else if (!netdev_mc_empty(dev)) {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit = ether_crc(ETH_ALEN, mclist->dmi_addr) & 0x3f;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit = ether_crc(ETH_ALEN, ha->addr) & 0x3f;
 			mc_filter[bit >> 5] |= 1 << (bit & 0x1f);
 		}
 
diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
index 75743a76bbe8..081f76bff341 100644
--- a/drivers/net/ucc_geth.c
+++ b/drivers/net/ucc_geth.c
@@ -1999,7 +1999,7 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
 static void ucc_geth_set_multi(struct net_device *dev)
 {
 	struct ucc_geth_private *ugeth;
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 	struct ucc_fast __iomem *uf_regs;
 	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
 
@@ -2028,16 +2028,16 @@ static void ucc_geth_set_multi(struct net_device *dev)
 			out_be32(&p_82xx_addr_filt->gaddr_h, 0x0);
 			out_be32(&p_82xx_addr_filt->gaddr_l, 0x0);
 
-			netdev_for_each_mc_addr(dmi, dev) {
+			netdev_for_each_mc_addr(ha, dev) {
 				/* Only support group multicast for now.
 				 */
-				if (!(dmi->dmi_addr[0] & 1))
+				if (!(ha->addr[0] & 1))
 					continue;
 
 				/* Ask CPM to run CRC and set bit in
 				 * filter mask.
 				 */
-				hw_add_addr_in_hash(ugeth, dmi->dmi_addr);
+				hw_add_addr_in_hash(ugeth, ha->addr);
 			}
 		}
 	}
diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
index 9e05639435f2..763364f0972e 100644
--- a/drivers/net/usb/asix.c
+++ b/drivers/net/usb/asix.c
@@ -557,16 +557,14 @@ static void asix_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE);
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits =
-			    ether_crc(ETH_ALEN,
-				      mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |=
 			    1 << (crc_bits & 7);
 		}
@@ -793,16 +791,14 @@ static void ax88172_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		memset(data->multi_filter, 0, AX_MCAST_FILTER_SIZE);
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits =
-			    ether_crc(ETH_ALEN,
-				      mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |=
 			    1 << (crc_bits & 7);
 		}
diff --git a/drivers/net/usb/catc.c b/drivers/net/usb/catc.c
index 96f1ebe0d348..0c48ff97f47c 100644
--- a/drivers/net/usb/catc.c
+++ b/drivers/net/usb/catc.c
@@ -629,7 +629,7 @@ static void catc_multicast(unsigned char *addr, u8 *multicast)
 static void catc_set_multicast_list(struct net_device *netdev)
 {
 	struct catc *catc = netdev_priv(netdev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	u8 broadcast[6];
 	u8 rx = RxEnable | RxPolarity | RxMultiCast;
 
@@ -647,8 +647,8 @@ static void catc_set_multicast_list(struct net_device *netdev)
 	if (netdev->flags & IFF_ALLMULTI) {
 		memset(catc->multicast, 0xff, 64);
 	} else {
-		netdev_for_each_mc_addr(mc, netdev) {
-			u32 crc = ether_crc_le(6, mc->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 crc = ether_crc_le(6, ha->addr);
 			if (!catc->is_f5u011) {
 				catc->multicast[(crc >> 3) & 0x3f] |= 1 << (crc & 7);
 			} else {
diff --git a/drivers/net/usb/dm9601.c b/drivers/net/usb/dm9601.c
index 269339769f47..4eb1fb31ff0b 100644
--- a/drivers/net/usb/dm9601.c
+++ b/drivers/net/usb/dm9601.c
@@ -386,10 +386,10 @@ static void dm9601_set_multicast(struct net_device *net)
 		   netdev_mc_count(net) > DM_MAX_MCAST) {
 		rx_ctl |= 0x04;
 	} else if (!netdev_mc_empty(net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
-		netdev_for_each_mc_addr(mc_list, net) {
-			u32 crc = ether_crc(ETH_ALEN, mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			u32 crc = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			hashes[crc >> 3] |= 1 << (crc & 0x7);
 		}
 	}
diff --git a/drivers/net/usb/mcs7830.c b/drivers/net/usb/mcs7830.c
index 70978219e98a..0dc92c8ba4b2 100644
--- a/drivers/net/usb/mcs7830.c
+++ b/drivers/net/usb/mcs7830.c
@@ -452,12 +452,12 @@ static void mcs7830_data_set_multicast(struct net_device *net)
 		 * for our 8 byte filter buffer
 		 * to avoid allocating memory that
 		 * is tricky to free later */
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 		u32 crc_bits;
 
 		/* Build the multicast hash filter. */
-		netdev_for_each_mc_addr(mc_list, net) {
-			crc_bits = ether_crc(ETH_ALEN, mc_list->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, net) {
+			crc_bits = ether_crc(ETH_ALEN, ha->addr) >> 26;
 			data->multi_filter[crc_bits >> 3] |= 1 << (crc_bits & 7);
 		}
 	}
diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c
index 300e3e764fa2..b8b00d06ea79 100644
--- a/drivers/net/usb/smsc75xx.c
+++ b/drivers/net/usb/smsc75xx.c
@@ -444,14 +444,14 @@ static void smsc75xx_set_multicast(struct net_device *netdev)
 		netif_dbg(dev, drv, dev->net, "receive all multicast enabled");
 		pdata->rfe_ctl |= RFE_CTL_AM | RFE_CTL_DPF;
 	} else if (!netdev_mc_empty(dev->net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		netif_dbg(dev, drv, dev->net, "receive multicast hash filter");
 
 		pdata->rfe_ctl |= RFE_CTL_MHF | RFE_CTL_DPF;
 
-		netdev_for_each_mc_addr(mc_list, netdev) {
-			u32 bitnum = smsc75xx_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 bitnum = smsc75xx_hash(ha->addr);
 			pdata->multicast_hash_table[bitnum / 32] |=
 				(1 << (bitnum % 32));
 		}
diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
index 73f9a31cf94d..ccd55ca3a06a 100644
--- a/drivers/net/usb/smsc95xx.c
+++ b/drivers/net/usb/smsc95xx.c
@@ -384,13 +384,13 @@ static void smsc95xx_set_multicast(struct net_device *netdev)
 		pdata->mac_cr |= MAC_CR_MCPAS_;
 		pdata->mac_cr &= ~(MAC_CR_PRMS_ | MAC_CR_HPFILT_);
 	} else if (!netdev_mc_empty(dev->net)) {
-		struct dev_mc_list *mc_list;
+		struct netdev_hw_addr *ha;
 
 		pdata->mac_cr |= MAC_CR_HPFILT_;
 		pdata->mac_cr &= ~(MAC_CR_PRMS_ | MAC_CR_MCPAS_);
 
-		netdev_for_each_mc_addr(mc_list, netdev) {
-			u32 bitnum = smsc95xx_hash(mc_list->dmi_addr);
+		netdev_for_each_mc_addr(ha, netdev) {
+			u32 bitnum = smsc95xx_hash(ha->addr);
 			u32 mask = 0x01 << (bitnum & 0x1F);
 			if (bitnum & 0x20)
 				hash_hi |= mask;
diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c
index 50f881aa3939..d9133c62a2f5 100644
--- a/drivers/net/via-rhine.c
+++ b/drivers/net/via-rhine.c
@@ -1704,11 +1704,11 @@ static void rhine_set_rx_mode(struct net_device *dev)
 		iowrite32(0xffffffff, ioaddr + MulticastFilter1);
 		rx_mode = 0x0C;
 	} else {
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 
 		memset(mc_filter, 0, sizeof(mc_filter));
-		netdev_for_each_mc_addr(mclist, dev) {
-			int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+		netdev_for_each_mc_addr(ha, dev) {
+			int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 			mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
 		}
diff --git a/drivers/net/via-velocity.c b/drivers/net/via-velocity.c
index 3a486f3bad3d..91f3b841288c 100644
--- a/drivers/net/via-velocity.c
+++ b/drivers/net/via-velocity.c
@@ -1126,7 +1126,7 @@ static void velocity_set_multi(struct net_device *dev)
 	struct mac_regs __iomem *regs = vptr->mac_regs;
 	u8 rx_mode;
 	int i;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 
 	if (dev->flags & IFF_PROMISC) {	/* Set promiscuous. */
 		writel(0xffffffff, &regs->MARCAM[0]);
@@ -1142,8 +1142,8 @@ static void velocity_set_multi(struct net_device *dev)
 		mac_get_cam_mask(regs, vptr->mCAMmask);
 
 		i = 0;
-		netdev_for_each_mc_addr(mclist, dev) {
-			mac_set_cam(regs, i + offset, mclist->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev) {
+			mac_set_cam(regs, i + offset, ha->addr);
 			vptr->mCAMmask[(offset + i) / 8] |= 1 << ((offset + i) & 7);
 			i++;
 		}
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 3f5be35dea25..ecec9a8527f3 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -721,7 +721,6 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	struct scatterlist sg[2];
 	u8 promisc, allmulti;
 	struct virtio_net_ctrl_mac *mac_data;
-	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
 	int uc_count;
 	int mc_count;
@@ -778,8 +777,8 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 
 	mac_data->entries = mc_count;
 	i = 0;
-	netdev_for_each_mc_addr(addr, dev)
-		memcpy(&mac_data->macs[i++][0], addr->da_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[1], mac_data,
 		   sizeof(mac_data->entries) + (mc_count * ETH_ALEN));
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index cff3485d9673..58dfa367bf86 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1675,11 +1675,11 @@ vmxnet3_copy_mc(struct net_device *netdev)
 		/* We may be called with BH disabled */
 		buf = kmalloc(sz, GFP_ATOMIC);
 		if (buf) {
-			struct dev_mc_list *mc;
+			struct netdev_hw_addr *ha;
 			int i = 0;
 
-			netdev_for_each_mc_addr(mc, netdev)
-				memcpy(buf + i++ * ETH_ALEN, mc->dmi_addr,
+			netdev_for_each_mc_addr(ha, netdev)
+				memcpy(buf + i++ * ETH_ALEN, ha->addr,
 				       ETH_ALEN);
 		}
 	}
diff --git a/drivers/net/vxge/vxge-main.c b/drivers/net/vxge/vxge-main.c
index e5f2d3ee0df3..37836a10d093 100644
--- a/drivers/net/vxge/vxge-main.c
+++ b/drivers/net/vxge/vxge-main.c
@@ -1117,7 +1117,7 @@ vxge_tx_term(void *dtrh, enum vxge_hw_txdl_state state, void *userdata)
  */
 static void vxge_set_multicast(struct net_device *dev)
 {
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	struct vxgedev *vdev;
 	int i, mcast_cnt = 0;
 	struct __vxge_hw_device  *hldev;
@@ -1217,8 +1217,8 @@ static void vxge_set_multicast(struct net_device *dev)
 		}
 
 		/* Add new ones */
-		netdev_for_each_mc_addr(mclist, dev) {
-			memcpy(mac_info.macaddr, mclist->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy(mac_info.macaddr, ha->addr, ETH_ALEN);
 			for (vpath_idx = 0; vpath_idx < vdev->no_of_vpath;
 					vpath_idx++) {
 				mac_info.vpath_no = vpath_idx;
diff --git a/drivers/net/wireless/adm8211.c b/drivers/net/wireless/adm8211.c
index 547912e6843f..2277998b7264 100644
--- a/drivers/net/wireless/adm8211.c
+++ b/drivers/net/wireless/adm8211.c
@@ -1317,21 +1317,19 @@ static void adm8211_bss_info_changed(struct ieee80211_hw *dev,
 }
 
 static u64 adm8211_prepare_multicast(struct ieee80211_hw *hw,
-				     int mc_count, struct dev_addr_list *mclist)
+				     struct netdev_hw_addr_list *mc_list)
 {
-	unsigned int bit_nr, i;
+	unsigned int bit_nr;
 	u32 mc_filter[2];
+	struct netdev_hw_addr *ha;
 
 	mc_filter[1] = mc_filter[0] = 0;
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
+		bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
 
 		bit_nr &= 0x3F;
 		mc_filter[bit_nr >> 5] |= 1 << (bit_nr & 31);
-		mclist = mclist->next;
 	}
 
 	return mc_filter[0] | ((u64)(mc_filter[1]) << 32);
diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c
index 257c734733d1..b43d4b006d7e 100644
--- a/drivers/net/wireless/ath/ar9170/main.c
+++ b/drivers/net/wireless/ath/ar9170/main.c
@@ -2045,21 +2045,17 @@ out:
 	return err;
 }
 
-static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				       struct dev_addr_list *mclist)
+static u64 ar9170_op_prepare_multicast(struct ieee80211_hw *hw,
+				       struct netdev_hw_addr_list *mc_list)
 {
 	u64 mchash;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	/* always get broadcast frames */
 	mchash = 1ULL << (0xff >> 2);
 
-	for (i = 0; i < mc_count; i++) {
-		if (WARN_ON(!mclist))
-			break;
-		mchash |= 1ULL << (mclist->dmi_addr[5] >> 2);
-		mclist = mclist->next;
-	}
+	netdev_hw_addr_list_for_each(ha, mc_list)
+		mchash |= 1ULL << (ha->addr[5] >> 2);
 
 	return mchash;
 }
diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
index b142a78ed1e5..53a2340f52bc 100644
--- a/drivers/net/wireless/ath/ath5k/base.c
+++ b/drivers/net/wireless/ath/ath5k/base.c
@@ -230,7 +230,7 @@ static void ath5k_remove_interface(struct ieee80211_hw *hw,
 		struct ieee80211_vif *vif);
 static int ath5k_config(struct ieee80211_hw *hw, u32 changed);
 static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mc_list);
+				   struct netdev_hw_addr_list *mc_list);
 static void ath5k_configure_filter(struct ieee80211_hw *hw,
 		unsigned int changed_flags,
 		unsigned int *new_flags,
@@ -2999,22 +2999,20 @@ unlock:
 }
 
 static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	u32 mfilt[2], val;
-	int i;
 	u8 pos;
+	struct netdev_hw_addr *ha;
 
 	mfilt[0] = 0;
 	mfilt[1] = 1;
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
 		/* calculate XOR of eight 6-bit values */
-		val = get_unaligned_le32(mclist->dmi_addr + 0);
+		val = get_unaligned_le32(ha->addr + 0);
 		pos = (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
-		val = get_unaligned_le32(mclist->dmi_addr + 3);
+		val = get_unaligned_le32(ha->addr + 3);
 		pos ^= (val >> 18) ^ (val >> 12) ^ (val >> 6) ^ val;
 		pos &= 0x3f;
 		mfilt[pos / 32] |= (1 << (pos % 32));
@@ -3022,8 +3020,7 @@ static u64 ath5k_prepare_multicast(struct ieee80211_hw *hw,
 		* but not sure, needs testing, if we do use this we'd
 		* neet to inform below to not reset the mcast */
 		/* ath5k_hw_set_mcast_filterindex(ah,
-		 *      mclist->dmi_addr[5]); */
-		mclist = mclist->next;
+		 *      ha->addr[5]); */
 	}
 
 	return ((u64)(mfilt[1]) << 32) | mfilt[0];
diff --git a/drivers/net/wireless/libertas/main.c b/drivers/net/wireless/libertas/main.c
index 3c889f43d909..ff7b0d0cca56 100644
--- a/drivers/net/wireless/libertas/main.c
+++ b/drivers/net/wireless/libertas/main.c
@@ -318,7 +318,7 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 			       struct net_device *dev, int nr_addrs)
 {
 	int i = nr_addrs;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 	int cnt;
 
 	if ((dev->flags & (IFF_UP|IFF_MULTICAST)) != (IFF_UP|IFF_MULTICAST))
@@ -326,19 +326,19 @@ static int lbs_add_mcast_addrs(struct cmd_ds_mac_multicast_adr *cmd,
 
 	netif_addr_lock_bh(dev);
 	cnt = netdev_mc_count(dev);
-	netdev_for_each_mc_addr(mc_list, dev) {
-		if (mac_in_list(cmd->maclist, nr_addrs, mc_list->dmi_addr)) {
+	netdev_for_each_mc_addr(ha, dev) {
+		if (mac_in_list(cmd->maclist, nr_addrs, ha->addr)) {
 			lbs_deb_net("mcast address %s:%pM skipped\n", dev->name,
-				    mc_list->dmi_addr);
+				    ha->addr);
 			cnt--;
 			continue;
 		}
 
 		if (i == MRVDRV_MAX_MULTICAST_LIST_SIZE)
 			break;
-		memcpy(&cmd->maclist[6*i], mc_list->dmi_addr, ETH_ALEN);
+		memcpy(&cmd->maclist[6*i], ha->addr, ETH_ALEN);
 		lbs_deb_net("mcast address %s:%pM added to filter\n", dev->name,
-			    mc_list->dmi_addr);
+			    ha->addr);
 		i++;
 		cnt--;
 	}
diff --git a/drivers/net/wireless/libertas_tf/main.c b/drivers/net/wireless/libertas_tf/main.c
index 6ab30033c26c..c20eef6b95c2 100644
--- a/drivers/net/wireless/libertas_tf/main.c
+++ b/drivers/net/wireless/libertas_tf/main.c
@@ -367,22 +367,20 @@ static int lbtf_op_config(struct ieee80211_hw *hw, u32 changed)
 }
 
 static u64 lbtf_op_prepare_multicast(struct ieee80211_hw *hw,
-				     int mc_count, struct dev_addr_list *mclist)
+				     struct netdev_hw_addr_list *mc_list)
 {
 	struct lbtf_private *priv = hw->priv;
 	int i;
+	struct netdev_hw_addr *ha;
+	int mc_count = netdev_hw_addr_list_count(mc_list);
 
 	if (!mc_count || mc_count > MRVDRV_MAX_MULTICAST_LIST_SIZE)
 		return mc_count;
 
 	priv->nr_of_multicastmacaddr = mc_count;
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		memcpy(&priv->multicastlist[i], mclist->da_addr,
-				ETH_ALEN);
-		mclist = mclist->next;
-	}
+	i = 0;
+	netdev_hw_addr_list_for_each(ha, mc_list)
+		memcpy(&priv->multicastlist[i++], ha->addr, ETH_ALEN);
 
 	return mc_count;
 }
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index ac65e13eb0de..6599fd15e675 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -1938,11 +1938,15 @@ struct mwl8k_cmd_mac_multicast_adr {
 
 static struct mwl8k_cmd_pkt *
 __mwl8k_cmd_mac_multicast_adr(struct ieee80211_hw *hw, int allmulti,
-			      int mc_count, struct dev_addr_list *mclist)
+			      struct netdev_hw_addr_list *mc_list)
 {
 	struct mwl8k_priv *priv = hw->priv;
 	struct mwl8k_cmd_mac_multicast_adr *cmd;
 	int size;
+	int mc_count = 0;
+
+	if (mc_list)
+		mc_count = netdev_hw_addr_list_count(mc_list);
 
 	if (allmulti || mc_count > priv->num_mcaddrs) {
 		allmulti = 1;
@@ -1963,17 +1967,13 @@ __mwl8k_cmd_mac_multicast_adr(struct ieee80211_hw *hw, int allmulti,
 	if (allmulti) {
 		cmd->action |= cpu_to_le16(MWL8K_ENABLE_RX_ALL_MULTICAST);
 	} else if (mc_count) {
-		int i;
+		struct netdev_hw_addr *ha;
+		int i = 0;
 
 		cmd->action |= cpu_to_le16(MWL8K_ENABLE_RX_MULTICAST);
 		cmd->numaddr = cpu_to_le16(mc_count);
-		for (i = 0; i < mc_count && mclist; i++) {
-			if (mclist->da_addrlen != ETH_ALEN) {
-				kfree(cmd);
-				return NULL;
-			}
-			memcpy(cmd->addr[i], mclist->da_addr, ETH_ALEN);
-			mclist = mclist->next;
+		netdev_hw_addr_list_for_each(ha, mc_list) {
+			memcpy(cmd->addr[i], ha->addr, ETH_ALEN);
 		}
 	}
 
@@ -3552,7 +3552,7 @@ mwl8k_bss_info_changed(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 }
 
 static u64 mwl8k_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	struct mwl8k_cmd_pkt *cmd;
 
@@ -3563,7 +3563,7 @@ static u64 mwl8k_prepare_multicast(struct ieee80211_hw *hw,
 	 * we'll end up throwing this packet away and creating a new
 	 * one in mwl8k_configure_filter().
 	 */
-	cmd = __mwl8k_cmd_mac_multicast_adr(hw, 0, mc_count, mclist);
+	cmd = __mwl8k_cmd_mac_multicast_adr(hw, 0, mc_list);
 
 	return (unsigned long)cmd;
 }
@@ -3686,7 +3686,7 @@ static void mwl8k_configure_filter(struct ieee80211_hw *hw,
 	 */
 	if (*total_flags & FIF_ALLMULTI) {
 		kfree(cmd);
-		cmd = __mwl8k_cmd_mac_multicast_adr(hw, 1, 0, NULL);
+		cmd = __mwl8k_cmd_mac_multicast_adr(hw, 1, NULL);
 	}
 
 	if (cmd != NULL) {
diff --git a/drivers/net/wireless/orinoco/hw.c b/drivers/net/wireless/orinoco/hw.c
index 883b8f868626..9f657afaa3e5 100644
--- a/drivers/net/wireless/orinoco/hw.c
+++ b/drivers/net/wireless/orinoco/hw.c
@@ -1056,14 +1056,14 @@ int __orinoco_hw_set_multicast_list(struct orinoco_private *priv,
 	 * group address if either we want to multicast, or if we were
 	 * multicasting and want to stop */
 	if (!promisc && (mc_count || priv->mc_count)) {
-		struct dev_mc_list *p;
+		struct netdev_hw_addr *ha;
 		struct hermes_multicast mclist;
 		int i = 0;
 
-		netdev_for_each_mc_addr(p, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (i == mc_count)
 				break;
-			memcpy(mclist.addr[i++], p->dmi_addr, ETH_ALEN);
+			memcpy(mclist.addr[i++], ha->addr, ETH_ALEN);
 		}
 
 		err = hermes_write_ltv(hw, USER_BAP,
diff --git a/drivers/net/wireless/orinoco/hw.h b/drivers/net/wireless/orinoco/hw.h
index 9799a1d14a63..97af71e79950 100644
--- a/drivers/net/wireless/orinoco/hw.h
+++ b/drivers/net/wireless/orinoco/hw.h
@@ -22,7 +22,6 @@
 
 /* Forward declarations */
 struct orinoco_private;
-struct dev_addr_list;
 
 int determine_fw_capabilities(struct orinoco_private *priv, char *fw_name,
 			      size_t fw_name_len, u32 *hw_ver);
diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index 4f5bdb528ef7..54680a3a5acb 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -1875,17 +1875,17 @@ static void ray_update_multi_list(struct net_device *dev, int all)
 		writeb(0xff, &pccs->var);
 		local->num_multi = 0xff;
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i = 0;
 
 		/* Copy the kernel's list of MC addresses to card */
-		netdev_for_each_mc_addr(dmi, dev) {
-			memcpy_toio(p, dmi->dmi_addr, ETH_ALEN);
+		netdev_for_each_mc_addr(ha, dev) {
+			memcpy_toio(p, ha->addr, ETH_ALEN);
 			dev_dbg(&link->dev,
 			      "ray_update_multi add addr %02x%02x%02x%02x%02x%02x\n",
-			      dmi->dmi_addr[0], dmi->dmi_addr[1],
-			      dmi->dmi_addr[2], dmi->dmi_addr[3],
-			      dmi->dmi_addr[4], dmi->dmi_addr[5]);
+			      ha->addr[0], ha->addr[1],
+			      ha->addr[2], ha->addr[3],
+			      ha->addr[4], ha->addr[5]);
 			p += ETH_ALEN;
 			i++;
 		}
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index aceb95ef7274..8ab9f094747b 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -1545,7 +1545,7 @@ static int remove_key(struct usbnet *usbdev, int index, const u8 *bssid)
 static void set_multicast_list(struct usbnet *usbdev)
 {
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	__le32 filter, basefilter;
 	int ret;
 	char *mc_addrs = NULL;
@@ -1584,9 +1584,9 @@ static void set_multicast_list(struct usbnet *usbdev)
 			return;
 		}
 
-		netdev_for_each_mc_addr(mclist, usbdev->net)
+		netdev_for_each_mc_addr(ha, usbdev->net)
 			memcpy(mc_addrs + i++ * ETH_ALEN,
-			       mclist->dmi_addr, ETH_ALEN);
+			       ha->addr, ETH_ALEN);
 	}
 	netif_addr_unlock_bh(usbdev->net);
 
diff --git a/drivers/net/wireless/rtl818x/rtl8180_dev.c b/drivers/net/wireless/rtl818x/rtl8180_dev.c
index 2b928ecf47bd..fb8a62f2b9b4 100644
--- a/drivers/net/wireless/rtl818x/rtl8180_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8180_dev.c
@@ -723,10 +723,10 @@ static void rtl8180_bss_info_changed(struct ieee80211_hw *dev,
 	        priv->rf->conf_erp(dev, info);
 }
 
-static u64 rtl8180_prepare_multicast(struct ieee80211_hw *dev, int mc_count,
-				     struct dev_addr_list *mc_list)
+static u64 rtl8180_prepare_multicast(struct ieee80211_hw *dev,
+				     struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void rtl8180_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/net/wireless/rtl818x/rtl8187_dev.c b/drivers/net/wireless/rtl818x/rtl8187_dev.c
index 0fb850e0c656..441d817ed489 100644
--- a/drivers/net/wireless/rtl818x/rtl8187_dev.c
+++ b/drivers/net/wireless/rtl818x/rtl8187_dev.c
@@ -1193,9 +1193,9 @@ static void rtl8187_bss_info_changed(struct ieee80211_hw *dev,
 }
 
 static u64 rtl8187_prepare_multicast(struct ieee80211_hw *dev,
-				     int mc_count, struct dev_addr_list *mc_list)
+				     struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void rtl8187_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c
index 3daba6c0c77f..6036d0206fec 100644
--- a/drivers/net/wireless/wl12xx/wl1271_main.c
+++ b/drivers/net/wireless/wl12xx/wl1271_main.c
@@ -1266,11 +1266,11 @@ struct wl1271_filter_params {
 	u8 mc_list[ACX_MC_ADDRESS_GROUP_MAX][ETH_ALEN];
 };
 
-static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				       struct dev_addr_list *mc_list)
+static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw,
+				       struct netdev_hw_addr_list *mc_list)
 {
 	struct wl1271_filter_params *fp;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	fp = kzalloc(sizeof(*fp), GFP_ATOMIC);
 	if (!fp) {
@@ -1279,21 +1279,16 @@ static u64 wl1271_op_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
 	}
 
 	/* update multicast filtering parameters */
-	fp->enabled = true;
-	if (mc_count > ACX_MC_ADDRESS_GROUP_MAX) {
-		mc_count = 0;
-		fp->enabled = false;
-	}
-
 	fp->mc_list_length = 0;
-	for (i = 0; i < mc_count; i++) {
-		if (mc_list->da_addrlen == ETH_ALEN) {
+	if (netdev_hw_addr_list_count(mc_list) > ACX_MC_ADDRESS_GROUP_MAX) {
+		fp->enabled = false;
+	} else {
+		fp->enabled = true;
+		netdev_hw_addr_list_for_each(ha, mc_list) {
 			memcpy(fp->mc_list[fp->mc_list_length],
-			       mc_list->da_addr, ETH_ALEN);
+					ha->addr, ETH_ALEN);
 			fp->mc_list_length++;
-		} else
-			wl1271_warning("Unknown mc address length.");
-		mc_list = mc_list->next;
+		}
 	}
 
 	return (u64)(unsigned long)fp;
diff --git a/drivers/net/wireless/zd1201.c b/drivers/net/wireless/zd1201.c
index 6917286edcae..74530b2d672c 100644
--- a/drivers/net/wireless/zd1201.c
+++ b/drivers/net/wireless/zd1201.c
@@ -875,7 +875,7 @@ static struct iw_statistics *zd1201_get_wireless_stats(struct net_device *dev)
 static void zd1201_set_multicast(struct net_device *dev)
 {
 	struct zd1201 *zd = netdev_priv(dev);
-	struct dev_mc_list *mc;
+	struct netdev_hw_addr *ha;
 	unsigned char reqbuf[ETH_ALEN*ZD1201_MAXMULTI];
 	int i;
 
@@ -883,8 +883,8 @@ static void zd1201_set_multicast(struct net_device *dev)
 		return;
 
 	i = 0;
-	netdev_for_each_mc_addr(mc, dev)
-		memcpy(reqbuf + i++ * ETH_ALEN, mc->dmi_addr, ETH_ALEN);
+	netdev_for_each_mc_addr(ha, dev)
+		memcpy(reqbuf + i++ * ETH_ALEN, ha->addr, ETH_ALEN);
 	zd1201_setconfig(zd, ZD1201_RID_CNFGROUPADDRESS, reqbuf,
 			 netdev_mc_count(dev) * ETH_ALEN, 0);
 }
diff --git a/drivers/net/wireless/zd1211rw/zd_mac.c b/drivers/net/wireless/zd1211rw/zd_mac.c
index 00e09e26c826..6d95e4d74d7f 100644
--- a/drivers/net/wireless/zd1211rw/zd_mac.c
+++ b/drivers/net/wireless/zd1211rw/zd_mac.c
@@ -947,20 +947,17 @@ static void set_rx_filter_handler(struct work_struct *work)
 }
 
 static u64 zd_op_prepare_multicast(struct ieee80211_hw *hw,
-				   int mc_count, struct dev_addr_list *mclist)
+				   struct netdev_hw_addr_list *mc_list)
 {
 	struct zd_mac *mac = zd_hw_mac(hw);
 	struct zd_mc_hash hash;
-	int i;
+	struct netdev_hw_addr *ha;
 
 	zd_mc_clear(&hash);
 
-	for (i = 0; i < mc_count; i++) {
-		if (!mclist)
-			break;
-		dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n", mclist->dmi_addr);
-		zd_mc_add_addr(&hash, mclist->dmi_addr);
-		mclist = mclist->next;
+	netdev_hw_addr_list_for_each(ha, mc_list) {
+		dev_dbg_f(zd_mac_dev(mac), "mc addr %pM\n", ha->addr);
+		zd_mc_add_addr(&hash, ha->addr);
 	}
 
 	return hash.low | ((u64)hash.high << 32);
diff --git a/drivers/net/yellowfin.c b/drivers/net/yellowfin.c
index 7d4107f5eeb0..34c91cf5d839 100644
--- a/drivers/net/yellowfin.c
+++ b/drivers/net/yellowfin.c
@@ -1300,25 +1300,25 @@ static void set_rx_mode(struct net_device *dev)
 		/* Too many to filter well, or accept all multicasts. */
 		iowrite16(0x000B, ioaddr + AddrMode);
 	} else if (!netdev_mc_empty(dev)) { /* Must use the multicast hash table. */
-		struct dev_mc_list *mclist;
+		struct netdev_hw_addr *ha;
 		u16 hash_table[4];
 		int i;
 
 		memset(hash_table, 0, sizeof(hash_table));
-		netdev_for_each_mc_addr(mclist, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			unsigned int bit;
 
 			/* Due to a bug in the early chip versions, multiple filter
 			   slots must be set for each address. */
 			if (yp->drv_flags & HasMulticastBug) {
-				bit = (ether_crc_le(3, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(3, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
-				bit = (ether_crc_le(4, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(4, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
-				bit = (ether_crc_le(5, mclist->dmi_addr) >> 3) & 0x3f;
+				bit = (ether_crc_le(5, ha->addr) >> 3) & 0x3f;
 				hash_table[bit >> 4] |= (1 << bit);
 			}
-			bit = (ether_crc_le(6, mclist->dmi_addr) >> 3) & 0x3f;
+			bit = (ether_crc_le(6, ha->addr) >> 3) & 0x3f;
 			hash_table[bit >> 4] |= (1 << bit);
 		}
 		/* Copy the hash table to the chip. */
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 7576ad5a833a..945f3e0a9f06 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -607,7 +607,6 @@ static int qeth_l2_set_mac_address(struct net_device *dev, void *p)
 static void qeth_l2_set_multicast_list(struct net_device *dev)
 {
 	struct qeth_card *card = dev->ml_priv;
-	struct dev_addr_list *dm;
 	struct netdev_hw_addr *ha;
 
 	if (card->info.type == QETH_CARD_TYPE_OSN)
@@ -619,8 +618,8 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 		return;
 	qeth_l2_del_all_mc(card);
 	spin_lock_bh(&card->mclock);
-	netdev_for_each_mc_addr(dm, dev)
-		qeth_l2_add_mc(card, dm->da_addr, 0);
+	netdev_for_each_mc_addr(ha, dev)
+		qeth_l2_add_mc(card, ha->addr, 0);
 
 	netdev_for_each_uc_addr(ha, dev)
 		qeth_l2_add_mc(card, ha->addr, 1);
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index de33e38a4059..37de40e01684 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -311,7 +311,7 @@ static int fcoe_interface_setup(struct fcoe_interface *fcoe,
 	dev_uc_add(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_add(netdev, fip->ctl_src_addr);
-	dev_mc_add(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
+	dev_mc_add(netdev, FIP_ALL_ENODE_MACS);
 
 	/*
 	 * setup the receive function from ethernet driver
@@ -397,7 +397,7 @@ void fcoe_interface_cleanup(struct fcoe_interface *fcoe)
 	dev_uc_del(netdev, flogi_maddr);
 	if (fip->spma)
 		dev_uc_del(netdev, fip->ctl_src_addr);
-	dev_mc_delete(netdev, FIP_ALL_ENODE_MACS, ETH_ALEN, 0);
+	dev_mc_del(netdev, FIP_ALL_ENODE_MACS);
 
 	/* Tell the LLD we are done w/ FCoE */
 	ops = netdev->netdev_ops;
diff --git a/drivers/staging/arlan/arlan-main.c b/drivers/staging/arlan/arlan-main.c
index 88fdd53cf5d3..80284522c42b 100644
--- a/drivers/staging/arlan/arlan-main.c
+++ b/drivers/staging/arlan/arlan-main.c
@@ -1458,7 +1458,7 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 				!netdev_mc_empty(dev))
 			{
 				char hw_dst_addr[6];
-				struct dev_mc_list *dmi;
+				struct netdev_hw_addr *ha;
 				int i;
 
 				memcpy_fromio(hw_dst_addr, arlan->ultimateDestAddress, 6);
@@ -1469,12 +1469,13 @@ static void arlan_rx_interrupt(struct net_device *dev, u_char rxStatus, u_short
 							printk(KERN_ERR "%s mcast 0x0100 \n", dev->name);
 						else if (hw_dst_addr[1] == 0x40)
 							printk(KERN_ERR "%s m/bcast 0x0140 \n", dev->name);
-					netdev_for_each_mc_entry(dmi, dev) {
+					netdev_for_each_mc_entry(ha, dev) {
 						if (arlan_debug & ARLAN_DEBUG_HEADER_DUMP)
 							printk(KERN_ERR "%s mcl %pM\n",
-							       dev->name, dmi->dmi_addr);
+							       dev->name,
+							       ha->addr);
 						for (i = 0; i < 6; i++)
-							if (dmi->dmi_addr[i] != hw_dst_addr[i])
+							if (ha->addr[i] != hw_dst_addr[i])
 								break;
 						if (i == 6)
 							break;
diff --git a/drivers/staging/et131x/et131x_netdev.c b/drivers/staging/et131x/et131x_netdev.c
index 40f8954dde47..2fb89cddef16 100644
--- a/drivers/staging/et131x/et131x_netdev.c
+++ b/drivers/staging/et131x/et131x_netdev.c
@@ -405,7 +405,7 @@ void et131x_multicast(struct net_device *netdev)
 	struct et131x_adapter *adapter = netdev_priv(netdev);
 	uint32_t PacketFilter = 0;
 	unsigned long flags;
-	struct dev_mc_list *mclist;
+	struct netdev_hw_addr *ha;
 	int i;
 
 	spin_lock_irqsave(&adapter->Lock, flags);
@@ -450,10 +450,10 @@ void et131x_multicast(struct net_device *netdev)
 
 	/* Set values in the private adapter struct */
 	i = 0;
-	netdev_for_each_mc_addr(mclist, netdev) {
+	netdev_for_each_mc_addr(ha, netdev) {
 		if (i == NIC_MAX_MCAST_LIST)
 			break;
-		memcpy(adapter->MCList[i++], mclist->dmi_addr, ETH_ALEN);
+		memcpy(adapter->MCList[i++], ha->addr, ETH_ALEN);
 	}
 	adapter->MCAddressCount = i;
 
diff --git a/drivers/staging/slicoss/slicoss.c b/drivers/staging/slicoss/slicoss.c
index 7daeced317c4..bebf0fd2af85 100644
--- a/drivers/staging/slicoss/slicoss.c
+++ b/drivers/staging/slicoss/slicoss.c
@@ -1367,12 +1367,12 @@ static void slic_mcast_set_list(struct net_device *dev)
 	struct adapter *adapter = netdev_priv(dev);
 	int status = STATUS_SUCCESS;
 	char *addresses;
-	struct dev_mc_list *mc_list;
+	struct netdev_hw_addr *ha;
 
 	ASSERT(adapter);
 
-	netdev_for_each_mc_addr(mc_list, dev) {
-		addresses = (char *) &mc_list->dmi_addr;
+	netdev_for_each_mc_addr(ha, dev) {
+		addresses = (char *) &ha->addr;
 		status = slic_mcast_add_list(adapter, addresses);
 		if (status != STATUS_SUCCESS)
 			break;
diff --git a/drivers/staging/vt6655/device_main.c b/drivers/staging/vt6655/device_main.c
index 1d643653a7ed..b698de40a60e 100644
--- a/drivers/staging/vt6655/device_main.c
+++ b/drivers/staging/vt6655/device_main.c
@@ -3079,7 +3079,7 @@ static void device_set_multi(struct net_device *dev) {
 
     PSMgmtObject     pMgmt = pDevice->pMgmt;
     u32              mc_filter[2];
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
 
 
     VNSvInPortB(pDevice->PortOffset + MAC_REG_RCR, &(pDevice->byRxMode));
@@ -3099,8 +3099,8 @@ static void device_set_multi(struct net_device *dev) {
     }
     else {
         memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-            int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+            int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
             mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
         }
         MACvSelectPage1(pDevice->PortOffset);
diff --git a/drivers/staging/vt6656/main_usb.c b/drivers/staging/vt6656/main_usb.c
index a8e1adbc9592..49270db98fbb 100644
--- a/drivers/staging/vt6656/main_usb.c
+++ b/drivers/staging/vt6656/main_usb.c
@@ -1596,7 +1596,7 @@ static void device_set_multi(struct net_device *dev) {
     PSMgmtObject     pMgmt = &(pDevice->sMgmtObj);
     u32              mc_filter[2];
     int              ii;
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
     BYTE             pbyData[8] = {0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff};
     BYTE             byTmpMode = 0;
     int              rc;
@@ -1632,8 +1632,8 @@ static void device_set_multi(struct net_device *dev) {
     }
     else {
         memset(mc_filter, 0, sizeof(mc_filter));
-	netdev_for_each_mc_addr(mclist, dev) {
-            int bit_nr = ether_crc(ETH_ALEN, mclist->dmi_addr) >> 26;
+	netdev_for_each_mc_addr(ha, dev) {
+            int bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
             mc_filter[bit_nr >> 5] |= cpu_to_le32(1 << (bit_nr & 31));
         }
         for (ii = 0; ii < 4; ii++) {
diff --git a/drivers/staging/wavelan/wavelan.c b/drivers/staging/wavelan/wavelan.c
index 54ca63196fdd..f44ef351647b 100644
--- a/drivers/staging/wavelan/wavelan.c
+++ b/drivers/staging/wavelan/wavelan.c
@@ -3419,7 +3419,7 @@ static void wv_82586_config(struct net_device * dev)
 	ac_cfg_t cfg;		/* Configure action */
 	ac_ias_t ias;		/* IA-setup action */
 	ac_mcs_t mcs;		/* Multicast setup */
-	struct dev_mc_list *dmi;
+	struct netdev_hw_addr *ha;
 
 #ifdef DEBUG_CONFIG_TRACE
 	printk(KERN_DEBUG "%s: ->wv_82586_config()\n", dev->name);
@@ -3531,16 +3531,16 @@ static void wv_82586_config(struct net_device * dev)
 
 	/* Any address to set? */
 	if (lp->mc_count) {
-		netdev_for_each_mc_addr(dmi, dev)
-			outsw(PIOP1(ioaddr), (u16 *) dmi->dmi_addr,
+		netdev_for_each_mc_addr(ha, dev)
+			outsw(PIOP1(ioaddr), (u16 *) ha->addr,
 			      WAVELAN_ADDR_SIZE >> 1);
 
 #ifdef DEBUG_CONFIG_INFO
 		printk(KERN_DEBUG
 		       "%s: wv_82586_config(): set %d multicast addresses:\n",
 		       dev->name, lp->mc_count);
-		netdev_for_each_mc_addr(dmi, dev)
-			printk(KERN_DEBUG " %pM\n", dmi->dmi_addr);
+		netdev_for_each_mc_addr(ha, dev)
+			printk(KERN_DEBUG " %pM\n", ha->addr);
 #endif
 	}
 
diff --git a/drivers/staging/wavelan/wavelan_cs.c b/drivers/staging/wavelan/wavelan_cs.c
index 04f691d127b4..a90132a204e6 100644
--- a/drivers/staging/wavelan/wavelan_cs.c
+++ b/drivers/staging/wavelan/wavelan_cs.c
@@ -3591,20 +3591,20 @@ wv_82593_config(struct net_device *	dev)
     /* If roaming is enabled, join the "Beacon Request" multicast group... */
     /* But only if it's not in there already! */
   if(do_roaming)
-    dev_mc_add(dev,WAVELAN_BEACON_ADDRESS, WAVELAN_ADDR_SIZE, 1);
+    dev_mc_add(dev, WAVELAN_BEACON_ADDRESS);
 #endif	/* WAVELAN_ROAMING */
 
   /* If any multicast address to set */
   if(lp->mc_count)
     {
-      struct dev_mc_list *dmi;
+      struct netdev_hw_addr *ha;
       int			addrs_len = WAVELAN_ADDR_SIZE * lp->mc_count;
 
 #ifdef DEBUG_CONFIG_INFO
       printk(KERN_DEBUG "%s: wv_hw_config(): set %d multicast addresses:\n",
 	     dev->name, lp->mc_count);
-      netdev_for_each_mc_addr(dmi, dev)
-	printk(KERN_DEBUG " %pM\n", dmi->dmi_addr);
+      netdev_for_each_mc_addr(ha, dev)
+	printk(KERN_DEBUG " %pM\n", ha->addr);
 #endif
 
       /* Initialize adapter's ethernet multicast addresses */
@@ -3612,8 +3612,8 @@ wv_82593_config(struct net_device *	dev)
       outb(((TX_BASE >> 8) & PIORH_MASK) | PIORH_SEL_TX, PIORH(base));
       outb(addrs_len & 0xff, PIOP(base));	/* byte count lsb */
       outb((addrs_len >> 8), PIOP(base));	/* byte count msb */
-      netdev_for_each_mc_addr(dmi, dev)
-	outsb(PIOP(base), dmi->dmi_addr, dmi->dmi_addrlen);
+      netdev_for_each_mc_addr(ha, dev)
+	outsb(PIOP(base), ha->addr, dev->addr_len);
 
       /* reset transmit DMA pointer */
       hacr_write_slow(base, HACR_PWR_STAT | HACR_TX_DMA_RESET);
diff --git a/drivers/staging/winbond/wbusb.c b/drivers/staging/winbond/wbusb.c
index 3482eec18651..5d9499bba9cc 100644
--- a/drivers/staging/winbond/wbusb.c
+++ b/drivers/staging/winbond/wbusb.c
@@ -92,10 +92,10 @@ static int wbsoft_get_stats(struct ieee80211_hw *hw,
 	return 0;
 }
 
-static u64 wbsoft_prepare_multicast(struct ieee80211_hw *hw, int mc_count,
-				    struct dev_addr_list *mc_list)
+static u64 wbsoft_prepare_multicast(struct ieee80211_hw *hw,
+				    struct netdev_hw_addr_list *mc_list)
 {
-	return mc_count;
+	return netdev_hw_addr_list_count(mc_list);
 }
 
 static void wbsoft_configure_filter(struct ieee80211_hw *dev,
diff --git a/drivers/staging/wlags49_h2/wl_netdev.c b/drivers/staging/wlags49_h2/wl_netdev.c
index fa082d90fcad..d7532e89f5fc 100644
--- a/drivers/staging/wlags49_h2/wl_netdev.c
+++ b/drivers/staging/wlags49_h2/wl_netdev.c
@@ -1049,7 +1049,7 @@ void wl_multicast( struct net_device *dev )
 //;?seems reasonable that even an AP-only driver could afford this small additional footprint
 
     int                 x;
-    struct dev_mc_list *mclist;
+    struct netdev_hw_addr *ha;
     struct wl_private   *lp = wl_priv(dev);
     unsigned long       flags;
     /*------------------------------------------------------------------------*/
@@ -1072,9 +1072,9 @@ void wl_multicast( struct net_device *dev )
 
         DBG_PRINT( "  mc_count: %d\n", netdev_mc_count(dev));
 
-	netdev_for_each_mc_addr(mclist, dev)
-            DBG_PRINT( "    %s (%d)\n", DbgHwAddr(mclist->dmi_addr),
-                       mclist->dmi_addrlen );
+	netdev_for_each_mc_addr(ha, dev)
+            DBG_PRINT("    %s (%d)\n", DbgHwAddr(ha->addr),
+		      dev->addr_len);
     }
 #endif /* DBG */
 
@@ -1119,9 +1119,9 @@ void wl_multicast( struct net_device *dev )
                 lp->ltvRecord.typ = CFG_GROUP_ADDR;
 
 		x = 0;
-		netdev_for_each_mc_addr(mclist, dev)
+		netdev_for_each_mc_addr(ha, dev)
                     memcpy(&(lp->ltvRecord.u.u8[x++ * ETH_ALEN]),
-                           mclist->dmi_addr, ETH_ALEN);
+			   ha->addr, ETH_ALEN);
                 DBG_PRINT( "Setting multicast list\n" );
                 hcf_put_info( &( lp->hcfCtx ), (LTVP)&( lp->ltvRecord ));
             } else {
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 60f0c83192fe..a343a21ba8b9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -228,25 +228,6 @@ struct netif_rx_stats {
 
 DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
 
-struct dev_addr_list {
-	struct dev_addr_list	*next;
-	u8			da_addr[MAX_ADDR_LEN];
-	u8			da_addrlen;
-	u8			da_synced;
-	int			da_users;
-	int			da_gusers;
-};
-
-/*
- *	We tag multicasts with these structures.
- */
-
-#define dev_mc_list	dev_addr_list
-#define dmi_addr	da_addr
-#define dmi_addrlen	da_addrlen
-#define dmi_users	da_users
-#define dmi_gusers	da_gusers
-
 struct netdev_hw_addr {
 	struct list_head	list;
 	unsigned char		addr[MAX_ADDR_LEN];
@@ -255,8 +236,10 @@ struct netdev_hw_addr {
 #define NETDEV_HW_ADDR_T_SAN		2
 #define NETDEV_HW_ADDR_T_SLAVE		3
 #define NETDEV_HW_ADDR_T_UNICAST	4
+#define NETDEV_HW_ADDR_T_MULTICAST	5
 	int			refcount;
 	bool			synced;
+	bool			global_use;
 	struct rcu_head		rcu_head;
 };
 
@@ -265,16 +248,20 @@ struct netdev_hw_addr_list {
 	int			count;
 };
 
-#define netdev_uc_count(dev) ((dev)->uc.count)
-#define netdev_uc_empty(dev) ((dev)->uc.count == 0)
-#define netdev_for_each_uc_addr(ha, dev) \
-	list_for_each_entry(ha, &dev->uc.list, list)
+#define netdev_hw_addr_list_count(l) ((l)->count)
+#define netdev_hw_addr_list_empty(l) (netdev_hw_addr_list_count(l) == 0)
+#define netdev_hw_addr_list_for_each(ha, l) \
+	list_for_each_entry(ha, &(l)->list, list)
 
-#define netdev_mc_count(dev) ((dev)->mc_count)
-#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+#define netdev_uc_count(dev) netdev_hw_addr_list_count(&(dev)->uc)
+#define netdev_uc_empty(dev) netdev_hw_addr_list_empty(&(dev)->uc)
+#define netdev_for_each_uc_addr(ha, dev) \
+	netdev_hw_addr_list_for_each(ha, &(dev)->uc)
 
+#define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
+#define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
 #define netdev_for_each_mc_addr(mclist, dev) \
-	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
@@ -862,12 +849,10 @@ struct net_device {
 	unsigned char		addr_len;	/* hardware address length	*/
 	unsigned short          dev_id;		/* for shared network cards */
 
-	struct netdev_hw_addr_list	uc;	/* Secondary unicast
-						   mac addresses */
-	int			uc_promisc;
 	spinlock_t		addr_list_lock;
-	struct dev_addr_list	*mc_list;	/* Multicast mac addresses	*/
-	int			mc_count;	/* Number of installed mcasts	*/
+	struct netdev_hw_addr_list	uc;	/* Unicast mac addresses */
+	struct netdev_hw_addr_list	mc;	/* Multicast mac addresses */
+	int			uc_promisc;
 	unsigned int		promiscuity;
 	unsigned int		allmulti;
 
@@ -1980,6 +1965,22 @@ extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 extern int		register_netdev(struct net_device *dev);
 extern void		unregister_netdev(struct net_device *dev);
 
+/* General hardware address lists handling functions */
+extern int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+				  struct netdev_hw_addr_list *from_list,
+				  int addr_len, unsigned char addr_type);
+extern void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+				   struct netdev_hw_addr_list *from_list,
+				   int addr_len, unsigned char addr_type);
+extern int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+			  struct netdev_hw_addr_list *from_list,
+			  int addr_len);
+extern void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+			     struct netdev_hw_addr_list *from_list,
+			     int addr_len);
+extern void __hw_addr_flush(struct netdev_hw_addr_list *list);
+extern void __hw_addr_init(struct netdev_hw_addr_list *list);
+
 /* Functions used for device addresses handling */
 extern int dev_addr_add(struct net_device *dev, unsigned char *addr,
 			unsigned char addr_type);
@@ -2002,18 +2003,19 @@ extern void dev_uc_unsync(struct net_device *to, struct net_device *from);
 extern void dev_uc_flush(struct net_device *dev);
 extern void dev_uc_init(struct net_device *dev);
 
+/* Functions used for multicast addresses handling */
+extern int dev_mc_add(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_add_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_del(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_del_global(struct net_device *dev, unsigned char *addr);
+extern int dev_mc_sync(struct net_device *to, struct net_device *from);
+extern void dev_mc_unsync(struct net_device *to, struct net_device *from);
+extern void dev_mc_flush(struct net_device *dev);
+extern void dev_mc_init(struct net_device *dev);
+
 /* Functions used for secondary unicast and multicast support */
 extern void		dev_set_rx_mode(struct net_device *dev);
 extern void		__dev_set_rx_mode(struct net_device *dev);
-extern int 		dev_mc_delete(struct net_device *dev, void *addr, int alen, int all);
-extern int		dev_mc_add(struct net_device *dev, void *addr, int alen, int newonly);
-extern int		dev_mc_sync(struct net_device *to, struct net_device *from);
-extern void		dev_mc_unsync(struct net_device *to, struct net_device *from);
-extern void		dev_addr_discard(struct net_device *dev);
-extern int 		__dev_addr_delete(struct dev_addr_list **list, int *count, void *addr, int alen, int all);
-extern int		__dev_addr_add(struct dev_addr_list **list, int *count, void *addr, int alen, int newonly);
-extern int		__dev_addr_sync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
-extern void		__dev_addr_unsync(struct dev_addr_list **to, int *to_count, struct dev_addr_list **from, int *from_count);
 extern int		dev_set_promiscuity(struct net_device *dev, int inc);
 extern int		dev_set_allmulti(struct net_device *dev, int inc);
 extern void		netdev_state_change(struct net_device *dev);
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1a8f50af49a0..20823d04e03c 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1626,7 +1626,7 @@ struct ieee80211_ops {
 				 struct ieee80211_bss_conf *info,
 				 u32 changed);
 	u64 (*prepare_multicast)(struct ieee80211_hw *hw,
-				 int mc_count, struct dev_addr_list *mc_list);
+				 struct netdev_hw_addr_list *mc_list);
 	void (*configure_filter)(struct ieee80211_hw *hw,
 				 unsigned int changed_flags,
 				 unsigned int *total_flags,
diff --git a/net/802/garp.c b/net/802/garp.c
index 1dcb0660c49d..78cff9ec2cb4 100644
--- a/net/802/garp.c
+++ b/net/802/garp.c
@@ -575,7 +575,7 @@ int garp_init_applicant(struct net_device *dev, struct garp_application *appl)
 	if (!app)
 		goto err2;
 
-	err = dev_mc_add(dev, appl->proto.group_address, ETH_ALEN, 0);
+	err = dev_mc_add(dev, appl->proto.group_address);
 	if (err < 0)
 		goto err3;
 
@@ -615,7 +615,7 @@ void garp_uninit_applicant(struct net_device *dev, struct garp_application *appl
 	garp_pdu_queue(app);
 	garp_queue_xmit(app);
 
-	dev_mc_delete(dev, appl->proto.group_address, ETH_ALEN, 0);
+	dev_mc_del(dev, appl->proto.group_address);
 	kfree(app);
 	garp_release_port(dev);
 }
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9fc4da56fb1d..1d15a60b23af 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -781,7 +781,7 @@ static int atif_ioctl(int cmd, void __user *arg)
 						atrtr_create(&rtdef, dev);
 					}
 			}
-			dev_mc_add(dev, aarp_mcast, 6, 1);
+			dev_mc_add_global(dev, aarp_mcast);
 			return 0;
 
 		case SIOCGIFADDR:
diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c
index 326ab453edb7..260a9507e542 100644
--- a/net/bluetooth/bnep/netdev.c
+++ b/net/bluetooth/bnep/netdev.c
@@ -87,7 +87,7 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		memcpy(__skb_put(skb, ETH_ALEN), dev->broadcast, ETH_ALEN);
 		r->len = htons(ETH_ALEN * 2);
 	} else {
-		struct dev_mc_list *dmi;
+		struct netdev_hw_addr *ha;
 		int i, len = skb->len;
 
 		if (dev->flags & IFF_BROADCAST) {
@@ -98,11 +98,11 @@ static void bnep_net_set_mc_list(struct net_device *dev)
 		/* FIXME: We should group addresses here. */
 
 		i = 0;
-		netdev_for_each_mc_addr(dmi, dev) {
+		netdev_for_each_mc_addr(ha, dev) {
 			if (i == BNEP_MAX_MULTICAST_FILTERS)
 				break;
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
-			memcpy(__skb_put(skb, ETH_ALEN), dmi->dmi_addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
+			memcpy(__skb_put(skb, ETH_ALEN), ha->addr, ETH_ALEN);
 		}
 		r->len = htons(skb->len - len);
 	}
diff --git a/net/core/Makefile b/net/core/Makefile
index 0a899f1aadb9..51c3eec850ef 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -7,9 +7,8 @@ obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
 
-obj-y		     += dev.o ethtool.o dev_mcast.o dst.o netevent.o \
-			neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
-			dev_addr_lists.o
+obj-y		     += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \
+			neighbour.o rtnetlink.o utils.o link_watch.o filter.o
 
 obj-$(CONFIG_XFRM) += flow.o
 obj-y += net-sysfs.o
diff --git a/net/core/dev.c b/net/core/dev.c
index 949c62dba719..2a9b7dd0bb6e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3968,140 +3968,6 @@ void dev_set_rx_mode(struct net_device *dev)
 	netif_addr_unlock_bh(dev);
 }
 
-/* multicast addresses handling functions */
-
-int __dev_addr_delete(struct dev_addr_list **list, int *count,
-		      void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (; (da = *list) != NULL; list = &da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    alen == da->da_addrlen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 0;
-				if (old_glbl == 0)
-					break;
-			}
-			if (--da->da_users)
-				return 0;
-
-			*list = da->next;
-			kfree(da);
-			(*count)--;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int __dev_addr_add(struct dev_addr_list **list, int *count,
-		   void *addr, int alen, int glbl)
-{
-	struct dev_addr_list *da;
-
-	for (da = *list; da != NULL; da = da->next) {
-		if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
-		    da->da_addrlen == alen) {
-			if (glbl) {
-				int old_glbl = da->da_gusers;
-				da->da_gusers = 1;
-				if (old_glbl)
-					return 0;
-			}
-			da->da_users++;
-			return 0;
-		}
-	}
-
-	da = kzalloc(sizeof(*da), GFP_ATOMIC);
-	if (da == NULL)
-		return -ENOMEM;
-	memcpy(da->da_addr, addr, alen);
-	da->da_addrlen = alen;
-	da->da_users = 1;
-	da->da_gusers = glbl ? 1 : 0;
-	da->next = *list;
-	*list = da;
-	(*count)++;
-	return 0;
-}
-
-
-int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
-		    struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-	int err = 0;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (!da->da_synced) {
-			err = __dev_addr_add(to, to_count,
-					     da->da_addr, da->da_addrlen, 0);
-			if (err < 0)
-				break;
-			da->da_synced = 1;
-			da->da_users++;
-		} else if (da->da_users == 1) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-	return err;
-}
-EXPORT_SYMBOL_GPL(__dev_addr_sync);
-
-void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
-		       struct dev_addr_list **from, int *from_count)
-{
-	struct dev_addr_list *da, *next;
-
-	da = *from;
-	while (da != NULL) {
-		next = da->next;
-		if (da->da_synced) {
-			__dev_addr_delete(to, to_count,
-					  da->da_addr, da->da_addrlen, 0);
-			da->da_synced = 0;
-			__dev_addr_delete(from, from_count,
-					  da->da_addr, da->da_addrlen, 0);
-		}
-		da = next;
-	}
-}
-EXPORT_SYMBOL_GPL(__dev_addr_unsync);
-
-static void __dev_addr_discard(struct dev_addr_list **list)
-{
-	struct dev_addr_list *tmp;
-
-	while (*list != NULL) {
-		tmp = *list;
-		*list = tmp->next;
-		if (tmp->da_users > tmp->da_gusers)
-			printk("__dev_addr_discard: address leakage! "
-			       "da_users=%d\n", tmp->da_users);
-		kfree(tmp);
-	}
-}
-
-void dev_addr_discard(struct net_device *dev)
-{
-	netif_addr_lock_bh(dev);
-
-	__dev_addr_discard(&dev->mc_list);
-	netdev_mc_count(dev) = 0;
-
-	netif_addr_unlock_bh(dev);
-}
-EXPORT_SYMBOL(dev_addr_discard);
-
 /**
  *	dev_get_flags - get flags reported to userspace
  *	@dev: device
@@ -4412,8 +4278,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
-				  dev->addr_len, 1);
+		return dev_mc_add_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCDELMULTI:
 		if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
@@ -4421,8 +4286,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
 			return -EINVAL;
 		if (!netif_device_present(dev))
 			return -ENODEV;
-		return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
-				     dev->addr_len, 1);
+		return dev_mc_del_global(dev, ifr->ifr_hwaddr.sa_data);
 
 	case SIOCSIFTXQLEN:
 		if (ifr->ifr_qlen < 0)
@@ -4730,7 +4594,7 @@ static void rollback_registered_many(struct list_head *head)
 		 *	Flush the unicast and multicast chains
 		 */
 		dev_uc_flush(dev);
-		dev_addr_discard(dev);
+		dev_mc_flush(dev);
 
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
@@ -5310,6 +5174,7 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
 	if (dev_addr_init(dev))
 		goto free_rx;
 
+	dev_mc_init(dev);
 	dev_uc_init(dev);
 
 	dev_net_set(dev, &init_net);
@@ -5545,7 +5410,7 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char
 	 *	Flush the unicast and multicast chains
 	 */
 	dev_uc_flush(dev);
-	dev_addr_discard(dev);
+	dev_mc_flush(dev);
 
 	netdev_unregister_kobject(dev);
 
diff --git a/net/core/dev_addr_lists.c b/net/core/dev_addr_lists.c
index 7e52b6d18add..37d5975e18a3 100644
--- a/net/core/dev_addr_lists.c
+++ b/net/core/dev_addr_lists.c
@@ -19,8 +19,9 @@
  * General list handling functions
  */
 
-static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_add_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 	int alloc_size;
@@ -31,6 +32,13 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    ha->type == addr_type) {
+			if (global) {
+				/* check if addr is already used as global */
+				if (ha->global_use)
+					return 0;
+				else
+					ha->global_use = true;
+			}
 			ha->refcount++;
 			return 0;
 		}
@@ -46,12 +54,19 @@ static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
 	memcpy(ha->addr, addr, addr_len);
 	ha->type = addr_type;
 	ha->refcount = 1;
+	ha->global_use = global;
 	ha->synced = false;
 	list_add_tail_rcu(&ha->list, &list->list);
 	list->count++;
 	return 0;
 }
 
+static int __hw_addr_add(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_add_ex(list, addr, addr_len, addr_type, false);
+}
+
 static void ha_rcu_free(struct rcu_head *head)
 {
 	struct netdev_hw_addr *ha;
@@ -60,14 +75,21 @@ static void ha_rcu_free(struct rcu_head *head)
 	kfree(ha);
 }
 
-static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
-			 int addr_len, unsigned char addr_type)
+static int __hw_addr_del_ex(struct netdev_hw_addr_list *list,
+			    unsigned char *addr, int addr_len,
+			    unsigned char addr_type, bool global)
 {
 	struct netdev_hw_addr *ha;
 
 	list_for_each_entry(ha, &list->list, list) {
 		if (!memcmp(ha->addr, addr, addr_len) &&
 		    (ha->type == addr_type || !addr_type)) {
+			if (global) {
+				if (!ha->global_use)
+					break;
+				else
+					ha->global_use = false;
+			}
 			if (--ha->refcount)
 				return 0;
 			list_del_rcu(&ha->list);
@@ -79,10 +101,15 @@ static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
 	return -ENOENT;
 }
 
-static int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
-				  struct netdev_hw_addr_list *from_list,
-				  int addr_len,
-				  unsigned char addr_type)
+static int __hw_addr_del(struct netdev_hw_addr_list *list, unsigned char *addr,
+			 int addr_len, unsigned char addr_type)
+{
+	return __hw_addr_del_ex(list, addr, addr_len, addr_type, false);
+}
+
+int __hw_addr_add_multiple(struct netdev_hw_addr_list *to_list,
+			   struct netdev_hw_addr_list *from_list,
+			   int addr_len, unsigned char addr_type)
 {
 	int err;
 	struct netdev_hw_addr *ha, *ha2;
@@ -105,11 +132,11 @@ unroll:
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_add_multiple);
 
-static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
-				   struct netdev_hw_addr_list *from_list,
-				   int addr_len,
-				   unsigned char addr_type)
+void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
+			    struct netdev_hw_addr_list *from_list,
+			    int addr_len, unsigned char addr_type)
 {
 	struct netdev_hw_addr *ha;
 	unsigned char type;
@@ -119,10 +146,11 @@ static void __hw_addr_del_multiple(struct netdev_hw_addr_list *to_list,
 		__hw_addr_del(to_list, ha->addr, addr_len, addr_type);
 	}
 }
+EXPORT_SYMBOL(__hw_addr_del_multiple);
 
-static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
-			  struct netdev_hw_addr_list *from_list,
-			  int addr_len)
+int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
+		   struct netdev_hw_addr_list *from_list,
+		   int addr_len)
 {
 	int err = 0;
 	struct netdev_hw_addr *ha, *tmp;
@@ -142,10 +170,11 @@ static int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 	}
 	return err;
 }
+EXPORT_SYMBOL(__hw_addr_sync);
 
-static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
-			     struct netdev_hw_addr_list *from_list,
-			     int addr_len)
+void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
+		      struct netdev_hw_addr_list *from_list,
+		      int addr_len)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -159,8 +188,9 @@ static void __hw_addr_unsync(struct netdev_hw_addr_list *to_list,
 		}
 	}
 }
+EXPORT_SYMBOL(__hw_addr_unsync);
 
-static void __hw_addr_flush(struct netdev_hw_addr_list *list)
+void __hw_addr_flush(struct netdev_hw_addr_list *list)
 {
 	struct netdev_hw_addr *ha, *tmp;
 
@@ -170,12 +200,14 @@ static void __hw_addr_flush(struct netdev_hw_addr_list *list)
 	}
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_flush);
 
-static void __hw_addr_init(struct netdev_hw_addr_list *list)
+void __hw_addr_init(struct netdev_hw_addr_list *list)
 {
 	INIT_LIST_HEAD(&list->list);
 	list->count = 0;
 }
+EXPORT_SYMBOL(__hw_addr_init);
 
 /*
  * Device addresses handling functions
@@ -475,4 +507,235 @@ EXPORT_SYMBOL(dev_uc_init);
  * Multicast list handling functions
  */
 
-/* To be filled here */
+static int __dev_mc_add(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_add_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+/**
+ *	dev_mc_add - Add a multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a multicast address to the device or increase
+ *	the reference count if it already exists.
+ */
+int dev_mc_add(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_add);
+
+/**
+ *	dev_mc_add_global - Add a global multicast address
+ *	@dev: device
+ *	@addr: address to add
+ *
+ *	Add a global multicast address to the device.
+ */
+int dev_mc_add_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_add(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_add_global);
+
+static int __dev_mc_del(struct net_device *dev, unsigned char *addr,
+			bool global)
+{
+	int err;
+
+	netif_addr_lock_bh(dev);
+	err = __hw_addr_del_ex(&dev->mc, addr, dev->addr_len,
+			       NETDEV_HW_ADDR_T_MULTICAST, global);
+	if (!err)
+		__dev_set_rx_mode(dev);
+	netif_addr_unlock_bh(dev);
+	return err;
+}
+
+/**
+ *	dev_mc_del - Delete a multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, false);
+}
+EXPORT_SYMBOL(dev_mc_del);
+
+/**
+ *	dev_mc_del_global - Delete a global multicast address.
+ *	@dev: device
+ *	@addr: address to delete
+ *
+ *	Release reference to a multicast address and remove it
+ *	from the device if the reference count drops to zero.
+ */
+int dev_mc_del_global(struct net_device *dev, unsigned char *addr)
+{
+	return __dev_mc_del(dev, addr, true);
+}
+EXPORT_SYMBOL(dev_mc_del_global);
+
+/**
+ *	dev_mc_sync - Synchronize device's unicast list to another device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Add newly added addresses to the destination device and release
+ *	addresses that have no users left. The source device must be
+ *	locked by netif_tx_lock_bh.
+ *
+ *	This function is intended to be called from the dev->set_multicast_list
+ *	or dev->set_rx_mode function of layered software devices.
+ */
+int dev_mc_sync(struct net_device *to, struct net_device *from)
+{
+	int err = 0;
+
+	if (to->addr_len != from->addr_len)
+		return -EINVAL;
+
+	netif_addr_lock_bh(to);
+	err = __hw_addr_sync(&to->mc, &from->mc, to->addr_len);
+	if (!err)
+		__dev_set_rx_mode(to);
+	netif_addr_unlock_bh(to);
+	return err;
+}
+EXPORT_SYMBOL(dev_mc_sync);
+
+/**
+ *	dev_mc_unsync - Remove synchronized addresses from the destination device
+ *	@to: destination device
+ *	@from: source device
+ *
+ *	Remove all addresses that were added to the destination device by
+ *	dev_mc_sync(). This function is intended to be called from the
+ *	dev->stop function of layered software devices.
+ */
+void dev_mc_unsync(struct net_device *to, struct net_device *from)
+{
+	if (to->addr_len != from->addr_len)
+		return;
+
+	netif_addr_lock_bh(from);
+	netif_addr_lock(to);
+	__hw_addr_unsync(&to->mc, &from->mc, to->addr_len);
+	__dev_set_rx_mode(to);
+	netif_addr_unlock(to);
+	netif_addr_unlock_bh(from);
+}
+EXPORT_SYMBOL(dev_mc_unsync);
+
+/**
+ *	dev_mc_flush - Flush multicast addresses
+ *	@dev: device
+ *
+ *	Flush multicast addresses.
+ */
+void dev_mc_flush(struct net_device *dev)
+{
+	netif_addr_lock_bh(dev);
+	__hw_addr_flush(&dev->mc);
+	netif_addr_unlock_bh(dev);
+}
+EXPORT_SYMBOL(dev_mc_flush);
+
+/**
+ *	dev_mc_flush - Init multicast address list
+ *	@dev: device
+ *
+ *	Init multicast address list.
+ */
+void dev_mc_init(struct net_device *dev)
+{
+	__hw_addr_init(&dev->mc);
+}
+EXPORT_SYMBOL(dev_mc_init);
+
+#ifdef CONFIG_PROC_FS
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+
+static int dev_mc_seq_show(struct seq_file *seq, void *v)
+{
+	struct netdev_hw_addr *ha;
+	struct net_device *dev = v;
+
+	if (v == SEQ_START_TOKEN)
+		return 0;
+
+	netif_addr_lock_bh(dev);
+	netdev_for_each_mc_addr(ha, dev) {
+		int i;
+
+		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
+			   dev->name, ha->refcount, ha->global_use);
+
+		for (i = 0; i < dev->addr_len; i++)
+			seq_printf(seq, "%02x", ha->addr[i]);
+
+		seq_putc(seq, '\n');
+	}
+	netif_addr_unlock_bh(dev);
+	return 0;
+}
+
+static const struct seq_operations dev_mc_seq_ops = {
+	.start = dev_seq_start,
+	.next  = dev_seq_next,
+	.stop  = dev_seq_stop,
+	.show  = dev_mc_seq_show,
+};
+
+static int dev_mc_seq_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &dev_mc_seq_ops,
+			    sizeof(struct seq_net_private));
+}
+
+static const struct file_operations dev_mc_seq_fops = {
+	.owner	 = THIS_MODULE,
+	.open    = dev_mc_seq_open,
+	.read    = seq_read,
+	.llseek  = seq_lseek,
+	.release = seq_release_net,
+};
+
+#endif
+
+static int __net_init dev_mc_net_init(struct net *net)
+{
+	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
+		return -ENOMEM;
+	return 0;
+}
+
+static void __net_exit dev_mc_net_exit(struct net *net)
+{
+	proc_net_remove(net, "dev_mcast");
+}
+
+static struct pernet_operations __net_initdata dev_mc_net_ops = {
+	.init = dev_mc_net_init,
+	.exit = dev_mc_net_exit,
+};
+
+void __init dev_mcast_init(void)
+{
+	register_pernet_subsys(&dev_mc_net_ops);
+}
+
diff --git a/net/core/dev_mcast.c b/net/core/dev_mcast.c
deleted file mode 100644
index 3dc295beb483..000000000000
--- a/net/core/dev_mcast.c
+++ /dev/null
@@ -1,232 +0,0 @@
-/*
- *	Linux NET3:	Multicast List maintenance.
- *
- *	Authors:
- *		Tim Kordas <tjk@nostromo.eeap.cwru.edu>
- *		Richard Underwood <richard@wuzz.demon.co.uk>
- *
- *	Stir fried together from the IP multicast and CAP patches above
- *		Alan Cox <alan@lxorguk.ukuu.org.uk>
- *
- *	Fixes:
- *		Alan Cox	:	Update the device on a real delete
- *					rather than any time but...
- *		Alan Cox	:	IFF_ALLMULTI support.
- *		Alan Cox	: 	New format set_multicast_list() calls.
- *		Gleb Natapov    :       Remove dev_mc_lock.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <linux/bitops.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/mm.h>
-#include <linux/socket.h>
-#include <linux/sockios.h>
-#include <linux/in.h>
-#include <linux/errno.h>
-#include <linux/interrupt.h>
-#include <linux/if_ether.h>
-#include <linux/inet.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/init.h>
-#include <net/net_namespace.h>
-#include <net/ip.h>
-#include <net/route.h>
-#include <linux/skbuff.h>
-#include <net/sock.h>
-#include <net/arp.h>
-
-
-/*
- *	Device multicast list maintenance.
- *
- *	This is used both by IP and by the user level maintenance functions.
- *	Unlike BSD we maintain a usage count on a given multicast address so
- *	that a casual user application can add/delete multicasts used by
- *	protocols without doing damage to the protocols when it deletes the
- *	entries. It also helps IP as it tracks overlapping maps.
- *
- *	Device mc lists are changed by bh at least if IPv6 is enabled,
- *	so that it must be bh protected.
- *
- *	We block accesses to device mc filters with netif_tx_lock.
- */
-
-/*
- *	Delete a device level multicast
- */
-
-int dev_mc_delete(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	err = __dev_addr_delete(&dev->mc_list, &dev->mc_count,
-				addr, alen, glbl);
-	if (!err) {
-		/*
-		 *	We have altered the list, so the card
-		 *	loaded filter is now wrong. Fix it
-		 */
-
-		__dev_set_rx_mode(dev);
-	}
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/*
- *	Add a device level multicast
- */
-
-int dev_mc_add(struct net_device *dev, void *addr, int alen, int glbl)
-{
-	int err;
-
-	netif_addr_lock_bh(dev);
-	if (alen != dev->addr_len)
-		err = -EINVAL;
-	else
-		err = __dev_addr_add(&dev->mc_list, &dev->mc_count, addr, alen, glbl);
-	if (!err)
-		__dev_set_rx_mode(dev);
-	netif_addr_unlock_bh(dev);
-	return err;
-}
-
-/**
- *	dev_mc_sync	- Synchronize device's multicast list to another device
- *	@to: destination device
- *	@from: source device
- *
- * 	Add newly added addresses to the destination device and release
- * 	addresses that have no users left. The source device must be
- * 	locked by netif_tx_lock_bh.
- *
- *	This function is intended to be called from the dev->set_multicast_list
- *	or dev->set_rx_mode function of layered software devices.
- */
-int dev_mc_sync(struct net_device *to, struct net_device *from)
-{
-	int err = 0;
-
-	netif_addr_lock_bh(to);
-	err = __dev_addr_sync(&to->mc_list, &to->mc_count,
-			      &from->mc_list, &from->mc_count);
-	if (!err)
-		__dev_set_rx_mode(to);
-	netif_addr_unlock_bh(to);
-
-	return err;
-}
-EXPORT_SYMBOL(dev_mc_sync);
-
-
-/**
- * 	dev_mc_unsync	- Remove synchronized addresses from the destination
- * 			  device
- *	@to: destination device
- *	@from: source device
- *
- * 	Remove all addresses that were added to the destination device by
- * 	dev_mc_sync(). This function is intended to be called from the
- * 	dev->stop function of layered software devices.
- */
-void dev_mc_unsync(struct net_device *to, struct net_device *from)
-{
-	netif_addr_lock_bh(from);
-	netif_addr_lock(to);
-
-	__dev_addr_unsync(&to->mc_list, &to->mc_count,
-			  &from->mc_list, &from->mc_count);
-	__dev_set_rx_mode(to);
-
-	netif_addr_unlock(to);
-	netif_addr_unlock_bh(from);
-}
-EXPORT_SYMBOL(dev_mc_unsync);
-
-#ifdef CONFIG_PROC_FS
-static int dev_mc_seq_show(struct seq_file *seq, void *v)
-{
-	struct dev_addr_list *m;
-	struct net_device *dev = v;
-
-	if (v == SEQ_START_TOKEN)
-		return 0;
-
-	netif_addr_lock_bh(dev);
-	for (m = dev->mc_list; m; m = m->next) {
-		int i;
-
-		seq_printf(seq, "%-4d %-15s %-5d %-5d ", dev->ifindex,
-			   dev->name, m->dmi_users, m->dmi_gusers);
-
-		for (i = 0; i < m->dmi_addrlen; i++)
-			seq_printf(seq, "%02x", m->dmi_addr[i]);
-
-		seq_putc(seq, '\n');
-	}
-	netif_addr_unlock_bh(dev);
-	return 0;
-}
-
-static const struct seq_operations dev_mc_seq_ops = {
-	.start = dev_seq_start,
-	.next  = dev_seq_next,
-	.stop  = dev_seq_stop,
-	.show  = dev_mc_seq_show,
-};
-
-static int dev_mc_seq_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &dev_mc_seq_ops,
-			    sizeof(struct seq_net_private));
-}
-
-static const struct file_operations dev_mc_seq_fops = {
-	.owner	 = THIS_MODULE,
-	.open    = dev_mc_seq_open,
-	.read    = seq_read,
-	.llseek  = seq_lseek,
-	.release = seq_release_net,
-};
-
-#endif
-
-static int __net_init dev_mc_net_init(struct net *net)
-{
-	if (!proc_net_fops_create(net, "dev_mcast", 0, &dev_mc_seq_fops))
-		return -ENOMEM;
-	return 0;
-}
-
-static void __net_exit dev_mc_net_exit(struct net *net)
-{
-	proc_net_remove(net, "dev_mcast");
-}
-
-static struct pernet_operations __net_initdata dev_mc_net_ops = {
-	.init = dev_mc_net_init,
-	.exit = dev_mc_net_exit,
-};
-
-void __init dev_mcast_init(void)
-{
-	register_pernet_subsys(&dev_mc_net_ops);
-}
-
-EXPORT_SYMBOL(dev_mc_add);
-EXPORT_SYMBOL(dev_mc_delete);
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 238af093495b..f3e4734d207f 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -349,7 +349,7 @@ static void dn_dev_del_ifa(struct dn_dev *dn_db, struct dn_ifaddr **ifap, int de
 	if (dn_db->dev->type == ARPHRD_ETHER) {
 		if (ifa1->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa1->ifa_local);
-			dev_mc_delete(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_del(dev, mac_addr);
 		}
 	}
 
@@ -380,7 +380,7 @@ static int dn_dev_insert_ifa(struct dn_dev *dn_db, struct dn_ifaddr *ifa)
 	if (dev->type == ARPHRD_ETHER) {
 		if (ifa->ifa_local != dn_eth2dn(dev->dev_addr)) {
 			dn_dn2eth(mac_addr, ifa->ifa_local);
-			dev_mc_add(dev, mac_addr, ETH_ALEN, 0);
+			dev_mc_add(dev, mac_addr);
 		}
 	}
 
@@ -1000,9 +1000,9 @@ static int dn_eth_up(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_add(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_add(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_add(dev, dn_rt_all_rt_mcast);
 
 	dn_db->use_long = 1;
 
@@ -1014,9 +1014,9 @@ static void dn_eth_down(struct net_device *dev)
 	struct dn_dev *dn_db = dev->dn_ptr;
 
 	if (dn_db->parms.forwarding == 0)
-		dev_mc_delete(dev, dn_rt_all_end_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_end_mcast);
 	else
-		dev_mc_delete(dev, dn_rt_all_rt_mcast, ETH_ALEN, 0);
+		dev_mc_del(dev, dn_rt_all_rt_mcast);
 }
 
 static void dn_dev_set_timer(struct net_device *dev);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 63bf298ca109..51824c42b775 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -997,7 +997,7 @@ static void ip_mc_filter_add(struct in_device *in_dev, __be32 addr)
 	   --ANK
 	   */
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_add(dev, buf, dev->addr_len, 0);
+		dev_mc_add(dev, buf);
 }
 
 /*
@@ -1010,7 +1010,7 @@ static void ip_mc_filter_del(struct in_device *in_dev, __be32 addr)
 	struct net_device *dev = in_dev->dev;
 
 	if (arp_mc_map(addr, buf, dev, 0) == 0)
-		dev_mc_delete(dev, buf, dev->addr_len, 0);
+		dev_mc_del(dev, buf);
 }
 
 #ifdef CONFIG_IP_MULTICAST
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 0886f96c736b..a2208b7b313d 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -87,7 +87,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
 		list_del(&c->list);
 		write_unlock_bh(&clusterip_lock);
 
-		dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+		dev_mc_del(c->dev, c->clustermac);
 		dev_put(c->dev);
 
 		/* In case anyone still accesses the file, the open/close
@@ -396,7 +396,7 @@ static bool clusterip_tg_check(const struct xt_tgchk_param *par)
 				dev_put(dev);
 				return false;
 			}
-			dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+			dev_mc_add(config->dev, config->clustermac);
 		}
 	}
 	cipinfo->config = config;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index bcd971915969..37d1868c0064 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -714,7 +714,7 @@ static void igmp6_group_added(struct ifmcaddr6 *mc)
 	if (!(mc->mca_flags&MAF_LOADED)) {
 		mc->mca_flags |= MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_add(dev, buf, dev->addr_len, 0);
+			dev_mc_add(dev, buf);
 	}
 	spin_unlock_bh(&mc->mca_lock);
 
@@ -740,7 +740,7 @@ static void igmp6_group_dropped(struct ifmcaddr6 *mc)
 	if (mc->mca_flags&MAF_LOADED) {
 		mc->mca_flags &= ~MAF_LOADED;
 		if (ndisc_mc_map(&mc->mca_addr, buf, dev, 0) == 0)
-			dev_mc_delete(dev, buf, dev->addr_len, 0);
+			dev_mc_del(dev, buf);
 	}
 
 	if (mc->mca_flags & MAF_NOREPORT)
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index c3d844093a2f..9179196da264 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -84,16 +84,14 @@ static inline void drv_bss_info_changed(struct ieee80211_local *local,
 }
 
 static inline u64 drv_prepare_multicast(struct ieee80211_local *local,
-					int mc_count,
-					struct dev_addr_list *mc_list)
+					struct netdev_hw_addr_list *mc_list)
 {
 	u64 ret = 0;
 
 	if (local->ops->prepare_multicast)
-		ret = local->ops->prepare_multicast(&local->hw, mc_count,
-						    mc_list);
+		ret = local->ops->prepare_multicast(&local->hw, mc_list);
 
-	trace_drv_prepare_multicast(local, mc_count, ret);
+	trace_drv_prepare_multicast(local, mc_list->count, ret);
 
 	return ret;
 }
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index ab369e2a5282..7fdacf9408b1 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -646,8 +646,7 @@ struct ieee80211_local {
 	struct work_struct recalc_smps;
 
 	/* aggregated multicast list */
-	struct dev_addr_list *mc_list;
-	int mc_count;
+	struct netdev_hw_addr_list mc_list;
 
 	bool tim_in_locked_section; /* see ieee80211_beacon_get() */
 
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index b4ec59a8dc03..00f3a93c6b04 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -412,8 +412,7 @@ static int ieee80211_stop(struct net_device *dev)
 
 	netif_addr_lock_bh(dev);
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_unsync(&local->mc_list, &local->mc_count,
-			  &dev->mc_list, &dev->mc_count);
+	__hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	netif_addr_unlock_bh(dev);
 
@@ -596,8 +595,7 @@ static void ieee80211_set_multicast_list(struct net_device *dev)
 		sdata->flags ^= IEEE80211_SDATA_PROMISC;
 	}
 	spin_lock_bh(&local->filter_lock);
-	__dev_addr_sync(&local->mc_list, &local->mc_count,
-			&dev->mc_list, &dev->mc_count);
+	__hw_addr_sync(&local->mc_list, &dev->mc, dev->addr_len);
 	spin_unlock_bh(&local->filter_lock);
 	ieee80211_queue_work(&local->hw, &local->reconfig_filter);
 }
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 06c33b68d8e5..84ad249a4e2e 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -71,7 +71,7 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	spin_lock_bh(&local->filter_lock);
 	changed_flags = local->filter_flags ^ new_flags;
 
-	mc = drv_prepare_multicast(local, local->mc_count, local->mc_list);
+	mc = drv_prepare_multicast(local, &local->mc_list);
 	spin_unlock_bh(&local->filter_lock);
 
 	/* be a bit nasty */
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 48c1e0ae565f..b0f037cc899c 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1691,9 +1691,9 @@ static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
 		if (i->alen != dev->addr_len)
 			return -EINVAL;
 		if (what > 0)
-			return dev_mc_add(dev, i->addr, i->alen, 0);
+			return dev_mc_add(dev, i->addr);
 		else
-			return dev_mc_delete(dev, i->addr, i->alen, 0);
+			return dev_mc_del(dev, i->addr);
 		break;
 	case PACKET_MR_PROMISC:
 		return dev_set_promiscuity(dev, what);
-- 
cgit v1.2.3


From fd558d186df2c13a22455373858bae634a4795af Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:33 +0000
Subject: l2tp: Split pppol2tp patch into separate l2tp and ppp parts

This patch splits the pppol2tp driver into separate L2TP and PPP parts
to prepare for L2TPv3 support. In L2TPv3, protocols other than PPP can
be carried, so this split creates a common L2TP core that will handle
the common L2TP bits which protocol support modules such as PPP will
use.

Note that the existing pppol2tp module is split into l2tp_core and
l2tp_ppp by this change.

There are no feature changes here. Internally, however, there are
significant changes, mostly to handle the separation of PPP-specific
data from the L2TP session and to provide hooks in the core for
modules like PPP to access.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig         |    7 +-
 include/linux/if_pppol2tp.h |    2 +-
 net/Kconfig                 |    1 +
 net/Makefile                |    2 +-
 net/l2tp/Kconfig            |   28 +
 net/l2tp/Makefile           |    5 +-
 net/l2tp/l2tp_core.c        | 1227 ++++++++++++++++++++
 net/l2tp/l2tp_core.h        |  254 ++++
 net/l2tp/l2tp_ppp.c         | 1658 ++++++++++++++++++++++++++
 net/l2tp/pppol2tp.c         | 2680 -------------------------------------------
 10 files changed, 3175 insertions(+), 2689 deletions(-)
 create mode 100644 net/l2tp/Kconfig
 create mode 100644 net/l2tp/l2tp_core.c
 create mode 100644 net/l2tp/l2tp_core.h
 create mode 100644 net/l2tp/l2tp_ppp.c
 delete mode 100644 net/l2tp/pppol2tp.c

(limited to 'include/linux')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 2cffcb84a241..a55bff26a264 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -3158,17 +3158,12 @@ config PPPOATM
 
 config PPPOL2TP
 	tristate "PPP over L2TP (EXPERIMENTAL)"
-	depends on EXPERIMENTAL && PPP && INET
+	depends on EXPERIMENTAL && L2TP && PPP
 	help
 	  Support for PPP-over-L2TP socket family. L2TP is a protocol
 	  used by ISPs and enterprises to tunnel PPP traffic over UDP
 	  tunnels. L2TP is replacing PPTP for VPN uses.
 
-	  This kernel component handles only L2TP data packets: a
-	  userland daemon handles L2TP the control protocol (tunnel
-	  and session setup). One such daemon is OpenL2TP
-	  (http://openl2tp.sourceforge.net/).
-
 config SLIP
 	tristate "SLIP (serial line) support"
 	---help---
diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index c58baea4a25b..1a1fb6e5d933 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -2,7 +2,7 @@
  * Linux PPP over L2TP (PPPoL2TP) Socket Implementation (RFC 2661)
  *
  * This file supplies definitions required by the PPP over L2TP driver
- * (pppol2tp.c).  All version information wrt this file is located in pppol2tp.c
+ * (l2tp_ppp.c).  All version information wrt this file is located in l2tp_ppp.c
  *
  * License:
  *		This program is free software; you can redistribute it and/or
diff --git a/net/Kconfig b/net/Kconfig
index e10d55c8ee5c..0d68b40fc0e6 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -186,6 +186,7 @@ source "net/sctp/Kconfig"
 source "net/rds/Kconfig"
 source "net/tipc/Kconfig"
 source "net/atm/Kconfig"
+source "net/l2tp/Kconfig"
 source "net/802/Kconfig"
 source "net/bridge/Kconfig"
 source "net/dsa/Kconfig"
diff --git a/net/Makefile b/net/Makefile
index 13ca77e0eb08..cb7bdc1210cb 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -40,7 +40,7 @@ obj-$(CONFIG_BT)		+= bluetooth/
 obj-$(CONFIG_SUNRPC)		+= sunrpc/
 obj-$(CONFIG_AF_RXRPC)		+= rxrpc/
 obj-$(CONFIG_ATM)		+= atm/
-obj-$(CONFIG_PPPOL2TP)		+= l2tp/
+obj-$(CONFIG_L2TP)		+= l2tp/
 obj-$(CONFIG_DECNET)		+= decnet/
 obj-$(CONFIG_ECONET)		+= econet/
 obj-$(CONFIG_PHONET)		+= phonet/
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
new file mode 100644
index 000000000000..ec88c5cdc397
--- /dev/null
+++ b/net/l2tp/Kconfig
@@ -0,0 +1,28 @@
+#
+# Layer Two Tunneling Protocol (L2TP)
+#
+
+menuconfig L2TP
+	tristate "Layer Two Tunneling Protocol (L2TP)"
+	depends on INET
+	---help---
+	  Layer Two Tunneling Protocol
+
+	  From RFC 2661 <http://www.ietf.org/rfc/rfc2661.txt>.
+
+	  L2TP facilitates the tunneling of packets across an
+	  intervening network in a way that is as transparent as
+	  possible to both end-users and applications.
+
+	  L2TP is often used to tunnel PPP traffic over IP
+	  tunnels. One IP tunnel may carry thousands of individual PPP
+	  connections. L2TP is also used as a VPN protocol, popular
+	  with home workers to connect to their offices.
+
+	  The kernel component handles only L2TP data packets: a
+	  userland daemon handles L2TP the control protocol (tunnel
+	  and session setup). One such daemon is OpenL2TP
+	  (http://openl2tp.org/).
+
+	  If you don't need L2TP, say N. To compile all L2TP code as
+	  modules, choose M here.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index 9af41e898a04..c91f208b1693 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -2,4 +2,7 @@
 # Makefile for the L2TP.
 #
 
-obj-$(CONFIG_PPPOL2TP) += pppol2tp.o
+obj-$(CONFIG_L2TP) += l2tp_core.o
+
+# Build l2tp as modules if L2TP is M
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
new file mode 100644
index 000000000000..4b6da3689893
--- /dev/null
+++ b/net/l2tp/l2tp_core.c
@@ -0,0 +1,1227 @@
+/*
+ * L2TP core.
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * This file contains some code of the original L2TPv2 pppol2tp
+ * driver, which has the following copyright:
+ *
+ * Authors:	Martijn van Oosterhout <kleptog@svana.org>
+ *		James Chapman (jchapman@katalix.com)
+ * Contributors:
+ *		Michal Ostrowski <mostrows@speakeasy.net>
+ *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
+ *		David S. Miller (davem@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/file.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define L2TP_DRV_VERSION	"V2.0"
+
+/* L2TP header constants */
+#define L2TP_HDRFLAG_T	   0x8000
+#define L2TP_HDRFLAG_L	   0x4000
+#define L2TP_HDRFLAG_S	   0x0800
+#define L2TP_HDRFLAG_O	   0x0200
+#define L2TP_HDRFLAG_P	   0x0100
+
+#define L2TP_HDR_VER_MASK  0x000F
+#define L2TP_HDR_VER_2	   0x0002
+
+/* L2TPv3 default L2-specific sublayer */
+#define L2TP_SLFLAG_S	   0x40000000
+#define L2TP_SL_SEQ_MASK   0x00ffffff
+
+#define L2TP_HDR_SIZE_SEQ		10
+#define L2TP_HDR_SIZE_NOSEQ		6
+
+/* Default trace flags */
+#define L2TP_DEFAULT_DEBUG_FLAGS	0
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "L2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Private data stored for received packets in the skb.
+ */
+struct l2tp_skb_cb {
+	u16			ns;
+	u16			has_seq;
+	u16			length;
+	unsigned long		expires;
+};
+
+#define L2TP_SKB_CB(skb)	((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
+
+static atomic_t l2tp_tunnel_count;
+static atomic_t l2tp_session_count;
+
+/* per-net private data for this module */
+static unsigned int l2tp_net_id;
+struct l2tp_net {
+	struct list_head l2tp_tunnel_list;
+	rwlock_t l2tp_tunnel_list_lock;
+};
+
+static inline struct l2tp_net *l2tp_pernet(struct net *net)
+{
+	BUG_ON(!net);
+
+	return net_generic(net, l2tp_net_id);
+}
+
+/* Session hash list.
+ * The session_id SHOULD be random according to RFC2661, but several
+ * L2TP implementations (Cisco and Microsoft) use incrementing
+ * session_ids.  So we do a real hash on the session_id, rather than a
+ * simple bitmask.
+ */
+static inline struct hlist_head *
+l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
+}
+
+/* Lookup a session by id
+ */
+struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id)
+{
+	struct hlist_head *session_list =
+		l2tp_session_id_hash(tunnel, session_id);
+	struct l2tp_session *session;
+	struct hlist_node *walk;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	hlist_for_each_entry(session, walk, session_list, hlist) {
+		if (session->session_id == session_id) {
+			read_unlock_bh(&tunnel->hlist_lock);
+			return session;
+		}
+	}
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find);
+
+struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+	int count = 0;
+
+	read_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+		hlist_for_each_entry(session, walk, &tunnel->session_hlist[hash], hlist) {
+			if (++count > nth) {
+				read_unlock_bh(&tunnel->hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&tunnel->hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
+
+/* Lookup a tunnel by id
+ */
+struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
+{
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_net *pn = l2tp_pernet(net);
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (tunnel->tunnel_id == tunnel_id) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find);
+
+struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	struct l2tp_tunnel *tunnel;
+	int count = 0;
+
+	read_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_for_each_entry(tunnel, &pn->l2tp_tunnel_list, list) {
+		if (++count > nth) {
+			read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+			return tunnel;
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_find_nth);
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+/* Queue a skb in order. We come here only if the skb has an L2TP sequence
+ * number.
+ */
+static void l2tp_recv_queue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct sk_buff *skbp;
+	struct sk_buff *tmp;
+	u16 ns = L2TP_SKB_CB(skb)->ns;
+
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
+		if (L2TP_SKB_CB(skbp)->ns > ns) {
+			__skb_queue_before(&session->reorder_q, skbp, skb);
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
+			       session->name, ns, L2TP_SKB_CB(skbp)->ns,
+			       skb_queue_len(&session->reorder_q));
+			session->stats.rx_oos_packets++;
+			goto out;
+		}
+	}
+
+	__skb_queue_tail(&session->reorder_q, skb);
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+/* Dequeue a single skb.
+ */
+static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	int length = L2TP_SKB_CB(skb)->length;
+
+	/* We're about to requeue the skb, so return resources
+	 * to its current owner (a socket receive buffer).
+	 */
+	skb_orphan(skb);
+
+	tunnel->stats.rx_packets++;
+	tunnel->stats.rx_bytes += length;
+	session->stats.rx_packets++;
+	session->stats.rx_bytes += length;
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Bump our Nr */
+		session->nr++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated nr to %hu\n", session->name, session->nr);
+	}
+
+	/* call private receive handler */
+	if (session->recv_skb != NULL)
+		(*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length);
+	else
+		kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+}
+
+/* Dequeue skbs from the session's reorder_q, subject to packet order.
+ * Skbs that have been in the queue for too long are simply discarded.
+ */
+static void l2tp_recv_dequeue(struct l2tp_session *session)
+{
+	struct sk_buff *skb;
+	struct sk_buff *tmp;
+
+	/* If the pkt at the head of the queue has the nr that we
+	 * expect to send up next, dequeue it and any other
+	 * in-sequence packets behind it.
+	 */
+	spin_lock_bh(&session->reorder_q.lock);
+	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
+		if (time_after(jiffies, L2TP_SKB_CB(skb)->expires)) {
+			session->stats.rx_seq_discards++;
+			session->stats.rx_errors++;
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+			       "%s: oos pkt %hu len %d discarded (too old), "
+			       "waiting for %hu, reorder_q_len=%d\n",
+			       session->name, L2TP_SKB_CB(skb)->ns,
+			       L2TP_SKB_CB(skb)->length, session->nr,
+			       skb_queue_len(&session->reorder_q));
+			__skb_unlink(skb, &session->reorder_q);
+			kfree_skb(skb);
+			if (session->deref)
+				(*session->deref)(session);
+			continue;
+		}
+
+		if (L2TP_SKB_CB(skb)->has_seq) {
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: holding oos pkt %hu len %d, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto out;
+			}
+		}
+		__skb_unlink(skb, &session->reorder_q);
+
+		/* Process the skb. We release the queue lock while we
+		 * do so to let other contexts process the queue.
+		 */
+		spin_unlock_bh(&session->reorder_q.lock);
+		l2tp_recv_dequeue_skb(session, skb);
+		spin_lock_bh(&session->reorder_q.lock);
+	}
+
+out:
+	spin_unlock_bh(&session->reorder_q.lock);
+}
+
+static inline int l2tp_verify_udp_checksum(struct sock *sk,
+					   struct sk_buff *skb)
+{
+	struct udphdr *uh = udp_hdr(skb);
+	u16 ulen = ntohs(uh->len);
+	struct inet_sock *inet;
+	__wsum psum;
+
+	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
+		return 0;
+
+	inet = inet_sk(sk);
+	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
+				  IPPROTO_UDP, 0);
+
+	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
+	    !csum_fold(csum_add(psum, skb->csum)))
+		return 0;
+
+	skb->csum = psum;
+
+	return __skb_checksum_complete(skb);
+}
+
+/* Internal UDP receive frame. Do the real work of receiving an L2TP data frame
+ * here. The skb is not on a list when we get here.
+ * Returns 0 if the packet was a data packet and was successfully passed on.
+ * Returns 1 if the packet was not a good data packet and could not be
+ * forwarded.  All such packets are passed up to userspace to deal with.
+ */
+int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
+		       int (*payload_hook)(struct sk_buff *skb))
+{
+	struct l2tp_session *session = NULL;
+	unsigned char *ptr, *optr;
+	u16 hdrflags;
+	u32 tunnel_id, session_id;
+	int length;
+	int offset;
+	u16 version;
+	u16 ns, nr;
+
+	if (tunnel->sock && l2tp_verify_udp_checksum(tunnel->sock, skb))
+		goto discard_bad_csum;
+
+	/* UDP always verifies the packet length. */
+	__skb_pull(skb, sizeof(struct udphdr));
+
+	/* Short packet? */
+	if (!pskb_may_pull(skb, L2TP_HDR_SIZE_SEQ)) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
+		goto error;
+	}
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto error;
+
+		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	/* Get L2TP header flags */
+	hdrflags = ntohs(*(__be16 *)ptr);
+
+	/* Check protocol version */
+	version = hdrflags & L2TP_HDR_VER_MASK;
+	if (version != tunnel->version) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: recv protocol version mismatch: got %d expected %d\n",
+		       tunnel->name, version, tunnel->version);
+		goto error;
+	}
+
+	/* Get length of L2TP packet */
+	length = skb->len;
+
+	/* If type is control packet, it is handled by userspace. */
+	if (hdrflags & L2TP_HDRFLAG_T) {
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv control packet, len=%d\n", tunnel->name, length);
+		goto error;
+	}
+
+	/* Skip flags */
+	ptr += 2;
+
+	/* If length is present, skip it */
+	if (hdrflags & L2TP_HDRFLAG_L)
+		ptr += 2;
+
+	/* Extract tunnel and session ID */
+	tunnel_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+	session_id = ntohs(*(__be16 *) ptr);
+	ptr += 2;
+
+	/* Find the session context */
+	session = l2tp_session_find(tunnel, session_id);
+	if (!session) {
+		/* Not found? Pass to userspace to deal with */
+		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
+		       "%s: no session found (%hu/%hu). Passing up.\n",
+		       tunnel->name, tunnel_id, session_id);
+		goto error;
+	}
+
+	/* The ref count is increased since we now hold a pointer to
+	 * the session. Take care to decrement the refcnt when exiting
+	 * this function from now on...
+	 */
+	l2tp_session_inc_refcount(session);
+	if (session->ref)
+		(*session->ref)(session);
+
+	/* Handle the optional sequence numbers. Sequence numbers are
+	 * in different places for L2TPv2 and L2TPv3.
+	 *
+	 * If we are the LAC, enable/disable sequence numbers under
+	 * the control of the LNS.  If no sequence numbers present but
+	 * we were expecting them, discard frame.
+	 */
+	ns = nr = 0;
+	L2TP_SKB_CB(skb)->has_seq = 0;
+	if (hdrflags & L2TP_HDRFLAG_S) {
+		ns = (u16) ntohs(*(__be16 *) ptr);
+		ptr += 2;
+		nr = ntohs(*(__be16 *) ptr);
+		ptr += 2;
+
+		/* Store L2TP info in the skb */
+		L2TP_SKB_CB(skb)->ns = ns;
+		L2TP_SKB_CB(skb)->has_seq = 1;
+
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
+		       session->name, ns, nr, session->nr);
+	}
+
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		/* Received a packet with sequence numbers. If we're the LNS,
+		 * check if we sre sending sequence numbers and if not,
+		 * configure it so.
+		 */
+		if ((!session->lns_mode) && (!session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to enable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = -1;
+		}
+	} else {
+		/* No sequence numbers.
+		 * If user has configured mandatory sequence numbers, discard.
+		 */
+		if (session->recv_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+
+		/* If we're the LAC and we're sending sequence numbers, the
+		 * LNS has requested that we no longer send sequence numbers.
+		 * If we're the LNS and we're sending sequence numbers, the
+		 * LAC is broken. Discard the frame.
+		 */
+		if ((!session->lns_mode) && (session->send_seq)) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_INFO,
+			       "%s: requested to disable seq numbers by LNS\n",
+			       session->name);
+			session->send_seq = 0;
+		} else if (session->send_seq) {
+			PRINTK(session->debug, L2TP_MSG_SEQ, KERN_WARNING,
+			       "%s: recv data has no seq numbers when required. "
+			       "Discarding\n", session->name);
+			session->stats.rx_seq_discards++;
+			goto discard;
+		}
+	}
+
+	/* If offset bit set, skip it. */
+	if (hdrflags & L2TP_HDRFLAG_O) {
+		offset = ntohs(*(__be16 *)ptr);
+		ptr += 2 + offset;
+	}
+
+	offset = ptr - optr;
+	if (!pskb_may_pull(skb, offset))
+		goto discard;
+
+	__skb_pull(skb, offset);
+
+	/* If caller wants to process the payload before we queue the
+	 * packet, do so now.
+	 */
+	if (payload_hook)
+		if ((*payload_hook)(skb))
+			goto discard;
+
+	/* Prepare skb for adding to the session's reorder_q.  Hold
+	 * packets for max reorder_timeout or 1 second if not
+	 * reordering.
+	 */
+	L2TP_SKB_CB(skb)->length = length;
+	L2TP_SKB_CB(skb)->expires = jiffies +
+		(session->reorder_timeout ? session->reorder_timeout : HZ);
+
+	/* Add packet to the session's receive queue. Reordering is done here, if
+	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
+	 */
+	if (L2TP_SKB_CB(skb)->has_seq) {
+		if (session->reorder_timeout != 0) {
+			/* Packet reordering enabled. Add skb to session's
+			 * reorder queue, in order of ns.
+			 */
+			l2tp_recv_queue_skb(session, skb);
+		} else {
+			/* Packet reordering disabled. Discard out-of-sequence
+			 * packets
+			 */
+			if (L2TP_SKB_CB(skb)->ns != session->nr) {
+				session->stats.rx_seq_discards++;
+				PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+				       "%s: oos pkt %hu len %d discarded, "
+				       "waiting for %hu, reorder_q_len=%d\n",
+				       session->name, L2TP_SKB_CB(skb)->ns,
+				       L2TP_SKB_CB(skb)->length, session->nr,
+				       skb_queue_len(&session->reorder_q));
+				goto discard;
+			}
+			skb_queue_tail(&session->reorder_q, skb);
+		}
+	} else {
+		/* No sequence numbers. Add the skb to the tail of the
+		 * reorder queue. This ensures that it will be
+		 * delivered after all previous sequenced skbs.
+		 */
+		skb_queue_tail(&session->reorder_q, skb);
+	}
+
+	/* Try to dequeue as many skbs from reorder_q as we can. */
+	l2tp_recv_dequeue(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard:
+	session->stats.rx_errors++;
+	kfree_skb(skb);
+
+	if (session->deref)
+		(*session->deref)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+
+discard_bad_csum:
+	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
+	UDP_INC_STATS_USER(tunnel->l2tp_net, UDP_MIB_INERRORS, 0);
+	tunnel->stats.rx_errors++;
+	kfree_skb(skb);
+
+	return 0;
+
+error:
+	/* Put UDP header back */
+	__skb_push(skb, sizeof(struct udphdr));
+
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_recv_core);
+
+/* UDP encapsulation receive handler. See net/ipv4/udp.c.
+ * Return codes:
+ * 0 : success.
+ * <0: error
+ * >0: skb should be passed up to userspace as UDP.
+ */
+int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = l2tp_sock_to_tunnel(sk);
+	if (tunnel == NULL)
+		goto pass_up;
+
+	PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_DEBUG,
+	       "%s: received %d bytes\n", tunnel->name, skb->len);
+
+	if (l2tp_udp_recv_core(tunnel, skb, tunnel->recv_payload_hook))
+		goto pass_up_put;
+
+	sock_put(sk);
+	return 0;
+
+pass_up_put:
+	sock_put(sk);
+pass_up:
+	return 1;
+}
+EXPORT_SYMBOL_GPL(l2tp_udp_encap_recv);
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Build an L2TP header for the session into the buffer provided.
+ */
+static void l2tp_build_l2tpv2_header(struct l2tp_tunnel *tunnel,
+				     struct l2tp_session *session,
+				     void *buf)
+{
+	__be16 *bufp = buf;
+	u16 flags = L2TP_HDR_VER_2;
+	u32 tunnel_id = tunnel->peer_tunnel_id;
+	u32 session_id = session->peer_session_id;
+
+	if (session->send_seq)
+		flags |= L2TP_HDRFLAG_S;
+
+	/* Setup L2TP header. */
+	*bufp++ = htons(flags);
+	*bufp++ = htons(tunnel_id);
+	*bufp++ = htons(session_id);
+	if (session->send_seq) {
+		*bufp++ = htons(session->ns);
+		*bufp++ = 0;
+		session->ns++;
+		PRINTK(session->debug, L2TP_MSG_SEQ, KERN_DEBUG,
+		       "%s: updated ns to %hu\n", session->name, session->ns);
+	}
+}
+
+void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+
+	BUG_ON(tunnel->version != L2TP_HDR_VER_2);
+	l2tp_build_l2tpv2_header(tunnel, session, buf);
+}
+EXPORT_SYMBOL_GPL(l2tp_build_l2tp_header);
+
+int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len)
+{
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	unsigned int len = skb->len;
+	int error;
+
+	/* Debug */
+	if (session->send_seq)
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes, ns=%hu\n", session->name,
+		       data_len, session->ns - 1);
+	else
+		PRINTK(session->debug, L2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: send %Zd bytes\n", session->name, data_len);
+
+	if (session->debug & L2TP_MSG_DATA) {
+		int i;
+		unsigned char *datap = skb->data + sizeof(struct udphdr);
+
+		printk(KERN_DEBUG "%s: xmit:", session->name);
+		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+			printk(" %02X", *datap++);
+			if (i == 31) {
+				printk(" ...");
+				break;
+			}
+		}
+		printk("\n");
+	}
+
+	/* Queue the packet to IP for output */
+	error = ip_queue_xmit(skb, 1);
+
+	/* Update stats */
+	if (error >= 0) {
+		tunnel->stats.tx_packets++;
+		tunnel->stats.tx_bytes += len;
+		session->stats.tx_packets++;
+		session->stats.tx_bytes += len;
+	} else {
+		tunnel->stats.tx_errors++;
+		session->stats.tx_errors++;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_core);
+
+/* Automatically called when the skb is freed.
+ */
+static void l2tp_sock_wfree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+
+/* For data skbs that we transmit, we associate with the tunnel socket
+ * but don't do accounting.
+ */
+static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
+{
+	sock_hold(sk);
+	skb->sk = sk;
+	skb->destructor = l2tp_sock_wfree;
+}
+
+/* If caller requires the skb to have a ppp header, the header must be
+ * inserted in the skb data before calling this function.
+ */
+int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
+{
+	int data_len = skb->len;
+	struct sock *sk = session->tunnel->sock;
+	struct udphdr *uh;
+	unsigned int udp_len;
+	struct inet_sock *inet;
+	__wsum csum;
+	int old_headroom;
+	int new_headroom;
+	int headroom;
+
+	/* Check that there's enough headroom in the skb to insert IP,
+	 * UDP and L2TP headers. If not enough, expand it to
+	 * make room. Adjust truesize.
+	 */
+	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
+		sizeof(struct udphdr) + hdr_len;
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, headroom))
+		goto abort;
+
+	new_headroom = skb_headroom(skb);
+	skb_orphan(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup L2TP header */
+	l2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
+	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
+
+	/* Setup UDP header */
+	inet = inet_sk(sk);
+	__skb_push(skb, sizeof(*uh));
+	skb_reset_transport_header(skb);
+	uh = udp_hdr(skb);
+	uh->source = inet->inet_sport;
+	uh->dest = inet->inet_dport;
+	uh->len = htons(udp_len);
+
+	uh->check = 0;
+
+	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
+			      IPSKB_REROUTED);
+	nf_reset(skb);
+
+	/* Get routing info from the tunnel socket */
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
+	l2tp_skb_set_owner_w(skb, sk);
+
+	/* Calculate UDP checksum if configured to do so */
+	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+		skb->ip_summed = CHECKSUM_NONE;
+	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+		skb->ip_summed = CHECKSUM_COMPLETE;
+		csum = skb_checksum(skb, 0, udp_len, 0);
+		uh->check = csum_tcpudp_magic(inet->inet_saddr,
+					      inet->inet_daddr,
+					      udp_len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		skb->ip_summed = CHECKSUM_PARTIAL;
+		skb->csum_start = skb_transport_header(skb) - skb->head;
+		skb->csum_offset = offsetof(struct udphdr, check);
+		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+					       inet->inet_daddr,
+					       udp_len, IPPROTO_UDP, 0);
+	}
+
+	l2tp_xmit_core(session, skb, data_len);
+
+abort:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_xmit_skb);
+
+/*****************************************************************************
+ * Tinnel and session create/destroy.
+ *****************************************************************************/
+
+/* Tunnel socket destruct hook.
+ * The tunnel context is deleted only when all session sockets have been
+ * closed.
+ */
+void l2tp_tunnel_destruct(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	tunnel = sk->sk_user_data;
+	if (tunnel == NULL)
+		goto end;
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing...\n", tunnel->name);
+
+	/* Close all sessions */
+	l2tp_tunnel_closeall(tunnel);
+
+	/* No longer an encapsulation socket. See net/ipv4/udp.c */
+	(udp_sk(sk))->encap_type = 0;
+	(udp_sk(sk))->encap_rcv = NULL;
+
+	/* Remove hooks into tunnel socket */
+	tunnel->sock = NULL;
+	sk->sk_destruct = tunnel->old_sk_destruct;
+	sk->sk_user_data = NULL;
+
+	/* Call the original destructor */
+	if (sk->sk_destruct)
+		(*sk->sk_destruct)(sk);
+
+	/* We're finished with the socket */
+	l2tp_tunnel_dec_refcount(tunnel);
+
+end:
+	return;
+}
+EXPORT_SYMBOL(l2tp_tunnel_destruct);
+
+/* When the tunnel is closed, all the attached sessions need to go too.
+ */
+void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel)
+{
+	int hash;
+	struct hlist_node *walk;
+	struct hlist_node *tmp;
+	struct l2tp_session *session;
+
+	BUG_ON(tunnel == NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: closing all sessions...\n", tunnel->name);
+
+	write_lock_bh(&tunnel->hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE; hash++) {
+again:
+		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
+			session = hlist_entry(walk, struct l2tp_session, hlist);
+
+			PRINTK(session->debug, L2TP_MSG_CONTROL, KERN_INFO,
+			       "%s: closing session\n", session->name);
+
+			hlist_del_init(&session->hlist);
+
+			/* Since we should hold the sock lock while
+			 * doing any unbinding, we need to release the
+			 * lock we're holding before taking that lock.
+			 * Hold a reference to the sock so it doesn't
+			 * disappear as we're jumping between locks.
+			 */
+			if (session->ref != NULL)
+				(*session->ref)(session);
+
+			write_unlock_bh(&tunnel->hlist_lock);
+
+			if (session->session_close != NULL)
+				(*session->session_close)(session);
+
+			if (session->deref != NULL)
+				(*session->deref)(session);
+
+			write_lock_bh(&tunnel->hlist_lock);
+
+			/* Now restart from the beginning of this hash
+			 * chain.  We always remove a session from the
+			 * list so we are guaranteed to make forward
+			 * progress.
+			 */
+			goto again;
+		}
+	}
+	write_unlock_bh(&tunnel->hlist_lock);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_closeall);
+
+/* Really kill the tunnel.
+ * Come here only when all sessions have been cleared from the tunnel.
+ */
+void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
+{
+	struct l2tp_net *pn = l2tp_pernet(tunnel->l2tp_net);
+
+	BUG_ON(atomic_read(&tunnel->ref_count) != 0);
+	BUG_ON(tunnel->sock != NULL);
+
+	PRINTK(tunnel->debug, L2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: free...\n", tunnel->name);
+
+	/* Remove from tunnel list */
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_del_init(&tunnel->list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+
+	atomic_dec(&l2tp_tunnel_count);
+	kfree(tunnel);
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_free);
+
+int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
+{
+	struct l2tp_tunnel *tunnel = NULL;
+	int err;
+	struct socket *sock = NULL;
+	struct sock *sk = NULL;
+	struct l2tp_net *pn;
+
+	/* Get the tunnel socket from the fd, which was opened by
+	 * the userspace L2TP daemon.
+	 */
+	err = -EBADF;
+	sock = sockfd_lookup(fd, &err);
+	if (!sock) {
+		printk(KERN_ERR "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
+		       tunnel_id, fd, err);
+		goto err;
+	}
+
+	sk = sock->sk;
+
+	/* Quick sanity checks */
+	err = -EPROTONOSUPPORT;
+	if (sk->sk_protocol != IPPROTO_UDP) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+		goto err;
+	}
+	err = -EAFNOSUPPORT;
+	if (sock->ops->family != AF_INET) {
+		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
+		       tunnel_id, fd, sock->ops->family, AF_INET);
+		goto err;
+	}
+
+	/* Check if this socket has already been prepped */
+	tunnel = (struct l2tp_tunnel *)sk->sk_user_data;
+	if (tunnel != NULL) {
+		/* This socket has already been prepped */
+		err = -EBUSY;
+		goto err;
+	}
+
+	if (version != L2TP_HDR_VER_2)
+		goto err;
+
+	tunnel = kzalloc(sizeof(struct l2tp_tunnel), GFP_KERNEL);
+	if (tunnel == NULL) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	tunnel->version = version;
+	tunnel->tunnel_id = tunnel_id;
+	tunnel->peer_tunnel_id = peer_tunnel_id;
+	tunnel->debug = L2TP_DEFAULT_DEBUG_FLAGS;
+
+	tunnel->magic = L2TP_TUNNEL_MAGIC;
+	sprintf(&tunnel->name[0], "tunl %u", tunnel_id);
+	rwlock_init(&tunnel->hlist_lock);
+
+	/* The net we belong to */
+	tunnel->l2tp_net = net;
+	pn = l2tp_pernet(net);
+
+	if (cfg)
+		tunnel->debug = cfg->debug;
+
+	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+
+	sk->sk_user_data = tunnel;
+
+	/* Hook on the tunnel socket destructor so that we can cleanup
+	 * if the tunnel socket goes away.
+	 */
+	tunnel->old_sk_destruct = sk->sk_destruct;
+	sk->sk_destruct = &l2tp_tunnel_destruct;
+	tunnel->sock = sk;
+	sk->sk_allocation = GFP_ATOMIC;
+
+	/* Add tunnel to our list */
+	INIT_LIST_HEAD(&tunnel->list);
+	write_lock_bh(&pn->l2tp_tunnel_list_lock);
+	list_add(&tunnel->list, &pn->l2tp_tunnel_list);
+	write_unlock_bh(&pn->l2tp_tunnel_list_lock);
+	atomic_inc(&l2tp_tunnel_count);
+
+	/* Bump the reference count. The tunnel context is deleted
+	 * only when this drops to zero.
+	 */
+	l2tp_tunnel_inc_refcount(tunnel);
+
+	err = 0;
+err:
+	if (tunnelp)
+		*tunnelp = tunnel;
+
+	if (sock)
+		sockfd_put(sock);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
+
+/* Really kill the session.
+ */
+void l2tp_session_free(struct l2tp_session *session)
+{
+	struct l2tp_tunnel *tunnel;
+
+	BUG_ON(atomic_read(&session->ref_count) != 0);
+
+	tunnel = session->tunnel;
+	if (tunnel != NULL) {
+		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+		/* Delete the session from the hash */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_del_init(&session->hlist);
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		if (session->session_id != 0)
+			atomic_dec(&l2tp_session_count);
+
+		sock_put(tunnel->sock);
+
+		/* This will delete the tunnel context if this
+		 * is the last session on the tunnel.
+		 */
+		session->tunnel = NULL;
+		l2tp_tunnel_dec_refcount(tunnel);
+	}
+
+	kfree(session);
+
+	return;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_free);
+
+struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	struct l2tp_session *session;
+
+	session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL);
+	if (session != NULL) {
+		session->magic = L2TP_SESSION_MAGIC;
+		session->tunnel = tunnel;
+
+		session->session_id = session_id;
+		session->peer_session_id = peer_session_id;
+
+		sprintf(&session->name[0], "sess %u/%u",
+			tunnel->tunnel_id, session->session_id);
+
+		skb_queue_head_init(&session->reorder_q);
+
+		INIT_HLIST_NODE(&session->hlist);
+
+		/* Inherit debug options from tunnel */
+		session->debug = tunnel->debug;
+
+		if (cfg) {
+			session->debug = cfg->debug;
+			session->hdr_len = cfg->hdr_len;
+			session->mtu = cfg->mtu;
+			session->mru = cfg->mru;
+			session->send_seq = cfg->send_seq;
+			session->recv_seq = cfg->recv_seq;
+			session->lns_mode = cfg->lns_mode;
+		}
+
+		/* Bump the reference count. The session context is deleted
+		 * only when this drops to zero.
+		 */
+		l2tp_session_inc_refcount(session);
+		l2tp_tunnel_inc_refcount(tunnel);
+
+		/* Ensure tunnel socket isn't deleted */
+		sock_hold(tunnel->sock);
+
+		/* Add session to the tunnel's hash list */
+		write_lock_bh(&tunnel->hlist_lock);
+		hlist_add_head(&session->hlist,
+			       l2tp_session_id_hash(tunnel, session_id));
+		write_unlock_bh(&tunnel->hlist_lock);
+
+		/* Ignore management session in session count value */
+		if (session->session_id != 0)
+			atomic_inc(&l2tp_session_count);
+	}
+
+	return session;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_create);
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static __net_init int l2tp_init_net(struct net *net)
+{
+	struct l2tp_net *pn;
+	int err;
+
+	pn = kzalloc(sizeof(*pn), GFP_KERNEL);
+	if (!pn)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
+	rwlock_init(&pn->l2tp_tunnel_list_lock);
+
+	err = net_assign_generic(net, l2tp_net_id, pn);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	kfree(pn);
+	return err;
+}
+
+static __net_exit void l2tp_exit_net(struct net *net)
+{
+	struct l2tp_net *pn;
+
+	pn = net_generic(net, l2tp_net_id);
+	/*
+	 * if someone has cached our net then
+	 * further net_generic call will return NULL
+	 */
+	net_assign_generic(net, l2tp_net_id, NULL);
+	kfree(pn);
+}
+
+static struct pernet_operations l2tp_net_ops = {
+	.init = l2tp_init_net,
+	.exit = l2tp_exit_net,
+	.id   = &l2tp_net_id,
+	.size = sizeof(struct l2tp_net),
+};
+
+static int __init l2tp_init(void)
+{
+	int rc = 0;
+
+	rc = register_pernet_device(&l2tp_net_ops);
+	if (rc)
+		goto out;
+
+	printk(KERN_INFO "L2TP core driver, %s\n", L2TP_DRV_VERSION);
+
+out:
+	return rc;
+}
+
+static void __exit l2tp_exit(void)
+{
+	unregister_pernet_device(&l2tp_net_ops);
+}
+
+module_init(l2tp_init);
+module_exit(l2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP core");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(L2TP_DRV_VERSION);
+
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
new file mode 100644
index 000000000000..2efe1a3ada98
--- /dev/null
+++ b/net/l2tp/l2tp_core.h
@@ -0,0 +1,254 @@
+/*
+ * L2TP internal definitions.
+ *
+ * Copyright (c) 2008,2009 Katalix Systems Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _L2TP_CORE_H_
+#define _L2TP_CORE_H_
+
+/* Just some random numbers */
+#define L2TP_TUNNEL_MAGIC	0x42114DDA
+#define L2TP_SESSION_MAGIC	0x0C04EB7D
+
+#define L2TP_HASH_BITS	4
+#define L2TP_HASH_SIZE	(1 << L2TP_HASH_BITS)
+
+/* Debug message categories for the DEBUG socket option */
+enum {
+	L2TP_MSG_DEBUG		= (1 << 0),	/* verbose debug (if
+						 * compiled in) */
+	L2TP_MSG_CONTROL	= (1 << 1),	/* userspace - kernel
+						 * interface */
+	L2TP_MSG_SEQ		= (1 << 2),	/* sequence numbers */
+	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
+};
+
+struct sk_buff;
+
+struct l2tp_stats {
+	u64			tx_packets;
+	u64			tx_bytes;
+	u64			tx_errors;
+	u64			rx_packets;
+	u64			rx_bytes;
+	u64			rx_seq_discards;
+	u64			rx_oos_packets;
+	u64			rx_errors;
+};
+
+struct l2tp_tunnel;
+
+/* Describes a session. Contains information to determine incoming
+ * packets and transmit outgoing ones.
+ */
+struct l2tp_session_cfg {
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			offset;		/* offset to payload */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+};
+
+struct l2tp_session {
+	int			magic;		/* should be
+						 * L2TP_SESSION_MAGIC */
+
+	struct l2tp_tunnel	*tunnel;	/* back pointer to tunnel
+						 * context */
+	u32			session_id;
+	u32			peer_session_id;
+	u16			nr;		/* session NR state (receive) */
+	u16			ns;		/* session NR state (send) */
+	struct sk_buff_head	reorder_q;	/* receive reorder queue */
+	struct hlist_node	hlist;		/* Hash list node */
+	atomic_t		ref_count;
+
+	char			name[32];	/* for logging */
+	unsigned		data_seq:2;	/* data sequencing level
+						 * 0 => none, 1 => IP only,
+						 * 2 => all
+						 */
+	unsigned		recv_seq:1;	/* expect receive packets with
+						 * sequence numbers? */
+	unsigned		send_seq:1;	/* send packets with sequence
+						 * numbers? */
+	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
+						 * sequence numbers under
+						 * control of LNS. */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			reorder_timeout; /* configured reorder timeout
+						  * (in jiffies) */
+	int			mtu;
+	int			mru;
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len);
+	void (*session_close)(struct l2tp_session *session);
+	void (*ref)(struct l2tp_session *session);
+	void (*deref)(struct l2tp_session *session);
+
+	uint8_t			priv[0];	/* private data */
+};
+
+/* Describes the tunnel. It contains info to track all the associated
+ * sessions so incoming packets can be sorted out
+ */
+struct l2tp_tunnel_cfg {
+	int			debug;		/* bitmask of debug message
+						 * categories */
+};
+
+struct l2tp_tunnel {
+	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
+	rwlock_t		hlist_lock;	/* protect session_hlist */
+	struct hlist_head	session_hlist[L2TP_HASH_SIZE];
+						/* hashed list of sessions,
+						 * hashed by id */
+	u32			tunnel_id;
+	u32			peer_tunnel_id;
+	int			version;	/* 2=>L2TPv2, 3=>L2TPv3 */
+
+	char			name[20];	/* for logging */
+	int			debug;		/* bitmask of debug message
+						 * categories */
+	int			hdr_len;
+	struct l2tp_stats	stats;
+
+	struct list_head	list;		/* Keep a list of all tunnels */
+	struct net		*l2tp_net;	/* the net we belong to */
+
+	atomic_t		ref_count;
+
+	int (*recv_payload_hook)(struct sk_buff *skb);
+	void (*old_sk_destruct)(struct sock *);
+	struct sock		*sock;		/* Parent socket */
+	int			fd;
+
+	uint8_t			priv[0];	/* private data */
+};
+
+static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
+{
+	return &tunnel->priv[0];
+}
+
+static inline void *l2tp_session_priv(struct l2tp_session *session)
+{
+	return &session->priv[0];
+}
+
+static inline struct l2tp_tunnel *l2tp_sock_to_tunnel(struct sock *sk)
+{
+	struct l2tp_tunnel *tunnel;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	tunnel = (struct l2tp_tunnel *)(sk->sk_user_data);
+	if (tunnel == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
+
+out:
+	return tunnel;
+}
+
+extern struct l2tp_session *l2tp_session_find(struct l2tp_tunnel *tunnel, u32 session_id);
+extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
+extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
+
+extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
+extern void l2tp_session_free(struct l2tp_session *session);
+extern int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, int (*payload_hook)(struct sk_buff *skb));
+extern int l2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb);
+
+extern void l2tp_build_l2tp_header(struct l2tp_session *session, void *buf);
+extern int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t data_len);
+extern int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len);
+extern void l2tp_tunnel_destruct(struct sock *sk);
+extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
+
+/* Tunnel reference counts. Incremented per session that is added to
+ * the tunnel.
+ */
+static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	atomic_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
+{
+	if (atomic_dec_and_test(&tunnel->ref_count))
+		l2tp_tunnel_free(tunnel);
+}
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_tunnel_inc_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_inc_refcount_1(_t);				\
+	} while (0)
+#define l2tp_tunnel_dec_refcount(_t) do { \
+		printk(KERN_DEBUG "l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_t)->name, atomic_read(&_t->ref_count)); \
+		l2tp_tunnel_dec_refcount_1(_t);				\
+	} while (0)
+#else
+#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
+#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
+#endif
+
+/* Session reference counts. Incremented when code obtains a reference
+ * to a session.
+ */
+static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session)
+{
+	atomic_inc(&session->ref_count);
+}
+
+static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session)
+{
+	if (atomic_dec_and_test(&session->ref_count))
+		l2tp_session_free(session);
+}
+
+#ifdef L2TP_REFCNT_DEBUG
+#define l2tp_session_inc_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_inc_refcount_1(_s);				\
+	} while (0)
+#define l2tp_session_dec_refcount(_s) do { \
+		printk(KERN_DEBUG "l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", __func__, __LINE__, (_s)->name, atomic_read(&_s->ref_count)); \
+		l2tp_session_dec_refcount_1(_s);				\
+	} while (0)
+#else
+#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s)
+#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s)
+#endif
+
+#endif /* _L2TP_CORE_H_ */
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
new file mode 100644
index 000000000000..baac072761aa
--- /dev/null
+++ b/net/l2tp/l2tp_ppp.c
@@ -0,0 +1,1658 @@
+/*****************************************************************************
+ * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
+ *
+ * PPPoX    --- Generic PPP encapsulation socket family
+ * PPPoL2TP --- PPP over L2TP (RFC 2661)
+ *
+ * Version:	2.0.0
+ *
+ * Authors:	James Chapman (jchapman@katalix.com)
+ *
+ * Based on original work by Martijn van Oosterhout <kleptog@svana.org>
+ *
+ * License:
+ *		This program is free software; you can redistribute it and/or
+ *		modify it under the terms of the GNU General Public License
+ *		as published by the Free Software Foundation; either version
+ *		2 of the License, or (at your option) any later version.
+ *
+ */
+
+/* This driver handles only L2TP data frames; control frames are handled by a
+ * userspace application.
+ *
+ * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
+ * attaches it to a bound UDP socket with local tunnel_id / session_id and
+ * peer tunnel_id / session_id set. Data can then be sent or received using
+ * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
+ * can be read or modified using ioctl() or [gs]etsockopt() calls.
+ *
+ * When a PPPoL2TP socket is connected with local and peer session_id values
+ * zero, the socket is treated as a special tunnel management socket.
+ *
+ * Here's example userspace code to create a socket for sending/receiving data
+ * over an L2TP session:-
+ *
+ *	struct sockaddr_pppol2tp sax;
+ *	int fd;
+ *	int session_fd;
+ *
+ *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+ *
+ *	sax.sa_family = AF_PPPOX;
+ *	sax.sa_protocol = PX_PROTO_OL2TP;
+ *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
+ *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+ *	sax.pppol2tp.addr.sin_port = addr->sin_port;
+ *	sax.pppol2tp.addr.sin_family = AF_INET;
+ *	sax.pppol2tp.s_tunnel  = tunnel_id;
+ *	sax.pppol2tp.s_session = session_id;
+ *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+ *	sax.pppol2tp.d_session = peer_session_id;
+ *
+ *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
+ *
+ * A pppd plugin that allows PPP traffic to be carried over L2TP using
+ * this driver is available from the OpenL2TP project at
+ * http://openl2tp.sourceforge.net.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/list.h>
+#include <linux/uaccess.h>
+
+#include <linux/kernel.h>
+#include <linux/spinlock.h>
+#include <linux/kthread.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/jiffies.h>
+
+#include <linux/netdevice.h>
+#include <linux/net.h>
+#include <linux/inetdevice.h>
+#include <linux/skbuff.h>
+#include <linux/init.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <linux/if_pppox.h>
+#include <linux/if_pppol2tp.h>
+#include <net/sock.h>
+#include <linux/ppp_channel.h>
+#include <linux/ppp_defs.h>
+#include <linux/if_ppp.h>
+#include <linux/file.h>
+#include <linux/hash.h>
+#include <linux/sort.h>
+#include <linux/proc_fs.h>
+#include <linux/nsproxy.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/dst.h>
+#include <net/ip.h>
+#include <net/udp.h>
+#include <net/xfrm.h>
+
+#include <asm/byteorder.h>
+#include <asm/atomic.h>
+
+#include "l2tp_core.h"
+
+#define PPPOL2TP_DRV_VERSION	"V2.0"
+
+/* Space for UDP, L2TP and PPP headers */
+#define PPPOL2TP_HEADER_OVERHEAD	40
+
+#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
+	do {								\
+		if ((_mask) & (_type))					\
+			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
+	} while (0)
+
+/* Number of bytes to build transmit L2TP headers.
+ * Unfortunately the size is different depending on whether sequence numbers
+ * are enabled.
+ */
+#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
+#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
+
+/* Private data of each session. This data lives at the end of struct
+ * l2tp_session, referenced via session->priv[].
+ */
+struct pppol2tp_session {
+	int			owner;		/* pid that opened the socket */
+
+	struct sock		*sock;		/* Pointer to the session
+						 * PPPoX socket */
+	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
+						 * socket */
+	int			flags;		/* accessed by PPPIOCGFLAGS.
+						 * Unused. */
+};
+
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
+
+static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
+static const struct proto_ops pppol2tp_ops;
+
+/* Helpers to obtain tunnel/session contexts from sockets.
+ */
+static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk == NULL)
+		return NULL;
+
+	sock_hold(sk);
+	session = (struct l2tp_session *)(sk->sk_user_data);
+	if (session == NULL) {
+		sock_put(sk);
+		goto out;
+	}
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+out:
+	return session;
+}
+
+/*****************************************************************************
+ * Receive data handling
+ *****************************************************************************/
+
+static int pppol2tp_recv_payload_hook(struct sk_buff *skb)
+{
+	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
+	 * don't send the PPP header (PPP header compression enabled), but
+	 * other clients can include the header. So we cope with both cases
+	 * here. The PPP header is always FF03 when using L2TP.
+	 *
+	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
+	 * the field may be unaligned.
+	 */
+	if (!pskb_may_pull(skb, 2))
+		return 1;
+
+	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
+		skb_pull(skb, 2);
+
+	return 0;
+}
+
+/* Receive message. This is the recvmsg for the PPPoL2TP socket.
+ */
+static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
+			    struct msghdr *msg, size_t len,
+			    int flags)
+{
+	int err;
+	struct sk_buff *skb;
+	struct sock *sk = sock->sk;
+
+	err = -EIO;
+	if (sk->sk_state & PPPOX_BOUND)
+		goto end;
+
+	msg->msg_namelen = 0;
+
+	err = 0;
+	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
+				flags & MSG_DONTWAIT, &err);
+	if (!skb)
+		goto end;
+
+	if (len > skb->len)
+		len = skb->len;
+	else if (len < skb->len)
+		msg->msg_flags |= MSG_TRUNC;
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	if (likely(err == 0))
+		err = len;
+
+	kfree_skb(skb);
+end:
+	return err;
+}
+
+static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = NULL;
+
+	/* If the socket is bound, send it in to PPP's input queue. Otherwise
+	 * queue it on the session socket.
+	 */
+	sk = ps->sock;
+	if (sk == NULL)
+		goto no_sock;
+
+	if (sk->sk_state & PPPOX_BOUND) {
+		struct pppox_sock *po;
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
+		       "%s: recv %d byte data frame, passing to ppp\n",
+		       session->name, data_len);
+
+		/* We need to forget all info related to the L2TP packet
+		 * gathered in the skb as we are going to reuse the same
+		 * skb for the inner packet.
+		 * Namely we need to:
+		 * - reset xfrm (IPSec) information as it applies to
+		 *   the outer L2TP packet and not to the inner one
+		 * - release the dst to force a route lookup on the inner
+		 *   IP packet since skb->dst currently points to the dst
+		 *   of the UDP tunnel
+		 * - reset netfilter information as it doesn't apply
+		 *   to the inner packet either
+		 */
+		secpath_reset(skb);
+		skb_dst_drop(skb);
+		nf_reset(skb);
+
+		po = pppox_sk(sk);
+		ppp_input(&po->chan, skb);
+	} else {
+		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+		       "%s: socket not bound\n", session->name);
+
+		/* Not bound. Nothing we can do, so discard. */
+		session->stats.rx_errors++;
+		kfree_skb(skb);
+	}
+
+	return;
+
+no_sock:
+	PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
+	       "%s: no socket\n", session->name);
+	kfree_skb(skb);
+}
+
+static void pppol2tp_session_sock_hold(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_hold(ps->sock);
+}
+
+static void pppol2tp_session_sock_put(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock)
+		sock_put(ps->sock);
+}
+
+/************************************************************************
+ * Transmit handling
+ ***********************************************************************/
+
+/* Tell how big L2TP headers are for a particular session. This
+ * depends on whether sequence numbers are being used.
+ */
+static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
+{
+	if (session->send_seq)
+		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
+
+	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+}
+
+/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
+ * when a user application does a sendmsg() on the session socket. L2TP and
+ * PPP headers must be inserted into the user's data.
+ */
+static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
+			    size_t total_len)
+{
+	static const unsigned char ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = sock->sk;
+	struct sk_buff *skb;
+	int error;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+
+	error = -ENOTCONN;
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto error;
+
+	/* Get session and tunnel contexts */
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto error;
+
+	ps = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+	if (tunnel == NULL)
+		goto error_put_sess;
+
+	/* Allocate a socket buffer */
+	error = -ENOMEM;
+	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
+			   sizeof(struct udphdr) + session->hdr_len +
+			   sizeof(ppph) + total_len,
+			   0, GFP_KERNEL);
+	if (!skb)
+		goto error_put_sess_tun;
+
+	/* Reserve space for headers. */
+	skb_reserve(skb, NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+	skb_reserve(skb, sizeof(struct udphdr));
+
+	/* Add PPP header */
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+	skb_put(skb, 2);
+
+	/* Copy user data into skb */
+	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
+	if (error < 0) {
+		kfree_skb(skb);
+		goto error_put_sess_tun;
+	}
+	skb_put(skb, total_len);
+
+	l2tp_xmit_skb(session, skb, session->hdr_len);
+
+	sock_put(ps->tunnel_sock);
+
+	return error;
+
+error_put_sess_tun:
+	sock_put(ps->tunnel_sock);
+error_put_sess:
+	sock_put(sk);
+error:
+	return error;
+}
+
+/* Transmit function called by generic PPP driver.  Sends PPP frame
+ * over PPPoL2TP socket.
+ *
+ * This is almost the same as pppol2tp_sendmsg(), but rather than
+ * being called with a msghdr from userspace, it is called with a skb
+ * from the kernel.
+ *
+ * The supplied skb from ppp doesn't have enough headroom for the
+ * insertion of L2TP, UDP and IP headers so we need to allocate more
+ * headroom in the skb. This will create a cloned skb. But we must be
+ * careful in the error case because the caller will expect to free
+ * the skb it supplied, not our cloned skb. So we take care to always
+ * leave the original skb unfreed if we return an error.
+ */
+static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
+{
+	static const u8 ppph[2] = { 0xff, 0x03 };
+	struct sock *sk = (struct sock *) chan->private;
+	struct sock *sk_tun;
+	int hdr_len;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int old_headroom;
+	int new_headroom;
+
+	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
+		goto abort;
+
+	/* Get session and tunnel contexts from the socket */
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto abort;
+
+	ps = l2tp_session_priv(session);
+	sk_tun = ps->tunnel_sock;
+	if (sk_tun == NULL)
+		goto abort_put_sess;
+	tunnel = l2tp_sock_to_tunnel(sk_tun);
+	if (tunnel == NULL)
+		goto abort_put_sess;
+
+	/* What header length is configured for this session? */
+	hdr_len = pppol2tp_l2tp_header_len(session);
+
+	old_headroom = skb_headroom(skb);
+	if (skb_cow_head(skb, sizeof(ppph)))
+		goto abort_put_sess_tun;
+
+	new_headroom = skb_headroom(skb);
+	skb->truesize += new_headroom - old_headroom;
+
+	/* Setup PPP header */
+	__skb_push(skb, sizeof(ppph));
+	skb->data[0] = ppph[0];
+	skb->data[1] = ppph[1];
+
+	l2tp_xmit_skb(session, skb, hdr_len);
+
+	sock_put(sk_tun);
+	sock_put(sk);
+	return 1;
+
+abort_put_sess_tun:
+	sock_put(sk_tun);
+abort_put_sess:
+	sock_put(sk);
+abort:
+	/* Free the original skb */
+	kfree_skb(skb);
+	return 1;
+}
+
+/*****************************************************************************
+ * Session (and tunnel control) socket create/destroy.
+ *****************************************************************************/
+
+/* Called by l2tp_core when a session socket is being closed.
+ */
+static void pppol2tp_session_close(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct sock *sk = ps->sock;
+	struct sk_buff *skb;
+
+	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+
+	if (session->session_id == 0)
+		goto out;
+
+	if (sk != NULL) {
+		lock_sock(sk);
+
+		if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
+			pppox_unbind_sock(sk);
+			sk->sk_state = PPPOX_DEAD;
+			sk->sk_state_change(sk);
+		}
+
+		/* Purge any queued data */
+		skb_queue_purge(&sk->sk_receive_queue);
+		skb_queue_purge(&sk->sk_write_queue);
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+
+		release_sock(sk);
+	}
+
+out:
+	return;
+}
+
+/* Really kill the session socket. (Called from sock_put() if
+ * refcnt == 0.)
+ */
+static void pppol2tp_session_destruct(struct sock *sk)
+{
+	struct l2tp_session *session;
+
+	if (sk->sk_user_data != NULL) {
+		session = sk->sk_user_data;
+		if (session == NULL)
+			goto out;
+
+		sk->sk_user_data = NULL;
+		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
+		l2tp_session_dec_refcount(session);
+	}
+
+out:
+	return;
+}
+
+/* Called when the PPPoX socket (session) is closed.
+ */
+static int pppol2tp_release(struct socket *sock)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	int error;
+
+	if (!sk)
+		return 0;
+
+	error = -EBADF;
+	lock_sock(sk);
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto error;
+
+	pppox_unbind_sock(sk);
+
+	/* Signal the death of the socket. */
+	sk->sk_state = PPPOX_DEAD;
+	sock_orphan(sk);
+	sock->sk = NULL;
+
+	session = pppol2tp_sock_to_session(sk);
+
+	/* Purge any queued data */
+	skb_queue_purge(&sk->sk_receive_queue);
+	skb_queue_purge(&sk->sk_write_queue);
+	if (session != NULL) {
+		struct sk_buff *skb;
+		while ((skb = skb_dequeue(&session->reorder_q))) {
+			kfree_skb(skb);
+			sock_put(sk);
+		}
+		sock_put(sk);
+	}
+
+	release_sock(sk);
+
+	/* This will delete the session context via
+	 * pppol2tp_session_destruct() if the socket's refcnt drops to
+	 * zero.
+	 */
+	sock_put(sk);
+
+	return 0;
+
+error:
+	release_sock(sk);
+	return error;
+}
+
+static struct proto pppol2tp_sk_proto = {
+	.name	  = "PPPOL2TP",
+	.owner	  = THIS_MODULE,
+	.obj_size = sizeof(struct pppox_sock),
+};
+
+static int pppol2tp_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	rc = l2tp_udp_encap_recv(sk, skb);
+	if (rc)
+		kfree_skb(skb);
+
+	return NET_RX_SUCCESS;
+}
+
+/* socket() handler. Initialize a new struct sock.
+ */
+static int pppol2tp_create(struct net *net, struct socket *sock)
+{
+	int error = -ENOMEM;
+	struct sock *sk;
+
+	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
+	if (!sk)
+		goto out;
+
+	sock_init_data(sock, sk);
+
+	sock->state  = SS_UNCONNECTED;
+	sock->ops    = &pppol2tp_ops;
+
+	sk->sk_backlog_rcv = pppol2tp_backlog_recv;
+	sk->sk_protocol	   = PX_PROTO_OL2TP;
+	sk->sk_family	   = PF_PPPOX;
+	sk->sk_state	   = PPPOX_NONE;
+	sk->sk_type	   = SOCK_STREAM;
+	sk->sk_destruct	   = pppol2tp_session_destruct;
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+			    int sockaddr_len, int flags)
+{
+	struct sock *sk = sock->sk;
+	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct pppox_sock *po = pppox_sk(sk);
+	struct l2tp_session *session = NULL;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	struct dst_entry *dst;
+	struct l2tp_session_cfg cfg = { 0, };
+	int error = 0;
+
+	lock_sock(sk);
+
+	error = -EINVAL;
+	if (sp->sa_protocol != PX_PROTO_OL2TP)
+		goto end;
+
+	/* Check for already bound sockets */
+	error = -EBUSY;
+	if (sk->sk_state & PPPOX_CONNECTED)
+		goto end;
+
+	/* We don't supporting rebinding anyway */
+	error = -EALREADY;
+	if (sk->sk_user_data)
+		goto end; /* socket is already attached */
+
+	/* Don't bind if s_tunnel is 0 */
+	error = -EINVAL;
+	if (sp->pppol2tp.s_tunnel == 0)
+		goto end;
+
+	/* Special case: create tunnel context if s_session and
+	 * d_session is 0. Otherwise look up tunnel using supplied
+	 * tunnel id.
+	 */
+	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+		if (error < 0)
+			goto end;
+	} else {
+		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+
+		/* Error if we can't find the tunnel */
+		error = -ENOENT;
+		if (tunnel == NULL)
+			goto end;
+
+		/* Error if socket is not prepped */
+		if (tunnel->sock == NULL)
+			goto end;
+	}
+
+	if (tunnel->recv_payload_hook == NULL)
+		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(tunnel, sp->pppol2tp.s_session);
+	if (session != NULL)
+		goto end;
+
+	/* Default MTU must allow space for UDP/L2TP/PPP
+	 * headers.
+	 */
+	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	cfg.hdr_len = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+	cfg.debug = tunnel->debug;
+
+	/* Allocate and initialize a new session context. */
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, sp->pppol2tp.s_session,
+				      sp->pppol2tp.d_session, &cfg);
+	if (session == NULL) {
+		error = -ENOMEM;
+		goto end;
+	}
+
+	ps = l2tp_session_priv(session);
+	ps->owner	     = current->pid;
+	ps->sock	     = sk;
+	ps->tunnel_sock = tunnel->sock;
+
+	session->recv_skb	= pppol2tp_recv;
+	session->session_close	= pppol2tp_session_close;
+
+	/* We need to know each time a skb is dropped from the reorder
+	 * queue.
+	 */
+	session->ref = pppol2tp_session_sock_hold;
+	session->deref = pppol2tp_session_sock_put;
+
+	/* If PMTU discovery was enabled, use the MTU that was discovered */
+	dst = sk_dst_get(sk);
+	if (dst != NULL) {
+		u32 pmtu = dst_mtu(__sk_dst_get(sk));
+		if (pmtu != 0)
+			session->mtu = session->mru = pmtu -
+				PPPOL2TP_HEADER_OVERHEAD;
+		dst_release(dst);
+	}
+
+	/* Special case: if source & dest session_id == 0x0000, this
+	 * socket is being created to manage the tunnel. Just set up
+	 * the internal context for use by ioctl() and sockopt()
+	 * handlers.
+	 */
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		error = 0;
+		goto out_no_ppp;
+	}
+
+	/* The only header we need to worry about is the L2TP
+	 * header. This size is different depending on whether
+	 * sequence numbers are enabled for the data channel.
+	 */
+	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+
+	po->chan.private = sk;
+	po->chan.ops	 = &pppol2tp_chan_ops;
+	po->chan.mtu	 = session->mtu;
+
+	error = ppp_register_net_channel(sock_net(sk), &po->chan);
+	if (error)
+		goto end;
+
+out_no_ppp:
+	/* This is how we get the session context from the socket. */
+	sk->sk_user_data = session;
+	sk->sk_state = PPPOX_CONNECTED;
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+end:
+	release_sock(sk);
+
+	return error;
+}
+
+/* getname() support.
+ */
+static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
+			    int *usockaddr_len, int peer)
+{
+	int len = sizeof(struct sockaddr_pppol2tp);
+	struct sockaddr_pppol2tp sp;
+	int error = 0;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct sock *sk = sock->sk;
+	struct inet_sock *inet;
+	struct pppol2tp_session *pls;
+
+	error = -ENOTCONN;
+	if (sk == NULL)
+		goto end;
+	if (sk->sk_state != PPPOX_CONNECTED)
+		goto end;
+
+	error = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	pls = l2tp_session_priv(session);
+	tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock);
+	if (tunnel == NULL) {
+		error = -EBADF;
+		goto end_put_sess;
+	}
+
+	memset(&sp, 0, len);
+	sp.sa_family	= AF_PPPOX;
+	sp.sa_protocol	= PX_PROTO_OL2TP;
+	sp.pppol2tp.fd  = tunnel->fd;
+	sp.pppol2tp.pid = pls->owner;
+	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+	sp.pppol2tp.s_session = session->session_id;
+	sp.pppol2tp.d_session = session->peer_session_id;
+	inet = inet_sk(sk);
+	sp.pppol2tp.addr.sin_family = AF_INET;
+	sp.pppol2tp.addr.sin_port = inet->inet_dport;
+	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+
+	memcpy(uaddr, &sp, len);
+
+	*usockaddr_len = len;
+
+	sock_put(pls->tunnel_sock);
+end_put_sess:
+	sock_put(sk);
+	error = 0;
+
+end:
+	return error;
+}
+
+/****************************************************************************
+ * ioctl() handlers.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. However, in order to control kernel tunnel features, we allow
+ * userspace to create a special "tunnel" PPPoX socket which is used for
+ * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
+ * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
+ * calls.
+ ****************************************************************************/
+
+static void pppol2tp_copy_stats(struct pppol2tp_ioc_stats *dest,
+				struct l2tp_stats *stats)
+{
+	dest->tx_packets = stats->tx_packets;
+	dest->tx_bytes = stats->tx_bytes;
+	dest->tx_errors = stats->tx_errors;
+	dest->rx_packets = stats->rx_packets;
+	dest->rx_bytes = stats->rx_bytes;
+	dest->rx_seq_discards = stats->rx_seq_discards;
+	dest->rx_oos_packets = stats->rx_oos_packets;
+	dest->rx_errors = stats->rx_errors;
+}
+
+/* Session ioctl helper.
+ */
+static int pppol2tp_session_ioctl(struct l2tp_session *session,
+				  unsigned int cmd, unsigned long arg)
+{
+	struct ifreq ifr;
+	int err = 0;
+	struct sock *sk;
+	int val = (int) arg;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
+	       session->name, cmd, arg);
+
+	sk = ps->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case SIOCGIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+		ifr.ifr_mtu = session->mtu;
+		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case SIOCSIFMTU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
+			break;
+
+		session->mtu = ifr.ifr_mtu;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mtu=%d\n", session->name, session->mtu);
+		err = 0;
+		break;
+
+	case PPPIOCGMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (put_user(session->mru, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCSMRU:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+
+		session->mru = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set mru=%d\n", session->name, session->mru);
+		err = 0;
+		break;
+
+	case PPPIOCGFLAGS:
+		err = -EFAULT;
+		if (put_user(ps->flags, (int __user *) arg))
+			break;
+
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCSFLAGS:
+		err = -EFAULT;
+		if (get_user(val, (int __user *) arg))
+			break;
+		ps->flags = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set flags=%d\n", session->name, ps->flags);
+		err = 0;
+		break;
+
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		memset(&stats, 0, sizeof(stats));
+		stats.tunnel_id = tunnel->tunnel_id;
+		stats.session_id = session->session_id;
+		pppol2tp_copy_stats(&stats, &session->stats);
+		if (copy_to_user((void __user *) arg, &stats,
+				 sizeof(stats)))
+			break;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", session->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Tunnel ioctl helper.
+ *
+ * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
+ * specifies a session_id, the session ioctl handler is called. This allows an
+ * application to retrieve session stats via a tunnel socket.
+ */
+static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel,
+				 unsigned int cmd, unsigned long arg)
+{
+	int err = 0;
+	struct sock *sk;
+	struct pppol2tp_ioc_stats stats;
+
+	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
+	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n",
+	       tunnel->name, cmd, arg);
+
+	sk = tunnel->sock;
+	sock_hold(sk);
+
+	switch (cmd) {
+	case PPPIOCGL2TPSTATS:
+		err = -ENXIO;
+		if (!(sk->sk_state & PPPOX_CONNECTED))
+			break;
+
+		if (copy_from_user(&stats, (void __user *) arg,
+				   sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		if (stats.session_id != 0) {
+			/* resend to session ioctl handler */
+			struct l2tp_session *session =
+				l2tp_session_find(tunnel, stats.session_id);
+			if (session != NULL)
+				err = pppol2tp_session_ioctl(session, cmd, arg);
+			else
+				err = -EBADR;
+			break;
+		}
+#ifdef CONFIG_XFRM
+		stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
+#endif
+		pppol2tp_copy_stats(&stats, &tunnel->stats);
+		if (copy_to_user((void __user *) arg, &stats, sizeof(stats))) {
+			err = -EFAULT;
+			break;
+		}
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get L2TP stats\n", tunnel->name);
+		err = 0;
+		break;
+
+	default:
+		err = -ENOSYS;
+		break;
+	}
+
+	sock_put(sk);
+
+	return err;
+}
+
+/* Main ioctl() handler.
+ * Dispatch to tunnel or session helpers depending on the socket.
+ */
+static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int err;
+
+	if (!sk)
+		return 0;
+
+	err = -EBADF;
+	if (sock_flag(sk, SOCK_DEAD) != 0)
+		goto end;
+
+	err = -ENOTCONN;
+	if ((sk->sk_user_data == NULL) ||
+	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session's session_id is zero, treat ioctl as a
+	 * tunnel ioctl
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
+		sock_put(ps->tunnel_sock);
+		goto end_put_sess;
+	}
+
+	err = pppol2tp_session_ioctl(session, cmd, arg);
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * setsockopt() / getsockopt() support.
+ *
+ * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
+ * sockets. In order to control kernel tunnel features, we allow userspace to
+ * create a special "tunnel" PPPoX socket which is used for control only.
+ * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
+ * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
+ *****************************************************************************/
+
+/* Tunnel setsockopt() helper.
+ */
+static int pppol2tp_tunnel_setsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		tunnel->debug = val;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session setsockopt helper.
+ */
+static int pppol2tp_session_setsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int val)
+{
+	int err = 0;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->recv_seq = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set recv_seq=%d\n", session->name, session->recv_seq);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->send_seq = val ? -1 : 0;
+		{
+			struct sock *ssk      = ps->sock;
+			struct pppox_sock *po = pppox_sk(ssk);
+			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
+				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
+		}
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set send_seq=%d\n", session->name, session->send_seq);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		if ((val != 0) && (val != 1)) {
+			err = -EINVAL;
+			break;
+		}
+		session->lns_mode = val ? -1 : 0;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set lns_mode=%d\n", session->name, session->lns_mode);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		session->debug = val;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set debug=%x\n", session->name, session->debug);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		session->reorder_timeout = msecs_to_jiffies(val);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: set reorder_timeout=%d\n", session->name, session->reorder_timeout);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Main setsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session setsockopt
+ * handler, according to whether the PPPoL2TP socket is a for a regular
+ * session or the special tunnel type.
+ */
+static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
+			       char __user *optval, unsigned int optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	struct pppol2tp_session *ps;
+	int val;
+	int err;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
+
+	if (optlen < sizeof(int))
+		return -EINVAL;
+
+	if (get_user(val, (int __user *)optval))
+		return -EFAULT;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get session context from the socket */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel
+	 */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_setsockopt(sk, session, optname, val);
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/* Tunnel getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_tunnel_getsockopt(struct sock *sk,
+				      struct l2tp_tunnel *tunnel,
+				      int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_DEBUG:
+		*val = tunnel->debug;
+		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+		break;
+	}
+
+	return err;
+}
+
+/* Session getsockopt helper. Called with sock locked.
+ */
+static int pppol2tp_session_getsockopt(struct sock *sk,
+				       struct l2tp_session *session,
+				       int optname, int *val)
+{
+	int err = 0;
+
+	switch (optname) {
+	case PPPOL2TP_SO_RECVSEQ:
+		*val = session->recv_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get recv_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_SENDSEQ:
+		*val = session->send_seq;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get send_seq=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_LNSMODE:
+		*val = session->lns_mode;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get lns_mode=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_DEBUG:
+		*val = session->debug;
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get debug=%d\n", session->name, *val);
+		break;
+
+	case PPPOL2TP_SO_REORDERTO:
+		*val = (int) jiffies_to_msecs(session->reorder_timeout);
+		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+		       "%s: get reorder_timeout=%d\n", session->name, *val);
+		break;
+
+	default:
+		err = -ENOPROTOOPT;
+	}
+
+	return err;
+}
+
+/* Main getsockopt() entry point.
+ * Does API checks, then calls either the tunnel or session getsockopt
+ * handler, according to whether the PPPoX socket is a for a regular session
+ * or the special tunnel type.
+ */
+static int pppol2tp_getsockopt(struct socket *sock, int level,
+			       int optname, char __user *optval, int __user *optlen)
+{
+	struct sock *sk = sock->sk;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel;
+	int val, len;
+	int err;
+	struct pppol2tp_session *ps;
+
+	if (level != SOL_PPPOL2TP)
+		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
+
+	if (get_user(len, (int __user *) optlen))
+		return -EFAULT;
+
+	len = min_t(unsigned int, len, sizeof(int));
+
+	if (len < 0)
+		return -EINVAL;
+
+	err = -ENOTCONN;
+	if (sk->sk_user_data == NULL)
+		goto end;
+
+	/* Get the session context */
+	err = -EBADF;
+	session = pppol2tp_sock_to_session(sk);
+	if (session == NULL)
+		goto end;
+
+	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
+	ps = l2tp_session_priv(session);
+	if ((session->session_id == 0) &&
+	    (session->peer_session_id == 0)) {
+		err = -EBADF;
+		tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock);
+		if (tunnel == NULL)
+			goto end_put_sess;
+
+		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
+		sock_put(ps->tunnel_sock);
+	} else
+		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
+
+	err = -EFAULT;
+	if (put_user(len, (int __user *) optlen))
+		goto end_put_sess;
+
+	if (copy_to_user((void __user *) optval, &val, len))
+		goto end_put_sess;
+
+	err = 0;
+
+end_put_sess:
+	sock_put(sk);
+end:
+	return err;
+}
+
+/*****************************************************************************
+ * /proc filesystem for debug
+ *****************************************************************************/
+
+static unsigned int pppol2tp_net_id;
+
+#ifdef CONFIG_PROC_FS
+
+struct pppol2tp_seq_data {
+	struct seq_net_private p;
+	int tunnel_idx;			/* current tunnel */
+	int session_idx;		/* index of session within current tunnel */
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;	/* NULL means get next tunnel */
+};
+
+static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->tunnel = l2tp_tunnel_find_nth(net, pd->tunnel_idx);
+	pd->tunnel_idx++;
+}
+
+static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd)
+{
+	pd->session = l2tp_session_find_nth(pd->tunnel, pd->session_idx);
+	pd->session_idx++;
+	if (pd->session == NULL) {
+		pd->session_idx = 0;
+		pppol2tp_next_tunnel(net, pd);
+	}
+}
+
+static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
+{
+	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
+	loff_t pos = *offs;
+	struct net *net;
+
+	if (!pos)
+		goto out;
+
+	BUG_ON(m->private == NULL);
+	pd = m->private;
+	net = seq_file_net(m);
+
+	if (pd->tunnel == NULL)
+		pppol2tp_next_tunnel(net, pd);
+	else
+		pppol2tp_next_session(net, pd);
+
+	/* NULL tunnel and session indicates end of list */
+	if ((pd->tunnel == NULL) && (pd->session == NULL))
+		pd = NULL;
+
+out:
+	return pd;
+}
+
+static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
+{
+	(*pos)++;
+	return NULL;
+}
+
+static void pppol2tp_seq_stop(struct seq_file *p, void *v)
+{
+	/* nothing to do */
+}
+
+static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
+{
+	struct l2tp_tunnel *tunnel = v;
+
+	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
+		   tunnel->name,
+		   (tunnel == tunnel->sock->sk_user_data) ? 'Y' : 'N',
+		   atomic_read(&tunnel->ref_count) - 1);
+	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
+		   tunnel->debug,
+		   (unsigned long long)tunnel->stats.tx_packets,
+		   (unsigned long long)tunnel->stats.tx_bytes,
+		   (unsigned long long)tunnel->stats.tx_errors,
+		   (unsigned long long)tunnel->stats.rx_packets,
+		   (unsigned long long)tunnel->stats.rx_bytes,
+		   (unsigned long long)tunnel->stats.rx_errors);
+}
+
+static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
+{
+	struct l2tp_session *session = v;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+	u32 ip = 0;
+	u16 port = 0;
+
+	if (tunnel->sock) {
+		struct inet_sock *inet = inet_sk(tunnel->sock);
+		ip = ntohl(inet->inet_saddr);
+		port = ntohs(inet->inet_sport);
+	}
+
+	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
+		   "%04X/%04X %d %c\n",
+		   session->name, ip, port,
+		   tunnel->tunnel_id,
+		   session->session_id,
+		   tunnel->peer_tunnel_id,
+		   session->peer_session_id,
+		   ps->sock->sk_state,
+		   (session == ps->sock->sk_user_data) ?
+		   'Y' : 'N');
+	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
+		   session->mtu, session->mru,
+		   session->recv_seq ? 'R' : '-',
+		   session->send_seq ? 'S' : '-',
+		   session->lns_mode ? "LNS" : "LAC",
+		   session->debug,
+		   jiffies_to_msecs(session->reorder_timeout));
+	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
+		   session->nr, session->ns,
+		   (unsigned long long)session->stats.tx_packets,
+		   (unsigned long long)session->stats.tx_bytes,
+		   (unsigned long long)session->stats.tx_errors,
+		   (unsigned long long)session->stats.rx_packets,
+		   (unsigned long long)session->stats.rx_bytes,
+		   (unsigned long long)session->stats.rx_errors);
+}
+
+static int pppol2tp_seq_show(struct seq_file *m, void *v)
+{
+	struct pppol2tp_seq_data *pd = v;
+
+	/* display header on line 1 */
+	if (v == SEQ_START_TOKEN) {
+		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
+		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
+		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
+			 "dest-tid/sid state user-data-ok\n");
+		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
+		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
+		goto out;
+	}
+
+	/* Show the tunnel or session context.
+	 */
+	if (pd->session == NULL)
+		pppol2tp_seq_tunnel_show(m, pd->tunnel);
+	else
+		pppol2tp_seq_session_show(m, pd->session);
+
+out:
+	return 0;
+}
+
+static const struct seq_operations pppol2tp_seq_ops = {
+	.start		= pppol2tp_seq_start,
+	.next		= pppol2tp_seq_next,
+	.stop		= pppol2tp_seq_stop,
+	.show		= pppol2tp_seq_show,
+};
+
+/* Called when our /proc file is opened. We allocate data for use when
+ * iterating our tunnel / session contexts and store it in the private
+ * data of the seq_file.
+ */
+static int pppol2tp_proc_open(struct inode *inode, struct file *file)
+{
+	return seq_open_net(inode, file, &pppol2tp_seq_ops,
+			    sizeof(struct pppol2tp_seq_data));
+}
+
+static const struct file_operations pppol2tp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= pppol2tp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= seq_release_net,
+};
+
+#endif /* CONFIG_PROC_FS */
+
+/*****************************************************************************
+ * Network namespace
+ *****************************************************************************/
+
+static __net_init int pppol2tp_init_net(struct net *net)
+{
+	struct proc_dir_entry *pde;
+	int err = 0;
+
+	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
+	if (!pde) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+out:
+	return err;
+}
+
+static __net_exit void pppol2tp_exit_net(struct net *net)
+{
+	proc_net_remove(net, "pppol2tp");
+}
+
+static struct pernet_operations pppol2tp_net_ops = {
+	.init = pppol2tp_init_net,
+	.exit = pppol2tp_exit_net,
+	.id   = &pppol2tp_net_id,
+};
+
+/*****************************************************************************
+ * Init and cleanup
+ *****************************************************************************/
+
+static const struct proto_ops pppol2tp_ops = {
+	.family		= AF_PPPOX,
+	.owner		= THIS_MODULE,
+	.release	= pppol2tp_release,
+	.bind		= sock_no_bind,
+	.connect	= pppol2tp_connect,
+	.socketpair	= sock_no_socketpair,
+	.accept		= sock_no_accept,
+	.getname	= pppol2tp_getname,
+	.poll		= datagram_poll,
+	.listen		= sock_no_listen,
+	.shutdown	= sock_no_shutdown,
+	.setsockopt	= pppol2tp_setsockopt,
+	.getsockopt	= pppol2tp_getsockopt,
+	.sendmsg	= pppol2tp_sendmsg,
+	.recvmsg	= pppol2tp_recvmsg,
+	.mmap		= sock_no_mmap,
+	.ioctl		= pppox_ioctl,
+};
+
+static struct pppox_proto pppol2tp_proto = {
+	.create		= pppol2tp_create,
+	.ioctl		= pppol2tp_ioctl
+};
+
+static int __init pppol2tp_init(void)
+{
+	int err;
+
+	err = register_pernet_device(&pppol2tp_net_ops);
+	if (err)
+		goto out;
+
+	err = proto_register(&pppol2tp_sk_proto, 0);
+	if (err)
+		goto out_unregister_pppol2tp_pernet;
+
+	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
+	if (err)
+		goto out_unregister_pppol2tp_proto;
+
+	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
+	       PPPOL2TP_DRV_VERSION);
+
+out:
+	return err;
+out_unregister_pppol2tp_proto:
+	proto_unregister(&pppol2tp_sk_proto);
+out_unregister_pppol2tp_pernet:
+	unregister_pernet_device(&pppol2tp_net_ops);
+	goto out;
+}
+
+static void __exit pppol2tp_exit(void)
+{
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+	proto_unregister(&pppol2tp_sk_proto);
+	unregister_pernet_device(&pppol2tp_net_ops);
+}
+
+module_init(pppol2tp_init);
+module_exit(pppol2tp_exit);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("PPP over L2TP over UDP");
+MODULE_LICENSE("GPL");
+MODULE_VERSION(PPPOL2TP_DRV_VERSION);
diff --git a/net/l2tp/pppol2tp.c b/net/l2tp/pppol2tp.c
deleted file mode 100644
index 449a9825200d..000000000000
--- a/net/l2tp/pppol2tp.c
+++ /dev/null
@@ -1,2680 +0,0 @@
-/*****************************************************************************
- * Linux PPP over L2TP (PPPoX/PPPoL2TP) Sockets
- *
- * PPPoX    --- Generic PPP encapsulation socket family
- * PPPoL2TP --- PPP over L2TP (RFC 2661)
- *
- * Version:	1.0.0
- *
- * Authors:	Martijn van Oosterhout <kleptog@svana.org>
- *		James Chapman (jchapman@katalix.com)
- * Contributors:
- *		Michal Ostrowski <mostrows@speakeasy.net>
- *		Arnaldo Carvalho de Melo <acme@xconectiva.com.br>
- *		David S. Miller (davem@redhat.com)
- *
- * License:
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- */
-
-/* This driver handles only L2TP data frames; control frames are handled by a
- * userspace application.
- *
- * To send data in an L2TP session, userspace opens a PPPoL2TP socket and
- * attaches it to a bound UDP socket with local tunnel_id / session_id and
- * peer tunnel_id / session_id set. Data can then be sent or received using
- * regular socket sendmsg() / recvmsg() calls. Kernel parameters of the socket
- * can be read or modified using ioctl() or [gs]etsockopt() calls.
- *
- * When a PPPoL2TP socket is connected with local and peer session_id values
- * zero, the socket is treated as a special tunnel management socket.
- *
- * Here's example userspace code to create a socket for sending/receiving data
- * over an L2TP session:-
- *
- *	struct sockaddr_pppol2tp sax;
- *	int fd;
- *	int session_fd;
- *
- *	fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
- *
- *	sax.sa_family = AF_PPPOX;
- *	sax.sa_protocol = PX_PROTO_OL2TP;
- *	sax.pppol2tp.fd = tunnel_fd;	// bound UDP socket
- *	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
- *	sax.pppol2tp.addr.sin_port = addr->sin_port;
- *	sax.pppol2tp.addr.sin_family = AF_INET;
- *	sax.pppol2tp.s_tunnel  = tunnel_id;
- *	sax.pppol2tp.s_session = session_id;
- *	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
- *	sax.pppol2tp.d_session = peer_session_id;
- *
- *	session_fd = connect(fd, (struct sockaddr *)&sax, sizeof(sax));
- *
- * A pppd plugin that allows PPP traffic to be carried over L2TP using
- * this driver is available from the OpenL2TP project at
- * http://openl2tp.sourceforge.net.
- */
-
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/list.h>
-#include <asm/uaccess.h>
-
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/kthread.h>
-#include <linux/sched.h>
-#include <linux/slab.h>
-#include <linux/errno.h>
-#include <linux/jiffies.h>
-
-#include <linux/netdevice.h>
-#include <linux/net.h>
-#include <linux/inetdevice.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if_pppox.h>
-#include <linux/if_pppol2tp.h>
-#include <net/sock.h>
-#include <linux/ppp_channel.h>
-#include <linux/ppp_defs.h>
-#include <linux/if_ppp.h>
-#include <linux/file.h>
-#include <linux/hash.h>
-#include <linux/sort.h>
-#include <linux/proc_fs.h>
-#include <linux/nsproxy.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/dst.h>
-#include <net/ip.h>
-#include <net/udp.h>
-#include <net/xfrm.h>
-
-#include <asm/byteorder.h>
-#include <asm/atomic.h>
-
-
-#define PPPOL2TP_DRV_VERSION	"V1.0"
-
-/* L2TP header constants */
-#define L2TP_HDRFLAG_T	   0x8000
-#define L2TP_HDRFLAG_L	   0x4000
-#define L2TP_HDRFLAG_S	   0x0800
-#define L2TP_HDRFLAG_O	   0x0200
-#define L2TP_HDRFLAG_P	   0x0100
-
-#define L2TP_HDR_VER_MASK  0x000F
-#define L2TP_HDR_VER	   0x0002
-
-/* Space for UDP, L2TP and PPP headers */
-#define PPPOL2TP_HEADER_OVERHEAD	40
-
-/* Just some random numbers */
-#define L2TP_TUNNEL_MAGIC	0x42114DDA
-#define L2TP_SESSION_MAGIC	0x0C04EB7D
-
-#define PPPOL2TP_HASH_BITS	4
-#define PPPOL2TP_HASH_SIZE	(1 << PPPOL2TP_HASH_BITS)
-
-/* Default trace flags */
-#define PPPOL2TP_DEFAULT_DEBUG_FLAGS	0
-
-#define PRINTK(_mask, _type, _lvl, _fmt, args...)			\
-	do {								\
-		if ((_mask) & (_type))					\
-			printk(_lvl "PPPOL2TP: " _fmt, ##args);		\
-	} while(0)
-
-/* Number of bytes to build transmit L2TP headers.
- * Unfortunately the size is different depending on whether sequence numbers
- * are enabled.
- */
-#define PPPOL2TP_L2TP_HDR_SIZE_SEQ		10
-#define PPPOL2TP_L2TP_HDR_SIZE_NOSEQ		6
-
-struct pppol2tp_tunnel;
-
-/* Describes a session. It is the sk_user_data field in the PPPoL2TP
- * socket. Contains information to determine incoming packets and transmit
- * outgoing ones.
- */
-struct pppol2tp_session
-{
-	int			magic;		/* should be
-						 * L2TP_SESSION_MAGIC */
-	int			owner;		/* pid that opened the socket */
-
-	struct sock		*sock;		/* Pointer to the session
-						 * PPPoX socket */
-	struct sock		*tunnel_sock;	/* Pointer to the tunnel UDP
-						 * socket */
-
-	struct pppol2tp_addr	tunnel_addr;	/* Description of tunnel */
-
-	struct pppol2tp_tunnel	*tunnel;	/* back pointer to tunnel
-						 * context */
-
-	char			name[20];	/* "sess xxxxx/yyyyy", where
-						 * x=tunnel_id, y=session_id */
-	int			mtu;
-	int			mru;
-	int			flags;		/* accessed by PPPIOCGFLAGS.
-						 * Unused. */
-	unsigned		recv_seq:1;	/* expect receive packets with
-						 * sequence numbers? */
-	unsigned		send_seq:1;	/* send packets with sequence
-						 * numbers? */
-	unsigned		lns_mode:1;	/* behave as LNS? LAC enables
-						 * sequence numbers under
-						 * control of LNS. */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	int			reorder_timeout; /* configured reorder timeout
-						  * (in jiffies) */
-	u16			nr;		/* session NR state (receive) */
-	u16			ns;		/* session NR state (send) */
-	struct sk_buff_head	reorder_q;	/* receive reorder queue */
-	struct pppol2tp_ioc_stats stats;
-	struct hlist_node	hlist;		/* Hash list node */
-};
-
-/* The sk_user_data field of the tunnel's UDP socket. It contains info to track
- * all the associated sessions so incoming packets can be sorted out
- */
-struct pppol2tp_tunnel
-{
-	int			magic;		/* Should be L2TP_TUNNEL_MAGIC */
-	rwlock_t		hlist_lock;	/* protect session_hlist */
-	struct hlist_head	session_hlist[PPPOL2TP_HASH_SIZE];
-						/* hashed list of sessions,
-						 * hashed by id */
-	int			debug;		/* bitmask of debug message
-						 * categories */
-	char			name[12];	/* "tunl xxxxx" */
-	struct pppol2tp_ioc_stats stats;
-
-	void (*old_sk_destruct)(struct sock *);
-
-	struct sock		*sock;		/* Parent socket */
-	struct list_head	list;		/* Keep a list of all open
-						 * prepared sockets */
-	struct net		*pppol2tp_net;	/* the net we belong to */
-
-	atomic_t		ref_count;
-};
-
-/* Private data stored for received packets in the skb.
- */
-struct pppol2tp_skb_cb {
-	u16			ns;
-	u16			nr;
-	u16			has_seq;
-	u16			length;
-	unsigned long		expires;
-};
-
-#define PPPOL2TP_SKB_CB(skb)	((struct pppol2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)])
-
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb);
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel);
-
-static atomic_t pppol2tp_tunnel_count;
-static atomic_t pppol2tp_session_count;
-static struct ppp_channel_ops pppol2tp_chan_ops = { pppol2tp_xmit , NULL };
-static const struct proto_ops pppol2tp_ops;
-
-/* per-net private data for this module */
-static int pppol2tp_net_id __read_mostly;
-struct pppol2tp_net {
-	struct list_head pppol2tp_tunnel_list;
-	rwlock_t pppol2tp_tunnel_list_lock;
-};
-
-static inline struct pppol2tp_net *pppol2tp_pernet(struct net *net)
-{
-	BUG_ON(!net);
-
-	return net_generic(net, pppol2tp_net_id);
-}
-
-/* Helpers to obtain tunnel/session contexts from sockets.
- */
-static inline struct pppol2tp_session *pppol2tp_sock_to_session(struct sock *sk)
-{
-	struct pppol2tp_session *session;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	session = (struct pppol2tp_session *)(sk->sk_user_data);
-	if (session == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-out:
-	return session;
-}
-
-static inline struct pppol2tp_tunnel *pppol2tp_sock_to_tunnel(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	if (sk == NULL)
-		return NULL;
-
-	sock_hold(sk);
-	tunnel = (struct pppol2tp_tunnel *)(sk->sk_user_data);
-	if (tunnel == NULL) {
-		sock_put(sk);
-		goto out;
-	}
-
-	BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-out:
-	return tunnel;
-}
-
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void pppol2tp_tunnel_inc_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	atomic_inc(&tunnel->ref_count);
-}
-
-static inline void pppol2tp_tunnel_dec_refcount(struct pppol2tp_tunnel *tunnel)
-{
-	if (atomic_dec_and_test(&tunnel->ref_count))
-		pppol2tp_tunnel_free(tunnel);
-}
-
-/* Session hash list.
- * The session_id SHOULD be random according to RFC2661, but several
- * L2TP implementations (Cisco and Microsoft) use incrementing
- * session_ids.  So we do a real hash on the session_id, rather than a
- * simple bitmask.
- */
-static inline struct hlist_head *
-pppol2tp_session_id_hash(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	unsigned long hash_val = (unsigned long) session_id;
-	return &tunnel->session_hlist[hash_long(hash_val, PPPOL2TP_HASH_BITS)];
-}
-
-/* Lookup a session by id
- */
-static struct pppol2tp_session *
-pppol2tp_session_find(struct pppol2tp_tunnel *tunnel, u16 session_id)
-{
-	struct hlist_head *session_list =
-		pppol2tp_session_id_hash(tunnel, session_id);
-	struct pppol2tp_session *session;
-	struct hlist_node *walk;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	hlist_for_each_entry(session, walk, session_list, hlist) {
-		if (session->tunnel_addr.s_session == session_id) {
-			read_unlock_bh(&tunnel->hlist_lock);
-			return session;
-		}
-	}
-	read_unlock_bh(&tunnel->hlist_lock);
-
-	return NULL;
-}
-
-/* Lookup a tunnel by id
- */
-static struct pppol2tp_tunnel *pppol2tp_tunnel_find(struct net *net, u16 tunnel_id)
-{
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_for_each_entry(tunnel, &pn->pppol2tp_tunnel_list, list) {
-		if (tunnel->stats.tunnel_id == tunnel_id) {
-			read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-			return tunnel;
-		}
-	}
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return NULL;
-}
-
-/*****************************************************************************
- * Receive data handling
- *****************************************************************************/
-
-/* Queue a skb in order. We come here only if the skb has an L2TP sequence
- * number.
- */
-static void pppol2tp_recv_queue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct sk_buff *skbp;
-	struct sk_buff *tmp;
-	u16 ns = PPPOL2TP_SKB_CB(skb)->ns;
-
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skbp, tmp) {
-		if (PPPOL2TP_SKB_CB(skbp)->ns > ns) {
-			__skb_queue_before(&session->reorder_q, skbp, skb);
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: pkt %hu, inserted before %hu, reorder_q len=%d\n",
-			       session->name, ns, PPPOL2TP_SKB_CB(skbp)->ns,
-			       skb_queue_len(&session->reorder_q));
-			session->stats.rx_oos_packets++;
-			goto out;
-		}
-	}
-
-	__skb_queue_tail(&session->reorder_q, skb);
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-/* Dequeue a single skb.
- */
-static void pppol2tp_recv_dequeue_skb(struct pppol2tp_session *session, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel = session->tunnel;
-	int length = PPPOL2TP_SKB_CB(skb)->length;
-	struct sock *session_sock = NULL;
-
-	/* We're about to requeue the skb, so return resources
-	 * to its current owner (a socket receive buffer).
-	 */
-	skb_orphan(skb);
-
-	tunnel->stats.rx_packets++;
-	tunnel->stats.rx_bytes += length;
-	session->stats.rx_packets++;
-	session->stats.rx_bytes += length;
-
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		/* Bump our Nr */
-		session->nr++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated nr to %hu\n", session->name, session->nr);
-	}
-
-	/* If the socket is bound, send it in to PPP's input queue. Otherwise
-	 * queue it on the session socket.
-	 */
-	session_sock = session->sock;
-	if (session_sock->sk_state & PPPOX_BOUND) {
-		struct pppox_sock *po;
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv %d byte data frame, passing to ppp\n",
-		       session->name, length);
-
-		/* We need to forget all info related to the L2TP packet
-		 * gathered in the skb as we are going to reuse the same
-		 * skb for the inner packet.
-		 * Namely we need to:
-		 * - reset xfrm (IPSec) information as it applies to
-		 *   the outer L2TP packet and not to the inner one
-		 * - release the dst to force a route lookup on the inner
-		 *   IP packet since skb->dst currently points to the dst
-		 *   of the UDP tunnel
-		 * - reset netfilter information as it doesn't apply
-		 *   to the inner packet either
-		 */
-		secpath_reset(skb);
-		skb_dst_drop(skb);
-		nf_reset(skb);
-
-		po = pppox_sk(session_sock);
-		ppp_input(&po->chan, skb);
-	} else {
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: socket not bound\n", session->name);
-
-		/* Not bound. Nothing we can do, so discard. */
-		session->stats.rx_errors++;
-		kfree_skb(skb);
-	}
-
-	sock_put(session->sock);
-}
-
-/* Dequeue skbs from the session's reorder_q, subject to packet order.
- * Skbs that have been in the queue for too long are simply discarded.
- */
-static void pppol2tp_recv_dequeue(struct pppol2tp_session *session)
-{
-	struct sk_buff *skb;
-	struct sk_buff *tmp;
-
-	/* If the pkt at the head of the queue has the nr that we
-	 * expect to send up next, dequeue it and any other
-	 * in-sequence packets behind it.
-	 */
-	spin_lock_bh(&session->reorder_q.lock);
-	skb_queue_walk_safe(&session->reorder_q, skb, tmp) {
-		if (time_after(jiffies, PPPOL2TP_SKB_CB(skb)->expires)) {
-			session->stats.rx_seq_discards++;
-			session->stats.rx_errors++;
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-			       "%s: oos pkt %hu len %d discarded (too old), "
-			       "waiting for %hu, reorder_q_len=%d\n",
-			       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-			       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-			       skb_queue_len(&session->reorder_q));
-			__skb_unlink(skb, &session->reorder_q);
-			kfree_skb(skb);
-			sock_put(session->sock);
-			continue;
-		}
-
-		if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: holding oos pkt %hu len %d, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto out;
-			}
-		}
-		__skb_unlink(skb, &session->reorder_q);
-
-		/* Process the skb. We release the queue lock while we
-		 * do so to let other contexts process the queue.
-		 */
-		spin_unlock_bh(&session->reorder_q.lock);
-		pppol2tp_recv_dequeue_skb(session, skb);
-		spin_lock_bh(&session->reorder_q.lock);
-	}
-
-out:
-	spin_unlock_bh(&session->reorder_q.lock);
-}
-
-static inline int pppol2tp_verify_udp_checksum(struct sock *sk,
-					       struct sk_buff *skb)
-{
-	struct udphdr *uh = udp_hdr(skb);
-	u16 ulen = ntohs(uh->len);
-	struct inet_sock *inet;
-	__wsum psum;
-
-	if (sk->sk_no_check || skb_csum_unnecessary(skb) || !uh->check)
-		return 0;
-
-	inet = inet_sk(sk);
-	psum = csum_tcpudp_nofold(inet->inet_saddr, inet->inet_daddr, ulen,
-				  IPPROTO_UDP, 0);
-
-	if ((skb->ip_summed == CHECKSUM_COMPLETE) &&
-	    !csum_fold(csum_add(psum, skb->csum)))
-		return 0;
-
-	skb->csum = psum;
-
-	return __skb_checksum_complete(skb);
-}
-
-/* Internal receive frame. Do the real work of receiving an L2TP data frame
- * here. The skb is not on a list when we get here.
- * Returns 0 if the packet was a data packet and was successfully passed on.
- * Returns 1 if the packet was not a good data packet and could not be
- * forwarded.  All such packets are passed up to userspace to deal with.
- */
-static int pppol2tp_recv_core(struct sock *sock, struct sk_buff *skb)
-{
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	unsigned char *ptr, *optr;
-	u16 hdrflags;
-	u16 tunnel_id, session_id;
-	int length;
-	int offset;
-
-	tunnel = pppol2tp_sock_to_tunnel(sock);
-	if (tunnel == NULL)
-		goto no_tunnel;
-
-	if (tunnel->sock && pppol2tp_verify_udp_checksum(tunnel->sock, skb))
-		goto discard_bad_csum;
-
-	/* UDP always verifies the packet length. */
-	__skb_pull(skb, sizeof(struct udphdr));
-
-	/* Short packet? */
-	if (!pskb_may_pull(skb, 12)) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: recv short packet (len=%d)\n", tunnel->name, skb->len);
-		goto error;
-	}
-
-	/* Point to L2TP header */
-	optr = ptr = skb->data;
-
-	/* Get L2TP header flags */
-	hdrflags = ntohs(*(__be16*)ptr);
-
-	/* Trace packet contents, if enabled */
-	if (tunnel->debug & PPPOL2TP_MSG_DATA) {
-		length = min(16u, skb->len);
-		if (!pskb_may_pull(skb, length))
-			goto error;
-
-		printk(KERN_DEBUG "%s: recv: ", tunnel->name);
-
-		offset = 0;
-		do {
-			printk(" %02X", ptr[offset]);
-		} while (++offset < length);
-
-		printk("\n");
-	}
-
-	/* Get length of L2TP packet */
-	length = skb->len;
-
-	/* If type is control packet, it is handled by userspace. */
-	if (hdrflags & L2TP_HDRFLAG_T) {
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: recv control packet, len=%d\n", tunnel->name, length);
-		goto error;
-	}
-
-	/* Skip flags */
-	ptr += 2;
-
-	/* If length is present, skip it */
-	if (hdrflags & L2TP_HDRFLAG_L)
-		ptr += 2;
-
-	/* Extract tunnel and session ID */
-	tunnel_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-	session_id = ntohs(*(__be16 *) ptr);
-	ptr += 2;
-
-	/* Find the session context */
-	session = pppol2tp_session_find(tunnel, session_id);
-	if (!session) {
-		/* Not found? Pass to userspace to deal with */
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_INFO,
-		       "%s: no socket found (%hu/%hu). Passing up.\n",
-		       tunnel->name, tunnel_id, session_id);
-		goto error;
-	}
-	sock_hold(session->sock);
-
-	/* The ref count on the socket was increased by the above call since
-	 * we now hold a pointer to the session. Take care to do sock_put()
-	 * when exiting this function from now on...
-	 */
-
-	/* Handle the optional sequence numbers.  If we are the LAC,
-	 * enable/disable sequence numbers under the control of the LNS.  If
-	 * no sequence numbers present but we were expecting them, discard
-	 * frame.
-	 */
-	if (hdrflags & L2TP_HDRFLAG_S) {
-		u16 ns, nr;
-		ns = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-		nr = ntohs(*(__be16 *) ptr);
-		ptr += 2;
-
-		/* Received a packet with sequence numbers. If we're the LNS,
-		 * check if we sre sending sequence numbers and if not,
-		 * configure it so.
-		 */
-		if ((!session->lns_mode) && (!session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to enable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = -1;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->ns = ns;
-		PPPOL2TP_SKB_CB(skb)->nr = nr;
-		PPPOL2TP_SKB_CB(skb)->has_seq = 1;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: recv data ns=%hu, nr=%hu, session nr=%hu\n",
-		       session->name, ns, nr, session->nr);
-	} else {
-		/* No sequence numbers.
-		 * If user has configured mandatory sequence numbers, discard.
-		 */
-		if (session->recv_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* If we're the LAC and we're sending sequence numbers, the
-		 * LNS has requested that we no longer send sequence numbers.
-		 * If we're the LNS and we're sending sequence numbers, the
-		 * LAC is broken. Discard the frame.
-		 */
-		if ((!session->lns_mode) && (session->send_seq)) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_INFO,
-			       "%s: requested to disable seq numbers by LNS\n",
-			       session->name);
-			session->send_seq = 0;
-		} else if (session->send_seq) {
-			PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_WARNING,
-			       "%s: recv data has no seq numbers when required. "
-			       "Discarding\n", session->name);
-			session->stats.rx_seq_discards++;
-			goto discard;
-		}
-
-		/* Store L2TP info in the skb */
-		PPPOL2TP_SKB_CB(skb)->has_seq = 0;
-	}
-
-	/* If offset bit set, skip it. */
-	if (hdrflags & L2TP_HDRFLAG_O) {
-		offset = ntohs(*(__be16 *)ptr);
-		ptr += 2 + offset;
-	}
-
-	offset = ptr - optr;
-	if (!pskb_may_pull(skb, offset))
-		goto discard;
-
-	__skb_pull(skb, offset);
-
-	/* Skip PPP header, if present.	 In testing, Microsoft L2TP clients
-	 * don't send the PPP header (PPP header compression enabled), but
-	 * other clients can include the header. So we cope with both cases
-	 * here. The PPP header is always FF03 when using L2TP.
-	 *
-	 * Note that skb->data[] isn't dereferenced from a u16 ptr here since
-	 * the field may be unaligned.
-	 */
-	if (!pskb_may_pull(skb, 2))
-		goto discard;
-
-	if ((skb->data[0] == 0xff) && (skb->data[1] == 0x03))
-		skb_pull(skb, 2);
-
-	/* Prepare skb for adding to the session's reorder_q.  Hold
-	 * packets for max reorder_timeout or 1 second if not
-	 * reordering.
-	 */
-	PPPOL2TP_SKB_CB(skb)->length = length;
-	PPPOL2TP_SKB_CB(skb)->expires = jiffies +
-		(session->reorder_timeout ? session->reorder_timeout : HZ);
-
-	/* Add packet to the session's receive queue. Reordering is done here, if
-	 * enabled. Saved L2TP protocol info is stored in skb->sb[].
-	 */
-	if (PPPOL2TP_SKB_CB(skb)->has_seq) {
-		if (session->reorder_timeout != 0) {
-			/* Packet reordering enabled. Add skb to session's
-			 * reorder queue, in order of ns.
-			 */
-			pppol2tp_recv_queue_skb(session, skb);
-		} else {
-			/* Packet reordering disabled. Discard out-of-sequence
-			 * packets
-			 */
-			if (PPPOL2TP_SKB_CB(skb)->ns != session->nr) {
-				session->stats.rx_seq_discards++;
-				PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-				       "%s: oos pkt %hu len %d discarded, "
-				       "waiting for %hu, reorder_q_len=%d\n",
-				       session->name, PPPOL2TP_SKB_CB(skb)->ns,
-				       PPPOL2TP_SKB_CB(skb)->length, session->nr,
-				       skb_queue_len(&session->reorder_q));
-				goto discard;
-			}
-			skb_queue_tail(&session->reorder_q, skb);
-		}
-	} else {
-		/* No sequence numbers. Add the skb to the tail of the
-		 * reorder queue. This ensures that it will be
-		 * delivered after all previous sequenced skbs.
-		 */
-		skb_queue_tail(&session->reorder_q, skb);
-	}
-
-	/* Try to dequeue as many skbs from reorder_q as we can. */
-	pppol2tp_recv_dequeue(session);
-	sock_put(sock);
-
-	return 0;
-
-discard:
-	session->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(session->sock);
-	sock_put(sock);
-
-	return 0;
-
-discard_bad_csum:
-	LIMIT_NETDEBUG("%s: UDP: bad checksum\n", tunnel->name);
-	UDP_INC_STATS_USER(&init_net, UDP_MIB_INERRORS, 0);
-	tunnel->stats.rx_errors++;
-	kfree_skb(skb);
-	sock_put(sock);
-
-	return 0;
-
-error:
-	/* Put UDP header back */
-	__skb_push(skb, sizeof(struct udphdr));
-	sock_put(sock);
-
-no_tunnel:
-	return 1;
-}
-
-/* UDP encapsulation receive handler. See net/ipv4/udp.c.
- * Return codes:
- * 0 : success.
- * <0: error
- * >0: skb should be passed up to userspace as UDP.
- */
-static int pppol2tp_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = pppol2tp_sock_to_tunnel(sk);
-	if (tunnel == NULL)
-		goto pass_up;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-	       "%s: received %d bytes\n", tunnel->name, skb->len);
-
-	if (pppol2tp_recv_core(sk, skb))
-		goto pass_up_put;
-
-	sock_put(sk);
-	return 0;
-
-pass_up_put:
-	sock_put(sk);
-pass_up:
-	return 1;
-}
-
-/* Receive message. This is the recvmsg for the PPPoL2TP socket.
- */
-static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
-			    struct msghdr *msg, size_t len,
-			    int flags)
-{
-	int err;
-	struct sk_buff *skb;
-	struct sock *sk = sock->sk;
-
-	err = -EIO;
-	if (sk->sk_state & PPPOX_BOUND)
-		goto end;
-
-	msg->msg_namelen = 0;
-
-	err = 0;
-	skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT,
-				flags & MSG_DONTWAIT, &err);
-	if (!skb)
-		goto end;
-
-	if (len > skb->len)
-		len = skb->len;
-	else if (len < skb->len)
-		msg->msg_flags |= MSG_TRUNC;
-
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
-	if (likely(err == 0))
-		err = len;
-
-	kfree_skb(skb);
-end:
-	return err;
-}
-
-/************************************************************************
- * Transmit handling
- ***********************************************************************/
-
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct pppol2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
-/* Build an L2TP header for the session into the buffer provided.
- */
-static void pppol2tp_build_l2tp_header(struct pppol2tp_session *session,
-				       void *buf)
-{
-	__be16 *bufp = buf;
-	u16 flags = L2TP_HDR_VER;
-
-	if (session->send_seq)
-		flags |= L2TP_HDRFLAG_S;
-
-	/* Setup L2TP header.
-	 * FIXME: Can this ever be unaligned? Is direct dereferencing of
-	 * 16-bit header fields safe here for all architectures?
-	 */
-	*bufp++ = htons(flags);
-	*bufp++ = htons(session->tunnel_addr.d_tunnel);
-	*bufp++ = htons(session->tunnel_addr.d_session);
-	if (session->send_seq) {
-		*bufp++ = htons(session->ns);
-		*bufp++ = 0;
-		session->ns++;
-		PRINTK(session->debug, PPPOL2TP_MSG_SEQ, KERN_DEBUG,
-		       "%s: updated ns to %hu\n", session->name, session->ns);
-	}
-}
-
-/* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
- * when a user application does a sendmsg() on the session socket. L2TP and
- * PPP headers must be inserted into the user's data.
- */
-static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
-			    size_t total_len)
-{
-	static const unsigned char ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = sock->sk;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct sk_buff *skb;
-	int error;
-	int hdr_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	struct udphdr *uh;
-	unsigned int len;
-	struct sock *sk_tun;
-	u16 udp_len;
-
-	error = -ENOTCONN;
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto error;
-
-	/* Get session and tunnel contexts */
-	error = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto error;
-
-	sk_tun = session->tunnel_sock;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto error_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Allocate a socket buffer */
-	error = -ENOMEM;
-	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + hdr_len +
-			   sizeof(ppph) + total_len,
-			   0, GFP_KERNEL);
-	if (!skb)
-		goto error_put_sess_tun;
-
-	/* Reserve space for headers. */
-	skb_reserve(skb, NET_SKB_PAD);
-	skb_reset_network_header(skb);
-	skb_reserve(skb, sizeof(struct iphdr));
-	skb_reset_transport_header(skb);
-
-	/* Build UDP header */
-	inet = inet_sk(sk_tun);
-	udp_len = hdr_len + sizeof(ppph) + total_len;
-	uh = (struct udphdr *) skb->data;
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-	skb_put(skb, sizeof(struct udphdr));
-
-	/* Build L2TP header */
-	pppol2tp_build_l2tp_header(session, skb->data);
-	skb_put(skb, hdr_len);
-
-	/* Add PPP header */
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-	skb_put(skb, 2);
-
-	/* Copy user data into skb */
-	error = memcpy_fromiovec(skb->data, m->msg_iov, total_len);
-	if (error < 0) {
-		kfree_skb(skb);
-		goto error_put_sess_tun;
-	}
-	skb_put(skb, total_len);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM)) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes, ns=%hu\n", session->name,
-		       total_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %Zd bytes\n", session->name, total_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < total_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 15) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	error = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (error >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	return error;
-
-error_put_sess_tun:
-	sock_put(session->tunnel_sock);
-error_put_sess:
-	sock_put(sk);
-error:
-	return error;
-}
-
-/* Automatically called when the skb is freed.
- */
-static void pppol2tp_sock_wfree(struct sk_buff *skb)
-{
-	sock_put(skb->sk);
-}
-
-/* For data skbs that we transmit, we associate with the tunnel socket
- * but don't do accounting.
- */
-static inline void pppol2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
-{
-	sock_hold(sk);
-	skb->sk = sk;
-	skb->destructor = pppol2tp_sock_wfree;
-}
-
-/* Transmit function called by generic PPP driver.  Sends PPP frame
- * over PPPoL2TP socket.
- *
- * This is almost the same as pppol2tp_sendmsg(), but rather than
- * being called with a msghdr from userspace, it is called with a skb
- * from the kernel.
- *
- * The supplied skb from ppp doesn't have enough headroom for the
- * insertion of L2TP, UDP and IP headers so we need to allocate more
- * headroom in the skb. This will create a cloned skb. But we must be
- * careful in the error case because the caller will expect to free
- * the skb it supplied, not our cloned skb. So we take care to always
- * leave the original skb unfreed if we return an error.
- */
-static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
-{
-	static const u8 ppph[2] = { 0xff, 0x03 };
-	struct sock *sk = (struct sock *) chan->private;
-	struct sock *sk_tun;
-	int hdr_len;
-	u16 udp_len;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int rc;
-	int headroom;
-	int data_len = skb->len;
-	struct inet_sock *inet;
-	__wsum csum;
-	struct udphdr *uh;
-	unsigned int len;
-	int old_headroom;
-	int new_headroom;
-
-	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
-		goto abort;
-
-	/* Get session and tunnel contexts from the socket */
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto abort;
-
-	sk_tun = session->tunnel_sock;
-	if (sk_tun == NULL)
-		goto abort_put_sess;
-	tunnel = pppol2tp_sock_to_tunnel(sk_tun);
-	if (tunnel == NULL)
-		goto abort_put_sess;
-
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
-	/* Check that there's enough headroom in the skb to insert IP,
-	 * UDP and L2TP and PPP headers. If not enough, expand it to
-	 * make room. Adjust truesize.
-	 */
-	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len + sizeof(ppph);
-	old_headroom = skb_headroom(skb);
-	if (skb_cow_head(skb, headroom))
-		goto abort_put_sess_tun;
-
-	new_headroom = skb_headroom(skb);
-	skb_orphan(skb);
-	skb->truesize += new_headroom - old_headroom;
-
-	/* Setup PPP header */
-	__skb_push(skb, sizeof(ppph));
-	skb->data[0] = ppph[0];
-	skb->data[1] = ppph[1];
-
-	/* Setup L2TP header */
-	pppol2tp_build_l2tp_header(session, __skb_push(skb, hdr_len));
-
-	udp_len = sizeof(struct udphdr) + hdr_len + sizeof(ppph) + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk_tun);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
-
-	/* Debug */
-	if (session->send_seq)
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes, ns=%hu\n", session->name,
-		       data_len, session->ns - 1);
-	else
-		PRINTK(session->debug, PPPOL2TP_MSG_DATA, KERN_DEBUG,
-		       "%s: send %d bytes\n", session->name, data_len);
-
-	if (session->debug & PPPOL2TP_MSG_DATA) {
-		int i;
-		unsigned char *datap = skb->data;
-
-		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < data_len; i++) {
-			printk(" %02X", *datap++);
-			if (i == 31) {
-				printk(" ...");
-				break;
-			}
-		}
-		printk("\n");
-	}
-
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
-			      IPSKB_REROUTED);
-	nf_reset(skb);
-
-	/* Get routing info from the tunnel socket */
-	skb_dst_drop(skb);
-	skb_dst_set(skb, dst_clone(__sk_dst_get(sk_tun)));
-	pppol2tp_skb_set_owner_w(skb, sk_tun);
-
-	/* Calculate UDP checksum if configured to do so */
-	if (sk_tun->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
-	}
-
-	/* Queue the packet to IP for output */
-	len = skb->len;
-	rc = ip_queue_xmit(skb, 1);
-
-	/* Update stats */
-	if (rc >= 0) {
-		tunnel->stats.tx_packets++;
-		tunnel->stats.tx_bytes += len;
-		session->stats.tx_packets++;
-		session->stats.tx_bytes += len;
-	} else {
-		tunnel->stats.tx_errors++;
-		session->stats.tx_errors++;
-	}
-
-	sock_put(sk_tun);
-	sock_put(sk);
-	return 1;
-
-abort_put_sess_tun:
-	sock_put(sk_tun);
-abort_put_sess:
-	sock_put(sk);
-abort:
-	/* Free the original skb */
-	kfree_skb(skb);
-	return 1;
-}
-
-/*****************************************************************************
- * Session (and tunnel control) socket create/destroy.
- *****************************************************************************/
-
-/* When the tunnel UDP socket is closed, all the attached sockets need to go
- * too.
- */
-static void pppol2tp_tunnel_closeall(struct pppol2tp_tunnel *tunnel)
-{
-	int hash;
-	struct hlist_node *walk;
-	struct hlist_node *tmp;
-	struct pppol2tp_session *session;
-	struct sock *sk;
-
-	BUG_ON(tunnel == NULL);
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing all sessions...\n", tunnel->name);
-
-	write_lock_bh(&tunnel->hlist_lock);
-	for (hash = 0; hash < PPPOL2TP_HASH_SIZE; hash++) {
-again:
-		hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) {
-			struct sk_buff *skb;
-
-			session = hlist_entry(walk, struct pppol2tp_session, hlist);
-
-			sk = session->sock;
-
-			PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-			       "%s: closing session\n", session->name);
-
-			hlist_del_init(&session->hlist);
-
-			/* Since we should hold the sock lock while
-			 * doing any unbinding, we need to release the
-			 * lock we're holding before taking that lock.
-			 * Hold a reference to the sock so it doesn't
-			 * disappear as we're jumping between locks.
-			 */
-			sock_hold(sk);
-			write_unlock_bh(&tunnel->hlist_lock);
-			lock_sock(sk);
-
-			if (sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND)) {
-				pppox_unbind_sock(sk);
-				sk->sk_state = PPPOX_DEAD;
-				sk->sk_state_change(sk);
-			}
-
-			/* Purge any queued data */
-			skb_queue_purge(&sk->sk_receive_queue);
-			skb_queue_purge(&sk->sk_write_queue);
-			while ((skb = skb_dequeue(&session->reorder_q))) {
-				kfree_skb(skb);
-				sock_put(sk);
-			}
-
-			release_sock(sk);
-			sock_put(sk);
-
-			/* Now restart from the beginning of this hash
-			 * chain.  We always remove a session from the
-			 * list so we are guaranteed to make forward
-			 * progress.
-			 */
-			write_lock_bh(&tunnel->hlist_lock);
-			goto again;
-		}
-	}
-	write_unlock_bh(&tunnel->hlist_lock);
-}
-
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void pppol2tp_tunnel_free(struct pppol2tp_tunnel *tunnel)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(tunnel->pppol2tp_net);
-
-	/* Remove from socket list */
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_del_init(&tunnel->list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	atomic_dec(&pppol2tp_tunnel_count);
-	kfree(tunnel);
-}
-
-/* Tunnel UDP socket destruct hook.
- * The tunnel context is deleted only when all session sockets have been
- * closed.
- */
-static void pppol2tp_tunnel_destruct(struct sock *sk)
-{
-	struct pppol2tp_tunnel *tunnel;
-
-	tunnel = sk->sk_user_data;
-	if (tunnel == NULL)
-		goto end;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: closing...\n", tunnel->name);
-
-	/* Close all sessions */
-	pppol2tp_tunnel_closeall(tunnel);
-
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
-
-	/* Remove hooks into tunnel socket */
-	tunnel->sock = NULL;
-	sk->sk_destruct = tunnel->old_sk_destruct;
-	sk->sk_user_data = NULL;
-
-	/* Call original (UDP) socket descructor */
-	if (sk->sk_destruct != NULL)
-		(*sk->sk_destruct)(sk);
-
-	pppol2tp_tunnel_dec_refcount(tunnel);
-
-end:
-	return;
-}
-
-/* Really kill the session socket. (Called from sock_put() if
- * refcnt == 0.)
- */
-static void pppol2tp_session_destruct(struct sock *sk)
-{
-	struct pppol2tp_session *session = NULL;
-
-	if (sk->sk_user_data != NULL) {
-		struct pppol2tp_tunnel *tunnel;
-
-		session = sk->sk_user_data;
-		if (session == NULL)
-			goto out;
-
-		BUG_ON(session->magic != L2TP_SESSION_MAGIC);
-
-		/* Don't use pppol2tp_sock_to_tunnel() here to
-		 * get the tunnel context because the tunnel
-		 * socket might have already been closed (its
-		 * sk->sk_user_data will be NULL) so use the
-		 * session's private tunnel ptr instead.
-		 */
-		tunnel = session->tunnel;
-		if (tunnel != NULL) {
-			BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-			/* If session_id is zero, this is a null
-			 * session context, which was created for a
-			 * socket that is being used only to manage
-			 * tunnels.
-			 */
-			if (session->tunnel_addr.s_session != 0) {
-				/* Delete the session socket from the
-				 * hash
-				 */
-				write_lock_bh(&tunnel->hlist_lock);
-				hlist_del_init(&session->hlist);
-				write_unlock_bh(&tunnel->hlist_lock);
-
-				atomic_dec(&pppol2tp_session_count);
-			}
-
-			/* This will delete the tunnel context if this
-			 * is the last session on the tunnel.
-			 */
-			session->tunnel = NULL;
-			session->tunnel_sock = NULL;
-			pppol2tp_tunnel_dec_refcount(tunnel);
-		}
-	}
-
-	kfree(session);
-out:
-	return;
-}
-
-/* Called when the PPPoX socket (session) is closed.
- */
-static int pppol2tp_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	int error;
-
-	if (!sk)
-		return 0;
-
-	error = -EBADF;
-	lock_sock(sk);
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto error;
-
-	pppox_unbind_sock(sk);
-
-	/* Signal the death of the socket. */
-	sk->sk_state = PPPOX_DEAD;
-	sock_orphan(sk);
-	sock->sk = NULL;
-
-	session = pppol2tp_sock_to_session(sk);
-
-	/* Purge any queued data */
-	skb_queue_purge(&sk->sk_receive_queue);
-	skb_queue_purge(&sk->sk_write_queue);
-	if (session != NULL) {
-		struct sk_buff *skb;
-		while ((skb = skb_dequeue(&session->reorder_q))) {
-			kfree_skb(skb);
-			sock_put(sk);
-		}
-		sock_put(sk);
-	}
-
-	release_sock(sk);
-
-	/* This will delete the session context via
-	 * pppol2tp_session_destruct() if the socket's refcnt drops to
-	 * zero.
-	 */
-	sock_put(sk);
-
-	return 0;
-
-error:
-	release_sock(sk);
-	return error;
-}
-
-/* Internal function to prepare a tunnel (UDP) socket to have PPPoX
- * sockets attached to it.
- */
-static struct sock *pppol2tp_prepare_tunnel_socket(struct net *net,
-					int fd, u16 tunnel_id, int *error)
-{
-	int err;
-	struct socket *sock = NULL;
-	struct sock *sk;
-	struct pppol2tp_tunnel *tunnel;
-	struct pppol2tp_net *pn;
-	struct sock *ret = NULL;
-
-	/* Get the tunnel UDP socket from the fd, which was opened by
-	 * the userspace L2TP daemon.
-	 */
-	err = -EBADF;
-	sock = sockfd_lookup(fd, &err);
-	if (!sock) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: sockfd_lookup(fd=%d) returned %d\n",
-		       tunnel_id, fd, err);
-		goto err;
-	}
-
-	sk = sock->sk;
-
-	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_ERR,
-		       "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
-	}
-
-	err = -ENOTCONN;
-
-	/* Check if this socket has already been prepped */
-	tunnel = (struct pppol2tp_tunnel *)sk->sk_user_data;
-	if (tunnel != NULL) {
-		/* User-data field already set */
-		err = -EBUSY;
-		BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC);
-
-		/* This socket has already been prepped */
-		ret = tunnel->sock;
-		goto out;
-	}
-
-	/* This socket is available and needs prepping. Create a new tunnel
-	 * context and init it.
-	 */
-	sk->sk_user_data = tunnel = kzalloc(sizeof(struct pppol2tp_tunnel), GFP_KERNEL);
-	if (sk->sk_user_data == NULL) {
-		err = -ENOMEM;
-		goto err;
-	}
-
-	tunnel->magic = L2TP_TUNNEL_MAGIC;
-	sprintf(&tunnel->name[0], "tunl %hu", tunnel_id);
-
-	tunnel->stats.tunnel_id = tunnel_id;
-	tunnel->debug = PPPOL2TP_DEFAULT_DEBUG_FLAGS;
-
-	/* Hook on the tunnel socket destructor so that we can cleanup
-	 * if the tunnel socket goes away.
-	 */
-	tunnel->old_sk_destruct = sk->sk_destruct;
-	sk->sk_destruct = pppol2tp_tunnel_destruct;
-
-	tunnel->sock = sk;
-	sk->sk_allocation = GFP_ATOMIC;
-
-	/* Misc init */
-	rwlock_init(&tunnel->hlist_lock);
-
-	/* The net we belong to */
-	tunnel->pppol2tp_net = net;
-	pn = pppol2tp_pernet(net);
-
-	/* Add tunnel to our list */
-	INIT_LIST_HEAD(&tunnel->list);
-	write_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	list_add(&tunnel->list, &pn->pppol2tp_tunnel_list);
-	write_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-	atomic_inc(&pppol2tp_tunnel_count);
-
-	/* Bump the reference count. The tunnel context is deleted
-	 * only when this drops to zero.
-	 */
-	pppol2tp_tunnel_inc_refcount(tunnel);
-
-	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = UDP_ENCAP_L2TPINUDP;
-	(udp_sk(sk))->encap_rcv = pppol2tp_udp_encap_recv;
-
-	ret = tunnel->sock;
-
-	*error = 0;
-out:
-	if (sock)
-		sockfd_put(sock);
-
-	return ret;
-
-err:
-	*error = err;
-	goto out;
-}
-
-static struct proto pppol2tp_sk_proto = {
-	.name	  = "PPPOL2TP",
-	.owner	  = THIS_MODULE,
-	.obj_size = sizeof(struct pppox_sock),
-};
-
-/* socket() handler. Initialize a new struct sock.
- */
-static int pppol2tp_create(struct net *net, struct socket *sock)
-{
-	int error = -ENOMEM;
-	struct sock *sk;
-
-	sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pppol2tp_sk_proto);
-	if (!sk)
-		goto out;
-
-	sock_init_data(sock, sk);
-
-	sock->state  = SS_UNCONNECTED;
-	sock->ops    = &pppol2tp_ops;
-
-	sk->sk_backlog_rcv = pppol2tp_recv_core;
-	sk->sk_protocol	   = PX_PROTO_OL2TP;
-	sk->sk_family	   = PF_PPPOX;
-	sk->sk_state	   = PPPOX_NONE;
-	sk->sk_type	   = SOCK_STREAM;
-	sk->sk_destruct	   = pppol2tp_session_destruct;
-
-	error = 0;
-
-out:
-	return error;
-}
-
-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
- */
-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
-			    int sockaddr_len, int flags)
-{
-	struct sock *sk = sock->sk;
-	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
-	struct pppox_sock *po = pppox_sk(sk);
-	struct sock *tunnel_sock = NULL;
-	struct pppol2tp_session *session = NULL;
-	struct pppol2tp_tunnel *tunnel;
-	struct dst_entry *dst;
-	int error = 0;
-
-	lock_sock(sk);
-
-	error = -EINVAL;
-	if (sp->sa_protocol != PX_PROTO_OL2TP)
-		goto end;
-
-	/* Check for already bound sockets */
-	error = -EBUSY;
-	if (sk->sk_state & PPPOX_CONNECTED)
-		goto end;
-
-	/* We don't supporting rebinding anyway */
-	error = -EALREADY;
-	if (sk->sk_user_data)
-		goto end; /* socket is already attached */
-
-	/* Don't bind if s_tunnel is 0 */
-	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
-		goto end;
-
-	/* Special case: prepare tunnel socket if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
-	 * tunnel id.
-	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		tunnel_sock = pppol2tp_prepare_tunnel_socket(sock_net(sk),
-							     sp->pppol2tp.fd,
-							     sp->pppol2tp.s_tunnel,
-							     &error);
-		if (tunnel_sock == NULL)
-			goto end;
-
-		sock_hold(tunnel_sock);
-		tunnel = tunnel_sock->sk_user_data;
-	} else {
-		tunnel = pppol2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
-
-		/* Error if we can't find the tunnel */
-		error = -ENOENT;
-		if (tunnel == NULL)
-			goto end;
-
-		tunnel_sock = tunnel->sock;
-	}
-
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = pppol2tp_session_find(tunnel, sp->pppol2tp.s_session);
-	if (session != NULL)
-		goto end;
-
-	/* Allocate and initialize a new session context. */
-	session = kzalloc(sizeof(struct pppol2tp_session), GFP_KERNEL);
-	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
-	}
-
-	skb_queue_head_init(&session->reorder_q);
-
-	session->magic	     = L2TP_SESSION_MAGIC;
-	session->owner	     = current->pid;
-	session->sock	     = sk;
-	session->tunnel	     = tunnel;
-	session->tunnel_sock = tunnel_sock;
-	session->tunnel_addr = sp->pppol2tp;
-	sprintf(&session->name[0], "sess %hu/%hu",
-		session->tunnel_addr.s_tunnel,
-		session->tunnel_addr.s_session);
-
-	session->stats.tunnel_id  = session->tunnel_addr.s_tunnel;
-	session->stats.session_id = session->tunnel_addr.s_session;
-
-	INIT_HLIST_NODE(&session->hlist);
-
-	/* Inherit debug options from tunnel */
-	session->debug = tunnel->debug;
-
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	session->mtu = session->mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-
-	/* If PMTU discovery was enabled, use the MTU that was discovered */
-	dst = sk_dst_get(sk);
-	if (dst != NULL) {
-		u32 pmtu = dst_mtu(__sk_dst_get(sk));
-		if (pmtu != 0)
-			session->mtu = session->mru = pmtu -
-				PPPOL2TP_HEADER_OVERHEAD;
-		dst_release(dst);
-	}
-
-	/* Special case: if source & dest session_id == 0x0000, this socket is
-	 * being created to manage the tunnel. Don't add the session to the
-	 * session hash list, just set up the internal context for use by
-	 * ioctl() and sockopt() handlers.
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		error = 0;
-		sk->sk_user_data = session;
-		goto out_no_ppp;
-	}
-
-	/* Get tunnel context from the tunnel socket */
-	tunnel = pppol2tp_sock_to_tunnel(tunnel_sock);
-	if (tunnel == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	/* Right now, because we don't have a way to push the incoming skb's
-	 * straight through the UDP layer, the only header we need to worry
-	 * about is the L2TP header. This size is different depending on
-	 * whether sequence numbers are enabled for the data channel.
-	 */
-	po->chan.hdrlen = PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-
-	po->chan.private = sk;
-	po->chan.ops	 = &pppol2tp_chan_ops;
-	po->chan.mtu	 = session->mtu;
-
-	error = ppp_register_net_channel(sock_net(sk), &po->chan);
-	if (error)
-		goto end_put_tun;
-
-	/* This is how we get the session context from the socket. */
-	sk->sk_user_data = session;
-
-	/* Add session to the tunnel's hash list */
-	write_lock_bh(&tunnel->hlist_lock);
-	hlist_add_head(&session->hlist,
-		       pppol2tp_session_id_hash(tunnel,
-						session->tunnel_addr.s_session));
-	write_unlock_bh(&tunnel->hlist_lock);
-
-	atomic_inc(&pppol2tp_session_count);
-
-out_no_ppp:
-	pppol2tp_tunnel_inc_refcount(tunnel);
-	sk->sk_state = PPPOX_CONNECTED;
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-	       "%s: created\n", session->name);
-
-end_put_tun:
-	sock_put(tunnel_sock);
-end:
-	release_sock(sk);
-
-	if (error != 0) {
-		if (session)
-			PRINTK(session->debug,
-				PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"%s: connect failed: %d\n",
-				session->name, error);
-		else
-			PRINTK(-1, PPPOL2TP_MSG_CONTROL, KERN_WARNING,
-				"connect failed: %d\n", error);
-	}
-
-	return error;
-}
-
-/* getname() support.
- */
-static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
-			    int *usockaddr_len, int peer)
-{
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
-	int error = 0;
-	struct pppol2tp_session *session;
-
-	error = -ENOTCONN;
-	if (sock->sk->sk_state != PPPOX_CONNECTED)
-		goto end;
-
-	session = pppol2tp_sock_to_session(sock->sk);
-	if (session == NULL) {
-		error = -EBADF;
-		goto end;
-	}
-
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	memcpy(&sp.pppol2tp, &session->tunnel_addr,
-	       sizeof(struct pppol2tp_addr));
-
-	memcpy(uaddr, &sp, len);
-
-	*usockaddr_len = len;
-
-	error = 0;
-	sock_put(sock->sk);
-
-end:
-	return error;
-}
-
-/****************************************************************************
- * ioctl() handlers.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. However, in order to control kernel tunnel features, we allow
- * userspace to create a special "tunnel" PPPoX socket which is used for
- * control only.  Tunnel PPPoX sockets have session_id == 0 and simply allow
- * the user application to issue L2TP setsockopt(), getsockopt() and ioctl()
- * calls.
- ****************************************************************************/
-
-/* Session ioctl helper.
- */
-static int pppol2tp_session_ioctl(struct pppol2tp_session *session,
-				  unsigned int cmd, unsigned long arg)
-{
-	struct ifreq ifr;
-	int err = 0;
-	struct sock *sk = session->sock;
-	int val = (int) arg;
-
-	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n",
-	       session->name, cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case SIOCGIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-		ifr.ifr_mtu = session->mtu;
-		if (copy_to_user((void __user *) arg, &ifr, sizeof(struct ifreq)))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case SIOCSIFMTU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (copy_from_user(&ifr, (void __user *) arg, sizeof(struct ifreq)))
-			break;
-
-		session->mtu = ifr.ifr_mtu;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mtu=%d\n", session->name, session->mtu);
-		err = 0;
-		break;
-
-	case PPPIOCGMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (put_user(session->mru, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCSMRU:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		err = -EFAULT;
-		if (get_user(val,(int __user *) arg))
-			break;
-
-		session->mru = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set mru=%d\n", session->name, session->mru);
-		err = 0;
-		break;
-
-	case PPPIOCGFLAGS:
-		err = -EFAULT;
-		if (put_user(session->flags, (int __user *) arg))
-			break;
-
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCSFLAGS:
-		err = -EFAULT;
-		if (get_user(val, (int __user *) arg))
-			break;
-		session->flags = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set flags=%d\n", session->name, session->flags);
-		err = 0;
-		break;
-
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_to_user((void __user *) arg, &session->stats,
-				 sizeof(session->stats)))
-			break;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", session->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Tunnel ioctl helper.
- *
- * Note the special handling for PPPIOCGL2TPSTATS below. If the ioctl data
- * specifies a session_id, the session ioctl handler is called. This allows an
- * application to retrieve session stats via a tunnel socket.
- */
-static int pppol2tp_tunnel_ioctl(struct pppol2tp_tunnel *tunnel,
-				 unsigned int cmd, unsigned long arg)
-{
-	int err = 0;
-	struct sock *sk = tunnel->sock;
-	struct pppol2tp_ioc_stats stats_req;
-
-	PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_DEBUG,
-	       "%s: pppol2tp_tunnel_ioctl(cmd=%#x, arg=%#lx)\n", tunnel->name,
-	       cmd, arg);
-
-	sock_hold(sk);
-
-	switch (cmd) {
-	case PPPIOCGL2TPSTATS:
-		err = -ENXIO;
-		if (!(sk->sk_state & PPPOX_CONNECTED))
-			break;
-
-		if (copy_from_user(&stats_req, (void __user *) arg,
-				   sizeof(stats_req))) {
-			err = -EFAULT;
-			break;
-		}
-		if (stats_req.session_id != 0) {
-			/* resend to session ioctl handler */
-			struct pppol2tp_session *session =
-				pppol2tp_session_find(tunnel, stats_req.session_id);
-			if (session != NULL)
-				err = pppol2tp_session_ioctl(session, cmd, arg);
-			else
-				err = -EBADR;
-			break;
-		}
-#ifdef CONFIG_XFRM
-		tunnel->stats.using_ipsec = (sk->sk_policy[0] || sk->sk_policy[1]) ? 1 : 0;
-#endif
-		if (copy_to_user((void __user *) arg, &tunnel->stats,
-				 sizeof(tunnel->stats))) {
-			err = -EFAULT;
-			break;
-		}
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get L2TP stats\n", tunnel->name);
-		err = 0;
-		break;
-
-	default:
-		err = -ENOSYS;
-		break;
-	}
-
-	sock_put(sk);
-
-	return err;
-}
-
-/* Main ioctl() handler.
- * Dispatch to tunnel or session helpers depending on the socket.
- */
-static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd,
-			  unsigned long arg)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session;
-	struct pppol2tp_tunnel *tunnel;
-	int err;
-
-	if (!sk)
-		return 0;
-
-	err = -EBADF;
-	if (sock_flag(sk, SOCK_DEAD) != 0)
-		goto end;
-
-	err = -ENOTCONN;
-	if ((sk->sk_user_data == NULL) ||
-	    (!(sk->sk_state & (PPPOX_CONNECTED | PPPOX_BOUND))))
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session's session_id is zero, treat ioctl as a
-	 * tunnel ioctl
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg);
-		sock_put(session->tunnel_sock);
-		goto end_put_sess;
-	}
-
-	err = pppol2tp_session_ioctl(session, cmd, arg);
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * setsockopt() / getsockopt() support.
- *
- * The PPPoX socket is created for L2TP sessions: tunnels have their own UDP
- * sockets. In order to control kernel tunnel features, we allow userspace to
- * create a special "tunnel" PPPoX socket which is used for control only.
- * Tunnel PPPoX sockets have session_id == 0 and simply allow the user
- * application to issue L2TP setsockopt(), getsockopt() and ioctl() calls.
- *****************************************************************************/
-
-/* Tunnel setsockopt() helper.
- */
-static int pppol2tp_tunnel_setsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		tunnel->debug = val;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session setsockopt helper.
- */
-static int pppol2tp_session_setsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->recv_seq = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set recv_seq=%d\n", session->name,
-		       session->recv_seq);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->send_seq = val ? -1 : 0;
-		{
-			struct sock *ssk      = session->sock;
-			struct pppox_sock *po = pppox_sk(ssk);
-			po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ :
-				PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-		}
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set send_seq=%d\n", session->name, session->send_seq);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		if ((val != 0) && (val != 1)) {
-			err = -EINVAL;
-			break;
-		}
-		session->lns_mode = val ? -1 : 0;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set lns_mode=%d\n", session->name,
-		       session->lns_mode);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		session->debug = val;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set debug=%x\n", session->name, session->debug);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		session->reorder_timeout = msecs_to_jiffies(val);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: set reorder_timeout=%d\n", session->name,
-		       session->reorder_timeout);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Main setsockopt() entry point.
- * Does API checks, then calls either the tunnel or session setsockopt
- * handler, according to whether the PPPoL2TP socket is a for a regular
- * session or the special tunnel type.
- */
-static int pppol2tp_setsockopt(struct socket *sock, int level, int optname,
-			       char __user *optval, unsigned int optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.setsockopt(sk, level, optname, optval, optlen);
-
-	if (optlen < sizeof(int))
-		return -EINVAL;
-
-	if (get_user(val, (int __user *)optval))
-		return -EFAULT;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get session context from the socket */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel
-	 */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_setsockopt(sk, session, optname, val);
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/* Tunnel getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_tunnel_getsockopt(struct sock *sk,
-				      struct pppol2tp_tunnel *tunnel,
-				      int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_DEBUG:
-		*val = tunnel->debug;
-		PRINTK(tunnel->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%x\n", tunnel->name, tunnel->debug);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-		break;
-	}
-
-	return err;
-}
-
-/* Session getsockopt helper. Called with sock locked.
- */
-static int pppol2tp_session_getsockopt(struct sock *sk,
-				       struct pppol2tp_session *session,
-				       int optname, int *val)
-{
-	int err = 0;
-
-	switch (optname) {
-	case PPPOL2TP_SO_RECVSEQ:
-		*val = session->recv_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get recv_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_SENDSEQ:
-		*val = session->send_seq;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get send_seq=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_LNSMODE:
-		*val = session->lns_mode;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get lns_mode=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_DEBUG:
-		*val = session->debug;
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get debug=%d\n", session->name, *val);
-		break;
-
-	case PPPOL2TP_SO_REORDERTO:
-		*val = (int) jiffies_to_msecs(session->reorder_timeout);
-		PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
-		       "%s: get reorder_timeout=%d\n", session->name, *val);
-		break;
-
-	default:
-		err = -ENOPROTOOPT;
-	}
-
-	return err;
-}
-
-/* Main getsockopt() entry point.
- * Does API checks, then calls either the tunnel or session getsockopt
- * handler, according to whether the PPPoX socket is a for a regular session
- * or the special tunnel type.
- */
-static int pppol2tp_getsockopt(struct socket *sock, int level,
-			       int optname, char __user *optval, int __user *optlen)
-{
-	struct sock *sk = sock->sk;
-	struct pppol2tp_session *session = sk->sk_user_data;
-	struct pppol2tp_tunnel *tunnel;
-	int val, len;
-	int err;
-
-	if (level != SOL_PPPOL2TP)
-		return udp_prot.getsockopt(sk, level, optname, optval, optlen);
-
-	if (get_user(len, (int __user *) optlen))
-		return -EFAULT;
-
-	len = min_t(unsigned int, len, sizeof(int));
-
-	if (len < 0)
-		return -EINVAL;
-
-	err = -ENOTCONN;
-	if (sk->sk_user_data == NULL)
-		goto end;
-
-	/* Get the session context */
-	err = -EBADF;
-	session = pppol2tp_sock_to_session(sk);
-	if (session == NULL)
-		goto end;
-
-	/* Special case: if session_id == 0x0000, treat as operation on tunnel */
-	if ((session->tunnel_addr.s_session == 0) &&
-	    (session->tunnel_addr.d_session == 0)) {
-		err = -EBADF;
-		tunnel = pppol2tp_sock_to_tunnel(session->tunnel_sock);
-		if (tunnel == NULL)
-			goto end_put_sess;
-
-		err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val);
-		sock_put(session->tunnel_sock);
-	} else
-		err = pppol2tp_session_getsockopt(sk, session, optname, &val);
-
-	err = -EFAULT;
-	if (put_user(len, (int __user *) optlen))
-		goto end_put_sess;
-
-	if (copy_to_user((void __user *) optval, &val, len))
-		goto end_put_sess;
-
-	err = 0;
-
-end_put_sess:
-	sock_put(sk);
-end:
-	return err;
-}
-
-/*****************************************************************************
- * /proc filesystem for debug
- *****************************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-#include <linux/seq_file.h>
-
-struct pppol2tp_seq_data {
-	struct seq_net_private p;
-	struct pppol2tp_tunnel *tunnel;		/* current tunnel */
-	struct pppol2tp_session *session;	/* NULL means get first session in tunnel */
-};
-
-static struct pppol2tp_session *next_session(struct pppol2tp_tunnel *tunnel, struct pppol2tp_session *curr)
-{
-	struct pppol2tp_session *session = NULL;
-	struct hlist_node *walk;
-	int found = 0;
-	int next = 0;
-	int i;
-
-	read_lock_bh(&tunnel->hlist_lock);
-	for (i = 0; i < PPPOL2TP_HASH_SIZE; i++) {
-		hlist_for_each_entry(session, walk, &tunnel->session_hlist[i], hlist) {
-			if (curr == NULL) {
-				found = 1;
-				goto out;
-			}
-			if (session == curr) {
-				next = 1;
-				continue;
-			}
-			if (next) {
-				found = 1;
-				goto out;
-			}
-		}
-	}
-out:
-	read_unlock_bh(&tunnel->hlist_lock);
-	if (!found)
-		session = NULL;
-
-	return session;
-}
-
-static struct pppol2tp_tunnel *next_tunnel(struct pppol2tp_net *pn,
-					   struct pppol2tp_tunnel *curr)
-{
-	struct pppol2tp_tunnel *tunnel = NULL;
-
-	read_lock_bh(&pn->pppol2tp_tunnel_list_lock);
-	if (list_is_last(&curr->list, &pn->pppol2tp_tunnel_list)) {
-		goto out;
-	}
-	tunnel = list_entry(curr->list.next, struct pppol2tp_tunnel, list);
-out:
-	read_unlock_bh(&pn->pppol2tp_tunnel_list_lock);
-
-	return tunnel;
-}
-
-static void *pppol2tp_seq_start(struct seq_file *m, loff_t *offs)
-{
-	struct pppol2tp_seq_data *pd = SEQ_START_TOKEN;
-	struct pppol2tp_net *pn;
-	loff_t pos = *offs;
-
-	if (!pos)
-		goto out;
-
-	BUG_ON(m->private == NULL);
-	pd = m->private;
-	pn = pppol2tp_pernet(seq_file_net(m));
-
-	if (pd->tunnel == NULL) {
-		if (!list_empty(&pn->pppol2tp_tunnel_list))
-			pd->tunnel = list_entry(pn->pppol2tp_tunnel_list.next, struct pppol2tp_tunnel, list);
-	} else {
-		pd->session = next_session(pd->tunnel, pd->session);
-		if (pd->session == NULL) {
-			pd->tunnel = next_tunnel(pn, pd->tunnel);
-		}
-	}
-
-	/* NULL tunnel and session indicates end of list */
-	if ((pd->tunnel == NULL) && (pd->session == NULL))
-		pd = NULL;
-
-out:
-	return pd;
-}
-
-static void *pppol2tp_seq_next(struct seq_file *m, void *v, loff_t *pos)
-{
-	(*pos)++;
-	return NULL;
-}
-
-static void pppol2tp_seq_stop(struct seq_file *p, void *v)
-{
-	/* nothing to do */
-}
-
-static void pppol2tp_seq_tunnel_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_tunnel *tunnel = v;
-
-	seq_printf(m, "\nTUNNEL '%s', %c %d\n",
-		   tunnel->name,
-		   (tunnel == tunnel->sock->sk_user_data) ? 'Y':'N',
-		   atomic_read(&tunnel->ref_count) - 1);
-	seq_printf(m, " %08x %llu/%llu/%llu %llu/%llu/%llu\n",
-		   tunnel->debug,
-		   (unsigned long long)tunnel->stats.tx_packets,
-		   (unsigned long long)tunnel->stats.tx_bytes,
-		   (unsigned long long)tunnel->stats.tx_errors,
-		   (unsigned long long)tunnel->stats.rx_packets,
-		   (unsigned long long)tunnel->stats.rx_bytes,
-		   (unsigned long long)tunnel->stats.rx_errors);
-}
-
-static void pppol2tp_seq_session_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_session *session = v;
-
-	seq_printf(m, "  SESSION '%s' %08X/%d %04X/%04X -> "
-		   "%04X/%04X %d %c\n",
-		   session->name,
-		   ntohl(session->tunnel_addr.addr.sin_addr.s_addr),
-		   ntohs(session->tunnel_addr.addr.sin_port),
-		   session->tunnel_addr.s_tunnel,
-		   session->tunnel_addr.s_session,
-		   session->tunnel_addr.d_tunnel,
-		   session->tunnel_addr.d_session,
-		   session->sock->sk_state,
-		   (session == session->sock->sk_user_data) ?
-		   'Y' : 'N');
-	seq_printf(m, "   %d/%d/%c/%c/%s %08x %u\n",
-		   session->mtu, session->mru,
-		   session->recv_seq ? 'R' : '-',
-		   session->send_seq ? 'S' : '-',
-		   session->lns_mode ? "LNS" : "LAC",
-		   session->debug,
-		   jiffies_to_msecs(session->reorder_timeout));
-	seq_printf(m, "   %hu/%hu %llu/%llu/%llu %llu/%llu/%llu\n",
-		   session->nr, session->ns,
-		   (unsigned long long)session->stats.tx_packets,
-		   (unsigned long long)session->stats.tx_bytes,
-		   (unsigned long long)session->stats.tx_errors,
-		   (unsigned long long)session->stats.rx_packets,
-		   (unsigned long long)session->stats.rx_bytes,
-		   (unsigned long long)session->stats.rx_errors);
-}
-
-static int pppol2tp_seq_show(struct seq_file *m, void *v)
-{
-	struct pppol2tp_seq_data *pd = v;
-
-	/* display header on line 1 */
-	if (v == SEQ_START_TOKEN) {
-		seq_puts(m, "PPPoL2TP driver info, " PPPOL2TP_DRV_VERSION "\n");
-		seq_puts(m, "TUNNEL name, user-data-ok session-count\n");
-		seq_puts(m, " debug tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		seq_puts(m, "  SESSION name, addr/port src-tid/sid "
-			 "dest-tid/sid state user-data-ok\n");
-		seq_puts(m, "   mtu/mru/rcvseq/sendseq/lns debug reorderto\n");
-		seq_puts(m, "   nr/ns tx-pkts/bytes/errs rx-pkts/bytes/errs\n");
-		goto out;
-	}
-
-	/* Show the tunnel or session context.
-	 */
-	if (pd->session == NULL)
-		pppol2tp_seq_tunnel_show(m, pd->tunnel);
-	else
-		pppol2tp_seq_session_show(m, pd->session);
-
-out:
-	return 0;
-}
-
-static const struct seq_operations pppol2tp_seq_ops = {
-	.start		= pppol2tp_seq_start,
-	.next		= pppol2tp_seq_next,
-	.stop		= pppol2tp_seq_stop,
-	.show		= pppol2tp_seq_show,
-};
-
-/* Called when our /proc file is opened. We allocate data for use when
- * iterating our tunnel / session contexts and store it in the private
- * data of the seq_file.
- */
-static int pppol2tp_proc_open(struct inode *inode, struct file *file)
-{
-	return seq_open_net(inode, file, &pppol2tp_seq_ops,
-			    sizeof(struct pppol2tp_seq_data));
-}
-
-static const struct file_operations pppol2tp_proc_fops = {
-	.owner		= THIS_MODULE,
-	.open		= pppol2tp_proc_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= seq_release_net,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-/*****************************************************************************
- * Init and cleanup
- *****************************************************************************/
-
-static const struct proto_ops pppol2tp_ops = {
-	.family		= AF_PPPOX,
-	.owner		= THIS_MODULE,
-	.release	= pppol2tp_release,
-	.bind		= sock_no_bind,
-	.connect	= pppol2tp_connect,
-	.socketpair	= sock_no_socketpair,
-	.accept		= sock_no_accept,
-	.getname	= pppol2tp_getname,
-	.poll		= datagram_poll,
-	.listen		= sock_no_listen,
-	.shutdown	= sock_no_shutdown,
-	.setsockopt	= pppol2tp_setsockopt,
-	.getsockopt	= pppol2tp_getsockopt,
-	.sendmsg	= pppol2tp_sendmsg,
-	.recvmsg	= pppol2tp_recvmsg,
-	.mmap		= sock_no_mmap,
-	.ioctl		= pppox_ioctl,
-};
-
-static struct pppox_proto pppol2tp_proto = {
-	.create		= pppol2tp_create,
-	.ioctl		= pppol2tp_ioctl
-};
-
-static __net_init int pppol2tp_init_net(struct net *net)
-{
-	struct pppol2tp_net *pn = pppol2tp_pernet(net);
-	struct proc_dir_entry *pde;
-
-	INIT_LIST_HEAD(&pn->pppol2tp_tunnel_list);
-	rwlock_init(&pn->pppol2tp_tunnel_list_lock);
-
-	pde = proc_net_fops_create(net, "pppol2tp", S_IRUGO, &pppol2tp_proc_fops);
-#ifdef CONFIG_PROC_FS
-	if (!pde)
-		return -ENOMEM;
-#endif
-
-	return 0;
-}
-
-static __net_exit void pppol2tp_exit_net(struct net *net)
-{
-	proc_net_remove(net, "pppol2tp");
-}
-
-static struct pernet_operations pppol2tp_net_ops = {
-	.init = pppol2tp_init_net,
-	.exit = pppol2tp_exit_net,
-	.id   = &pppol2tp_net_id,
-	.size = sizeof(struct pppol2tp_net),
-};
-
-static int __init pppol2tp_init(void)
-{
-	int err;
-
-	err = proto_register(&pppol2tp_sk_proto, 0);
-	if (err)
-		goto out;
-	err = register_pppox_proto(PX_PROTO_OL2TP, &pppol2tp_proto);
-	if (err)
-		goto out_unregister_pppol2tp_proto;
-
-	err = register_pernet_device(&pppol2tp_net_ops);
-	if (err)
-		goto out_unregister_pppox_proto;
-
-	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
-	       PPPOL2TP_DRV_VERSION);
-
-out:
-	return err;
-out_unregister_pppox_proto:
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-out_unregister_pppol2tp_proto:
-	proto_unregister(&pppol2tp_sk_proto);
-	goto out;
-}
-
-static void __exit pppol2tp_exit(void)
-{
-	unregister_pppox_proto(PX_PROTO_OL2TP);
-	unregister_pernet_device(&pppol2tp_net_ops);
-	proto_unregister(&pppol2tp_sk_proto);
-}
-
-module_init(pppol2tp_init);
-module_exit(pppol2tp_exit);
-
-MODULE_AUTHOR("Martijn van Oosterhout <kleptog@svana.org>, "
-	      "James Chapman <jchapman@katalix.com>");
-MODULE_DESCRIPTION("PPP over L2TP over UDP");
-MODULE_LICENSE("GPL");
-MODULE_VERSION(PPPOL2TP_DRV_VERSION);
-- 
cgit v1.2.3


From 63f96072f9ba430aa348bc987c3d5a4f48bae301 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:39 +0000
Subject: ppp: Add ppp_dev_name() exported function

ppp_dev_name() gives PPP users visibility of a ppp channel's device
name. This can be used by L2TP drivers to dump the assigned PPP
interface name.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ppp_generic.c   | 19 +++++++++++++++++++
 include/linux/ppp_channel.h |  3 +++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
index 6d61602208c1..8a0dd8a843a9 100644
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -2162,6 +2162,24 @@ int ppp_unit_number(struct ppp_channel *chan)
 	return unit;
 }
 
+/*
+ * Return the PPP device interface name of a channel.
+ */
+char *ppp_dev_name(struct ppp_channel *chan)
+{
+	struct channel *pch = chan->ppp;
+	char *name = NULL;
+
+	if (pch) {
+		read_lock_bh(&pch->upl);
+		if (pch->ppp && pch->ppp->dev)
+			name = pch->ppp->dev->name;
+		read_unlock_bh(&pch->upl);
+	}
+	return name;
+}
+
+
 /*
  * Disconnect a channel from the generic layer.
  * This must be called in process context.
@@ -2890,6 +2908,7 @@ EXPORT_SYMBOL(ppp_register_channel);
 EXPORT_SYMBOL(ppp_unregister_channel);
 EXPORT_SYMBOL(ppp_channel_index);
 EXPORT_SYMBOL(ppp_unit_number);
+EXPORT_SYMBOL(ppp_dev_name);
 EXPORT_SYMBOL(ppp_input);
 EXPORT_SYMBOL(ppp_input_error);
 EXPORT_SYMBOL(ppp_output_wakeup);
diff --git a/include/linux/ppp_channel.h b/include/linux/ppp_channel.h
index 0d3fa63e90ea..bff98ec1bfed 100644
--- a/include/linux/ppp_channel.h
+++ b/include/linux/ppp_channel.h
@@ -72,6 +72,9 @@ extern int ppp_channel_index(struct ppp_channel *);
 /* Get the unit number associated with a channel, or -1 if none */
 extern int ppp_unit_number(struct ppp_channel *);
 
+/* Get the device name associated with a channel, or NULL if none */
+extern char *ppp_dev_name(struct ppp_channel *);
+
 /*
  * SMP locking notes:
  * The channel code must ensure that when it calls ppp_unregister_channel,
-- 
cgit v1.2.3


From e0d4435f93905f517003cfa7328a36ea19788147 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:18:54 +0000
Subject: l2tp: Update PPP-over-L2TP driver to work over L2TPv3

This patch makes changes to the L2TP PPP code for L2TPv3.

The existing code has some assumptions about the L2TP header which are
broken by L2TPv3. Also the sockaddr_pppol2tp structure of the original
code is too small to support the increased size of the L2TPv3 tunnel
and session id, so a new sockaddr_pppol2tpv3 structure is needed. In
the socket calls, the size of this structure is used to tell if the
operation is for L2TPv2 or L2TPv3.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_pppol2tp.h |  14 ++++++
 include/linux/if_pppox.h    |   9 ++++
 net/l2tp/l2tp_ppp.c         | 120 +++++++++++++++++++++++++++-----------------
 3 files changed, 97 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_pppol2tp.h b/include/linux/if_pppol2tp.h
index 1a1fb6e5d933..184bc5566207 100644
--- a/include/linux/if_pppol2tp.h
+++ b/include/linux/if_pppol2tp.h
@@ -35,6 +35,20 @@ struct pppol2tp_addr {
 	__u16 d_tunnel, d_session;	/* For sending outgoing packets */
 };
 
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct pppol2tpv3_addr {
+	pid_t	pid;			/* pid that owns the fd.
+					 * 0 => current */
+	int	fd;			/* FD of UDP or IP socket to use */
+
+	struct sockaddr_in addr;	/* IP address and port to send to */
+
+	__u32 s_tunnel, s_session;	/* For matching incoming packets */
+	__u32 d_tunnel, d_session;	/* For sending outgoing packets */
+};
+
 /* Socket options:
  * DEBUG	- bitmask of debug message categories
  * SENDSEQ	- 0 => don't send packets with sequence numbers
diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h
index 90b5fae5d714..a6577af0c4e6 100644
--- a/include/linux/if_pppox.h
+++ b/include/linux/if_pppox.h
@@ -72,6 +72,15 @@ struct sockaddr_pppol2tp {
 	struct pppol2tp_addr pppol2tp;
 }__attribute__ ((packed));
 
+/* The L2TPv3 protocol changes tunnel and session ids from 16 to 32
+ * bits. So we need a different sockaddr structure.
+ */
+struct sockaddr_pppol2tpv3 {
+	sa_family_t     sa_family;      /* address family, AF_PPPOX */
+	unsigned int    sa_protocol;    /* protocol identifier */
+	struct pppol2tpv3_addr pppol2tp;
+} __attribute__ ((packed));
+
 /*********************************************************************
  *
  * ioctl interface for defining forwarding of connections
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index bee5b1413ec0..e5b531266541 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -291,17 +291,6 @@ static void pppol2tp_session_sock_put(struct l2tp_session *session)
  * Transmit handling
  ***********************************************************************/
 
-/* Tell how big L2TP headers are for a particular session. This
- * depends on whether sequence numbers are being used.
- */
-static inline int pppol2tp_l2tp_header_len(struct l2tp_session *session)
-{
-	if (session->send_seq)
-		return PPPOL2TP_L2TP_HDR_SIZE_SEQ;
-
-	return PPPOL2TP_L2TP_HDR_SIZE_NOSEQ;
-}
-
 /* This is the sendmsg for the PPPoL2TP pppol2tp_session socket.  We come here
  * when a user application does a sendmsg() on the session socket. L2TP and
  * PPP headers must be inserted into the user's data.
@@ -394,7 +383,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	static const u8 ppph[2] = { 0xff, 0x03 };
 	struct sock *sk = (struct sock *) chan->private;
 	struct sock *sk_tun;
-	int hdr_len;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
@@ -417,9 +405,6 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	if (tunnel == NULL)
 		goto abort_put_sess;
 
-	/* What header length is configured for this session? */
-	hdr_len = pppol2tp_l2tp_header_len(session);
-
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, sizeof(ppph)))
 		goto abort_put_sess_tun;
@@ -432,7 +417,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb)
 	skb->data[0] = ppph[0];
 	skb->data[1] = ppph[1];
 
-	l2tp_xmit_skb(session, skb, hdr_len);
+	l2tp_xmit_skb(session, skb, session->hdr_len);
 
 	sock_put(sk_tun);
 	sock_put(sk);
@@ -615,6 +600,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 {
 	struct sock *sk = sock->sk;
 	struct sockaddr_pppol2tp *sp = (struct sockaddr_pppol2tp *) uservaddr;
+	struct sockaddr_pppol2tpv3 *sp3 = (struct sockaddr_pppol2tpv3 *) uservaddr;
 	struct pppox_sock *po = pppox_sk(sk);
 	struct l2tp_session *session = NULL;
 	struct l2tp_tunnel *tunnel;
@@ -622,6 +608,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	struct dst_entry *dst;
 	struct l2tp_session_cfg cfg = { 0, };
 	int error = 0;
+	u32 tunnel_id, peer_tunnel_id;
+	u32 session_id, peer_session_id;
+	int ver = 2;
+	int fd;
 
 	lock_sock(sk);
 
@@ -639,21 +629,40 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (sk->sk_user_data)
 		goto end; /* socket is already attached */
 
-	/* Don't bind if s_tunnel is 0 */
+	/* Get params from socket address. Handle L2TPv2 and L2TPv3 */
+	if (sockaddr_len == sizeof(struct sockaddr_pppol2tp)) {
+		fd = sp->pppol2tp.fd;
+		tunnel_id = sp->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		session_id = sp->pppol2tp.s_session;
+		peer_session_id = sp->pppol2tp.d_session;
+	} else if (sockaddr_len == sizeof(struct sockaddr_pppol2tpv3)) {
+		ver = 3;
+		fd = sp3->pppol2tp.fd;
+		tunnel_id = sp3->pppol2tp.s_tunnel;
+		peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+		session_id = sp3->pppol2tp.s_session;
+		peer_session_id = sp3->pppol2tp.d_session;
+	} else {
+		error = -EINVAL;
+		goto end; /* bad socket address */
+	}
+
+	/* Don't bind if tunnel_id is 0 */
 	error = -EINVAL;
-	if (sp->pppol2tp.s_tunnel == 0)
+	if (tunnel_id == 0)
 		goto end;
 
-	/* Special case: create tunnel context if s_session and
-	 * d_session is 0. Otherwise look up tunnel using supplied
+	/* Special case: create tunnel context if session_id and
+	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
-	if ((sp->pppol2tp.s_session == 0) && (sp->pppol2tp.d_session == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), sp->pppol2tp.fd, 2, sp->pppol2tp.s_tunnel, sp->pppol2tp.d_tunnel, NULL, &tunnel);
+	if ((session_id == 0) && (peer_session_id == 0)) {
+		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
 		if (error < 0)
 			goto end;
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), sp->pppol2tp.s_tunnel);
+		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
 
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
@@ -670,20 +679,21 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 
 	/* Check that this session doesn't already exist */
 	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, sp->pppol2tp.s_session);
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session != NULL)
 		goto end;
 
-	/* Default MTU must allow space for UDP/L2TP/PPP
-	 * headers.
-	 */
-	cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	/* Default MTU values. */
+	if (cfg.mtu == 0)
+		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg.mru == 0)
+		cfg.mru = cfg.mtu;
 	cfg.debug = tunnel->debug;
 
 	/* Allocate and initialize a new session context. */
 	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, sp->pppol2tp.s_session,
-				      sp->pppol2tp.d_session, &cfg);
+				      tunnel, session_id,
+				      peer_session_id, &cfg);
 	if (session == NULL) {
 		error = -ENOMEM;
 		goto end;
@@ -756,8 +766,7 @@ end:
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 			    int *usockaddr_len, int peer)
 {
-	int len = sizeof(struct sockaddr_pppol2tp);
-	struct sockaddr_pppol2tp sp;
+	int len = 0;
 	int error = 0;
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
@@ -783,21 +792,40 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
 		goto end_put_sess;
 	}
 
-	memset(&sp, 0, len);
-	sp.sa_family	= AF_PPPOX;
-	sp.sa_protocol	= PX_PROTO_OL2TP;
-	sp.pppol2tp.fd  = tunnel->fd;
-	sp.pppol2tp.pid = pls->owner;
-	sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
-	sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
-	sp.pppol2tp.s_session = session->session_id;
-	sp.pppol2tp.d_session = session->peer_session_id;
 	inet = inet_sk(sk);
-	sp.pppol2tp.addr.sin_family = AF_INET;
-	sp.pppol2tp.addr.sin_port = inet->inet_dport;
-	sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
-
-	memcpy(uaddr, &sp, len);
+	if (tunnel->version == 2) {
+		struct sockaddr_pppol2tp sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	} else if (tunnel->version == 3) {
+		struct sockaddr_pppol2tpv3 sp;
+		len = sizeof(sp);
+		memset(&sp, 0, len);
+		sp.sa_family	= AF_PPPOX;
+		sp.sa_protocol	= PX_PROTO_OL2TP;
+		sp.pppol2tp.fd  = tunnel->fd;
+		sp.pppol2tp.pid = pls->owner;
+		sp.pppol2tp.s_tunnel = tunnel->tunnel_id;
+		sp.pppol2tp.d_tunnel = tunnel->peer_tunnel_id;
+		sp.pppol2tp.s_session = session->session_id;
+		sp.pppol2tp.d_session = session->peer_session_id;
+		sp.pppol2tp.addr.sin_family = AF_INET;
+		sp.pppol2tp.addr.sin_port = inet->inet_dport;
+		sp.pppol2tp.addr.sin_addr.s_addr = inet->inet_daddr;
+		memcpy(uaddr, &sp, len);
+	}
 
 	*usockaddr_len = len;
 
-- 
cgit v1.2.3


From 0d76751fad7739014485ba5bd388d4f1b4fd4143 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:00 +0000
Subject: l2tp: Add L2TPv3 IP encapsulation (no UDP) support

This patch adds a new L2TPIP socket family and modifies the core to
handle the case where there is no UDP header in the L2TP
packet. L2TP/IP uses IP protocol 115. Since L2TP/UDP and L2TP/IP
packets differ in layout, the datapath packet handling code needs
changes too. Userspace uses an L2TPIP socket instead of a UDP socket
when IP encapsulation is required.

We can't use raw sockets for this because the semantics of raw sockets
don't lend themselves to the socket-per-tunnel model - we need to

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/l2tp.h |  38 +++
 net/l2tp/Kconfig     |  17 ++
 net/l2tp/Makefile    |   1 +
 net/l2tp/l2tp_core.c | 163 ++++++++-----
 net/l2tp/l2tp_core.h |   7 +
 net/l2tp/l2tp_ip.c   | 679 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c  |   7 +-
 7 files changed, 850 insertions(+), 62 deletions(-)
 create mode 100644 include/linux/l2tp.h
 create mode 100644 net/l2tp/l2tp_ip.c

(limited to 'include/linux')

diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
new file mode 100644
index 000000000000..deff7bca4e05
--- /dev/null
+++ b/include/linux/l2tp.h
@@ -0,0 +1,38 @@
+/*
+ * L2TP-over-IP socket for L2TPv3.
+ *
+ * Author: James Chapman <jchapman@katalix.com>
+ */
+
+#ifndef _LINUX_L2TP_H_
+#define _LINUX_L2TP_H_
+
+#include <linux/types.h>
+#ifdef __KERNEL__
+#include <linux/socket.h>
+#include <linux/in.h>
+#endif
+
+#define IPPROTO_L2TP		115
+
+/**
+ * struct sockaddr_l2tpip - the sockaddr structure for L2TP-over-IP sockets
+ * @l2tp_family:  address family number AF_L2TPIP.
+ * @l2tp_addr:    protocol specific address information
+ * @l2tp_conn_id: connection id of tunnel
+ */
+struct sockaddr_l2tpip {
+	/* The first fields must match struct sockaddr_in */
+	sa_family_t	l2tp_family;	/* AF_INET */
+	__be16		l2tp_unused;	/* INET port number (unused) */
+	struct in_addr	l2tp_addr;	/* Internet address */
+
+	__u32		l2tp_conn_id;	/* Connection ID of tunnel */
+
+	/* Pad to size of `struct sockaddr'. */
+	unsigned char	__pad[sizeof(struct sockaddr) - sizeof(sa_family_t) -
+			      sizeof(__be16) - sizeof(struct in_addr) -
+			      sizeof(__u32)];
+};
+
+#endif
diff --git a/net/l2tp/Kconfig b/net/l2tp/Kconfig
index d60758d60478..0a11ccf2d95b 100644
--- a/net/l2tp/Kconfig
+++ b/net/l2tp/Kconfig
@@ -51,3 +51,20 @@ config L2TP_V3
 	  If you are connecting to L2TPv3 equipment, or you want to
 	  tunnel raw ethernet frames using L2TP, say Y here. If
 	  unsure, say N.
+
+config L2TP_IP
+	tristate "L2TP IP encapsulation for L2TPv3"
+	depends on L2TP_V3
+	help
+	  Support for L2TP-over-IP socket family.
+
+	  The L2TPv3 protocol defines two possible encapsulations for
+	  L2TP frames, namely UDP and plain IP (without UDP). This
+	  driver provides a new L2TPIP socket family with which
+	  userspace L2TPv3 daemons may create L2TP/IP tunnel sockets
+	  when UDP encapsulation is not required. When L2TP is carried
+	  in IP packets, it used IP protocol number 115, so this port
+	  must be enabled in firewalls.
+
+	  To compile this driver as a module, choose M here. The module
+	  will be called l2tp_ip.
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index c91f208b1693..ef28b16f7d6a 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 0eee1a65f1b1..1739d04367e4 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -36,8 +36,10 @@
 #include <linux/inetdevice.h>
 #include <linux/skbuff.h>
 #include <linux/init.h>
+#include <linux/in.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
+#include <linux/l2tp.h>
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/file.h>
@@ -48,6 +50,7 @@
 #include <net/ip.h>
 #include <net/udp.h>
 #include <net/xfrm.h>
+#include <net/protocol.h>
 
 #include <asm/byteorder.h>
 #include <asm/atomic.h>
@@ -849,15 +852,21 @@ static int l2tp_build_l2tpv2_header(struct l2tp_session *session, void *buf)
 
 static int l2tp_build_l2tpv3_header(struct l2tp_session *session, void *buf)
 {
+	struct l2tp_tunnel *tunnel = session->tunnel;
 	char *bufp = buf;
 	char *optr = bufp;
-	u16 flags = L2TP_HDR_VER_3;
 
-	/* Setup L2TP header. */
-	*((__be16 *) bufp) = htons(flags);
-	bufp += 2;
-	*((__be16 *) bufp) = 0;
-	bufp += 2;
+	/* Setup L2TP header. The header differs slightly for UDP and
+	 * IP encapsulations. For UDP, there is 4 bytes of flags.
+	 */
+	if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
+		u16 flags = L2TP_HDR_VER_3;
+		*((__be16 *) bufp) = htons(flags);
+		bufp += 2;
+		*((__be16 *) bufp) = 0;
+		bufp += 2;
+	}
+
 	*((__be32 *) bufp) = htonl(session->peer_session_id);
 	bufp += 4;
 	if (session->cookie_len) {
@@ -902,10 +911,11 @@ int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, size_t dat
 
 	if (session->debug & L2TP_MSG_DATA) {
 		int i;
-		unsigned char *datap = skb->data + sizeof(struct udphdr);
+		int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+		unsigned char *datap = skb->data + uhlen;
 
 		printk(KERN_DEBUG "%s: xmit:", session->name);
-		for (i = 0; i < (len - sizeof(struct udphdr)); i++) {
+		for (i = 0; i < (len - uhlen); i++) {
 			printk(" %02X", *datap++);
 			if (i == 31) {
 				printk(" ...");
@@ -956,21 +966,23 @@ static inline void l2tp_skb_set_owner_w(struct sk_buff *skb, struct sock *sk)
 int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len)
 {
 	int data_len = skb->len;
-	struct sock *sk = session->tunnel->sock;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = tunnel->sock;
 	struct udphdr *uh;
-	unsigned int udp_len;
 	struct inet_sock *inet;
 	__wsum csum;
 	int old_headroom;
 	int new_headroom;
 	int headroom;
+	int uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+	int udp_len;
 
 	/* Check that there's enough headroom in the skb to insert IP,
 	 * UDP and L2TP headers. If not enough, expand it to
 	 * make room. Adjust truesize.
 	 */
 	headroom = NET_SKB_PAD + sizeof(struct iphdr) +
-		sizeof(struct udphdr) + hdr_len;
+		uhlen + hdr_len;
 	old_headroom = skb_headroom(skb);
 	if (skb_cow_head(skb, headroom))
 		goto abort;
@@ -981,18 +993,8 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 
 	/* Setup L2TP header */
 	session->build_header(session, __skb_push(skb, hdr_len));
-	udp_len = sizeof(struct udphdr) + hdr_len + data_len;
-
-	/* Setup UDP header */
-	inet = inet_sk(sk);
-	__skb_push(skb, sizeof(*uh));
-	skb_reset_transport_header(skb);
-	uh = udp_hdr(skb);
-	uh->source = inet->inet_sport;
-	uh->dest = inet->inet_dport;
-	uh->len = htons(udp_len);
-	uh->check = 0;
 
+	/* Reset skb netfilter state */
 	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
 	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
 			      IPSKB_REROUTED);
@@ -1001,29 +1003,48 @@ int l2tp_xmit_skb(struct l2tp_session *session, struct sk_buff *skb, int hdr_len
 	/* Get routing info from the tunnel socket */
 	skb_dst_drop(skb);
 	skb_dst_set(skb, dst_clone(__sk_dst_get(sk)));
-	l2tp_skb_set_owner_w(skb, sk);
 
-	/* Calculate UDP checksum if configured to do so */
-	if (sk->sk_no_check == UDP_CSUM_NOXMIT)
-		skb->ip_summed = CHECKSUM_NONE;
-	else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
-		 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
-		skb->ip_summed = CHECKSUM_COMPLETE;
-		csum = skb_checksum(skb, 0, udp_len, 0);
-		uh->check = csum_tcpudp_magic(inet->inet_saddr,
-					      inet->inet_daddr,
-					      udp_len, IPPROTO_UDP, csum);
-		if (uh->check == 0)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		skb->ip_summed = CHECKSUM_PARTIAL;
-		skb->csum_start = skb_transport_header(skb) - skb->head;
-		skb->csum_offset = offsetof(struct udphdr, check);
-		uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
-					       inet->inet_daddr,
-					       udp_len, IPPROTO_UDP, 0);
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* Setup UDP header */
+		inet = inet_sk(sk);
+		__skb_push(skb, sizeof(*uh));
+		skb_reset_transport_header(skb);
+		uh = udp_hdr(skb);
+		uh->source = inet->inet_sport;
+		uh->dest = inet->inet_dport;
+		udp_len = uhlen + hdr_len + data_len;
+		uh->len = htons(udp_len);
+		uh->check = 0;
+
+		/* Calculate UDP checksum if configured to do so */
+		if (sk->sk_no_check == UDP_CSUM_NOXMIT)
+			skb->ip_summed = CHECKSUM_NONE;
+		else if ((skb_dst(skb) && skb_dst(skb)->dev) &&
+			 (!(skb_dst(skb)->dev->features & NETIF_F_V4_CSUM))) {
+			skb->ip_summed = CHECKSUM_COMPLETE;
+			csum = skb_checksum(skb, 0, udp_len, 0);
+			uh->check = csum_tcpudp_magic(inet->inet_saddr,
+						      inet->inet_daddr,
+						      udp_len, IPPROTO_UDP, csum);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
+		} else {
+			skb->ip_summed = CHECKSUM_PARTIAL;
+			skb->csum_start = skb_transport_header(skb) - skb->head;
+			skb->csum_offset = offsetof(struct udphdr, check);
+			uh->check = ~csum_tcpudp_magic(inet->inet_saddr,
+						       inet->inet_daddr,
+						       udp_len, IPPROTO_UDP, 0);
+		}
+		break;
+
+	case L2TP_ENCAPTYPE_IP:
+		break;
 	}
 
+	l2tp_skb_set_owner_w(skb, sk);
+
 	l2tp_xmit_core(session, skb, data_len);
 
 abort:
@@ -1053,9 +1074,15 @@ void l2tp_tunnel_destruct(struct sock *sk)
 	/* Close all sessions */
 	l2tp_tunnel_closeall(tunnel);
 
-	/* No longer an encapsulation socket. See net/ipv4/udp.c */
-	(udp_sk(sk))->encap_type = 0;
-	(udp_sk(sk))->encap_rcv = NULL;
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		/* No longer an encapsulation socket. See net/ipv4/udp.c */
+		(udp_sk(sk))->encap_type = 0;
+		(udp_sk(sk))->encap_rcv = NULL;
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		break;
+	}
 
 	/* Remove hooks into tunnel socket */
 	tunnel->sock = NULL;
@@ -1168,6 +1195,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	struct socket *sock = NULL;
 	struct sock *sk = NULL;
 	struct l2tp_net *pn;
+	enum l2tp_encap_type encap = L2TP_ENCAPTYPE_UDP;
 
 	/* Get the tunnel socket from the fd, which was opened by
 	 * the userspace L2TP daemon.
@@ -1182,18 +1210,27 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 
 	sk = sock->sk;
 
+	if (cfg != NULL)
+		encap = cfg->encap;
+
 	/* Quick sanity checks */
-	err = -EPROTONOSUPPORT;
-	if (sk->sk_protocol != IPPROTO_UDP) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
-		       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
-		goto err;
-	}
-	err = -EAFNOSUPPORT;
-	if (sock->ops->family != AF_INET) {
-		printk(KERN_ERR "tunl %hu: fd %d wrong family, got %d, expected %d\n",
-		       tunnel_id, fd, sock->ops->family, AF_INET);
-		goto err;
+	switch (encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_UDP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_UDP);
+			goto err;
+		}
+		break;
+	case L2TP_ENCAPTYPE_IP:
+		err = -EPROTONOSUPPORT;
+		if (sk->sk_protocol != IPPROTO_L2TP) {
+			printk(KERN_ERR "tunl %hu: fd %d wrong protocol, got %d, expected %d\n",
+			       tunnel_id, fd, sk->sk_protocol, IPPROTO_L2TP);
+			goto err;
+		}
+		break;
 	}
 
 	/* Check if this socket has already been prepped */
@@ -1223,12 +1260,16 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32
 	tunnel->l2tp_net = net;
 	pn = l2tp_pernet(net);
 
-	if (cfg)
+	if (cfg != NULL)
 		tunnel->debug = cfg->debug;
 
 	/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
-	udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
-	udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	tunnel->encap = encap;
+	if (encap == L2TP_ENCAPTYPE_UDP) {
+		/* Mark socket as an encapsulation socket. See net/ipv4/udp.c */
+		udp_sk(sk)->encap_type = UDP_ENCAP_L2TPINUDP;
+		udp_sk(sk)->encap_rcv = l2tp_udp_encap_recv;
+	}
 
 	sk->sk_user_data = tunnel;
 
@@ -1318,7 +1359,9 @@ void l2tp_session_set_header_len(struct l2tp_session *session, int version)
 		if (session->send_seq)
 			session->hdr_len += 4;
 	} else {
-		session->hdr_len = 8 + session->cookie_len + session->l2specific_len + session->offset;
+		session->hdr_len = 4 + session->cookie_len + session->l2specific_len + session->offset;
+		if (session->tunnel->encap == L2TP_ENCAPTYPE_UDP)
+			session->hdr_len += 4;
 	}
 
 }
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index 5c53eb2a8ad9..d2395984645e 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -48,6 +48,11 @@ enum l2tp_l2spec_type {
 	L2TP_L2SPECTYPE_DEFAULT,
 };
 
+enum l2tp_encap_type {
+	L2TP_ENCAPTYPE_UDP,
+	L2TP_ENCAPTYPE_IP,
+};
+
 struct sk_buff;
 
 struct l2tp_stats {
@@ -155,6 +160,7 @@ struct l2tp_session {
 struct l2tp_tunnel_cfg {
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 };
 
 struct l2tp_tunnel {
@@ -170,6 +176,7 @@ struct l2tp_tunnel {
 	char			name[20];	/* for logging */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	enum l2tp_encap_type	encap;
 	struct l2tp_stats	stats;
 
 	struct list_head	list;		/* Keep a list of all tunnels */
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
new file mode 100644
index 000000000000..75bf784ba18d
--- /dev/null
+++ b/net/l2tp/l2tp_ip.c
@@ -0,0 +1,679 @@
+/*
+ * L2TPv3 IP encapsulation support
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+#include <linux/socket.h>
+#include <linux/l2tp.h>
+#include <linux/in.h>
+#include <net/sock.h>
+#include <net/ip.h>
+#include <net/icmp.h>
+#include <net/udp.h>
+#include <net/inet_common.h>
+#include <net/inet_hashtables.h>
+#include <net/tcp_states.h>
+#include <net/protocol.h>
+#include <net/xfrm.h>
+
+#include "l2tp_core.h"
+
+struct l2tp_ip_sock {
+	/* inet_sock has to be the first member of l2tp_ip_sock */
+	struct inet_sock	inet;
+
+	__u32			conn_id;
+	__u32			peer_conn_id;
+
+	__u64			tx_packets;
+	__u64			tx_bytes;
+	__u64			tx_errors;
+	__u64			rx_packets;
+	__u64			rx_bytes;
+	__u64			rx_errors;
+};
+
+static DEFINE_RWLOCK(l2tp_ip_lock);
+static struct hlist_head l2tp_ip_table;
+static struct hlist_head l2tp_ip_bind_table;
+
+static inline struct l2tp_ip_sock *l2tp_ip_sk(const struct sock *sk)
+{
+	return (struct l2tp_ip_sock *)sk;
+}
+
+static struct sock *__l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct hlist_node *node;
+	struct sock *sk;
+
+	sk_for_each_bound(sk, node, &l2tp_ip_bind_table) {
+		struct inet_sock *inet = inet_sk(sk);
+		struct l2tp_ip_sock *l2tp = l2tp_ip_sk(sk);
+
+		if (l2tp == NULL)
+			continue;
+
+		if ((l2tp->conn_id == tunnel_id) &&
+#ifdef CONFIG_NET_NS
+		    (sk->sk_net == net) &&
+#endif
+		    !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) &&
+		    !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+			goto found;
+	}
+
+	sk = NULL;
+found:
+	return sk;
+}
+
+static inline struct sock *l2tp_ip_bind_lookup(struct net *net, __be32 laddr, int dif, u32 tunnel_id)
+{
+	struct sock *sk = __l2tp_ip_bind_lookup(net, laddr, dif, tunnel_id);
+	if (sk)
+		sock_hold(sk);
+
+	return sk;
+}
+
+/* When processing receive frames, there are two cases to
+ * consider. Data frames consist of a non-zero session-id and an
+ * optional cookie. Control frames consist of a regular L2TP header
+ * preceded by 32-bits of zeros.
+ *
+ * L2TPv3 Session Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                           Session ID                          |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Cookie (optional, maximum 64 bits)...
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *                                                                 |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * L2TPv3 Control Message Header Over IP
+ *
+ *  0                   1                   2                   3
+ *  0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                      (32 bits of zeros)                       |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |T|L|x|x|S|x|x|x|x|x|x|x|  Ver  |             Length            |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                     Control Connection ID                     |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |               Ns              |               Nr              |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * All control frames are passed to userspace.
+ */
+static int l2tp_ip_recv(struct sk_buff *skb)
+{
+	struct sock *sk;
+	u32 session_id;
+	u32 tunnel_id;
+	unsigned char *ptr, *optr;
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int length;
+	int offset;
+
+	/* Point to L2TP header */
+	optr = ptr = skb->data;
+
+	if (!pskb_may_pull(skb, 4))
+		goto discard;
+
+	session_id = ntohl(*((__be32 *) ptr));
+	ptr += 4;
+
+	/* RFC3931: L2TP/IP packets have the first 4 bytes containing
+	 * the session_id. If it is 0, the packet is a L2TP control
+	 * frame and the session_id value can be discarded.
+	 */
+	if (session_id == 0) {
+		__skb_pull(skb, 4);
+		goto pass_up;
+	}
+
+	/* Ok, this is a data packet. Lookup the session. */
+	session = l2tp_session_find(&init_net, NULL, session_id);
+	if (session == NULL)
+		goto discard;
+
+	tunnel = session->tunnel;
+	if (tunnel == NULL)
+		goto discard;
+
+	/* Trace packet contents, if enabled */
+	if (tunnel->debug & L2TP_MSG_DATA) {
+		length = min(32u, skb->len);
+		if (!pskb_may_pull(skb, length))
+			goto discard;
+
+		printk(KERN_DEBUG "%s: ip recv: ", tunnel->name);
+
+		offset = 0;
+		do {
+			printk(" %02X", ptr[offset]);
+		} while (++offset < length);
+
+		printk("\n");
+	}
+
+	l2tp_recv_common(session, skb, ptr, optr, 0, skb->len, tunnel->recv_payload_hook);
+
+	return 0;
+
+pass_up:
+	/* Get the tunnel_id from the L2TP header */
+	if (!pskb_may_pull(skb, 12))
+		goto discard;
+
+	if ((skb->data[0] & 0xc0) != 0xc0)
+		goto discard;
+
+	tunnel_id = ntohl(*(__be32 *) &skb->data[4]);
+	tunnel = l2tp_tunnel_find(&init_net, tunnel_id);
+	if (tunnel != NULL)
+		sk = tunnel->sock;
+	else {
+		struct iphdr *iph = (struct iphdr *) skb_network_header(skb);
+
+		read_lock_bh(&l2tp_ip_lock);
+		sk = __l2tp_ip_bind_lookup(&init_net, iph->daddr, 0, tunnel_id);
+		read_unlock_bh(&l2tp_ip_lock);
+	}
+
+	if (sk == NULL)
+		goto discard;
+
+	sock_hold(sk);
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto discard_put;
+
+	nf_reset(skb);
+
+	return sk_receive_skb(sk, skb, 1);
+
+discard_put:
+	sock_put(sk);
+
+discard:
+	kfree_skb(skb);
+	return 0;
+}
+
+static int l2tp_ip_open(struct sock *sk)
+{
+	/* Prevent autobind. We don't have ports. */
+	inet_sk(sk)->inet_num = IPPROTO_L2TP;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_node(sk, &l2tp_ip_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	return 0;
+}
+
+static void l2tp_ip_close(struct sock *sk, long timeout)
+{
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	hlist_del_init(&sk->sk_node);
+	write_unlock_bh(&l2tp_ip_lock);
+	sk_common_release(sk);
+}
+
+static void l2tp_ip_destroy_sock(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL)
+		kfree_skb(skb);
+
+	sk_refcnt_debug_dec(sk);
+}
+
+static int l2tp_ip_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *addr = (struct sockaddr_l2tpip *) uaddr;
+	int ret = -EINVAL;
+	int chk_addr_ret;
+
+	ret = -EADDRINUSE;
+	read_lock_bh(&l2tp_ip_lock);
+	if (__l2tp_ip_bind_lookup(&init_net, addr->l2tp_addr.s_addr, sk->sk_bound_dev_if, addr->l2tp_conn_id))
+		goto out_in_use;
+
+	read_unlock_bh(&l2tp_ip_lock);
+
+	lock_sock(sk);
+	if (sk->sk_state != TCP_CLOSE || addr_len < sizeof(struct sockaddr_l2tpip))
+		goto out;
+
+	chk_addr_ret = inet_addr_type(&init_net, addr->l2tp_addr.s_addr);
+	ret = -EADDRNOTAVAIL;
+	if (addr->l2tp_addr.s_addr && chk_addr_ret != RTN_LOCAL &&
+	    chk_addr_ret != RTN_MULTICAST && chk_addr_ret != RTN_BROADCAST)
+		goto out;
+
+	inet->inet_rcv_saddr = inet->inet_saddr = addr->l2tp_addr.s_addr;
+	if (chk_addr_ret == RTN_MULTICAST || chk_addr_ret == RTN_BROADCAST)
+		inet->inet_saddr = 0;  /* Use device */
+	sk_dst_reset(sk);
+
+	l2tp_ip_sk(sk)->conn_id = addr->l2tp_conn_id;
+
+	write_lock_bh(&l2tp_ip_lock);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l2tp_ip_lock);
+	ret = 0;
+out:
+	release_sock(sk);
+
+	return ret;
+
+out_in_use:
+	read_unlock_bh(&l2tp_ip_lock);
+
+	return ret;
+}
+
+static int l2tp_ip_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+{
+	int rc;
+	struct inet_sock *inet = inet_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *) uaddr;
+	struct rtable *rt;
+	__be32 saddr;
+	int oif;
+
+	rc = -EINVAL;
+	if (addr_len < sizeof(*lsa))
+		goto out;
+
+	rc = -EAFNOSUPPORT;
+	if (lsa->l2tp_family != AF_INET)
+		goto out;
+
+	sk_dst_reset(sk);
+
+	oif = sk->sk_bound_dev_if;
+	saddr = inet->inet_saddr;
+
+	rc = -EINVAL;
+	if (ipv4_is_multicast(lsa->l2tp_addr.s_addr))
+		goto out;
+
+	rc = ip_route_connect(&rt, lsa->l2tp_addr.s_addr, saddr,
+			      RT_CONN_FLAGS(sk), oif,
+			      IPPROTO_L2TP,
+			      0, 0, sk, 1);
+	if (rc) {
+		if (rc == -ENETUNREACH)
+			IP_INC_STATS_BH(&init_net, IPSTATS_MIB_OUTNOROUTES);
+		goto out;
+	}
+
+	rc = -ENETUNREACH;
+	if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+		ip_rt_put(rt);
+		goto out;
+	}
+
+	l2tp_ip_sk(sk)->peer_conn_id = lsa->l2tp_conn_id;
+
+	if (!inet->inet_saddr)
+		inet->inet_saddr = rt->rt_src;
+	if (!inet->inet_rcv_saddr)
+		inet->inet_rcv_saddr = rt->rt_src;
+	inet->inet_daddr = rt->rt_dst;
+	sk->sk_state = TCP_ESTABLISHED;
+	inet->inet_id = jiffies;
+
+	sk_dst_set(sk, &rt->u.dst);
+
+	write_lock_bh(&l2tp_ip_lock);
+	hlist_del_init(&sk->sk_bind_node);
+	sk_add_bind_node(sk, &l2tp_ip_bind_table);
+	write_unlock_bh(&l2tp_ip_lock);
+
+	rc = 0;
+out:
+	return rc;
+}
+
+static int l2tp_ip_getname(struct socket *sock, struct sockaddr *uaddr,
+			   int *uaddr_len, int peer)
+{
+	struct sock *sk		= sock->sk;
+	struct inet_sock *inet	= inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	struct sockaddr_l2tpip *lsa = (struct sockaddr_l2tpip *)uaddr;
+
+	memset(lsa, 0, sizeof(*lsa));
+	lsa->l2tp_family = AF_INET;
+	if (peer) {
+		if (!inet->inet_dport)
+			return -ENOTCONN;
+		lsa->l2tp_conn_id = lsk->peer_conn_id;
+		lsa->l2tp_addr.s_addr = inet->inet_daddr;
+	} else {
+		__be32 addr = inet->inet_rcv_saddr;
+		if (!addr)
+			addr = inet->inet_saddr;
+		lsa->l2tp_conn_id = lsk->conn_id;
+		lsa->l2tp_addr.s_addr = addr;
+	}
+	*uaddr_len = sizeof(*lsa);
+	return 0;
+}
+
+static int l2tp_ip_backlog_recv(struct sock *sk, struct sk_buff *skb)
+{
+	int rc;
+
+	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+		goto drop;
+
+	nf_reset(skb);
+
+	/* Charge it to the socket, dropping if the queue is full. */
+	rc = sock_queue_rcv_skb(sk, skb);
+	if (rc < 0)
+		goto drop;
+
+	return 0;
+
+drop:
+	IP_INC_STATS(&init_net, IPSTATS_MIB_INDISCARDS);
+	kfree_skb(skb);
+	return -1;
+}
+
+/* Userspace will call sendmsg() on the tunnel socket to send L2TP
+ * control frames.
+ */
+static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len)
+{
+	struct sk_buff *skb;
+	int rc;
+	struct l2tp_ip_sock *lsa = l2tp_ip_sk(sk);
+	struct inet_sock *inet = inet_sk(sk);
+	struct ip_options *opt = inet->opt;
+	struct rtable *rt = NULL;
+	int connected = 0;
+	__be32 daddr;
+
+	if (sock_flag(sk, SOCK_DEAD))
+		return -ENOTCONN;
+
+	/* Get and verify the address. */
+	if (msg->msg_name) {
+		struct sockaddr_l2tpip *lip = (struct sockaddr_l2tpip *) msg->msg_name;
+		if (msg->msg_namelen < sizeof(*lip))
+			return -EINVAL;
+
+		if (lip->l2tp_family != AF_INET) {
+			if (lip->l2tp_family != AF_UNSPEC)
+				return -EAFNOSUPPORT;
+		}
+
+		daddr = lip->l2tp_addr.s_addr;
+	} else {
+		if (sk->sk_state != TCP_ESTABLISHED)
+			return -EDESTADDRREQ;
+
+		daddr = inet->inet_daddr;
+		connected = 1;
+	}
+
+	/* Allocate a socket buffer */
+	rc = -ENOMEM;
+	skb = sock_wmalloc(sk, 2 + NET_SKB_PAD + sizeof(struct iphdr) +
+			   4 + len, 0, GFP_KERNEL);
+	if (!skb)
+		goto error;
+
+	/* Reserve space for headers, putting IP header on 4-byte boundary. */
+	skb_reserve(skb, 2 + NET_SKB_PAD);
+	skb_reset_network_header(skb);
+	skb_reserve(skb, sizeof(struct iphdr));
+	skb_reset_transport_header(skb);
+
+	/* Insert 0 session_id */
+	*((__be32 *) skb_put(skb, 4)) = 0;
+
+	/* Copy user data into skb */
+	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	if (rc < 0) {
+		kfree_skb(skb);
+		goto error;
+	}
+
+	if (connected)
+		rt = (struct rtable *) __sk_dst_check(sk, 0);
+
+	if (rt == NULL) {
+		/* Use correct destination address if we have options. */
+		if (opt && opt->srr)
+			daddr = opt->faddr;
+
+		{
+			struct flowi fl = { .oif = sk->sk_bound_dev_if,
+					    .nl_u = { .ip4_u = {
+							.daddr = daddr,
+							.saddr = inet->inet_saddr,
+							.tos = RT_CONN_FLAGS(sk) } },
+					    .proto = sk->sk_protocol,
+					    .flags = inet_sk_flowi_flags(sk),
+					    .uli_u = { .ports = {
+							 .sport = inet->inet_sport,
+							 .dport = inet->inet_dport } } };
+
+			/* If this fails, retransmit mechanism of transport layer will
+			 * keep trying until route appears or the connection times
+			 * itself out.
+			 */
+			security_sk_classify_flow(sk, &fl);
+			if (ip_route_output_flow(sock_net(sk), &rt, &fl, sk, 0))
+				goto no_route;
+		}
+		sk_setup_caps(sk, &rt->u.dst);
+	}
+	skb_dst_set(skb, dst_clone(&rt->u.dst));
+
+	/* Queue the packet to IP for output */
+	rc = ip_queue_xmit(skb, 0);
+
+error:
+	/* Update stats */
+	if (rc >= 0) {
+		lsa->tx_packets++;
+		lsa->tx_bytes += len;
+		rc = len;
+	} else {
+		lsa->tx_errors++;
+	}
+
+	return rc;
+
+no_route:
+	IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
+	kfree_skb(skb);
+	return -EHOSTUNREACH;
+}
+
+static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+			   size_t len, int noblock, int flags, int *addr_len)
+{
+	struct inet_sock *inet = inet_sk(sk);
+	struct l2tp_ip_sock *lsk = l2tp_ip_sk(sk);
+	size_t copied = 0;
+	int err = -EOPNOTSUPP;
+	struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
+	struct sk_buff *skb;
+
+	if (flags & MSG_OOB)
+		goto out;
+
+	if (addr_len)
+		*addr_len = sizeof(*sin);
+
+	skb = skb_recv_datagram(sk, flags, noblock, &err);
+	if (!skb)
+		goto out;
+
+	copied = skb->len;
+	if (len < copied) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto done;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	/* Copy the address. */
+	if (sin) {
+		sin->sin_family = AF_INET;
+		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
+		sin->sin_port = 0;
+		memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
+	}
+	if (inet->cmsg_flags)
+		ip_cmsg_recv(msg, skb);
+	if (flags & MSG_TRUNC)
+		copied = skb->len;
+done:
+	skb_free_datagram(sk, skb);
+out:
+	if (err) {
+		lsk->rx_errors++;
+		return err;
+	}
+
+	lsk->rx_packets++;
+	lsk->rx_bytes += copied;
+
+	return copied;
+}
+
+struct proto l2tp_ip_prot = {
+	.name		   = "L2TP/IP",
+	.owner		   = THIS_MODULE,
+	.init		   = l2tp_ip_open,
+	.close		   = l2tp_ip_close,
+	.bind		   = l2tp_ip_bind,
+	.connect	   = l2tp_ip_connect,
+	.disconnect	   = udp_disconnect,
+	.ioctl		   = udp_ioctl,
+	.destroy	   = l2tp_ip_destroy_sock,
+	.setsockopt	   = ip_setsockopt,
+	.getsockopt	   = ip_getsockopt,
+	.sendmsg	   = l2tp_ip_sendmsg,
+	.recvmsg	   = l2tp_ip_recvmsg,
+	.backlog_rcv	   = l2tp_ip_backlog_recv,
+	.hash		   = inet_hash,
+	.unhash		   = inet_unhash,
+	.obj_size	   = sizeof(struct l2tp_ip_sock),
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_ip_setsockopt,
+	.compat_getsockopt = compat_ip_getsockopt,
+#endif
+};
+
+static const struct proto_ops l2tp_ip_ops = {
+	.family		   = PF_INET,
+	.owner		   = THIS_MODULE,
+	.release	   = inet_release,
+	.bind		   = inet_bind,
+	.connect	   = inet_dgram_connect,
+	.socketpair	   = sock_no_socketpair,
+	.accept		   = sock_no_accept,
+	.getname	   = l2tp_ip_getname,
+	.poll		   = datagram_poll,
+	.ioctl		   = inet_ioctl,
+	.listen		   = sock_no_listen,
+	.shutdown	   = inet_shutdown,
+	.setsockopt	   = sock_common_setsockopt,
+	.getsockopt	   = sock_common_getsockopt,
+	.sendmsg	   = inet_sendmsg,
+	.recvmsg	   = sock_common_recvmsg,
+	.mmap		   = sock_no_mmap,
+	.sendpage	   = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+	.compat_setsockopt = compat_sock_common_setsockopt,
+	.compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw l2tp_ip_protosw = {
+	.type		= SOCK_DGRAM,
+	.protocol	= IPPROTO_L2TP,
+	.prot		= &l2tp_ip_prot,
+	.ops		= &l2tp_ip_ops,
+	.no_check	= 0,
+};
+
+static struct net_protocol l2tp_ip_protocol __read_mostly = {
+	.handler	= l2tp_ip_recv,
+};
+
+static int __init l2tp_ip_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP IP encapsulation support (L2TPv3)\n");
+
+	err = proto_register(&l2tp_ip_prot, 1);
+	if (err != 0)
+		goto out;
+
+	err = inet_add_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	if (err)
+		goto out1;
+
+	inet_register_protosw(&l2tp_ip_protosw);
+	return 0;
+
+out1:
+	proto_unregister(&l2tp_ip_prot);
+out:
+	return err;
+}
+
+static void __exit l2tp_ip_exit(void)
+{
+	inet_unregister_protosw(&l2tp_ip_protosw);
+	inet_del_protocol(&l2tp_ip_protocol, IPPROTO_L2TP);
+	proto_unregister(&l2tp_ip_prot);
+}
+
+module_init(l2tp_ip_init);
+module_exit(l2tp_ip_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP over IP");
+MODULE_VERSION("1.0");
+MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, SOCK_DGRAM, IPPROTO_L2TP);
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index e5b531266541..63fc62baeeb9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -305,6 +305,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	struct l2tp_session *session;
 	struct l2tp_tunnel *tunnel;
 	struct pppol2tp_session *ps;
+	int uhlen;
 
 	error = -ENOTCONN;
 	if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED))
@@ -321,10 +322,12 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	if (tunnel == NULL)
 		goto error_put_sess;
 
+	uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0;
+
 	/* Allocate a socket buffer */
 	error = -ENOMEM;
 	skb = sock_wmalloc(sk, NET_SKB_PAD + sizeof(struct iphdr) +
-			   sizeof(struct udphdr) + session->hdr_len +
+			   uhlen + session->hdr_len +
 			   sizeof(ppph) + total_len,
 			   0, GFP_KERNEL);
 	if (!skb)
@@ -335,7 +338,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	skb_reset_network_header(skb);
 	skb_reserve(skb, sizeof(struct iphdr));
 	skb_reset_transport_header(skb);
-	skb_reserve(skb, sizeof(struct udphdr));
+	skb_reserve(skb, uhlen);
 
 	/* Add PPP header */
 	skb->data[0] = ppph[0];
-- 
cgit v1.2.3


From f408e0ce40270559ef80f231843c93baa9947bc5 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:05 +0000
Subject: netlink: Export genl_lock() API for use by modules

This lets kernel modules which use genl netlink APIs serialize netlink
processing.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genetlink.h | 8 ++++++++
 net/netlink/genetlink.c   | 6 ++++--
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h
index b834ef6d59fa..61549b26ad6f 100644
--- a/include/linux/genetlink.h
+++ b/include/linux/genetlink.h
@@ -80,4 +80,12 @@ enum {
 
 #define CTRL_ATTR_MCAST_GRP_MAX (__CTRL_ATTR_MCAST_GRP_MAX - 1)
 
+#ifdef __KERNEL__
+
+/* All generic netlink requests are serialized by a global lock.  */
+extern void genl_lock(void);
+extern void genl_unlock(void);
+
+#endif /* __KERNEL__ */
+
 #endif	/* __LINUX_GENERIC_NETLINK_H */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index a4b6e148c5de..a28fda7420d9 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -20,15 +20,17 @@
 
 static DEFINE_MUTEX(genl_mutex); /* serialization of message processing */
 
-static inline void genl_lock(void)
+void genl_lock(void)
 {
 	mutex_lock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_lock);
 
-static inline void genl_unlock(void)
+void genl_unlock(void)
 {
 	mutex_unlock(&genl_mutex);
 }
+EXPORT_SYMBOL(genl_unlock);
 
 #define GENL_FAM_TAB_SIZE	16
 #define GENL_FAM_TAB_MASK	(GENL_FAM_TAB_SIZE - 1)
-- 
cgit v1.2.3


From 309795f4bec2d69cd507a631f82065c2198a0825 Mon Sep 17 00:00:00 2001
From: James Chapman <jchapman@katalix.com>
Date: Fri, 2 Apr 2010 06:19:10 +0000
Subject: l2tp: Add netlink control API for L2TP

In L2TPv3, we need to create/delete/modify/query L2TP tunnel and
session contexts. The number of parameters is significant. So let's
use netlink. Userspace uses this API to control L2TP tunnel/session
contexts in the kernel.

The previous pppol2tp driver was managed using [gs]etsockopt(). This
API is retained for backwards compatibility. Unlike L2TPv2 which
carries only PPP frames, L2TPv3 can carry raw ethernet frames or other
frame types and these do not always have an associated socket
family. Therefore, we need a way to use L2TP sessions that doesn't
require a socket type for each supported frame type. Hence netlink is
used.

Signed-off-by: James Chapman <jchapman@katalix.com>
Reviewed-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/l2tp.h    | 125 ++++++++
 net/l2tp/Makefile       |   1 +
 net/l2tp/l2tp_core.c    |  61 +++-
 net/l2tp/l2tp_core.h    |  34 +-
 net/l2tp/l2tp_netlink.c | 830 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/l2tp/l2tp_ppp.c     | 162 ++++++++--
 6 files changed, 1169 insertions(+), 44 deletions(-)
 create mode 100644 net/l2tp/l2tp_netlink.c

(limited to 'include/linux')

diff --git a/include/linux/l2tp.h b/include/linux/l2tp.h
index deff7bca4e05..4bdb31df8e72 100644
--- a/include/linux/l2tp.h
+++ b/include/linux/l2tp.h
@@ -11,6 +11,8 @@
 #ifdef __KERNEL__
 #include <linux/socket.h>
 #include <linux/in.h>
+#else
+#include <netinet/in.h>
 #endif
 
 #define IPPROTO_L2TP		115
@@ -21,6 +23,7 @@
  * @l2tp_addr:    protocol specific address information
  * @l2tp_conn_id: connection id of tunnel
  */
+#define __SOCK_SIZE__	16		/* sizeof(struct sockaddr)	*/
 struct sockaddr_l2tpip {
 	/* The first fields must match struct sockaddr_in */
 	sa_family_t	l2tp_family;	/* AF_INET */
@@ -35,4 +38,126 @@ struct sockaddr_l2tpip {
 			      sizeof(__u32)];
 };
 
+/*****************************************************************************
+ *  NETLINK_GENERIC netlink family.
+ *****************************************************************************/
+
+/*
+ * Commands.
+ * Valid TLVs of each command are:-
+ * TUNNEL_CREATE	- CONN_ID, pw_type, netns, ifname, ipinfo, udpinfo, udpcsum, vlanid
+ * TUNNEL_DELETE	- CONN_ID
+ * TUNNEL_MODIFY	- CONN_ID, udpcsum
+ * TUNNEL_GETSTATS	- CONN_ID, (stats)
+ * TUNNEL_GET		- CONN_ID, (...)
+ * SESSION_CREATE	- SESSION_ID, PW_TYPE, offset, data_seq, cookie, peer_cookie, offset, l2spec
+ * SESSION_DELETE	- SESSION_ID
+ * SESSION_MODIFY	- SESSION_ID, data_seq
+ * SESSION_GET		- SESSION_ID, (...)
+ * SESSION_GETSTATS	- SESSION_ID, (stats)
+ *
+ */
+enum {
+	L2TP_CMD_NOOP,
+	L2TP_CMD_TUNNEL_CREATE,
+	L2TP_CMD_TUNNEL_DELETE,
+	L2TP_CMD_TUNNEL_MODIFY,
+	L2TP_CMD_TUNNEL_GET,
+	L2TP_CMD_SESSION_CREATE,
+	L2TP_CMD_SESSION_DELETE,
+	L2TP_CMD_SESSION_MODIFY,
+	L2TP_CMD_SESSION_GET,
+	__L2TP_CMD_MAX,
+};
+
+#define L2TP_CMD_MAX			(__L2TP_CMD_MAX - 1)
+
+/*
+ * ATTR types defined for L2TP
+ */
+enum {
+	L2TP_ATTR_NONE,			/* no data */
+	L2TP_ATTR_PW_TYPE,		/* u16, enum l2tp_pwtype */
+	L2TP_ATTR_ENCAP_TYPE,		/* u16, enum l2tp_encap_type */
+	L2TP_ATTR_OFFSET,		/* u16 */
+	L2TP_ATTR_DATA_SEQ,		/* u16 */
+	L2TP_ATTR_L2SPEC_TYPE,		/* u8, enum l2tp_l2spec_type */
+	L2TP_ATTR_L2SPEC_LEN,		/* u8, enum l2tp_l2spec_type */
+	L2TP_ATTR_PROTO_VERSION,	/* u8 */
+	L2TP_ATTR_IFNAME,		/* string */
+	L2TP_ATTR_CONN_ID,		/* u32 */
+	L2TP_ATTR_PEER_CONN_ID,		/* u32 */
+	L2TP_ATTR_SESSION_ID,		/* u32 */
+	L2TP_ATTR_PEER_SESSION_ID,	/* u32 */
+	L2TP_ATTR_UDP_CSUM,		/* u8 */
+	L2TP_ATTR_VLAN_ID,		/* u16 */
+	L2TP_ATTR_COOKIE,		/* 0, 4 or 8 bytes */
+	L2TP_ATTR_PEER_COOKIE,		/* 0, 4 or 8 bytes */
+	L2TP_ATTR_DEBUG,		/* u32 */
+	L2TP_ATTR_RECV_SEQ,		/* u8 */
+	L2TP_ATTR_SEND_SEQ,		/* u8 */
+	L2TP_ATTR_LNS_MODE,		/* u8 */
+	L2TP_ATTR_USING_IPSEC,		/* u8 */
+	L2TP_ATTR_RECV_TIMEOUT,		/* msec */
+	L2TP_ATTR_FD,			/* int */
+	L2TP_ATTR_IP_SADDR,		/* u32 */
+	L2TP_ATTR_IP_DADDR,		/* u32 */
+	L2TP_ATTR_UDP_SPORT,		/* u16 */
+	L2TP_ATTR_UDP_DPORT,		/* u16 */
+	L2TP_ATTR_MTU,			/* u16 */
+	L2TP_ATTR_MRU,			/* u16 */
+	L2TP_ATTR_STATS,		/* nested */
+	__L2TP_ATTR_MAX,
+};
+
+#define L2TP_ATTR_MAX			(__L2TP_ATTR_MAX - 1)
+
+/* Nested in L2TP_ATTR_STATS */
+enum {
+	L2TP_ATTR_STATS_NONE,		/* no data */
+	L2TP_ATTR_TX_PACKETS,		/* u64 */
+	L2TP_ATTR_TX_BYTES,		/* u64 */
+	L2TP_ATTR_TX_ERRORS,		/* u64 */
+	L2TP_ATTR_RX_PACKETS,		/* u64 */
+	L2TP_ATTR_RX_BYTES,		/* u64 */
+	L2TP_ATTR_RX_SEQ_DISCARDS,	/* u64 */
+	L2TP_ATTR_RX_OOS_PACKETS,	/* u64 */
+	L2TP_ATTR_RX_ERRORS,		/* u64 */
+	__L2TP_ATTR_STATS_MAX,
+};
+
+#define L2TP_ATTR_STATS_MAX		(__L2TP_ATTR_STATS_MAX - 1)
+
+enum l2tp_pwtype {
+	L2TP_PWTYPE_NONE = 0x0000,
+	L2TP_PWTYPE_ETH_VLAN = 0x0004,
+	L2TP_PWTYPE_ETH = 0x0005,
+	L2TP_PWTYPE_PPP = 0x0007,
+	L2TP_PWTYPE_PPP_AC = 0x0008,
+	L2TP_PWTYPE_IP = 0x000b,
+	__L2TP_PWTYPE_MAX
+};
+
+enum l2tp_l2spec_type {
+	L2TP_L2SPECTYPE_NONE,
+	L2TP_L2SPECTYPE_DEFAULT,
+};
+
+enum l2tp_encap_type {
+	L2TP_ENCAPTYPE_UDP,
+	L2TP_ENCAPTYPE_IP,
+};
+
+enum l2tp_seqmode {
+	L2TP_SEQ_NONE = 0,
+	L2TP_SEQ_IP = 1,
+	L2TP_SEQ_ALL = 2,
+};
+
+/*
+ * NETLINK_GENERIC related info
+ */
+#define L2TP_GENL_NAME		"l2tp"
+#define L2TP_GENL_VERSION	0x1
+
 #endif
diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile
index ef28b16f7d6a..2c4a14b673ab 100644
--- a/net/l2tp/Makefile
+++ b/net/l2tp/Makefile
@@ -7,3 +7,4 @@ obj-$(CONFIG_L2TP) += l2tp_core.o
 # Build l2tp as modules if L2TP is M
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_PPPOL2TP)) += l2tp_ppp.o
 obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_IP)) += l2tp_ip.o
+obj-$(subst y,$(CONFIG_L2TP),$(CONFIG_L2TP_V3)) += l2tp_netlink.o
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 1739d04367e4..fbd1f2119fe9 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -49,6 +49,7 @@
 #include <net/dst.h>
 #include <net/ip.h>
 #include <net/udp.h>
+#include <net/inet_common.h>
 #include <net/xfrm.h>
 #include <net/protocol.h>
 
@@ -214,6 +215,32 @@ struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_find_nth);
 
+/* Lookup a session by interface name.
+ * This is very inefficient but is only used by management interfaces.
+ */
+struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname)
+{
+	struct l2tp_net *pn = l2tp_pernet(net);
+	int hash;
+	struct hlist_node *walk;
+	struct l2tp_session *session;
+
+	read_lock_bh(&pn->l2tp_session_hlist_lock);
+	for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) {
+		hlist_for_each_entry(session, walk, &pn->l2tp_session_hlist[hash], global_hlist) {
+			if (!strcmp(session->ifname, ifname)) {
+				read_unlock_bh(&pn->l2tp_session_hlist_lock);
+				return session;
+			}
+		}
+	}
+
+	read_unlock_bh(&pn->l2tp_session_hlist_lock);
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_find_by_ifname);
+
 /* Lookup a tunnel by id
  */
 struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id)
@@ -758,7 +785,7 @@ int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb,
 
 	/* Find the session context */
 	session = l2tp_session_find(tunnel->l2tp_net, tunnel, session_id);
-	if (!session) {
+	if (!session || !session->recv_skb) {
 		/* Not found? Pass to userspace to deal with */
 		PRINTK(tunnel->debug, L2TP_MSG_DATA, KERN_INFO,
 		       "%s: no session found (%u/%u). Passing up.\n",
@@ -1305,6 +1332,23 @@ err:
 }
 EXPORT_SYMBOL_GPL(l2tp_tunnel_create);
 
+/* This function is used by the netlink TUNNEL_DELETE command.
+ */
+int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel)
+{
+	int err = 0;
+
+	/* Force the tunnel socket to close. This will eventually
+	 * cause the tunnel to be deleted via the normal socket close
+	 * mechanisms when userspace closes the tunnel socket.
+	 */
+	if ((tunnel->sock != NULL) && (tunnel->sock->sk_socket != NULL))
+		err = inet_shutdown(tunnel->sock->sk_socket, 2);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_delete);
+
 /* Really kill the session.
  */
 void l2tp_session_free(struct l2tp_session *session)
@@ -1349,6 +1393,21 @@ void l2tp_session_free(struct l2tp_session *session)
 }
 EXPORT_SYMBOL_GPL(l2tp_session_free);
 
+/* This function is used by the netlink SESSION_DELETE command and by
+   pseudowire modules.
+ */
+int l2tp_session_delete(struct l2tp_session *session)
+{
+	if (session->session_close != NULL)
+		(*session->session_close)(session);
+
+	l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_session_delete);
+
+
 /* We come here whenever a session's send_seq, cookie_len or
  * l2specific_len parameters are set.
  */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index d2395984645e..2974d9ade167 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -33,26 +33,6 @@ enum {
 	L2TP_MSG_DATA		= (1 << 3),	/* data packets */
 };
 
-enum l2tp_pwtype {
-	L2TP_PWTYPE_NONE = 0x0000,
-	L2TP_PWTYPE_ETH_VLAN = 0x0004,
-	L2TP_PWTYPE_ETH = 0x0005,
-	L2TP_PWTYPE_PPP = 0x0007,
-	L2TP_PWTYPE_PPP_AC = 0x0008,
-	L2TP_PWTYPE_IP = 0x000b,
-	__L2TP_PWTYPE_MAX
-};
-
-enum l2tp_l2spec_type {
-	L2TP_L2SPECTYPE_NONE,
-	L2TP_L2SPECTYPE_DEFAULT,
-};
-
-enum l2tp_encap_type {
-	L2TP_ENCAPTYPE_UDP,
-	L2TP_ENCAPTYPE_IP,
-};
-
 struct sk_buff;
 
 struct l2tp_stats {
@@ -87,6 +67,7 @@ struct l2tp_session_cfg {
 						 * control of LNS. */
 	int			debug;		/* bitmask of debug message
 						 * categories */
+	u16			vlan_id;	/* VLAN pseudowire only */
 	u16			offset;		/* offset to payload */
 	u16			l2specific_len;	/* Layer 2 specific length */
 	u16			l2specific_type; /* Layer 2 specific type */
@@ -98,6 +79,7 @@ struct l2tp_session_cfg {
 						  * (in jiffies) */
 	int			mtu;
 	int			mru;
+	char			*ifname;
 };
 
 struct l2tp_session {
@@ -124,6 +106,7 @@ struct l2tp_session {
 	atomic_t		ref_count;
 
 	char			name[32];	/* for logging */
+	char			ifname[IFNAMSIZ];
 	unsigned		data_seq:2;	/* data sequencing level
 						 * 0 => none, 1 => IP only,
 						 * 2 => all
@@ -192,6 +175,11 @@ struct l2tp_tunnel {
 	uint8_t			priv[0];	/* private data */
 };
 
+struct l2tp_nl_cmd_ops {
+	int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+	int (*session_delete)(struct l2tp_session *session);
+};
+
 static inline void *l2tp_tunnel_priv(struct l2tp_tunnel *tunnel)
 {
 	return &tunnel->priv[0];
@@ -224,11 +212,14 @@ out:
 
 extern struct l2tp_session *l2tp_session_find(struct net *net, struct l2tp_tunnel *tunnel, u32 session_id);
 extern struct l2tp_session *l2tp_session_find_nth(struct l2tp_tunnel *tunnel, int nth);
+extern struct l2tp_session *l2tp_session_find_by_ifname(struct net *net, char *ifname);
 extern struct l2tp_tunnel *l2tp_tunnel_find(struct net *net, u32 tunnel_id);
 extern struct l2tp_tunnel *l2tp_tunnel_find_nth(struct net *net, int nth);
 
 extern int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp);
+extern int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel);
 extern struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg);
+extern int l2tp_session_delete(struct l2tp_session *session);
 extern void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_free(struct l2tp_session *session);
 extern void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, int length, int (*payload_hook)(struct sk_buff *skb));
@@ -241,6 +232,9 @@ extern void l2tp_tunnel_destruct(struct sock *sk);
 extern void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel);
 extern void l2tp_session_set_header_len(struct l2tp_session *session, int version);
 
+extern int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops);
+extern void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
+
 /* Tunnel reference counts. Incremented per session that is added to
  * the tunnel.
  */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
new file mode 100644
index 000000000000..3d0f7f6f7488
--- /dev/null
+++ b/net/l2tp/l2tp_netlink.c
@@ -0,0 +1,830 @@
+/*
+ * L2TP netlink layer, for management
+ *
+ * Copyright (c) 2008,2009,2010 Katalix Systems Ltd
+ *
+ * Partly based on the IrDA nelink implementation
+ * (see net/irda/irnetlink.c) which is:
+ * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org>
+ * which is in turn partly based on the wireless netlink code:
+ * Copyright 2006 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <net/genetlink.h>
+#include <net/udp.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/socket.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+
+#include <linux/l2tp.h>
+
+#include "l2tp_core.h"
+
+
+static struct genl_family l2tp_nl_family = {
+	.id		= GENL_ID_GENERATE,
+	.name		= L2TP_GENL_NAME,
+	.version	= L2TP_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= L2TP_ATTR_MAX,
+};
+
+/* Accessed under genl lock */
+static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
+
+static struct l2tp_session *l2tp_nl_session_find(struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 session_id;
+	char *ifname;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session = NULL;
+	struct net *net = genl_info_net(info);
+
+	if (info->attrs[L2TP_ATTR_IFNAME]) {
+		ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+		session = l2tp_session_find_by_ifname(net, ifname);
+	} else if ((info->attrs[L2TP_ATTR_SESSION_ID]) &&
+		   (info->attrs[L2TP_ATTR_CONN_ID])) {
+		tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+		session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+		tunnel = l2tp_tunnel_find(net, tunnel_id);
+		if (tunnel)
+			session = l2tp_session_find(net, tunnel, session_id);
+	}
+
+	return session;
+}
+
+static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info)
+{
+	struct sk_buff *msg;
+	void *hdr;
+	int ret = -ENOBUFS;
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq,
+			  &l2tp_nl_family, 0, L2TP_CMD_NOOP);
+	if (IS_ERR(hdr)) {
+		ret = PTR_ERR(hdr);
+		goto err_out;
+	}
+
+	genlmsg_end(msg, hdr);
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id;
+	u32 peer_tunnel_id;
+	int proto_version;
+	int fd;
+	int ret = 0;
+	struct l2tp_tunnel_cfg cfg = { 0, };
+	struct l2tp_tunnel *tunnel;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PEER_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_CONN_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PROTO_VERSION]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	proto_version = nla_get_u8(info->attrs[L2TP_ATTR_PROTO_VERSION]);
+
+	if (!info->attrs[L2TP_ATTR_ENCAP_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.encap = nla_get_u16(info->attrs[L2TP_ATTR_ENCAP_TYPE]);
+
+	if (!info->attrs[L2TP_ATTR_FD]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	fd = nla_get_u32(info->attrs[L2TP_ATTR_FD]);
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel != NULL) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	ret = -EINVAL;
+	switch (cfg.encap) {
+	case L2TP_ENCAPTYPE_UDP:
+	case L2TP_ENCAPTYPE_IP:
+		ret = l2tp_tunnel_create(net, fd, proto_version, tunnel_id,
+					 peer_tunnel_id, &cfg, &tunnel);
+		break;
+	}
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	(void) l2tp_tunnel_delete(tunnel);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	u32 tunnel_id;
+	int ret = 0;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+			       struct l2tp_tunnel *tunnel)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct sock *sk = NULL;
+	struct inet_sock *inet;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags,
+			  L2TP_CMD_TUNNEL_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U8(skb, L2TP_ATTR_PROTO_VERSION, tunnel->version);
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, tunnel->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_ENCAP_TYPE, tunnel->encap);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, tunnel->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, tunnel->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, tunnel->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, tunnel->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, tunnel->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, tunnel->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, tunnel->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, tunnel->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	sk = tunnel->sock;
+	if (!sk)
+		goto out;
+
+	inet = inet_sk(sk);
+
+	switch (tunnel->encap) {
+	case L2TP_ENCAPTYPE_UDP:
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport));
+		NLA_PUT_U16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport));
+		NLA_PUT_U8(skb, L2TP_ATTR_UDP_CSUM, (sk->sk_no_check != UDP_CSUM_NOXMIT));
+		/* NOBREAK */
+	case L2TP_ENCAPTYPE_IP:
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_SADDR, inet->inet_saddr);
+		NLA_PUT_BE32(skb, L2TP_ATTR_IP_DADDR, inet->inet_daddr);
+		break;
+	}
+
+out:
+	return genlmsg_end(skb, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_tunnel *tunnel;
+	struct sk_buff *msg;
+	u32 tunnel_id;
+	int ret = -ENOBUFS;
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (tunnel == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq,
+				  NLM_F_ACK, tunnel);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(net, msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	int ti = cb->args[0];
+	struct l2tp_tunnel *tunnel;
+	struct net *net = sock_net(skb->sk);
+
+	for (;;) {
+		tunnel = l2tp_tunnel_find_nth(net, ti);
+		if (tunnel == NULL)
+			goto out;
+
+		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid,
+					cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					tunnel) <= 0)
+			goto out;
+
+		ti++;
+	}
+
+out:
+	cb->args[0] = ti;
+
+	return skb->len;
+}
+
+static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *info)
+{
+	u32 tunnel_id = 0;
+	u32 session_id;
+	u32 peer_session_id;
+	int ret = 0;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct l2tp_session_cfg cfg = { 0, };
+	struct net *net = genl_info_net(info);
+
+	if (!info->attrs[L2TP_ATTR_CONN_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+	if (!tunnel) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session) {
+		ret = -EEXIST;
+		goto out;
+	}
+
+	if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
+
+	if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
+		ret = -EINVAL;
+		goto out;
+	}
+	cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
+	if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (tunnel->version > 2) {
+		if (info->attrs[L2TP_ATTR_OFFSET])
+			cfg.offset = nla_get_u16(info->attrs[L2TP_ATTR_OFFSET]);
+
+		if (info->attrs[L2TP_ATTR_DATA_SEQ])
+			cfg.data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+		cfg.l2specific_type = L2TP_L2SPECTYPE_DEFAULT;
+		if (info->attrs[L2TP_ATTR_L2SPEC_TYPE])
+			cfg.l2specific_type = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_TYPE]);
+
+		cfg.l2specific_len = 4;
+		if (info->attrs[L2TP_ATTR_L2SPEC_LEN])
+			cfg.l2specific_len = nla_get_u8(info->attrs[L2TP_ATTR_L2SPEC_LEN]);
+
+		if (info->attrs[L2TP_ATTR_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.cookie_len = len;
+			memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_PEER_COOKIE]) {
+			u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
+			if (len > 8) {
+				ret = -EINVAL;
+				goto out;
+			}
+			cfg.peer_cookie_len = len;
+			memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
+		}
+		if (info->attrs[L2TP_ATTR_IFNAME])
+			cfg.ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]);
+
+		if (info->attrs[L2TP_ATTR_VLAN_ID])
+			cfg.vlan_id = nla_get_u16(info->attrs[L2TP_ATTR_VLAN_ID]);
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		cfg.debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		cfg.recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		cfg.send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		cfg.lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		cfg.reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		cfg.mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		cfg.mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+	if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
+	    (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
+		ret = -EPROTONOSUPPORT;
+		goto out;
+	}
+
+	/* Check that pseudowire-specific params are present */
+	switch (cfg.pw_type) {
+	case L2TP_PWTYPE_NONE:
+		break;
+	case L2TP_PWTYPE_ETH_VLAN:
+		if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
+			ret = -EINVAL;
+			goto out;
+		}
+		break;
+	case L2TP_PWTYPE_ETH:
+		break;
+	case L2TP_PWTYPE_PPP:
+	case L2TP_PWTYPE_PPP_AC:
+		break;
+	case L2TP_PWTYPE_IP:
+	default:
+		ret = -EPROTONOSUPPORT;
+		break;
+	}
+
+	ret = -EPROTONOSUPPORT;
+	if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create)
+		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
+			session_id, peer_session_id, &cfg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+	u16 pw_type;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	pw_type = session->pwtype;
+	if (pw_type < __L2TP_PWTYPE_MAX)
+		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
+			ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *info)
+{
+	int ret = 0;
+	struct l2tp_session *session;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	if (info->attrs[L2TP_ATTR_DEBUG])
+		session->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
+
+	if (info->attrs[L2TP_ATTR_DATA_SEQ])
+		session->data_seq = nla_get_u8(info->attrs[L2TP_ATTR_DATA_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_RECV_SEQ])
+		session->recv_seq = nla_get_u8(info->attrs[L2TP_ATTR_RECV_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_SEND_SEQ])
+		session->send_seq = nla_get_u8(info->attrs[L2TP_ATTR_SEND_SEQ]);
+
+	if (info->attrs[L2TP_ATTR_LNS_MODE])
+		session->lns_mode = nla_get_u8(info->attrs[L2TP_ATTR_LNS_MODE]);
+
+	if (info->attrs[L2TP_ATTR_RECV_TIMEOUT])
+		session->reorder_timeout = nla_get_msecs(info->attrs[L2TP_ATTR_RECV_TIMEOUT]);
+
+	if (info->attrs[L2TP_ATTR_MTU])
+		session->mtu = nla_get_u16(info->attrs[L2TP_ATTR_MTU]);
+
+	if (info->attrs[L2TP_ATTR_MRU])
+		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags,
+				struct l2tp_session *session)
+{
+	void *hdr;
+	struct nlattr *nest;
+	struct l2tp_tunnel *tunnel = session->tunnel;
+	struct sock *sk = NULL;
+
+	sk = tunnel->sock;
+
+	hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	if (IS_ERR(hdr))
+		return PTR_ERR(hdr);
+
+	NLA_PUT_U32(skb, L2TP_ATTR_CONN_ID, tunnel->tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_SESSION_ID, session->session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_CONN_ID, tunnel->peer_tunnel_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_PEER_SESSION_ID, session->peer_session_id);
+	NLA_PUT_U32(skb, L2TP_ATTR_DEBUG, session->debug);
+	NLA_PUT_U16(skb, L2TP_ATTR_PW_TYPE, session->pwtype);
+	NLA_PUT_U16(skb, L2TP_ATTR_MTU, session->mtu);
+	if (session->mru)
+		NLA_PUT_U16(skb, L2TP_ATTR_MRU, session->mru);
+
+	if (session->ifname && session->ifname[0])
+		NLA_PUT_STRING(skb, L2TP_ATTR_IFNAME, session->ifname);
+	if (session->cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_COOKIE, session->cookie_len, &session->cookie[0]);
+	if (session->peer_cookie_len)
+		NLA_PUT(skb, L2TP_ATTR_PEER_COOKIE, session->peer_cookie_len, &session->peer_cookie[0]);
+	NLA_PUT_U8(skb, L2TP_ATTR_RECV_SEQ, session->recv_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_SEND_SEQ, session->send_seq);
+	NLA_PUT_U8(skb, L2TP_ATTR_LNS_MODE, session->lns_mode);
+#ifdef CONFIG_XFRM
+	if ((sk) && (sk->sk_policy[0] || sk->sk_policy[1]))
+		NLA_PUT_U8(skb, L2TP_ATTR_USING_IPSEC, 1);
+#endif
+	if (session->reorder_timeout)
+		NLA_PUT_MSECS(skb, L2TP_ATTR_RECV_TIMEOUT, session->reorder_timeout);
+
+	nest = nla_nest_start(skb, L2TP_ATTR_STATS);
+	if (nest == NULL)
+		goto nla_put_failure;
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_PACKETS, session->stats.tx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_BYTES, session->stats.tx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_TX_ERRORS, session->stats.tx_errors);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_PACKETS, session->stats.rx_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_BYTES, session->stats.rx_bytes);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_SEQ_DISCARDS, session->stats.rx_seq_discards);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_OOS_PACKETS, session->stats.rx_oos_packets);
+	NLA_PUT_U64(skb, L2TP_ATTR_RX_ERRORS, session->stats.rx_errors);
+	nla_nest_end(skb, nest);
+
+	return genlmsg_end(skb, hdr);
+
+ nla_put_failure:
+	genlmsg_cancel(skb, hdr);
+	return -1;
+}
+
+static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
+{
+	struct l2tp_session *session;
+	struct sk_buff *msg;
+	int ret;
+
+	session = l2tp_nl_session_find(info);
+	if (session == NULL) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	msg = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!msg) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq,
+				   0, session);
+	if (ret < 0)
+		goto err_out;
+
+	return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid);
+
+err_out:
+	nlmsg_free(msg);
+
+out:
+	return ret;
+}
+
+static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct l2tp_session *session;
+	struct l2tp_tunnel *tunnel = NULL;
+	int ti = cb->args[0];
+	int si = cb->args[1];
+
+	for (;;) {
+		if (tunnel == NULL) {
+			tunnel = l2tp_tunnel_find_nth(net, ti);
+			if (tunnel == NULL)
+				goto out;
+		}
+
+		session = l2tp_session_find_nth(tunnel, si);
+		if (session == NULL) {
+			ti++;
+			tunnel = NULL;
+			si = 0;
+			continue;
+		}
+
+		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid,
+					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
+					 session) <= 0)
+			break;
+
+		si++;
+	}
+
+out:
+	cb->args[0] = ti;
+	cb->args[1] = si;
+
+	return skb->len;
+}
+
+static struct nla_policy l2tp_nl_policy[L2TP_ATTR_MAX + 1] = {
+	[L2TP_ATTR_NONE]		= { .type = NLA_UNSPEC, },
+	[L2TP_ATTR_PW_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_ENCAP_TYPE]		= { .type = NLA_U16, },
+	[L2TP_ATTR_OFFSET]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DATA_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_TYPE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_L2SPEC_LEN]		= { .type = NLA_U8, },
+	[L2TP_ATTR_PROTO_VERSION]	= { .type = NLA_U8, },
+	[L2TP_ATTR_CONN_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_CONN_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_SESSION_ID]		= { .type = NLA_U32, },
+	[L2TP_ATTR_PEER_SESSION_ID]	= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_CSUM]		= { .type = NLA_U8, },
+	[L2TP_ATTR_VLAN_ID]		= { .type = NLA_U16, },
+	[L2TP_ATTR_DEBUG]		= { .type = NLA_U32, },
+	[L2TP_ATTR_RECV_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_SEND_SEQ]		= { .type = NLA_U8, },
+	[L2TP_ATTR_LNS_MODE]		= { .type = NLA_U8, },
+	[L2TP_ATTR_USING_IPSEC]		= { .type = NLA_U8, },
+	[L2TP_ATTR_RECV_TIMEOUT]	= { .type = NLA_MSECS, },
+	[L2TP_ATTR_FD]			= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_SADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_IP_DADDR]		= { .type = NLA_U32, },
+	[L2TP_ATTR_UDP_SPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_UDP_DPORT]		= { .type = NLA_U16, },
+	[L2TP_ATTR_MTU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_MRU]			= { .type = NLA_U16, },
+	[L2TP_ATTR_STATS]		= { .type = NLA_NESTED, },
+	[L2TP_ATTR_IFNAME] = {
+		.type = NLA_NUL_STRING,
+		.len = IFNAMSIZ - 1,
+	},
+	[L2TP_ATTR_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+	[L2TP_ATTR_PEER_COOKIE] = {
+		.type = NLA_BINARY,
+		.len = 8,
+	},
+};
+
+static struct genl_ops l2tp_nl_ops[] = {
+	{
+		.cmd = L2TP_CMD_NOOP,
+		.doit = l2tp_nl_cmd_noop,
+		.policy = l2tp_nl_policy,
+		/* can be retrieved by unprivileged users */
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_CREATE,
+		.doit = l2tp_nl_cmd_tunnel_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_DELETE,
+		.doit = l2tp_nl_cmd_tunnel_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_MODIFY,
+		.doit = l2tp_nl_cmd_tunnel_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_TUNNEL_GET,
+		.doit = l2tp_nl_cmd_tunnel_get,
+		.dumpit = l2tp_nl_cmd_tunnel_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_CREATE,
+		.doit = l2tp_nl_cmd_session_create,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_DELETE,
+		.doit = l2tp_nl_cmd_session_delete,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_MODIFY,
+		.doit = l2tp_nl_cmd_session_modify,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+	{
+		.cmd = L2TP_CMD_SESSION_GET,
+		.doit = l2tp_nl_cmd_session_get,
+		.dumpit = l2tp_nl_cmd_session_dump,
+		.policy = l2tp_nl_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
+};
+
+int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, const struct l2tp_nl_cmd_ops *ops)
+{
+	int ret;
+
+	ret = -EINVAL;
+	if (pw_type >= __L2TP_PWTYPE_MAX)
+		goto err;
+
+	genl_lock();
+	ret = -EBUSY;
+	if (l2tp_nl_cmd_ops[pw_type])
+		goto out;
+
+	l2tp_nl_cmd_ops[pw_type] = ops;
+
+out:
+	genl_unlock();
+err:
+	return 0;
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_register_ops);
+
+void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type)
+{
+	if (pw_type < __L2TP_PWTYPE_MAX) {
+		genl_lock();
+		l2tp_nl_cmd_ops[pw_type] = NULL;
+		genl_unlock();
+	}
+}
+EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
+
+static int l2tp_nl_init(void)
+{
+	int err;
+
+	printk(KERN_INFO "L2TP netlink interface\n");
+	err = genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops,
+					    ARRAY_SIZE(l2tp_nl_ops));
+
+	return err;
+}
+
+static void l2tp_nl_cleanup(void)
+{
+	genl_unregister_family(&l2tp_nl_family);
+}
+
+module_init(l2tp_nl_init);
+module_exit(l2tp_nl_cleanup);
+
+MODULE_AUTHOR("James Chapman <jchapman@katalix.com>");
+MODULE_DESCRIPTION("L2TP netlink");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1.0");
+MODULE_ALIAS("net-pf-" __stringify(PF_NETLINK) "-proto-" \
+	     __stringify(NETLINK_GENERIC) "-type-" "l2tp")
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 63fc62baeeb9..d64f081f2b1c 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -87,6 +87,7 @@
 #include <linux/hash.h>
 #include <linux/sort.h>
 #include <linux/proc_fs.h>
+#include <linux/l2tp.h>
 #include <linux/nsproxy.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
@@ -656,17 +657,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel_id == 0)
 		goto end;
 
+	tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
+
 	/* Special case: create tunnel context if session_id and
 	 * peer_session_id is 0. Otherwise look up tunnel using supplied
 	 * tunnel id.
 	 */
 	if ((session_id == 0) && (peer_session_id == 0)) {
-		error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, NULL, &tunnel);
-		if (error < 0)
-			goto end;
+		if (tunnel == NULL) {
+			struct l2tp_tunnel_cfg tcfg = {
+				.encap = L2TP_ENCAPTYPE_UDP,
+				.debug = 0,
+			};
+			error = l2tp_tunnel_create(sock_net(sk), fd, ver, tunnel_id, peer_tunnel_id, &tcfg, &tunnel);
+			if (error < 0)
+				goto end;
+		}
 	} else {
-		tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id);
-
 		/* Error if we can't find the tunnel */
 		error = -ENOENT;
 		if (tunnel == NULL)
@@ -680,28 +687,46 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
 	if (tunnel->recv_payload_hook == NULL)
 		tunnel->recv_payload_hook = pppol2tp_recv_payload_hook;
 
-	/* Check that this session doesn't already exist */
-	error = -EEXIST;
-	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
-	if (session != NULL)
-		goto end;
-
-	/* Default MTU values. */
-	if (cfg.mtu == 0)
-		cfg.mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
-	if (cfg.mru == 0)
-		cfg.mru = cfg.mtu;
-	cfg.debug = tunnel->debug;
+	if (tunnel->peer_tunnel_id == 0) {
+		if (ver == 2)
+			tunnel->peer_tunnel_id = sp->pppol2tp.d_tunnel;
+		else
+			tunnel->peer_tunnel_id = sp3->pppol2tp.d_tunnel;
+	}
 
-	/* Allocate and initialize a new session context. */
-	session = l2tp_session_create(sizeof(struct pppol2tp_session),
-				      tunnel, session_id,
-				      peer_session_id, &cfg);
+	/* Create session if it doesn't already exist. We handle the
+	 * case where a session was previously created by the netlink
+	 * interface by checking that the session doesn't already have
+	 * a socket and its tunnel socket are what we expect. If any
+	 * of those checks fail, return EEXIST to the caller.
+	 */
+	session = l2tp_session_find(sock_net(sk), tunnel, session_id);
 	if (session == NULL) {
-		error = -ENOMEM;
-		goto end;
+		/* Default MTU must allow space for UDP/L2TP/PPP
+		 * headers.
+		 */
+		cfg.mtu = cfg.mru = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+
+		/* Allocate and initialize a new session context. */
+		session = l2tp_session_create(sizeof(struct pppol2tp_session),
+					      tunnel, session_id,
+					      peer_session_id, &cfg);
+		if (session == NULL) {
+			error = -ENOMEM;
+			goto end;
+		}
+	} else {
+		ps = l2tp_session_priv(session);
+		error = -EEXIST;
+		if (ps->sock != NULL)
+			goto end;
+
+		/* consistency checks */
+		if (ps->tunnel_sock != tunnel->sock)
+			goto end;
 	}
 
+	/* Associate session with its PPPoL2TP socket */
 	ps = l2tp_session_priv(session);
 	ps->owner	     = current->pid;
 	ps->sock	     = sk;
@@ -764,6 +789,74 @@ end:
 	return error;
 }
 
+#ifdef CONFIG_L2TP_V3
+
+/* Called when creating sessions via the netlink interface.
+ */
+static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg)
+{
+	int error;
+	struct l2tp_tunnel *tunnel;
+	struct l2tp_session *session;
+	struct pppol2tp_session *ps;
+
+	tunnel = l2tp_tunnel_find(net, tunnel_id);
+
+	/* Error if we can't find the tunnel */
+	error = -ENOENT;
+	if (tunnel == NULL)
+		goto out;
+
+	/* Error if tunnel socket is not prepped */
+	if (tunnel->sock == NULL)
+		goto out;
+
+	/* Check that this session doesn't already exist */
+	error = -EEXIST;
+	session = l2tp_session_find(net, tunnel, session_id);
+	if (session != NULL)
+		goto out;
+
+	/* Default MTU values. */
+	if (cfg->mtu == 0)
+		cfg->mtu = 1500 - PPPOL2TP_HEADER_OVERHEAD;
+	if (cfg->mru == 0)
+		cfg->mru = cfg->mtu;
+
+	/* Allocate and initialize a new session context. */
+	error = -ENOMEM;
+	session = l2tp_session_create(sizeof(struct pppol2tp_session),
+				      tunnel, session_id,
+				      peer_session_id, cfg);
+	if (session == NULL)
+		goto out;
+
+	ps = l2tp_session_priv(session);
+	ps->tunnel_sock = tunnel->sock;
+
+	PRINTK(session->debug, PPPOL2TP_MSG_CONTROL, KERN_INFO,
+	       "%s: created\n", session->name);
+
+	error = 0;
+
+out:
+	return error;
+}
+
+/* Called when deleting sessions via the netlink interface.
+ */
+static int pppol2tp_session_delete(struct l2tp_session *session)
+{
+	struct pppol2tp_session *ps = l2tp_session_priv(session);
+
+	if (ps->sock == NULL)
+		l2tp_session_dec_refcount(session);
+
+	return 0;
+}
+
+#endif /* CONFIG_L2TP_V3 */
+
 /* getname() support.
  */
 static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr,
@@ -1660,6 +1753,15 @@ static struct pppox_proto pppol2tp_proto = {
 	.ioctl		= pppol2tp_ioctl
 };
 
+#ifdef CONFIG_L2TP_V3
+
+static const struct l2tp_nl_cmd_ops pppol2tp_nl_cmd_ops = {
+	.session_create	= pppol2tp_session_create,
+	.session_delete	= pppol2tp_session_delete,
+};
+
+#endif /* CONFIG_L2TP_V3 */
+
 static int __init pppol2tp_init(void)
 {
 	int err;
@@ -1676,11 +1778,22 @@ static int __init pppol2tp_init(void)
 	if (err)
 		goto out_unregister_pppol2tp_proto;
 
+#ifdef CONFIG_L2TP_V3
+	err = l2tp_nl_register_ops(L2TP_PWTYPE_PPP, &pppol2tp_nl_cmd_ops);
+	if (err)
+		goto out_unregister_pppox;
+#endif
+
 	printk(KERN_INFO "PPPoL2TP kernel driver, %s\n",
 	       PPPOL2TP_DRV_VERSION);
 
 out:
 	return err;
+
+#ifdef CONFIG_L2TP_V3
+out_unregister_pppox:
+	unregister_pppox_proto(PX_PROTO_OL2TP);
+#endif
 out_unregister_pppol2tp_proto:
 	proto_unregister(&pppol2tp_sk_proto);
 out_unregister_pppol2tp_pernet:
@@ -1690,6 +1803,9 @@ out_unregister_pppol2tp_pernet:
 
 static void __exit pppol2tp_exit(void)
 {
+#ifdef CONFIG_L2TP_V3
+	l2tp_nl_unregister_ops(L2TP_PWTYPE_PPP);
+#endif
 	unregister_pppox_proto(PX_PROTO_OL2TP);
 	proto_unregister(&pppol2tp_sk_proto);
 	unregister_pernet_device(&pppol2tp_net_ops);
-- 
cgit v1.2.3


From 1f8438a853667d48055ad38384c63e94b32c6578 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 3 Apr 2010 15:09:04 -0700
Subject: icmp: Account for ICMP out errors

When ip_append() fails because of socket limit or memory shortage,
increment ICMP_MIB_OUTERRORS counter, so that "netstat -s" can report
these errors.

LANG=C netstat -s | grep "ICMP messages failed"
    0 ICMP messages failed

For IPV6, implement ICMP6_MIB_OUTERRORS counter as well.

# grep Icmp6OutErrors /proc/net/dev_snmp6/*
/proc/net/dev_snmp6/eth0:Icmp6OutErrors                   	0
/proc/net/dev_snmp6/lo:Icmp6OutErrors                   	0

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/snmp.h | 1 +
 net/ipv4/icmp.c      | 5 +++--
 net/ipv6/icmp.c      | 2 ++
 net/ipv6/proc.c      | 1 +
 4 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/snmp.h b/include/linux/snmp.h
index d2a9aa3c6c88..52797714ade7 100644
--- a/include/linux/snmp.h
+++ b/include/linux/snmp.h
@@ -100,6 +100,7 @@ enum
 	ICMP6_MIB_INMSGS,			/* InMsgs */
 	ICMP6_MIB_INERRORS,			/* InErrors */
 	ICMP6_MIB_OUTMSGS,			/* OutMsgs */
+	ICMP6_MIB_OUTERRORS,			/* OutErrors */
 	__ICMP6_MIB_MAX
 };
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 4b4c2bcd15db..d2aa7438c523 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -330,9 +330,10 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 	if (ip_append_data(sk, icmp_glue_bits, icmp_param,
 			   icmp_param->data_len+icmp_param->head_len,
 			   icmp_param->head_len,
-			   ipc, rt, MSG_DONTWAIT) < 0)
+			   ipc, rt, MSG_DONTWAIT) < 0) {
+		ICMP_INC_STATS_BH(sock_net(sk), ICMP_MIB_OUTERRORS);
 		ip_flush_pending_frames(sk);
-	else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
+	} else if ((skb = skb_peek(&sk->sk_write_queue)) != NULL) {
 		struct icmphdr *icmph = icmp_hdr(skb);
 		__wsum csum = 0;
 		struct sk_buff *skb1;
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index eb9abe24bdf0..a00c18aa6c8d 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -482,6 +482,7 @@ route_done:
 			      np->tclass, NULL, &fl, (struct rt6_info*)dst,
 			      MSG_DONTWAIT);
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
@@ -562,6 +563,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
 				(struct rt6_info*)dst, MSG_DONTWAIT);
 
 	if (err) {
+		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
 		ip6_flush_pending_frames(sk);
 		goto out_put;
 	}
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 58344c0fbd13..458eabfbe130 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -97,6 +97,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
 	SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
+	SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
 	SNMP_MIB_SENTINEL
 };
 
-- 
cgit v1.2.3


From c9fbf7e070bbf9cc7adc1420df87706c62cb04ed Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Fri, 26 Mar 2010 16:49:15 +0000
Subject: mfd: Add WM8994 interrupt controller support

The WM8994 has an interrupt controller which supports interrupts for
both CODEC and GPIO portions of the chip. Support this using genirq,
while allowing for systems that do not have an interrupt hooked up.

Wrapper functions are provided for the IRQ request and free to simplify
the code in consumer drivers when handling cases where IRQs are not
set up.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Kconfig              |   4 +-
 drivers/mfd/Makefile             |   2 +-
 drivers/mfd/wm8994-core.c        |  43 +++++-
 drivers/mfd/wm8994-irq.c         | 310 +++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/wm8994/core.h  |  53 ++++++-
 include/linux/mfd/wm8994/pdata.h |   1 +
 6 files changed, 406 insertions(+), 7 deletions(-)
 create mode 100644 drivers/mfd/wm8994-irq.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index b2073e0266f5..de3e74cde51c 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -301,9 +301,9 @@ config MFD_WM8350_I2C
 	  selected to enable support for the functionality of the chip.
 
 config MFD_WM8994
-	tristate "Support Wolfson Microelectronics WM8994"
+	bool "Support Wolfson Microelectronics WM8994"
 	select MFD_CORE
-	depends on I2C
+	depends on I2C=y && GENERIC_HARDIRQS
 	help
 	  The WM8994 is a highly integrated hi-fi CODEC designed for
 	  smartphone applicatiosn.  As well as audio functionality it
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 4fbf8f89a49b..87935f967aa0 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -26,7 +26,7 @@ wm8350-objs			:= wm8350-core.o wm8350-regmap.o wm8350-gpio.o
 wm8350-objs			+= wm8350-irq.o
 obj-$(CONFIG_MFD_WM8350)	+= wm8350.o
 obj-$(CONFIG_MFD_WM8350_I2C)	+= wm8350-i2c.o
-obj-$(CONFIG_MFD_WM8994)	+= wm8994-core.o
+obj-$(CONFIG_MFD_WM8994)	+= wm8994-core.o wm8994-irq.o
 
 obj-$(CONFIG_TPS65010)		+= tps65010.o
 obj-$(CONFIG_MENELAUS)		+= menelaus.o
diff --git a/drivers/mfd/wm8994-core.c b/drivers/mfd/wm8994-core.c
index 844e1c1b7d90..39cde82afb48 100644
--- a/drivers/mfd/wm8994-core.c
+++ b/drivers/mfd/wm8994-core.c
@@ -172,9 +172,34 @@ static struct mfd_cell wm8994_regulator_devs[] = {
 	{ .name = "wm8994-ldo", .id = 2 },
 };
 
+static struct resource wm8994_codec_resources[] = {
+	{
+		.start = WM8994_IRQ_TEMP_SHUT,
+		.end   = WM8994_IRQ_TEMP_WARN,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource wm8994_gpio_resources[] = {
+	{
+		.start = WM8994_IRQ_GPIO(1),
+		.end   = WM8994_IRQ_GPIO(11),
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 static struct mfd_cell wm8994_devs[] = {
-	{ .name = "wm8994-codec" },
-	{ .name = "wm8994-gpio" },
+	{
+		.name = "wm8994-codec",
+		.num_resources = ARRAY_SIZE(wm8994_codec_resources),
+		.resources = wm8994_codec_resources,
+	},
+
+	{
+		.name = "wm8994-gpio",
+		.num_resources = ARRAY_SIZE(wm8994_gpio_resources),
+		.resources = wm8994_gpio_resources,
+	},
 };
 
 /*
@@ -235,6 +260,11 @@ static int wm8994_device_resume(struct device *dev)
 		return ret;
 	}
 
+	ret = wm8994_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK,
+			   WM8994_NUM_IRQ_REGS * 2, &wm8994->irq_masks_cur);
+	if (ret < 0)
+		dev_err(dev, "Failed to restore interrupt masks: %d\n", ret);
+
 	ret = wm8994_write(wm8994, WM8994_LDO_1, WM8994_NUM_LDO_REGS * 2,
 			   &wm8994->ldo_regs);
 	if (ret < 0)
@@ -347,6 +377,7 @@ static int wm8994_device_init(struct wm8994 *wm8994, unsigned long id, int irq)
 
 
 	if (pdata) {
+		wm8994->irq_base = pdata->irq_base;
 		wm8994->gpio_base = pdata->gpio_base;
 
 		/* GPIO configuration is only applied if it's non-zero */
@@ -374,16 +405,20 @@ static int wm8994_device_init(struct wm8994 *wm8994, unsigned long id, int irq)
 					WM8994_LDO1_DISCH, 0);
 	}
 
+	wm8994_irq_init(wm8994);
+
 	ret = mfd_add_devices(wm8994->dev, -1,
 			      wm8994_devs, ARRAY_SIZE(wm8994_devs),
 			      NULL, 0);
 	if (ret != 0) {
 		dev_err(wm8994->dev, "Failed to add children: %d\n", ret);
-		goto err_enable;
+		goto err_irq;
 	}
 
 	return 0;
 
+err_irq:
+	wm8994_irq_exit(wm8994);
 err_enable:
 	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
 			       wm8994->supplies);
@@ -400,6 +435,7 @@ err:
 static void wm8994_device_exit(struct wm8994 *wm8994)
 {
 	mfd_remove_devices(wm8994->dev);
+	wm8994_irq_exit(wm8994);
 	regulator_bulk_disable(ARRAY_SIZE(wm8994_main_supplies),
 			       wm8994->supplies);
 	regulator_bulk_free(ARRAY_SIZE(wm8994_main_supplies), wm8994->supplies);
@@ -468,6 +504,7 @@ static int wm8994_i2c_probe(struct i2c_client *i2c,
 	wm8994->control_data = i2c;
 	wm8994->read_dev = wm8994_i2c_read_device;
 	wm8994->write_dev = wm8994_i2c_write_device;
+	wm8994->irq = i2c->irq;
 
 	return wm8994_device_init(wm8994, id->driver_data, i2c->irq);
 }
diff --git a/drivers/mfd/wm8994-irq.c b/drivers/mfd/wm8994-irq.c
new file mode 100644
index 000000000000..8400eb1ee5db
--- /dev/null
+++ b/drivers/mfd/wm8994-irq.c
@@ -0,0 +1,310 @@
+/*
+ * wm8994-irq.c  --  Interrupt controller support for Wolfson WM8994
+ *
+ * Copyright 2010 Wolfson Microelectronics PLC.
+ *
+ * Author: Mark Brown <broonie@opensource.wolfsonmicro.com>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/i2c.h>
+#include <linux/irq.h>
+#include <linux/mfd/core.h>
+#include <linux/interrupt.h>
+
+#include <linux/mfd/wm8994/core.h>
+#include <linux/mfd/wm8994/registers.h>
+
+#include <linux/delay.h>
+
+struct wm8994_irq_data {
+	int reg;
+	int mask;
+};
+
+static struct wm8994_irq_data wm8994_irqs[] = {
+	[WM8994_IRQ_TEMP_SHUT] = {
+		.reg = 2,
+		.mask = WM8994_TEMP_SHUT_EINT,
+	},
+	[WM8994_IRQ_MIC1_DET] = {
+		.reg = 2,
+		.mask = WM8994_MIC1_DET_EINT,
+	},
+	[WM8994_IRQ_MIC1_SHRT] = {
+		.reg = 2,
+		.mask = WM8994_MIC1_SHRT_EINT,
+	},
+	[WM8994_IRQ_MIC2_DET] = {
+		.reg = 2,
+		.mask = WM8994_MIC2_DET_EINT,
+	},
+	[WM8994_IRQ_MIC2_SHRT] = {
+		.reg = 2,
+		.mask = WM8994_MIC2_SHRT_EINT,
+	},
+	[WM8994_IRQ_FLL1_LOCK] = {
+		.reg = 2,
+		.mask = WM8994_FLL1_LOCK_EINT,
+	},
+	[WM8994_IRQ_FLL2_LOCK] = {
+		.reg = 2,
+		.mask = WM8994_FLL2_LOCK_EINT,
+	},
+	[WM8994_IRQ_SRC1_LOCK] = {
+		.reg = 2,
+		.mask = WM8994_SRC1_LOCK_EINT,
+	},
+	[WM8994_IRQ_SRC2_LOCK] = {
+		.reg = 2,
+		.mask = WM8994_SRC2_LOCK_EINT,
+	},
+	[WM8994_IRQ_AIF1DRC1_SIG_DET] = {
+		.reg = 2,
+		.mask = WM8994_AIF1DRC1_SIG_DET,
+	},
+	[WM8994_IRQ_AIF1DRC2_SIG_DET] = {
+		.reg = 2,
+		.mask = WM8994_AIF1DRC2_SIG_DET_EINT,
+	},
+	[WM8994_IRQ_AIF2DRC_SIG_DET] = {
+		.reg = 2,
+		.mask = WM8994_AIF2DRC_SIG_DET_EINT,
+	},
+	[WM8994_IRQ_FIFOS_ERR] = {
+		.reg = 2,
+		.mask = WM8994_FIFOS_ERR_EINT,
+	},
+	[WM8994_IRQ_WSEQ_DONE] = {
+		.reg = 2,
+		.mask = WM8994_WSEQ_DONE_EINT,
+	},
+	[WM8994_IRQ_DCS_DONE] = {
+		.reg = 2,
+		.mask = WM8994_DCS_DONE_EINT,
+	},
+	[WM8994_IRQ_TEMP_WARN] = {
+		.reg = 2,
+		.mask = WM8994_TEMP_WARN_EINT,
+	},
+	[WM8994_IRQ_GPIO(1)] = {
+		.reg = 1,
+		.mask = WM8994_GP1_EINT,
+	},
+	[WM8994_IRQ_GPIO(2)] = {
+		.reg = 1,
+		.mask = WM8994_GP2_EINT,
+	},
+	[WM8994_IRQ_GPIO(3)] = {
+		.reg = 1,
+		.mask = WM8994_GP3_EINT,
+	},
+	[WM8994_IRQ_GPIO(4)] = {
+		.reg = 1,
+		.mask = WM8994_GP4_EINT,
+	},
+	[WM8994_IRQ_GPIO(5)] = {
+		.reg = 1,
+		.mask = WM8994_GP5_EINT,
+	},
+	[WM8994_IRQ_GPIO(6)] = {
+		.reg = 1,
+		.mask = WM8994_GP6_EINT,
+	},
+	[WM8994_IRQ_GPIO(7)] = {
+		.reg = 1,
+		.mask = WM8994_GP7_EINT,
+	},
+	[WM8994_IRQ_GPIO(8)] = {
+		.reg = 1,
+		.mask = WM8994_GP8_EINT,
+	},
+	[WM8994_IRQ_GPIO(9)] = {
+		.reg = 1,
+		.mask = WM8994_GP8_EINT,
+	},
+	[WM8994_IRQ_GPIO(10)] = {
+		.reg = 1,
+		.mask = WM8994_GP10_EINT,
+	},
+	[WM8994_IRQ_GPIO(11)] = {
+		.reg = 1,
+		.mask = WM8994_GP11_EINT,
+	},
+};
+
+static inline int irq_data_to_status_reg(struct wm8994_irq_data *irq_data)
+{
+	return WM8994_INTERRUPT_STATUS_1 - 1 + irq_data->reg;
+}
+
+static inline int irq_data_to_mask_reg(struct wm8994_irq_data *irq_data)
+{
+	return WM8994_INTERRUPT_STATUS_1_MASK - 1 + irq_data->reg;
+}
+
+static inline struct wm8994_irq_data *irq_to_wm8994_irq(struct wm8994 *wm8994,
+							int irq)
+{
+	return &wm8994_irqs[irq - wm8994->irq_base];
+}
+
+static void wm8994_irq_lock(unsigned int irq)
+{
+	struct wm8994 *wm8994 = get_irq_chip_data(irq);
+
+	mutex_lock(&wm8994->irq_lock);
+}
+
+static void wm8994_irq_sync_unlock(unsigned int irq)
+{
+	struct wm8994 *wm8994 = get_irq_chip_data(irq);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
+		/* If there's been a change in the mask write it back
+		 * to the hardware. */
+		if (wm8994->irq_masks_cur[i] != wm8994->irq_masks_cache[i]) {
+			wm8994->irq_masks_cache[i] = wm8994->irq_masks_cur[i];
+			wm8994_reg_write(wm8994,
+					 WM8994_INTERRUPT_STATUS_1_MASK + i,
+					 wm8994->irq_masks_cur[i]);
+		}
+	}
+
+	mutex_unlock(&wm8994->irq_lock);
+}
+
+static void wm8994_irq_unmask(unsigned int irq)
+{
+	struct wm8994 *wm8994 = get_irq_chip_data(irq);
+	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994, irq);
+
+	wm8994->irq_masks_cur[irq_data->reg - 1] &= ~irq_data->mask;
+}
+
+static void wm8994_irq_mask(unsigned int irq)
+{
+	struct wm8994 *wm8994 = get_irq_chip_data(irq);
+	struct wm8994_irq_data *irq_data = irq_to_wm8994_irq(wm8994, irq);
+
+	wm8994->irq_masks_cur[irq_data->reg - 1] |= irq_data->mask;
+}
+
+static struct irq_chip wm8994_irq_chip = {
+	.name = "wm8994",
+	.bus_lock = wm8994_irq_lock,
+	.bus_sync_unlock = wm8994_irq_sync_unlock,
+	.mask = wm8994_irq_mask,
+	.unmask = wm8994_irq_unmask,
+};
+
+/* The processing of the primary interrupt occurs in a thread so that
+ * we can interact with the device over I2C or SPI. */
+static irqreturn_t wm8994_irq_thread(int irq, void *data)
+{
+	struct wm8994 *wm8994 = data;
+	unsigned int i;
+	u16 status[WM8994_NUM_IRQ_REGS];
+	int ret;
+
+	ret = wm8994_bulk_read(wm8994, WM8994_INTERRUPT_STATUS_1,
+			       WM8994_NUM_IRQ_REGS, status);
+	if (ret < 0) {
+		dev_err(wm8994->dev, "Failed to read interrupt status: %d\n",
+			ret);
+		return IRQ_NONE;
+	}
+
+	/* Apply masking */
+	for (i = 0; i < WM8994_NUM_IRQ_REGS; i++)
+		status[i] &= ~wm8994->irq_masks_cur[i];
+
+	/* Report */
+	for (i = 0; i < ARRAY_SIZE(wm8994_irqs); i++) {
+		if (status[wm8994_irqs[i].reg - 1] & wm8994_irqs[i].mask)
+			handle_nested_irq(wm8994->irq_base + i);
+	}
+
+	/* Ack any unmasked IRQs */
+	for (i = 0; i < ARRAY_SIZE(status); i++) {
+		if (status[i])
+			wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1 + i,
+					 status[i]);
+	}
+
+	return IRQ_HANDLED;
+}
+
+int wm8994_irq_init(struct wm8994 *wm8994)
+{
+	int i, cur_irq, ret;
+
+	mutex_init(&wm8994->irq_lock);
+
+	/* Mask the individual interrupt sources */
+	for (i = 0; i < ARRAY_SIZE(wm8994->irq_masks_cur); i++) {
+		wm8994->irq_masks_cur[i] = 0xffff;
+		wm8994->irq_masks_cache[i] = 0xffff;
+		wm8994_reg_write(wm8994, WM8994_INTERRUPT_STATUS_1_MASK + i,
+				 0xffff);
+	}
+
+	if (!wm8994->irq) {
+		dev_warn(wm8994->dev,
+			 "No interrupt specified, no interrupts\n");
+		wm8994->irq_base = 0;
+		return 0;
+	}
+
+	if (!wm8994->irq_base) {
+		dev_err(wm8994->dev,
+			"No interrupt base specified, no interrupts\n");
+		return 0;
+	}
+
+	/* Register them with genirq */
+	for (cur_irq = wm8994->irq_base;
+	     cur_irq < ARRAY_SIZE(wm8994_irqs) + wm8994->irq_base;
+	     cur_irq++) {
+		set_irq_chip_data(cur_irq, wm8994);
+		set_irq_chip_and_handler(cur_irq, &wm8994_irq_chip,
+					 handle_edge_irq);
+		set_irq_nested_thread(cur_irq, 1);
+
+		/* ARM needs us to explicitly flag the IRQ as valid
+		 * and will set them noprobe when we do so. */
+#ifdef CONFIG_ARM
+		set_irq_flags(cur_irq, IRQF_VALID);
+#else
+		set_irq_noprobe(cur_irq);
+#endif
+	}
+
+	ret = request_threaded_irq(wm8994->irq, NULL, wm8994_irq_thread,
+				   IRQF_TRIGGER_HIGH | IRQF_ONESHOT,
+				   "wm8994", wm8994);
+	if (ret != 0) {
+		dev_err(wm8994->dev, "Failed to request IRQ %d: %d\n",
+			wm8994->irq, ret);
+		return ret;
+	}
+
+	/* Enable top level interrupt if it was masked */
+	wm8994_reg_write(wm8994, WM8994_INTERRUPT_CONTROL, 0);
+
+	return 0;
+}
+
+void wm8994_irq_exit(struct wm8994 *wm8994)
+{
+	if (wm8994->irq)
+		free_irq(wm8994->irq, wm8994);
+}
diff --git a/include/linux/mfd/wm8994/core.h b/include/linux/mfd/wm8994/core.h
index b06ff2846748..de79baee4925 100644
--- a/include/linux/mfd/wm8994/core.h
+++ b/include/linux/mfd/wm8994/core.h
@@ -15,14 +15,38 @@
 #ifndef __MFD_WM8994_CORE_H__
 #define __MFD_WM8994_CORE_H__
 
+#include <linux/interrupt.h>
+
 struct regulator_dev;
 struct regulator_bulk_data;
 
 #define WM8994_NUM_GPIO_REGS 11
-#define WM8994_NUM_LDO_REGS 2
+#define WM8994_NUM_LDO_REGS   2
+#define WM8994_NUM_IRQ_REGS   2
+
+#define WM8994_IRQ_TEMP_SHUT		0
+#define WM8994_IRQ_MIC1_DET		1
+#define WM8994_IRQ_MIC1_SHRT		2
+#define WM8994_IRQ_MIC2_DET		3
+#define WM8994_IRQ_MIC2_SHRT		4
+#define WM8994_IRQ_FLL1_LOCK		5
+#define WM8994_IRQ_FLL2_LOCK		6
+#define WM8994_IRQ_SRC1_LOCK		7
+#define WM8994_IRQ_SRC2_LOCK		8
+#define WM8994_IRQ_AIF1DRC1_SIG_DET	9
+#define WM8994_IRQ_AIF1DRC2_SIG_DET	10
+#define WM8994_IRQ_AIF2DRC_SIG_DET	11
+#define WM8994_IRQ_FIFOS_ERR		12
+#define WM8994_IRQ_WSEQ_DONE		13
+#define WM8994_IRQ_DCS_DONE		14
+#define WM8994_IRQ_TEMP_WARN		15
+
+/* GPIOs in the chip are numbered from 1-11 */
+#define WM8994_IRQ_GPIO(x) (x + WM8994_IRQ_TEMP_WARN)
 
 struct wm8994 {
 	struct mutex io_lock;
+	struct mutex irq_lock;
 
 	struct device *dev;
 	int (*read_dev)(struct wm8994 *wm8994, unsigned short reg,
@@ -33,6 +57,11 @@ struct wm8994 {
 	void *control_data;
 
 	int gpio_base;
+	int irq_base;
+
+	int irq;
+	u16 irq_masks_cur[WM8994_NUM_IRQ_REGS];
+	u16 irq_masks_cache[WM8994_NUM_IRQ_REGS];
 
 	/* Used over suspend/resume */
 	u16 ldo_regs[WM8994_NUM_LDO_REGS];
@@ -51,4 +80,26 @@ int wm8994_set_bits(struct wm8994 *wm8994, unsigned short reg,
 int wm8994_bulk_read(struct wm8994 *wm8994, unsigned short reg,
 		     int count, u16 *buf);
 
+
+/* Helper to save on boilerplate */
+static inline int wm8994_request_irq(struct wm8994 *wm8994, int irq,
+				     irq_handler_t handler, const char *name,
+				     void *data)
+{
+	if (!wm8994->irq_base)
+		return -EINVAL;
+	return request_threaded_irq(wm8994->irq_base + irq, NULL, handler,
+				    IRQF_TRIGGER_RISING, name,
+				    data);
+}
+static inline void wm8994_free_irq(struct wm8994 *wm8994, int irq, void *data)
+{
+	if (!wm8994->irq_base)
+		return;
+	free_irq(wm8994->irq_base + irq, data);
+}
+
+int wm8994_irq_init(struct wm8994 *wm8994);
+void wm8994_irq_exit(struct wm8994 *wm8994);
+
 #endif
diff --git a/include/linux/mfd/wm8994/pdata.h b/include/linux/mfd/wm8994/pdata.h
index 70d6a8687dc5..5c51f367c061 100644
--- a/include/linux/mfd/wm8994/pdata.h
+++ b/include/linux/mfd/wm8994/pdata.h
@@ -70,6 +70,7 @@ struct wm8994_pdata {
 
 	struct wm8994_ldo_pdata ldo[WM8994_NUM_LDO];
 
+	int irq_base;  /** Base IRQ number for WM8994, required for IRQs */
 
         int num_drc_cfgs;
         struct wm8994_drc_cfg *drc_cfgs;
-- 
cgit v1.2.3


From 3bbb9ec946428b96657126768f65487a48dd090c Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 11 Mar 2010 14:04:36 -0800
Subject: timers: Introduce the concept of timer slack for legacy timers

While HR timers have had the concept of timer slack for quite some time
now, the legacy timers lacked this concept, and had to make do with
round_jiffies() and friends.

Timer slack is important for power management; grouping timers reduces the
number of wakeups which in turn reduces power consumption.

This patch introduces timer slack to the legacy timers using the following
pieces:
* A slack field in the timer struct
* An api (set_timer_slack) that callers can use to set explicit timer slack
* A default slack of 0.4% of the requested delay for callers that do not set
  any explicit slack
* Rounding code that is part of mod_timer() that tries to
  group timers around jiffies values every 'power of two'
  (so quick timers will group around every 2, but longer timers
  will group around every 4, 8, 16, 32 etc)

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Cc: johnstul@us.ibm.com
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timer.h | 10 ++++++++-
 kernel/timer.c        | 56 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/timer.h b/include/linux/timer.h
index a2d1eb6cb3f0..ea965b857a50 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -10,13 +10,19 @@
 struct tvec_base;
 
 struct timer_list {
+	/*
+	 * All fields that change during normal runtime grouped to the
+	 * same cacheline
+	 */
 	struct list_head entry;
 	unsigned long expires;
+	struct tvec_base *base;
 
 	void (*function)(unsigned long);
 	unsigned long data;
 
-	struct tvec_base *base;
+	int slack;
+
 #ifdef CONFIG_TIMER_STATS
 	void *start_site;
 	char start_comm[16];
@@ -165,6 +171,8 @@ extern int mod_timer(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pending(struct timer_list *timer, unsigned long expires);
 extern int mod_timer_pinned(struct timer_list *timer, unsigned long expires);
 
+extern void set_timer_slack(struct timer_list *time, int slack_hz);
+
 #define TIMER_NOT_PINNED	0
 #define TIMER_PINNED		1
 /*
diff --git a/kernel/timer.c b/kernel/timer.c
index 7e12e7bc7ce6..49773f38c9bc 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -318,6 +318,24 @@ unsigned long round_jiffies_up_relative(unsigned long j)
 }
 EXPORT_SYMBOL_GPL(round_jiffies_up_relative);
 
+/**
+ * set_timer_slack - set the allowed slack for a timer
+ * @slack_hz: the amount of time (in jiffies) allowed for rounding
+ *
+ * Set the amount of time, in jiffies, that a certain timer has
+ * in terms of slack. By setting this value, the timer subsystem
+ * will schedule the actual timer somewhere between
+ * the time mod_timer() asks for, and that time plus the slack.
+ *
+ * By setting the slack to -1, a percentage of the delay is used
+ * instead.
+ */
+void set_timer_slack(struct timer_list *timer, int slack_hz)
+{
+	timer->slack = slack_hz;
+}
+EXPORT_SYMBOL_GPL(set_timer_slack);
+
 
 static inline void set_running_timer(struct tvec_base *base,
 					struct timer_list *timer)
@@ -549,6 +567,7 @@ static void __init_timer(struct timer_list *timer,
 {
 	timer->entry.next = NULL;
 	timer->base = __raw_get_cpu_var(tvec_bases);
+	timer->slack = -1;
 #ifdef CONFIG_TIMER_STATS
 	timer->start_site = NULL;
 	timer->start_pid = -1;
@@ -714,6 +733,41 @@ int mod_timer_pending(struct timer_list *timer, unsigned long expires)
 }
 EXPORT_SYMBOL(mod_timer_pending);
 
+/*
+ * Decide where to put the timer while taking the slack into account
+ *
+ * Algorithm:
+ *   1) calculate the maximum (absolute) time
+ *   2) calculate the highest bit where the expires and new max are different
+ *   3) use this bit to make a mask
+ *   4) use the bitmask to round down the maximum time, so that all last
+ *      bits are zeros
+ */
+static inline
+unsigned long apply_slack(struct timer_list *timer, unsigned long expires)
+{
+	unsigned long expires_limit, mask;
+	int bit;
+
+	expires_limit = expires + timer->slack;
+
+	if (timer->slack < 0) /* auto slack: use 0.4% */
+		expires_limit = expires + (expires - jiffies)/256;
+
+	mask = expires ^ expires_limit;
+
+	if (mask == 0)
+		return expires;
+
+	bit = find_last_bit(&mask, BITS_PER_LONG);
+
+	mask = (1 << bit) - 1;
+
+	expires_limit = expires_limit & ~(mask);
+
+	return expires_limit;
+}
+
 /**
  * mod_timer - modify a timer's timeout
  * @timer: the timer to be modified
@@ -744,6 +798,8 @@ int mod_timer(struct timer_list *timer, unsigned long expires)
 	if (timer_pending(timer) && timer->expires == expires)
 		return 1;
 
+	expires = apply_slack(timer, expires);
+
 	return __mod_timer(timer, expires, false, TIMER_NOT_PINNED);
 }
 EXPORT_SYMBOL(mod_timer);
-- 
cgit v1.2.3


From 351b3f7a21e413a9b14d0393171497d2373bd702 Mon Sep 17 00:00:00 2001
From: Carsten Emde <C.Emde@osadl.org>
Date: Fri, 2 Apr 2010 22:40:19 +0200
Subject: hrtimers: Provide schedule_hrtimeout for CLOCK_REALTIME

The current version of schedule_hrtimeout() always uses the
monotonic clock. Some system calls such as mq_timedsend()
and mq_timedreceive(), however, require the use of the wall
clock due to the definition of the system call.

This patch provides the infrastructure to use schedule_hrtimeout()
with a CLOCK_REALTIME timer.

Signed-off-by: Carsten Emde <C.Emde@osadl.org>
Tested-by: Pradyumna Sampath <pradysam@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arjan van de Veen <arjan@infradead.org>
LKML-Reference: <20100402204331.167439615@osadl.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/hrtimer.h |  2 ++
 kernel/hrtimer.c        | 67 ++++++++++++++++++++++++++++++-------------------
 2 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 5d86fb2309d2..fd0c1b857d3d 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -422,6 +422,8 @@ extern void hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
 
 extern int schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
 						const enum hrtimer_mode mode);
+extern int schedule_hrtimeout_range_clock(ktime_t *expires,
+		unsigned long delta, const enum hrtimer_mode mode, int clock);
 extern int schedule_hrtimeout(ktime_t *expires, const enum hrtimer_mode mode);
 
 /* Soft interrupt function to run the hrtimer queues: */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 0086628b6e97..b9b134b35088 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -1749,35 +1749,15 @@ void __init hrtimers_init(void)
 }
 
 /**
- * schedule_hrtimeout_range - sleep until timeout
+ * schedule_hrtimeout_range_clock - sleep until timeout
  * @expires:	timeout value (ktime_t)
  * @delta:	slack in expires timeout (ktime_t)
  * @mode:	timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
- *
- * Make the current task sleep until the given expiry time has
- * elapsed. The routine will return immediately unless
- * the current task state has been set (see set_current_state()).
- *
- * The @delta argument gives the kernel the freedom to schedule the
- * actual wakeup to a time that is both power and performance friendly.
- * The kernel give the normal best effort behavior for "@expires+@delta",
- * but may decide to fire the timer earlier, but no earlier than @expires.
- *
- * You can set the task state as follows -
- *
- * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
- * pass before the routine returns.
- *
- * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
- * delivered to the current task.
- *
- * The current task state is guaranteed to be TASK_RUNNING when this
- * routine returns.
- *
- * Returns 0 when the timer has expired otherwise -EINTR
+ * @clock:	timer clock, CLOCK_MONOTONIC or CLOCK_REALTIME
  */
-int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
-			       const enum hrtimer_mode mode)
+int __sched
+schedule_hrtimeout_range_clock(ktime_t *expires, unsigned long delta,
+			       const enum hrtimer_mode mode, int clock)
 {
 	struct hrtimer_sleeper t;
 
@@ -1799,7 +1779,7 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
 		return -EINTR;
 	}
 
-	hrtimer_init_on_stack(&t.timer, CLOCK_MONOTONIC, mode);
+	hrtimer_init_on_stack(&t.timer, clock, mode);
 	hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
 
 	hrtimer_init_sleeper(&t, current);
@@ -1818,6 +1798,41 @@ int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
 
 	return !t.task ? 0 : -EINTR;
 }
+
+/**
+ * schedule_hrtimeout_range - sleep until timeout
+ * @expires:	timeout value (ktime_t)
+ * @delta:	slack in expires timeout (ktime_t)
+ * @mode:	timer mode, HRTIMER_MODE_ABS or HRTIMER_MODE_REL
+ *
+ * Make the current task sleep until the given expiry time has
+ * elapsed. The routine will return immediately unless
+ * the current task state has been set (see set_current_state()).
+ *
+ * The @delta argument gives the kernel the freedom to schedule the
+ * actual wakeup to a time that is both power and performance friendly.
+ * The kernel give the normal best effort behavior for "@expires+@delta",
+ * but may decide to fire the timer earlier, but no earlier than @expires.
+ *
+ * You can set the task state as follows -
+ *
+ * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to
+ * pass before the routine returns.
+ *
+ * %TASK_INTERRUPTIBLE - the routine may return early if a signal is
+ * delivered to the current task.
+ *
+ * The current task state is guaranteed to be TASK_RUNNING when this
+ * routine returns.
+ *
+ * Returns 0 when the timer has expired otherwise -EINTR
+ */
+int __sched schedule_hrtimeout_range(ktime_t *expires, unsigned long delta,
+				     const enum hrtimer_mode mode)
+{
+	return schedule_hrtimeout_range_clock(expires, delta, mode,
+					      CLOCK_MONOTONIC);
+}
 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range);
 
 /**
-- 
cgit v1.2.3


From 8a64c0f6b7ec7f758c4ef445e49f479e27fa2236 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 6 Apr 2010 10:52:44 +0200
Subject: libertas/sdio: 8686: set ECSI bit for 1-bit transfers

When operating in 1-bit mode, SDAT1 is used as dedicated interrupt line.
However, the 8686 will only drive this line when the ECSI bit is set in
the CCCR_IF register.

Thanks to Alagu Sankar for pointing me in the right direction.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Alagu Sankar <alagusankar@embwise.com>
Cc: Volker Ernst <volker.ernst@txtr.com>
Cc: Dan Williams <dcbw@redhat.com>
Cc: John W. Linville <linville@tuxdriver.com>
Cc: Holger Schurig <hs4233@mail.mn-solutions.de>
Cc: Bing Zhao <bzhao@marvell.com>
Cc: libertas-dev@lists.infradead.org
Cc: linux-wireless@vger.kernel.org
Cc: linux-mmc@vger.kernel.org
Acked-by: Dan Williams <dcbw@redhat.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/libertas/if_sdio.c | 22 ++++++++++++++++++++++
 include/linux/mmc/sdio.h                |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/libertas/if_sdio.c b/drivers/net/wireless/libertas/if_sdio.c
index 7a73f625273b..33206a98a572 100644
--- a/drivers/net/wireless/libertas/if_sdio.c
+++ b/drivers/net/wireless/libertas/if_sdio.c
@@ -34,6 +34,8 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/sdio_ids.h>
+#include <linux/mmc/sdio.h>
+#include <linux/mmc/host.h>
 
 #include "host.h"
 #include "decl.h"
@@ -942,6 +944,7 @@ static int if_sdio_probe(struct sdio_func *func,
 	int ret, i;
 	unsigned int model;
 	struct if_sdio_packet *packet;
+	struct mmc_host *host = func->card->host;
 
 	lbs_deb_enter(LBS_DEB_SDIO);
 
@@ -1022,6 +1025,25 @@ static int if_sdio_probe(struct sdio_func *func,
 	if (ret)
 		goto disable;
 
+	/* For 1-bit transfers to the 8686 model, we need to enable the
+	 * interrupt flag in the CCCR register. Set the MMC_QUIRK_LENIENT_FN0
+	 * bit to allow access to non-vendor registers. */
+	if ((card->model == IF_SDIO_MODEL_8686) &&
+	    (host->caps & MMC_CAP_SDIO_IRQ) &&
+	    (host->ios.bus_width == MMC_BUS_WIDTH_1)) {
+		u8 reg;
+
+		func->card->quirks |= MMC_QUIRK_LENIENT_FN0;
+		reg = sdio_f0_readb(func, SDIO_CCCR_IF, &ret);
+		if (ret)
+			goto release_int;
+
+		reg |= SDIO_BUS_ECSI;
+		sdio_f0_writeb(func, reg, SDIO_CCCR_IF, &ret);
+		if (ret)
+			goto release_int;
+	}
+
 	card->ioport = sdio_readb(func, IF_SDIO_IOPORT, &ret);
 	if (ret)
 		goto release_int;
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 47ba464f5170..118f0295a575 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -94,6 +94,8 @@
 
 #define  SDIO_BUS_WIDTH_1BIT	0x00
 #define  SDIO_BUS_WIDTH_4BIT	0x02
+#define  SDIO_BUS_ECSI		0x20	/* Enable continuous SPI interrupt */
+#define  SDIO_BUS_SCSI		0x40	/* Support continuous SPI interrupt */
 
 #define  SDIO_BUS_CD_DISABLE     0x80	/* disable pull-up on DAT3 (pin 1) */
 
-- 
cgit v1.2.3


From 1527bc8b928dd1399c3d3467dd47d9ede210978a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 1 Feb 2010 15:03:07 +0100
Subject: bitops: Optimize hweight() by making use of compile-time evaluation

Rename the extisting runtime hweight() implementations to
__arch_hweight(), rename the compile-time versions to __const_hweight()
and then have hweight() pick between them.

Suggested-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <20100318111929.GB11152@aftab>
Acked-by: H. Peter Anvin <hpa@zytor.com>
LKML-Reference: <1265028224.24455.154.camel@laptop>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/alpha/include/asm/bitops.h            | 18 +++++++------
 arch/ia64/include/asm/bitops.h             | 11 ++++----
 arch/sparc/include/asm/bitops_64.h         | 11 ++++----
 include/asm-generic/bitops/arch_hweight.h  | 11 ++++++++
 include/asm-generic/bitops/const_hweight.h | 42 ++++++++++++++++++++++++++++++
 include/asm-generic/bitops/hweight.h       |  8 ++----
 include/linux/bitops.h                     | 25 ------------------
 lib/hweight.c                              | 19 +++++++-------
 8 files changed, 87 insertions(+), 58 deletions(-)
 create mode 100644 include/asm-generic/bitops/arch_hweight.h
 create mode 100644 include/asm-generic/bitops/const_hweight.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/bitops.h b/arch/alpha/include/asm/bitops.h
index 15f3ae25c511..296da1d5ed57 100644
--- a/arch/alpha/include/asm/bitops.h
+++ b/arch/alpha/include/asm/bitops.h
@@ -405,29 +405,31 @@ static inline int fls(int x)
 
 #if defined(CONFIG_ALPHA_EV6) && defined(CONFIG_ALPHA_EV67)
 /* Whee.  EV67 can calculate it directly.  */
-static inline unsigned long hweight64(unsigned long w)
+static inline unsigned long __arch_hweight64(unsigned long w)
 {
 	return __kernel_ctpop(w);
 }
 
-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_weight32(unsigned int w)
 {
-	return hweight64(w);
+	return __arch_hweight64(w);
 }
 
-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
 {
-	return hweight64(w & 0xffff);
+	return __arch_hweight64(w & 0xffff);
 }
 
-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
 {
-	return hweight64(w & 0xff);
+	return __arch_hweight64(w & 0xff);
 }
 #else
-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>
 #endif
 
+#include <asm-generic/bitops/const_hweight.h>
+
 #endif /* __KERNEL__ */
 
 #include <asm-generic/bitops/find.h>
diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h
index 6ebc229a1c51..9da3df6f1a52 100644
--- a/arch/ia64/include/asm/bitops.h
+++ b/arch/ia64/include/asm/bitops.h
@@ -437,17 +437,18 @@ __fls (unsigned long x)
  * hweightN: returns the hamming weight (i.e. the number
  * of bits set) of a N-bit word
  */
-static __inline__ unsigned long
-hweight64 (unsigned long x)
+static __inline__ unsigned long __arch_hweight64(unsigned long x)
 {
 	unsigned long result;
 	result = ia64_popcnt(x);
 	return result;
 }
 
-#define hweight32(x)	(unsigned int) hweight64((x) & 0xfffffffful)
-#define hweight16(x)	(unsigned int) hweight64((x) & 0xfffful)
-#define hweight8(x)	(unsigned int) hweight64((x) & 0xfful)
+#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful))
+#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful))
+#define __arch_hweight8(x)  ((unsigned int) __arch_hweight64((x) & 0xfful))
+
+#include <asm-generic/bitops/const_hweight.h>
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/sparc/include/asm/bitops_64.h b/arch/sparc/include/asm/bitops_64.h
index e72ac9cdfb98..766121a67a24 100644
--- a/arch/sparc/include/asm/bitops_64.h
+++ b/arch/sparc/include/asm/bitops_64.h
@@ -44,7 +44,7 @@ extern void change_bit(unsigned long nr, volatile unsigned long *addr);
 
 #ifdef ULTRA_HAS_POPULATION_COUNT
 
-static inline unsigned int hweight64(unsigned long w)
+static inline unsigned int __arch_hweight64(unsigned long w)
 {
 	unsigned int res;
 
@@ -52,7 +52,7 @@ static inline unsigned int hweight64(unsigned long w)
 	return res;
 }
 
-static inline unsigned int hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	unsigned int res;
 
@@ -60,7 +60,7 @@ static inline unsigned int hweight32(unsigned int w)
 	return res;
 }
 
-static inline unsigned int hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
 {
 	unsigned int res;
 
@@ -68,7 +68,7 @@ static inline unsigned int hweight16(unsigned int w)
 	return res;
 }
 
-static inline unsigned int hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
 {
 	unsigned int res;
 
@@ -78,9 +78,10 @@ static inline unsigned int hweight8(unsigned int w)
 
 #else
 
-#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/arch_hweight.h>
 
 #endif
+#include <asm-generic/bitops/const_hweight.h>
 #include <asm-generic/bitops/lock.h>
 #endif /* __KERNEL__ */
 
diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h
new file mode 100644
index 000000000000..3a7be842cdce
--- /dev/null
+++ b/include/asm-generic/bitops/arch_hweight.h
@@ -0,0 +1,11 @@
+#ifndef _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_ARCH_HWEIGHT_H_
+
+#include <asm/types.h>
+
+extern unsigned int __arch_hweight32(unsigned int w);
+extern unsigned int __arch_hweight16(unsigned int w);
+extern unsigned int __arch_hweight8(unsigned int w);
+extern unsigned long __arch_hweight64(__u64 w);
+
+#endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/include/asm-generic/bitops/const_hweight.h b/include/asm-generic/bitops/const_hweight.h
new file mode 100644
index 000000000000..fa2a50b7ee66
--- /dev/null
+++ b/include/asm-generic/bitops/const_hweight.h
@@ -0,0 +1,42 @@
+#ifndef _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+#define _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_
+
+/*
+ * Compile time versions of __arch_hweightN()
+ */
+#define __const_hweight8(w)		\
+      (	(!!((w) & (1ULL << 0))) +	\
+	(!!((w) & (1ULL << 1))) +	\
+	(!!((w) & (1ULL << 2))) +	\
+	(!!((w) & (1ULL << 3))) +	\
+	(!!((w) & (1ULL << 4))) +	\
+	(!!((w) & (1ULL << 5))) +	\
+	(!!((w) & (1ULL << 6))) +	\
+	(!!((w) & (1ULL << 7)))	)
+
+#define __const_hweight16(w) (__const_hweight8(w)  + __const_hweight8((w)  >> 8 ))
+#define __const_hweight32(w) (__const_hweight16(w) + __const_hweight16((w) >> 16))
+#define __const_hweight64(w) (__const_hweight32(w) + __const_hweight32((w) >> 32))
+
+/*
+ * Generic interface.
+ */
+#define hweight8(w)  (__builtin_constant_p(w) ? __const_hweight8(w)  : __arch_hweight8(w))
+#define hweight16(w) (__builtin_constant_p(w) ? __const_hweight16(w) : __arch_hweight16(w))
+#define hweight32(w) (__builtin_constant_p(w) ? __const_hweight32(w) : __arch_hweight32(w))
+#define hweight64(w) (__builtin_constant_p(w) ? __const_hweight64(w) : __arch_hweight64(w))
+
+/*
+ * Interface for known constant arguments
+ */
+#define HWEIGHT8(w)  (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight8(w))
+#define HWEIGHT16(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight16(w))
+#define HWEIGHT32(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight32(w))
+#define HWEIGHT64(w) (BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) + __const_hweight64(w))
+
+/*
+ * Type invariant interface to the compile time constant hweight functions.
+ */
+#define HWEIGHT(w)   HWEIGHT64((u64)w)
+
+#endif /* _ASM_GENERIC_BITOPS_CONST_HWEIGHT_H_ */
diff --git a/include/asm-generic/bitops/hweight.h b/include/asm-generic/bitops/hweight.h
index fbbc383771da..a94d6519c7ed 100644
--- a/include/asm-generic/bitops/hweight.h
+++ b/include/asm-generic/bitops/hweight.h
@@ -1,11 +1,7 @@
 #ifndef _ASM_GENERIC_BITOPS_HWEIGHT_H_
 #define _ASM_GENERIC_BITOPS_HWEIGHT_H_
 
-#include <asm/types.h>
-
-extern unsigned int hweight32(unsigned int w);
-extern unsigned int hweight16(unsigned int w);
-extern unsigned int hweight8(unsigned int w);
-extern unsigned long hweight64(__u64 w);
+#include <asm-generic/bitops/arch_hweight.h>
+#include <asm-generic/bitops/const_hweight.h>
 
 #endif /* _ASM_GENERIC_BITOPS_HWEIGHT_H_ */
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index b79389879238..c55d5bc4ee58 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -47,31 +47,6 @@ static inline unsigned long hweight_long(unsigned long w)
 	return sizeof(w) == 4 ? hweight32(w) : hweight64(w);
 }
 
-/*
- * Clearly slow versions of the hweightN() functions, their benefit is
- * of course compile time evaluation of constant arguments.
- */
-#define HWEIGHT8(w)					\
-      (	BUILD_BUG_ON_ZERO(!__builtin_constant_p(w)) +	\
-	(!!((w) & (1ULL << 0))) +			\
-	(!!((w) & (1ULL << 1))) +			\
-	(!!((w) & (1ULL << 2))) +			\
-	(!!((w) & (1ULL << 3))) +			\
-	(!!((w) & (1ULL << 4))) +			\
-	(!!((w) & (1ULL << 5))) +			\
-	(!!((w) & (1ULL << 6))) +			\
-	(!!((w) & (1ULL << 7)))	)
-
-#define HWEIGHT16(w) (HWEIGHT8(w)  + HWEIGHT8((w) >> 8))
-#define HWEIGHT32(w) (HWEIGHT16(w) + HWEIGHT16((w) >> 16))
-#define HWEIGHT64(w) (HWEIGHT32(w) + HWEIGHT32((w) >> 32))
-
-/*
- * Type invariant version that simply casts things to the
- * largest type.
- */
-#define HWEIGHT(w)   HWEIGHT64((u64)(w))
-
 /**
  * rol32 - rotate a 32-bit value left
  * @word: value to rotate
diff --git a/lib/hweight.c b/lib/hweight.c
index 63ee4eb1228d..a6927e76840f 100644
--- a/lib/hweight.c
+++ b/lib/hweight.c
@@ -9,7 +9,7 @@
  * The Hamming Weight of a number is the total number of bits set in it.
  */
 
-unsigned int hweight32(unsigned int w)
+unsigned int __arch_hweight32(unsigned int w)
 {
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x55555555;
@@ -24,29 +24,30 @@ unsigned int hweight32(unsigned int w)
 	return (res + (res >> 16)) & 0x000000FF;
 #endif
 }
-EXPORT_SYMBOL(hweight32);
+EXPORT_SYMBOL(__arch_hweight32);
 
-unsigned int hweight16(unsigned int w)
+unsigned int __arch_hweight16(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x5555);
 	res = (res & 0x3333) + ((res >> 2) & 0x3333);
 	res = (res + (res >> 4)) & 0x0F0F;
 	return (res + (res >> 8)) & 0x00FF;
 }
-EXPORT_SYMBOL(hweight16);
+EXPORT_SYMBOL(__arch_hweight16);
 
-unsigned int hweight8(unsigned int w)
+unsigned int __arch_hweight8(unsigned int w)
 {
 	unsigned int res = w - ((w >> 1) & 0x55);
 	res = (res & 0x33) + ((res >> 2) & 0x33);
 	return (res + (res >> 4)) & 0x0F;
 }
-EXPORT_SYMBOL(hweight8);
+EXPORT_SYMBOL(__arch_hweight8);
 
-unsigned long hweight64(__u64 w)
+unsigned long __arch_hweight64(__u64 w)
 {
 #if BITS_PER_LONG == 32
-	return hweight32((unsigned int)(w >> 32)) + hweight32((unsigned int)w);
+	return __arch_hweight32((unsigned int)(w >> 32)) +
+	       __arch_hweight32((unsigned int)w);
 #elif BITS_PER_LONG == 64
 #ifdef ARCH_HAS_FAST_MULTIPLIER
 	w -= (w >> 1) & 0x5555555555555555ul;
@@ -63,4 +64,4 @@ unsigned long hweight64(__u64 w)
 #endif
 #endif
 }
-EXPORT_SYMBOL(hweight64);
+EXPORT_SYMBOL(__arch_hweight64);
-- 
cgit v1.2.3


From a244b25217978ffd54d2cd87013b3cd564689462 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 7 Apr 2010 10:08:49 +1000
Subject: Remove unused HDPU driver

This driver seems to be specific to a "Sky CPU" board for which we
don't appear to have upstream support (or not any more). No Kconfig
file in the kernel ever enables it. So remove it.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
---
 drivers/misc/Makefile                 |   1 -
 drivers/misc/hdpuftrs/Makefile        |   1 -
 drivers/misc/hdpuftrs/hdpu_cpustate.c | 256 ----------------------------------
 drivers/misc/hdpuftrs/hdpu_nexus.c    | 149 --------------------
 include/linux/hdpu_features.h         |  26 ----
 5 files changed, 433 deletions(-)
 delete mode 100644 drivers/misc/hdpuftrs/Makefile
 delete mode 100644 drivers/misc/hdpuftrs/hdpu_cpustate.c
 delete mode 100644 drivers/misc/hdpuftrs/hdpu_nexus.c
 delete mode 100644 include/linux/hdpu_features.h

(limited to 'include/linux')

diff --git a/drivers/misc/Makefile b/drivers/misc/Makefile
index 27c484355414..208ae3091a4e 100644
--- a/drivers/misc/Makefile
+++ b/drivers/misc/Makefile
@@ -3,7 +3,6 @@
 #
 
 obj-$(CONFIG_IBM_ASM)		+= ibmasm/
-obj-$(CONFIG_HDPU_FEATURES)	+= hdpuftrs/
 obj-$(CONFIG_AD525X_DPOT)	+= ad525x_dpot.o
 obj-$(CONFIG_ATMEL_PWM)		+= atmel_pwm.o
 obj-$(CONFIG_ATMEL_SSC)		+= atmel-ssc.o
diff --git a/drivers/misc/hdpuftrs/Makefile b/drivers/misc/hdpuftrs/Makefile
deleted file mode 100644
index ac74ae679230..000000000000
--- a/drivers/misc/hdpuftrs/Makefile
+++ /dev/null
@@ -1 +0,0 @@
-obj-$(CONFIG_HDPU_FEATURES) := hdpu_cpustate.o hdpu_nexus.o
diff --git a/drivers/misc/hdpuftrs/hdpu_cpustate.c b/drivers/misc/hdpuftrs/hdpu_cpustate.c
deleted file mode 100644
index 176fe4e09d3f..000000000000
--- a/drivers/misc/hdpuftrs/hdpu_cpustate.c
+++ /dev/null
@@ -1,256 +0,0 @@
-/*
- *	Sky CPU State Driver
- *
- *	Copyright (C) 2002 Brian Waite
- *
- *	This driver allows use of the CPU state bits
- *	It exports the /dev/sky_cpustate and also
- *	/proc/sky_cpustate pseudo-file for status information.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/smp_lock.h>
-#include <linux/miscdevice.h>
-#include <linux/proc_fs.h>
-#include <linux/hdpu_features.h>
-#include <linux/platform_device.h>
-#include <asm/uaccess.h>
-#include <linux/seq_file.h>
-#include <asm/io.h>
-
-#define SKY_CPUSTATE_VERSION		"1.1"
-
-static int hdpu_cpustate_probe(struct platform_device *pdev);
-static int hdpu_cpustate_remove(struct platform_device *pdev);
-
-static unsigned char cpustate_get_state(void);
-static int cpustate_proc_open(struct inode *inode, struct file *file);
-static int cpustate_proc_read(struct seq_file *seq, void *offset);
-
-static struct cpustate_t cpustate;
-
-static const struct file_operations proc_cpustate = {
-	.open = cpustate_proc_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
-
-static int cpustate_proc_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, cpustate_proc_read, NULL);
-}
-
-static int cpustate_proc_read(struct seq_file *seq, void *offset)
-{
-	seq_printf(seq, "CPU State: %04x\n", cpustate_get_state());
-	return 0;
-}
-
-static int cpustate_get_ref(int excl)
-{
-
-	int retval = -EBUSY;
-
-	spin_lock(&cpustate.lock);
-
-	if (cpustate.excl)
-		goto out_busy;
-
-	if (excl) {
-		if (cpustate.open_count)
-			goto out_busy;
-		cpustate.excl = 1;
-	}
-
-	cpustate.open_count++;
-	retval = 0;
-
-      out_busy:
-	spin_unlock(&cpustate.lock);
-	return retval;
-}
-
-static int cpustate_free_ref(void)
-{
-
-	spin_lock(&cpustate.lock);
-
-	cpustate.excl = 0;
-	cpustate.open_count--;
-
-	spin_unlock(&cpustate.lock);
-	return 0;
-}
-
-static unsigned char cpustate_get_state(void)
-{
-
-	return cpustate.cached_val;
-}
-
-static void cpustate_set_state(unsigned char new_state)
-{
-	unsigned int state = (new_state << 21);
-
-#ifdef DEBUG_CPUSTATE
-	printk("CPUSTATE -> 0x%x\n", new_state);
-#endif
-	spin_lock(&cpustate.lock);
-	cpustate.cached_val = new_state;
-	writel((0xff << 21), cpustate.clr_addr);
-	writel(state, cpustate.set_addr);
-	spin_unlock(&cpustate.lock);
-}
-
-/*
- *	Now all the various file operations that we export.
- */
-
-static ssize_t cpustate_read(struct file *file, char *buf,
-			     size_t count, loff_t * ppos)
-{
-	unsigned char data;
-
-	if (count < 0)
-		return -EFAULT;
-	if (count == 0)
-		return 0;
-
-	data = cpustate_get_state();
-	if (copy_to_user(buf, &data, sizeof(unsigned char)))
-		return -EFAULT;
-	return sizeof(unsigned char);
-}
-
-static ssize_t cpustate_write(struct file *file, const char *buf,
-			      size_t count, loff_t * ppos)
-{
-	unsigned char data;
-
-	if (count < 0)
-		return -EFAULT;
-
-	if (count == 0)
-		return 0;
-
-	if (copy_from_user((unsigned char *)&data, buf, sizeof(unsigned char)))
-		return -EFAULT;
-
-	cpustate_set_state(data);
-	return sizeof(unsigned char);
-}
-
-static int cpustate_open(struct inode *inode, struct file *file)
-{
-	int ret;
-
-	lock_kernel();
-	ret = cpustate_get_ref((file->f_flags & O_EXCL));
-	unlock_kernel();
-
-	return ret;
-}
-
-static int cpustate_release(struct inode *inode, struct file *file)
-{
-	return cpustate_free_ref();
-}
-
-static struct platform_driver hdpu_cpustate_driver = {
-	.probe = hdpu_cpustate_probe,
-	.remove = hdpu_cpustate_remove,
-	.driver = {
-		.name = HDPU_CPUSTATE_NAME,
-		.owner = THIS_MODULE,
-	},
-};
-
-/*
- *	The various file operations we support.
- */
-static const struct file_operations cpustate_fops = {
-      .owner	= THIS_MODULE,
-      .open	= cpustate_open,
-      .release	= cpustate_release,
-      .read	= cpustate_read,
-      .write	= cpustate_write,
-      .llseek	= no_llseek,
-};
-
-static struct miscdevice cpustate_dev = {
-	.minor	= MISC_DYNAMIC_MINOR,
-	.name	= "sky_cpustate",
-	.fops	= &cpustate_fops,
-};
-
-static int hdpu_cpustate_probe(struct platform_device *pdev)
-{
-	struct resource *res;
-	struct proc_dir_entry *proc_de;
-	int ret;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		printk(KERN_ERR "sky_cpustate: "
-		       "Invalid memory resource.\n");
-		return -EINVAL;
-	}
-	cpustate.set_addr = (unsigned long *)res->start;
-	cpustate.clr_addr = (unsigned long *)res->end - 1;
-
-	ret = misc_register(&cpustate_dev);
-	if (ret) {
-		printk(KERN_WARNING "sky_cpustate: "
-		       "Unable to register misc device.\n");
-		cpustate.set_addr = NULL;
-		cpustate.clr_addr = NULL;
-		return ret;
-	}
-
-	proc_de = proc_create("sky_cpustate", 0666, NULL, &proc_cpustate);
-	if (!proc_de) {
-		printk(KERN_WARNING "sky_cpustate: "
-		       "Unable to create proc entry\n");
-	}
-
-	printk(KERN_INFO "Sky CPU State Driver v" SKY_CPUSTATE_VERSION "\n");
-	return 0;
-}
-
-static int hdpu_cpustate_remove(struct platform_device *pdev)
-{
-	cpustate.set_addr = NULL;
-	cpustate.clr_addr = NULL;
-
-	remove_proc_entry("sky_cpustate", NULL);
-	misc_deregister(&cpustate_dev);
-
-	return 0;
-}
-
-static int __init cpustate_init(void)
-{
-	return platform_driver_register(&hdpu_cpustate_driver);
-}
-
-static void __exit cpustate_exit(void)
-{
-	platform_driver_unregister(&hdpu_cpustate_driver);
-}
-
-module_init(cpustate_init);
-module_exit(cpustate_exit);
-
-MODULE_AUTHOR("Brian Waite");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" HDPU_CPUSTATE_NAME);
diff --git a/drivers/misc/hdpuftrs/hdpu_nexus.c b/drivers/misc/hdpuftrs/hdpu_nexus.c
deleted file mode 100644
index ce39fa54949b..000000000000
--- a/drivers/misc/hdpuftrs/hdpu_nexus.c
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- *	Sky Nexus Register Driver
- *
- *	Copyright (C) 2002 Brian Waite
- *
- *	This driver allows reading the Nexus register
- *	It exports the /proc/sky_chassis_id and also
- *	/proc/sky_slot_id pseudo-file for status information.
- *
- *	This program is free software; you can redistribute it and/or
- *	modify it under the terms of the GNU General Public License
- *	as published by the Free Software Foundation; either version
- *	2 of the License, or (at your option) any later version.
- *
- */
-
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/proc_fs.h>
-#include <linux/hdpu_features.h>
-#include <linux/platform_device.h>
-#include <linux/seq_file.h>
-#include <asm/io.h>
-
-static int hdpu_nexus_probe(struct platform_device *pdev);
-static int hdpu_nexus_remove(struct platform_device *pdev);
-static int hdpu_slot_id_open(struct inode *inode, struct file *file);
-static int hdpu_slot_id_read(struct seq_file *seq, void *offset);
-static int hdpu_chassis_id_open(struct inode *inode, struct file *file);
-static int hdpu_chassis_id_read(struct seq_file *seq, void *offset);
-
-static struct proc_dir_entry *hdpu_slot_id;
-static struct proc_dir_entry *hdpu_chassis_id;
-static int slot_id = -1;
-static int chassis_id = -1;
-
-static const struct file_operations proc_slot_id = {
-	.open = hdpu_slot_id_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
-
-static const struct file_operations proc_chassis_id = {
-	.open = hdpu_chassis_id_open,
-	.read = seq_read,
-	.llseek	= seq_lseek,
-	.release = single_release,
-	.owner = THIS_MODULE,
-};
-
-static struct platform_driver hdpu_nexus_driver = {
-	.probe = hdpu_nexus_probe,
-	.remove = hdpu_nexus_remove,
-	.driver = {
-		.name = HDPU_NEXUS_NAME,
-		.owner = THIS_MODULE,
-	},
-};
-
-static int hdpu_slot_id_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hdpu_slot_id_read, NULL);
-}
-
-static int hdpu_slot_id_read(struct seq_file *seq, void *offset)
-{
-	seq_printf(seq, "%d\n", slot_id);
-	return 0;
-}
-
-static int hdpu_chassis_id_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, hdpu_chassis_id_read, NULL);
-}
-
-static int hdpu_chassis_id_read(struct seq_file *seq, void *offset)
-{
-	seq_printf(seq, "%d\n", chassis_id);
-	return 0;
-}
-
-static int hdpu_nexus_probe(struct platform_device *pdev)
-{
-	struct resource *res;
-	int *nexus_id_addr;
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		printk(KERN_ERR "sky_nexus: "
-		       "Invalid memory resource.\n");
-		return -EINVAL;
-	}
-	nexus_id_addr = ioremap(res->start,
-				(unsigned long)(res->end - res->start));
-	if (nexus_id_addr) {
-		slot_id = (*nexus_id_addr >> 8) & 0x1f;
-		chassis_id = *nexus_id_addr & 0xff;
-		iounmap(nexus_id_addr);
-	} else {
-		printk(KERN_ERR "sky_nexus: Could not map slot id\n");
-	}
-
-	hdpu_slot_id = proc_create("sky_slot_id", 0666, NULL, &proc_slot_id);
-	if (!hdpu_slot_id) {
-		printk(KERN_WARNING "sky_nexus: "
-		       "Unable to create proc dir entry: sky_slot_id\n");
-	}
-
-	hdpu_chassis_id = proc_create("sky_chassis_id", 0666, NULL,
-				      &proc_chassis_id);
-	if (!hdpu_chassis_id)
-		printk(KERN_WARNING "sky_nexus: "
-		       "Unable to create proc dir entry: sky_chassis_id\n");
-
-	return 0;
-}
-
-static int hdpu_nexus_remove(struct platform_device *pdev)
-{
-	slot_id = -1;
-	chassis_id = -1;
-
-	remove_proc_entry("sky_slot_id", NULL);
-	remove_proc_entry("sky_chassis_id", NULL);
-
-	hdpu_slot_id = 0;
-	hdpu_chassis_id = 0;
-
-	return 0;
-}
-
-static int __init nexus_init(void)
-{
-	return platform_driver_register(&hdpu_nexus_driver);
-}
-
-static void __exit nexus_exit(void)
-{
-	platform_driver_unregister(&hdpu_nexus_driver);
-}
-
-module_init(nexus_init);
-module_exit(nexus_exit);
-
-MODULE_AUTHOR("Brian Waite");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" HDPU_NEXUS_NAME);
diff --git a/include/linux/hdpu_features.h b/include/linux/hdpu_features.h
deleted file mode 100644
index 6a8715431ae4..000000000000
--- a/include/linux/hdpu_features.h
+++ /dev/null
@@ -1,26 +0,0 @@
-#include <linux/spinlock.h>
-
-struct cpustate_t {
-	spinlock_t lock;
-	int excl;
-        int open_count;
-	unsigned char cached_val;
-	int inited;
-	unsigned long *set_addr;
-	unsigned long *clr_addr;
-};
-
-
-#define HDPU_CPUSTATE_NAME "hdpu cpustate"
-#define HDPU_NEXUS_NAME "hdpu nexus"
-
-#define CPUSTATE_KERNEL_MAJOR  0x10
-
-#define CPUSTATE_KERNEL_INIT_DRV   0 /* CPU State Driver Initialized */
-#define CPUSTATE_KERNEL_INIT_PCI   1 /* 64360 PCI Busses Init */
-#define CPUSTATE_KERNEL_INIT_REG   2 /* 64360 Bridge Init */
-#define CPUSTATE_KERNEL_CPU1_KICK  3 /* Boot cpu 1 */
-#define CPUSTATE_KERNEL_CPU1_OK    4  /* Cpu 1 has checked in */
-#define CPUSTATE_KERNEL_OK         5 /* Terminal state */
-#define CPUSTATE_KERNEL_RESET   14 /* Board reset via SW*/
-#define CPUSTATE_KERNEL_HALT   15 /* Board halted via SW*/
-- 
cgit v1.2.3


From c6537d6742985da1fbf12ae26cde6a096fd35b5c Mon Sep 17 00:00:00 2001
From: Jon Paul Maloy <jon.maloy@ericsson.com>
Date: Tue, 6 Apr 2010 11:40:52 +0000
Subject: TIPC: Updated topology subscription protocol according to latest spec

This patch makes it explicit in the API that all fields in subscriptions and events exchanged with the Topology Server must be in
network byte order.
It also ensures that all fields of a subscription are compared when cancelling a subscription, in order to avoid inadvertent
cancelling of the wrong subscription.
Finally, the tipc module version is updated to 2.0.0, to reflect the API change.

Signed-off-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h | 30 ++++++++++++------------------
 net/tipc/core.c      |  2 +-
 net/tipc/subscr.c    | 15 ++++++++++-----
 3 files changed, 23 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index 3d92396639de..9536d8aeadf1 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -127,23 +127,17 @@ static inline unsigned int tipc_node(__u32 addr)
  * TIPC topology subscription service definitions
  */
 
-#define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
-#define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
-#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
-#if 0
-/* The following filter options are not currently implemented */
-#define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
-#define TIPC_SUB_NO_UNBIND_EVTS	0x08	/* filter out "withdraw" events */
-#define TIPC_SUB_SINGLE_EVT	0x10	/* expire after first event */
-#endif
+#define TIPC_SUB_SERVICE     	0x00  	/* Filter for service availability    */
+#define TIPC_SUB_PORTS     	0x01  	/* Filter for port availability  */
+#define TIPC_SUB_CANCEL         0x04    /* Cancel a subscription         */
 
 #define TIPC_WAIT_FOREVER	~0	/* timeout for permanent subscription */
 
 struct tipc_subscr {
-	struct tipc_name_seq seq;	/* name sequence of interest */
-	__u32 timeout;			/* subscription duration (in ms) */
-        __u32 filter;   		/* bitmask of filter options */
-	char usr_handle[8];		/* available for subscriber use */
+	struct tipc_name_seq seq;	/* NBO. Name sequence of interest */
+	__u32 timeout;			/* NBO. Subscription duration (in ms) */
+        __u32 filter;   		/* NBO. Bitmask of filter options */
+	char usr_handle[8];		/* Opaque. Available for subscriber use */
 };
 
 #define TIPC_PUBLISHED		1	/* publication event */
@@ -151,11 +145,11 @@ struct tipc_subscr {
 #define TIPC_SUBSCR_TIMEOUT	3	/* subscription timeout event */
 
 struct tipc_event {
-	__u32 event;			/* event type */
-	__u32 found_lower;		/* matching name seq instances */
-	__u32 found_upper;		/*    "      "    "     "      */
-	struct tipc_portid port;	/* associated port */
-	struct tipc_subscr s;		/* associated subscription */
+	__u32 event;			/* NBO. Event type, as defined above */
+	__u32 found_lower;		/* NBO. Matching name seq instances  */
+	__u32 found_upper;		/*  "      "       "   "    "        */
+	struct tipc_portid port;	/* NBO. Associated port              */
+	struct tipc_subscr s;		/* Original, associated subscription */
 };
 
 /*
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 52c571fedbe0..4e84c8431f32 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,7 +49,7 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "1.6.4"
+#define TIPC_MOD_VER "2.0.0"
 
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index ff123e56114a..ab6eab4c45e2 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -274,7 +274,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 	struct subscription *sub_temp;
-	__u32 type, lower, upper;
+	__u32 type, lower, upper, timeout, filter;
 	int found = 0;
 
 	/* Find first matching subscription, exit if not found */
@@ -282,12 +282,18 @@ static void subscr_cancel(struct tipc_subscr *s,
 	type = ntohl(s->seq.type);
 	lower = ntohl(s->seq.lower);
 	upper = ntohl(s->seq.upper);
+	timeout = ntohl(s->timeout);
+	filter = ntohl(s->filter) & ~TIPC_SUB_CANCEL;
 
 	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
 				 subscription_list) {
 			if ((type == sub->seq.type) &&
 			    (lower == sub->seq.lower) &&
-			    (upper == sub->seq.upper)) {
+			    (upper == sub->seq.upper) &&
+			    (timeout == sub->timeout) &&
+                            (filter == sub->filter) &&
+                             !memcmp(s->usr_handle,sub->evt.s.usr_handle,
+				     sizeof(s->usr_handle)) ){
 				found = 1;
 				break;
 			}
@@ -304,7 +310,7 @@ static void subscr_cancel(struct tipc_subscr *s,
 		k_term_timer(&sub->timer);
 		spin_lock_bh(subscriber->lock);
 	}
-	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
+	dbg("Cancel: removing sub %u,%u,%u from subscriber %p list\n",
 	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
 	subscr_del(sub);
 }
@@ -352,8 +358,7 @@ static struct subscription *subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.upper = ntohl(s->seq.upper);
 	sub->timeout = ntohl(s->timeout);
 	sub->filter = ntohl(s->filter);
-	if ((!(sub->filter & TIPC_SUB_PORTS) ==
-	     !(sub->filter & TIPC_SUB_SERVICE)) ||
+	if ((sub->filter && (sub->filter != TIPC_SUB_PORTS)) ||
 	    (sub->seq.lower > sub->seq.upper)) {
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
-- 
cgit v1.2.3


From d5cdfacb35ed886271d1ccfffbded98d3447da17 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Sun, 4 Apr 2010 09:37:19 +0300
Subject: cfg80211: Add local-state-change-only auth/deauth/disassoc

cfg80211 is quite strict on allowing authentication and association
commands only in certain states. In order to meet these requirements,
user space applications may need to clear authentication or
association state in some cases. Currently, this can be done with
deauth/disassoc command, but that ends up sending out Deauthentication
or Disassociation frame unnecessarily. Add a new nl80211 attribute to
allow this sending of the frame be skipped, but with all other
deauth/disassoc operations being completed.

Similar state change is also needed for IEEE 802.11r FT protocol in
the FT-over-DS case which does not use Authentication frame exchange
in a transition to another BSS. For this to work with cfg80211, an
authentication entry needs to be created for the target BSS without
sending out an Authentication frame. The nl80211 authentication
command can be used for this purpose, too, with the new attribute to
indicate that the command is only for changing local state. This
enables wpa_supplicant to complete FT-over-DS transition successfully.

Signed-off-by: Jouni Malinen <j@w1.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  8 ++++++++
 include/net/cfg80211.h  | 11 +++++++++++
 net/mac80211/mlme.c     | 23 +++++++++++++++--------
 net/wireless/core.h     | 15 ++++++++++-----
 net/wireless/mlme.c     | 39 ++++++++++++++++++++++++++++-----------
 net/wireless/nl80211.c  | 19 ++++++++++++++++---
 net/wireless/sme.c      | 15 +++++++++------
 7 files changed, 97 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index daf6a3432b92..2ea3edeee7aa 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -703,6 +703,12 @@ enum nl80211_commands {
  * @NL80211_ATTR_CQM: connection quality monitor configuration in a
  *	nested attribute with %NL80211_ATTR_CQM_* sub-attributes.
  *
+ * @NL80211_ATTR_LOCAL_STATE_CHANGE: Flag attribute to indicate that a command
+ *	is requesting a local authentication/association state change without
+ *	invoking actual management frame exchange. This can be used with
+ *	NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
+ *	NL80211_CMD_DISASSOCIATE.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -856,6 +862,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_CQM,
 
+	NL80211_ATTR_LOCAL_STATE_CHANGE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 868cfd3b9724..37cebd3aa0f7 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -704,6 +704,10 @@ struct cfg80211_crypto_settings {
  * @key_len: length of WEP key for shared key authentication
  * @key_idx: index of WEP key for shared key authentication
  * @key: WEP key for shared key authentication
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Authentication frame is to be transmitted and authentication state is
+ *	to be changed without having to wait for a response from the peer STA
+ *	(AP).
  */
 struct cfg80211_auth_request {
 	struct cfg80211_bss *bss;
@@ -712,6 +716,7 @@ struct cfg80211_auth_request {
 	enum nl80211_auth_type auth_type;
 	const u8 *key;
 	u8 key_len, key_idx;
+	bool local_state_change;
 };
 
 /**
@@ -744,12 +749,15 @@ struct cfg80211_assoc_request {
  * @ie: Extra IEs to add to Deauthentication frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @reason_code: The reason code for the deauthentication
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Deauthentication frame is to be transmitted.
  */
 struct cfg80211_deauth_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
 	u16 reason_code;
+	bool local_state_change;
 };
 
 /**
@@ -762,12 +770,15 @@ struct cfg80211_deauth_request {
  * @ie: Extra IEs to add to Disassociation frame or %NULL
  * @ie_len: Length of ie buffer in octets
  * @reason_code: The reason code for the disassociation
+ * @local_state_change: This is a request for a local state only, i.e., no
+ *	Disassociation frame is to be transmitted.
  */
 struct cfg80211_disassoc_request {
 	struct cfg80211_bss *bss;
 	const u8 *ie;
 	size_t ie_len;
 	u16 reason_code;
+	bool local_state_change;
 };
 
 /**
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 57a3c62139e2..4c189d0be4a3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -210,7 +210,7 @@ static u32 ieee80211_enable_ht(struct ieee80211_sub_if_data *sdata,
 
 static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 					   const u8 *bssid, u16 stype, u16 reason,
-					   void *cookie)
+					   void *cookie, bool send_frame)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
@@ -247,7 +247,11 @@ static void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 			cfg80211_send_disassoc(sdata->dev, (u8 *)mgmt, skb->len);
 	if (!(ifmgd->flags & IEEE80211_STA_MFP_ENABLED))
 		IEEE80211_SKB_CB(skb)->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	ieee80211_tx_skb(sdata, skb);
+
+	if (send_frame)
+		ieee80211_tx_skb(sdata, skb);
+	else
+		kfree_skb(skb);
 }
 
 void ieee80211_send_pspoll(struct ieee80211_local *local,
@@ -980,7 +984,7 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata)
 	ieee80211_send_deauth_disassoc(sdata, bssid,
 				       IEEE80211_STYPE_DEAUTH,
 				       WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-				       NULL);
+				       NULL, true);
 }
 
 void ieee80211_beacon_connection_loss_work(struct work_struct *work)
@@ -1724,7 +1728,7 @@ static void ieee80211_sta_work(struct work_struct *work)
 			ieee80211_send_deauth_disassoc(sdata, bssid,
 					IEEE80211_STYPE_DEAUTH,
 					WLAN_REASON_DISASSOC_DUE_TO_INACTIVITY,
-					NULL);
+					NULL, true);
 			mutex_lock(&ifmgd->mtx);
 		}
 	}
@@ -1908,6 +1912,9 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_work *wk;
 	u16 auth_alg;
 
+	if (req->local_state_change)
+		return 0; /* no need to update mac80211 state */
+
 	switch (req->auth_type) {
 	case NL80211_AUTHTYPE_OPEN_SYSTEM:
 		auth_alg = WLAN_AUTH_OPEN;
@@ -2163,9 +2170,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
 	printk(KERN_DEBUG "%s: deauthenticating from %pM by local choice (reason=%d)\n",
 	       sdata->name, bssid, req->reason_code);
 
-	ieee80211_send_deauth_disassoc(sdata, bssid,
-			IEEE80211_STYPE_DEAUTH, req->reason_code,
-			cookie);
+	ieee80211_send_deauth_disassoc(sdata, bssid, IEEE80211_STYPE_DEAUTH,
+				       req->reason_code, cookie,
+				       !req->local_state_change);
 
 	ieee80211_recalc_idle(sdata->local);
 
@@ -2202,7 +2209,7 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_send_deauth_disassoc(sdata, req->bss->bssid,
 			IEEE80211_STYPE_DISASSOC, req->reason_code,
-			cookie);
+			cookie, !req->local_state_change);
 	sta_info_destroy_addr(sdata, bssid);
 
 	ieee80211_recalc_idle(sdata->local);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index d52da913145a..b2234b436ead 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -293,13 +293,15 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx);
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change);
 int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       struct net_device *dev, struct ieee80211_channel *chan,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx);
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change);
 int __cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			  struct net_device *dev,
 			  struct ieee80211_channel *chan,
@@ -315,13 +317,16 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 			struct cfg80211_crypto_settings *crypt);
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason);
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change);
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason);
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change);
 void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 0855f0d32349..387dd2a27d2f 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -377,7 +377,8 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 			 const u8 *bssid,
 			 const u8 *ssid, int ssid_len,
 			 const u8 *ie, int ie_len,
-			 const u8 *key, int key_len, int key_idx)
+			 const u8 *key, int key_len, int key_idx,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_auth_request req;
@@ -407,6 +408,7 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	req.auth_type = auth_type;
@@ -433,12 +435,18 @@ int __cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		goto out;
 	}
 
-	wdev->authtry_bsses[slot] = bss;
+	if (local_state_change)
+		wdev->auth_bsses[slot] = bss;
+	else
+		wdev->authtry_bsses[slot] = bss;
 	cfg80211_hold_bss(bss);
 
 	err = rdev->ops->auth(&rdev->wiphy, dev, &req);
 	if (err) {
-		wdev->authtry_bsses[slot] = NULL;
+		if (local_state_change)
+			wdev->auth_bsses[slot] = NULL;
+		else
+			wdev->authtry_bsses[slot] = NULL;
 		cfg80211_unhold_bss(bss);
 	}
 
@@ -453,14 +461,15 @@ int cfg80211_mlme_auth(struct cfg80211_registered_device *rdev,
 		       enum nl80211_auth_type auth_type, const u8 *bssid,
 		       const u8 *ssid, int ssid_len,
 		       const u8 *ie, int ie_len,
-		       const u8 *key, int key_len, int key_idx)
+		       const u8 *key, int key_len, int key_idx,
+		       bool local_state_change)
 {
 	int err;
 
 	wdev_lock(dev->ieee80211_ptr);
 	err = __cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				   ssid, ssid_len, ie, ie_len,
-				   key, key_len, key_idx);
+				   key, key_len, key_idx, local_state_change);
 	wdev_unlock(dev->ieee80211_ptr);
 
 	return err;
@@ -554,7 +563,8 @@ int cfg80211_mlme_assoc(struct cfg80211_registered_device *rdev,
 
 int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_deauth_request req;
@@ -564,6 +574,7 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (wdev->current_bss &&
@@ -590,13 +601,15 @@ int __cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 			 struct net_device *dev, const u8 *bssid,
-			 const u8 *ie, int ie_len, u16 reason)
+			 const u8 *ie, int ie_len, u16 reason,
+			 bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason,
+				     local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
@@ -604,7 +617,8 @@ int cfg80211_mlme_deauth(struct cfg80211_registered_device *rdev,
 
 static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 				    struct net_device *dev, const u8 *bssid,
-				    const u8 *ie, int ie_len, u16 reason)
+				    const u8 *ie, int ie_len, u16 reason,
+				    bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_disassoc_request req;
@@ -619,6 +633,7 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 	memset(&req, 0, sizeof(req));
 	req.reason_code = reason;
+	req.local_state_change = local_state_change;
 	req.ie = ie;
 	req.ie_len = ie_len;
 	if (memcmp(wdev->current_bss->pub.bssid, bssid, ETH_ALEN) == 0)
@@ -631,13 +646,15 @@ static int __cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 
 int cfg80211_mlme_disassoc(struct cfg80211_registered_device *rdev,
 			   struct net_device *dev, const u8 *bssid,
-			   const u8 *ie, int ie_len, u16 reason)
+			   const u8 *ie, int ie_len, u16 reason,
+			   bool local_state_change)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	int err;
 
 	wdev_lock(wdev);
-	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason);
+	err = __cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason,
+				       local_state_change);
 	wdev_unlock(wdev);
 
 	return err;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 95149f303409..df5505b3930c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -150,6 +150,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_FRAME_MATCH] = { .type = NLA_BINARY, },
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
+	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
 };
 
 /* policy for the attributes */
@@ -3393,6 +3394,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	int err, ssid_len, ie_len = 0;
 	enum nl80211_auth_type auth_type;
 	struct key_parse key;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3471,9 +3473,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
 	err = cfg80211_mlme_auth(rdev, dev, chan, auth_type, bssid,
 				 ssid, ssid_len, ie, ie_len,
-				 key.p.key, key.p.key_len, key.idx);
+				 key.p.key, key.p.key_len, key.idx,
+				 local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3650,6 +3655,7 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3695,7 +3701,10 @@ static int nl80211_deauthenticate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_deauth(rdev, dev, bssid, ie, ie_len, reason_code,
+				   local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
@@ -3712,6 +3721,7 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 	const u8 *ie = NULL, *bssid;
 	int err, ie_len = 0;
 	u16 reason_code;
+	bool local_state_change;
 
 	if (!is_valid_ie_attr(info->attrs[NL80211_ATTR_IE]))
 		return -EINVAL;
@@ -3757,7 +3767,10 @@ static int nl80211_disassociate(struct sk_buff *skb, struct genl_info *info)
 		ie_len = nla_len(info->attrs[NL80211_ATTR_IE]);
 	}
 
-	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code);
+	local_state_change = !!info->attrs[NL80211_ATTR_LOCAL_STATE_CHANGE];
+
+	err = cfg80211_mlme_disassoc(rdev, dev, bssid, ie, ie_len, reason_code,
+				     local_state_change);
 
 out:
 	cfg80211_unlock_rdev(rdev);
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 17fde0da1b08..17465777eb47 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -170,7 +170,7 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 					    params->ssid, params->ssid_len,
 					    NULL, 0,
 					    params->key, params->key_len,
-					    params->key_idx);
+					    params->key_idx, false);
 	case CFG80211_CONN_ASSOCIATE_NEXT:
 		BUG_ON(!rdev->ops->assoc);
 		wdev->conn->state = CFG80211_CONN_ASSOCIATING;
@@ -185,12 +185,13 @@ static int cfg80211_conn_do_work(struct wireless_dev *wdev)
 		if (err)
 			__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 					       NULL, 0,
-					       WLAN_REASON_DEAUTH_LEAVING);
+					       WLAN_REASON_DEAUTH_LEAVING,
+					       false);
 		return err;
 	case CFG80211_CONN_DEAUTH_ASSOC_FAIL:
 		__cfg80211_mlme_deauth(rdev, wdev->netdev, params->bssid,
 				       NULL, 0,
-				       WLAN_REASON_DEAUTH_LEAVING);
+				       WLAN_REASON_DEAUTH_LEAVING, false);
 		/* return an error so that we call __cfg80211_connect_result() */
 		return -EINVAL;
 	default:
@@ -675,7 +676,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 				continue;
 			bssid = wdev->auth_bsses[i]->pub.bssid;
 			ret = __cfg80211_mlme_deauth(rdev, dev, bssid, NULL, 0,
-						WLAN_REASON_DEAUTH_LEAVING);
+						WLAN_REASON_DEAUTH_LEAVING,
+						false);
 			WARN(ret, "deauth failed: %d\n", ret);
 		}
 	}
@@ -934,7 +936,7 @@ int __cfg80211_disconnect(struct cfg80211_registered_device *rdev,
 		/* wdev->conn->params.bssid must be set if > SCANNING */
 		err = __cfg80211_mlme_deauth(rdev, dev,
 					     wdev->conn->params.bssid,
-					     NULL, 0, reason);
+					     NULL, 0, reason, false);
 		if (err)
 			return err;
 	} else {
@@ -990,7 +992,8 @@ void cfg80211_sme_disassoc(struct net_device *dev, int idx)
 
 	memcpy(bssid, wdev->auth_bsses[idx]->pub.bssid, ETH_ALEN);
 	if (__cfg80211_mlme_deauth(rdev, dev, bssid,
-				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING)) {
+				   NULL, 0, WLAN_REASON_DEAUTH_LEAVING,
+				   false)) {
 		/* whatever -- assume gone anyway */
 		cfg80211_unhold_bss(wdev->auth_bsses[idx]);
 		cfg80211_put_bss(&wdev->auth_bsses[idx]->pub);
-- 
cgit v1.2.3


From 098a607091426e79178b9a6c318d993fea131791 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 6 Apr 2010 11:18:47 +0200
Subject: mac80211: clean up/fix aggregation code

The aggregation code has a number of quirks, like
inventing an unneeded WLAN_BACK_TIMER value and
leaking memory under certain circumstances during
station destruction. Fix these issues by using
the regular aggregation session teardown code and
blocking new aggregation sessions, all before the
station is really destructed.

As a side effect, this gets rid of the long code
block to destroy aggregation safely.

Additionally, rename tid_state_rx which can only
have the values IDLE and OPERATIONAL to
tid_active_rx to make it easier to understand
that there is no bitwise stuff going on on the
RX side -- the TX side remains because it needs
to keep track of the driver and peer states.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  |  1 -
 net/mac80211/agg-rx.c      | 48 +++++++++++++++++---------------------
 net/mac80211/debugfs_sta.c | 10 ++++----
 net/mac80211/rx.c          |  5 ++--
 net/mac80211/sta_info.c    | 58 ++++++++--------------------------------------
 net/mac80211/sta_info.h    |  6 ++---
 6 files changed, 40 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 19984958ab7b..e9e03b02cb08 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1324,7 +1324,6 @@ enum ieee80211_back_actioncode {
 enum ieee80211_back_parties {
 	WLAN_BACK_RECIPIENT = 0,
 	WLAN_BACK_INITIATOR = 1,
-	WLAN_BACK_TIMER = 2,
 };
 
 /* SA Query action */
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 7d87f446f030..53233ab50f65 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -22,19 +22,20 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 				    u16 initiator, u16 reason)
 {
 	struct ieee80211_local *local = sta->local;
+	struct tid_ampdu_rx *tid_rx;
 	int i;
 
-	/* check if TID is in operational state */
 	spin_lock_bh(&sta->lock);
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL) {
+
+	/* check if TID is in operational state */
+	if (!sta->ampdu_mlme.tid_active_rx[tid]) {
 		spin_unlock_bh(&sta->lock);
 		return;
 	}
 
-	sta->ampdu_mlme.tid_state_rx[tid] =
-		HT_AGG_STATE_REQ_STOP_BA_MSK |
-		(initiator << HT_AGG_STATE_INITIATOR_SHIFT);
-	spin_unlock_bh(&sta->lock);
+	sta->ampdu_mlme.tid_active_rx[tid] = false;
+
+	tid_rx = sta->ampdu_mlme.tid_rx[tid];
 
 #ifdef CONFIG_MAC80211_HT_DEBUG
 	printk(KERN_DEBUG "Rx BA session stop requested for %pM tid %u\n",
@@ -46,37 +47,30 @@ void __ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid,
 		printk(KERN_DEBUG "HW problem - can not stop rx "
 				"aggregation for tid %d\n", tid);
 
-	/* shutdown timer has not expired */
-	if (initiator != WLAN_BACK_TIMER)
-		del_timer_sync(&sta->ampdu_mlme.tid_rx[tid]->session_timer);
-
 	/* check if this is a self generated aggregation halt */
-	if (initiator == WLAN_BACK_RECIPIENT || initiator == WLAN_BACK_TIMER)
+	if (initiator == WLAN_BACK_RECIPIENT)
 		ieee80211_send_delba(sta->sdata, sta->sta.addr,
 				     tid, 0, reason);
 
 	/* free the reordering buffer */
-	for (i = 0; i < sta->ampdu_mlme.tid_rx[tid]->buf_size; i++) {
-		if (sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]) {
+	for (i = 0; i < tid_rx->buf_size; i++) {
+		if (tid_rx->reorder_buf[i]) {
 			/* release the reordered frames */
-			dev_kfree_skb(sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i]);
-			sta->ampdu_mlme.tid_rx[tid]->stored_mpdu_num--;
-			sta->ampdu_mlme.tid_rx[tid]->reorder_buf[i] = NULL;
+			dev_kfree_skb(tid_rx->reorder_buf[i]);
+			tid_rx->stored_mpdu_num--;
+			tid_rx->reorder_buf[i] = NULL;
 		}
 	}
 
-	spin_lock_bh(&sta->lock);
 	/* free resources */
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_buf);
-	kfree(sta->ampdu_mlme.tid_rx[tid]->reorder_time);
-
-	if (!sta->ampdu_mlme.tid_rx[tid]->shutdown) {
-		kfree(sta->ampdu_mlme.tid_rx[tid]);
-		sta->ampdu_mlme.tid_rx[tid] = NULL;
-	}
+	kfree(tid_rx->reorder_buf);
+	kfree(tid_rx->reorder_time);
+	sta->ampdu_mlme.tid_rx[tid] = NULL;
 
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_IDLE;
 	spin_unlock_bh(&sta->lock);
+
+	del_timer_sync(&tid_rx->session_timer);
+	kfree(tid_rx);
 }
 
 /*
@@ -211,7 +205,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	/* examine state machine */
 	spin_lock_bh(&sta->lock);
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_IDLE) {
+	if (sta->ampdu_mlme.tid_active_rx[tid]) {
 #ifdef CONFIG_MAC80211_HT_DEBUG
 		if (net_ratelimit())
 			printk(KERN_DEBUG "unexpected AddBA Req from "
@@ -273,7 +267,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local,
 	}
 
 	/* change state and send addba resp */
-	sta->ampdu_mlme.tid_state_rx[tid] = HT_AGG_STATE_OPERATIONAL;
+	sta->ampdu_mlme.tid_active_rx[tid] = true;
 	tid_agg_rx->dialog_token = dialog_token;
 	tid_agg_rx->ssn = start_seq_num;
 	tid_agg_rx->head_seq_num = start_seq_num;
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 23e720034577..740ff6c5b92c 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -119,7 +119,7 @@ STA_OPS(last_seq_ctrl);
 static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 					size_t count, loff_t *ppos)
 {
-	char buf[64 + STA_TID_NUM * 40], *p = buf;
+	char buf[71 + STA_TID_NUM * 40], *p = buf;
 	int i;
 	struct sta_info *sta = file->private_data;
 
@@ -127,16 +127,16 @@ static ssize_t sta_agg_status_read(struct file *file, char __user *userbuf,
 	p += scnprintf(p, sizeof(buf) + buf - p, "next dialog_token: %#02x\n",
 			sta->ampdu_mlme.dialog_token_allocator + 1);
 	p += scnprintf(p, sizeof(buf) + buf - p,
-		       "TID\t\tRX\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
+		       "TID\t\tRX active\tDTKN\tSSN\t\tTX\tDTKN\tSSN\tpending\n");
 	for (i = 0; i < STA_TID_NUM; i++) {
 		p += scnprintf(p, sizeof(buf) + buf - p, "%02d", i);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
-				sta->ampdu_mlme.tid_state_rx[i]);
+				sta->ampdu_mlme.tid_active_rx[i]);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.2x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->dialog_token : 0);
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t%#.3x",
-				sta->ampdu_mlme.tid_state_rx[i] ?
+				sta->ampdu_mlme.tid_active_rx[i] ?
 				sta->ampdu_mlme.tid_rx[i]->ssn : 0);
 
 		p += scnprintf(p, sizeof(buf) + buf - p, "\t\t%x",
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c02e43b50ac3..62053fa711f3 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -720,7 +720,7 @@ static void ieee80211_rx_reorder_ampdu(struct ieee80211_rx_data *rx,
 
 	tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
 
-	if (sta->ampdu_mlme.tid_state_rx[tid] != HT_AGG_STATE_OPERATIONAL)
+	if (!sta->ampdu_mlme.tid_active_rx[tid])
 		goto dont_reorder;
 
 	tid_agg_rx = sta->ampdu_mlme.tid_rx[tid];
@@ -1805,8 +1805,7 @@ ieee80211_rx_h_ctrl(struct ieee80211_rx_data *rx, struct sk_buff_head *frames)
 		if (!rx->sta)
 			return RX_DROP_MONITOR;
 		tid = le16_to_cpu(bar->control) >> 12;
-		if (rx->sta->ampdu_mlme.tid_state_rx[tid]
-					!= HT_AGG_STATE_OPERATIONAL)
+		if (!rx->sta->ampdu_mlme.tid_active_rx[tid])
 			return RX_DROP_MONITOR;
 		tid_agg_rx = rx->sta->ampdu_mlme.tid_rx[tid];
 
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bd11753c1525..5bf044b92dca 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -238,9 +238,6 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata,
 		 * enable session_timer's data differentiation. refer to
 		 * sta_rx_agg_session_timer_expired for useage */
 		sta->timer_to_tid[i] = i;
-		/* rx */
-		sta->ampdu_mlme.tid_state_rx[i] = HT_AGG_STATE_IDLE;
-		sta->ampdu_mlme.tid_rx[i] = NULL;
 		/* tx */
 		sta->ampdu_mlme.tid_state_tx[i] = HT_AGG_STATE_IDLE;
 		sta->ampdu_mlme.tid_tx[i] = NULL;
@@ -606,7 +603,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	struct ieee80211_sub_if_data *sdata;
 	struct sk_buff *skb;
 	unsigned long flags;
-	int ret, i;
+	int ret;
 
 	might_sleep();
 
@@ -616,6 +613,15 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	local = sta->local;
 	sdata = sta->sdata;
 
+	/*
+	 * Before removing the station from the driver and
+	 * rate control, it might still start new aggregation
+	 * sessions -- block that to make sure the tear-down
+	 * will be sufficient.
+	 */
+	set_sta_flags(sta, WLAN_STA_BLOCK_BA);
+	ieee80211_sta_tear_down_BA_sessions(sta);
+
 	spin_lock_irqsave(&local->sta_lock, flags);
 	ret = sta_info_hash_del(local, sta);
 	/* this might still be the pending list ... which is fine */
@@ -700,50 +706,6 @@ static int __must_check __sta_info_destroy(struct sta_info *sta)
 	while ((skb = skb_dequeue(&sta->tx_filtered)) != NULL)
 		dev_kfree_skb_any(skb);
 
-	for (i = 0; i <  STA_TID_NUM; i++) {
-		struct tid_ampdu_rx *tid_rx;
-		struct tid_ampdu_tx *tid_tx;
-
-		spin_lock_bh(&sta->lock);
-		tid_rx = sta->ampdu_mlme.tid_rx[i];
-		/* Make sure timer won't free the tid_rx struct, see below */
-		if (tid_rx)
-			tid_rx->shutdown = true;
-
-		spin_unlock_bh(&sta->lock);
-
-		/*
-		 * Outside spinlock - shutdown is true now so that the timer
-		 * won't free tid_rx, we have to do that now. Can't let the
-		 * timer do it because we have to sync the timer outside the
-		 * lock that it takes itself.
-		 */
-		if (tid_rx) {
-			del_timer_sync(&tid_rx->session_timer);
-			kfree(tid_rx);
-		}
-
-		/*
-		 * No need to do such complications for TX agg sessions, the
-		 * path leading to freeing the tid_tx struct goes via a call
-		 * from the driver, and thus needs to look up the sta struct
-		 * again, which cannot be found when we get here. Hence, we
-		 * just need to delete the timer and free the aggregation
-		 * info; we won't be telling the peer about it then but that
-		 * doesn't matter if we're not talking to it again anyway.
-		 */
-		tid_tx = sta->ampdu_mlme.tid_tx[i];
-		if (tid_tx) {
-			del_timer_sync(&tid_tx->addba_resp_timer);
-			/*
-			 * STA removed while aggregation session being
-			 * started? Bit odd, but purge frames anyway.
-			 */
-			skb_queue_purge(&tid_tx->pending);
-			kfree(tid_tx);
-		}
-	}
-
 	__sta_info_free(local, sta);
 
 	return 0;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 57e81758d6f7..48a5e80957f0 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -36,7 +36,7 @@
  *	frame to this station is transmitted.
  * @WLAN_STA_MFP: Management frame protection is used with this STA.
  * @WLAN_STA_BLOCK_BA: Used to deny ADDBA requests (both TX and RX)
- *	during suspend/resume.
+ *	during suspend/resume and station removal.
  * @WLAN_STA_PS_DRIVER: driver requires keeping this station in
  *	power-save mode logically to flush frames that might still
  *	be in the queues
@@ -106,7 +106,6 @@ struct tid_ampdu_tx {
  * @buf_size: buffer size for incoming A-MPDUs
  * @timeout: reset timer value (in TUs).
  * @dialog_token: dialog token for aggregation session
- * @shutdown: this session is being shut down due to STA removal
  */
 struct tid_ampdu_rx {
 	struct sk_buff **reorder_buf;
@@ -118,7 +117,6 @@ struct tid_ampdu_rx {
 	u16 buf_size;
 	u16 timeout;
 	u8 dialog_token;
-	bool shutdown;
 };
 
 /**
@@ -156,7 +154,7 @@ enum plink_state {
  */
 struct sta_ampdu_mlme {
 	/* rx */
-	u8 tid_state_rx[STA_TID_NUM];
+	bool tid_active_rx[STA_TID_NUM];
 	struct tid_ampdu_rx *tid_rx[STA_TID_NUM];
 	/* tx */
 	u8 tid_state_tx[STA_TID_NUM];
-- 
cgit v1.2.3


From 18e225f257663c59ff9d4482f07ffd06361fc2ec Mon Sep 17 00:00:00 2001
From: Pavel Roskin <proski@gnu.org>
Date: Wed, 7 Apr 2010 16:40:09 -0700
Subject: net: fix definition of netdev_for_each_mc_addr()

The first argument should be called ha, not mclist.  All callers use the
name "ha", but if they used a different name, there would be a compile
error.

Signed-off-by: Pavel Roskin <proski@gnu.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a343a21ba8b9..d1a21b576a40 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -260,7 +260,7 @@ struct netdev_hw_addr_list {
 
 #define netdev_mc_count(dev) netdev_hw_addr_list_count(&(dev)->mc)
 #define netdev_mc_empty(dev) netdev_hw_addr_list_empty(&(dev)->mc)
-#define netdev_for_each_mc_addr(mclist, dev) \
+#define netdev_for_each_mc_addr(ha, dev) \
 	netdev_hw_addr_list_for_each(ha, &(dev)->mc)
 
 struct hh_cache {
-- 
cgit v1.2.3


From 97f8aefbbfb5aa5c9944e5fa8149f1fdaf71c7b6 Mon Sep 17 00:00:00 2001
From: chavey <chavey@google.com>
Date: Wed, 7 Apr 2010 21:54:42 -0700
Subject: net: fix ethtool coding style errors and warnings

Fix coding style errors and warnings output while running checkpatch.pl
on the files net/core/ethtool.c and include/linux/ethtool.h

Signed-off-by: chavey <chavey@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h | 115 +++++++++++++++++++++------------------
 net/core/ethtool.c      | 141 +++++++++++++++++++++++++-----------------------
 2 files changed, 136 insertions(+), 120 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index b391969a0dd9..276b40a16835 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -491,12 +491,12 @@ void ethtool_ntuple_flush(struct net_device *dev);
  * get_ufo: Report whether UDP fragmentation offload is enabled
  * set_ufo: Turn UDP fragmentation offload on or off
  * self_test: Run specified self-tests
- * get_strings: Return a set of strings that describe the requested objects 
+ * get_strings: Return a set of strings that describe the requested objects
  * phys_id: Identify the device
  * get_stats: Return statistics about the device
  * get_flags: get 32-bit flags bitmap
  * set_flags: set 32-bit flags bitmap
- * 
+ *
  * Description:
  *
  * get_settings:
@@ -532,14 +532,20 @@ struct ethtool_ops {
 	int	(*nway_reset)(struct net_device *);
 	u32	(*get_link)(struct net_device *);
 	int	(*get_eeprom_len)(struct net_device *);
-	int	(*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
-	int	(*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *);
+	int	(*get_eeprom)(struct net_device *,
+			      struct ethtool_eeprom *, u8 *);
+	int	(*set_eeprom)(struct net_device *,
+			      struct ethtool_eeprom *, u8 *);
 	int	(*get_coalesce)(struct net_device *, struct ethtool_coalesce *);
 	int	(*set_coalesce)(struct net_device *, struct ethtool_coalesce *);
-	void	(*get_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	int	(*set_ringparam)(struct net_device *, struct ethtool_ringparam *);
-	void	(*get_pauseparam)(struct net_device *, struct ethtool_pauseparam*);
-	int	(*set_pauseparam)(struct net_device *, struct ethtool_pauseparam*);
+	void	(*get_ringparam)(struct net_device *,
+				 struct ethtool_ringparam *);
+	int	(*set_ringparam)(struct net_device *,
+				 struct ethtool_ringparam *);
+	void	(*get_pauseparam)(struct net_device *,
+				  struct ethtool_pauseparam*);
+	int	(*set_pauseparam)(struct net_device *,
+				  struct ethtool_pauseparam*);
 	u32	(*get_rx_csum)(struct net_device *);
 	int	(*set_rx_csum)(struct net_device *, u32);
 	u32	(*get_tx_csum)(struct net_device *);
@@ -551,21 +557,24 @@ struct ethtool_ops {
 	void	(*self_test)(struct net_device *, struct ethtool_test *, u64 *);
 	void	(*get_strings)(struct net_device *, u32 stringset, u8 *);
 	int	(*phys_id)(struct net_device *, u32);
-	void	(*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, u64 *);
+	void	(*get_ethtool_stats)(struct net_device *,
+				     struct ethtool_stats *, u64 *);
 	int	(*begin)(struct net_device *);
 	void	(*complete)(struct net_device *);
-	u32     (*get_ufo)(struct net_device *);
-	int     (*set_ufo)(struct net_device *, u32);
-	u32     (*get_flags)(struct net_device *);
-	int     (*set_flags)(struct net_device *, u32);
-	u32     (*get_priv_flags)(struct net_device *);
-	int     (*set_priv_flags)(struct net_device *, u32);
+	u32	(*get_ufo)(struct net_device *);
+	int	(*set_ufo)(struct net_device *, u32);
+	u32	(*get_flags)(struct net_device *);
+	int	(*set_flags)(struct net_device *, u32);
+	u32	(*get_priv_flags)(struct net_device *);
+	int	(*set_priv_flags)(struct net_device *, u32);
 	int	(*get_sset_count)(struct net_device *, int);
-	int	(*get_rxnfc)(struct net_device *, struct ethtool_rxnfc *, void *);
+	int	(*get_rxnfc)(struct net_device *,
+			     struct ethtool_rxnfc *, void *);
 	int	(*set_rxnfc)(struct net_device *, struct ethtool_rxnfc *);
-	int     (*flash_device)(struct net_device *, struct ethtool_flash *);
+	int	(*flash_device)(struct net_device *, struct ethtool_flash *);
 	int	(*reset)(struct net_device *, u32 *);
-	int	(*set_rx_ntuple)(struct net_device *, struct ethtool_rx_ntuple *);
+	int	(*set_rx_ntuple)(struct net_device *,
+				 struct ethtool_rx_ntuple *);
 	int	(*get_rx_ntuple)(struct net_device *, u32 stringset, void *);
 };
 #endif /* __KERNEL__ */
@@ -577,29 +586,29 @@ struct ethtool_ops {
 #define ETHTOOL_GREGS		0x00000004 /* Get NIC registers. */
 #define ETHTOOL_GWOL		0x00000005 /* Get wake-on-lan options. */
 #define ETHTOOL_SWOL		0x00000006 /* Set wake-on-lan options. */
-#define ETHTOOL_GMSGLVL		0x00000007 /* Get driver message level */
-#define ETHTOOL_SMSGLVL		0x00000008 /* Set driver msg level. */
+#define ETHTOOL_GMSGLVL	0x00000007 /* Get driver message level */
+#define ETHTOOL_SMSGLVL	0x00000008 /* Set driver msg level. */
 #define ETHTOOL_NWAY_RST	0x00000009 /* Restart autonegotiation. */
 #define ETHTOOL_GLINK		0x0000000a /* Get link status (ethtool_value) */
-#define ETHTOOL_GEEPROM		0x0000000b /* Get EEPROM data */
-#define ETHTOOL_SEEPROM		0x0000000c /* Set EEPROM data. */
+#define ETHTOOL_GEEPROM	0x0000000b /* Get EEPROM data */
+#define ETHTOOL_SEEPROM	0x0000000c /* Set EEPROM data. */
 #define ETHTOOL_GCOALESCE	0x0000000e /* Get coalesce config */
 #define ETHTOOL_SCOALESCE	0x0000000f /* Set coalesce config. */
 #define ETHTOOL_GRINGPARAM	0x00000010 /* Get ring parameters */
 #define ETHTOOL_SRINGPARAM	0x00000011 /* Set ring parameters. */
 #define ETHTOOL_GPAUSEPARAM	0x00000012 /* Get pause parameters */
 #define ETHTOOL_SPAUSEPARAM	0x00000013 /* Set pause parameters. */
-#define ETHTOOL_GRXCSUM		0x00000014 /* Get RX hw csum enable (ethtool_value) */
-#define ETHTOOL_SRXCSUM		0x00000015 /* Set RX hw csum enable (ethtool_value) */
-#define ETHTOOL_GTXCSUM		0x00000016 /* Get TX hw csum enable (ethtool_value) */
-#define ETHTOOL_STXCSUM		0x00000017 /* Set TX hw csum enable (ethtool_value) */
+#define ETHTOOL_GRXCSUM	0x00000014 /* Get RX hw csum enable (ethtool_value) */
+#define ETHTOOL_SRXCSUM	0x00000015 /* Set RX hw csum enable (ethtool_value) */
+#define ETHTOOL_GTXCSUM	0x00000016 /* Get TX hw csum enable (ethtool_value) */
+#define ETHTOOL_STXCSUM	0x00000017 /* Set TX hw csum enable (ethtool_value) */
 #define ETHTOOL_GSG		0x00000018 /* Get scatter-gather enable
 					    * (ethtool_value) */
 #define ETHTOOL_SSG		0x00000019 /* Set scatter-gather enable
 					    * (ethtool_value). */
 #define ETHTOOL_TEST		0x0000001a /* execute NIC self-test. */
 #define ETHTOOL_GSTRINGS	0x0000001b /* get specified string set */
-#define ETHTOOL_PHYS_ID		0x0000001c /* identify the NIC */
+#define ETHTOOL_PHYS_ID	0x0000001c /* identify the NIC */
 #define ETHTOOL_GSTATS		0x0000001d /* get NIC-specific statistics */
 #define ETHTOOL_GTSO		0x0000001e /* Get TSO enable (ethtool_value) */
 #define ETHTOOL_STSO		0x0000001f /* Set TSO enable (ethtool_value) */
@@ -610,24 +619,24 @@ struct ethtool_ops {
 #define ETHTOOL_SGSO		0x00000024 /* Set GSO enable (ethtool_value) */
 #define ETHTOOL_GFLAGS		0x00000025 /* Get flags bitmap(ethtool_value) */
 #define ETHTOOL_SFLAGS		0x00000026 /* Set flags bitmap(ethtool_value) */
-#define ETHTOOL_GPFLAGS		0x00000027 /* Get driver-private flags bitmap */
-#define ETHTOOL_SPFLAGS		0x00000028 /* Set driver-private flags bitmap */
+#define ETHTOOL_GPFLAGS	0x00000027 /* Get driver-private flags bitmap */
+#define ETHTOOL_SPFLAGS	0x00000028 /* Set driver-private flags bitmap */
 
-#define	ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
-#define	ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
+#define ETHTOOL_GRXFH		0x00000029 /* Get RX flow hash configuration */
+#define ETHTOOL_SRXFH		0x0000002a /* Set RX flow hash configuration */
 #define ETHTOOL_GGRO		0x0000002b /* Get GRO enable (ethtool_value) */
 #define ETHTOOL_SGRO		0x0000002c /* Set GRO enable (ethtool_value) */
-#define	ETHTOOL_GRXRINGS	0x0000002d /* Get RX rings available for LB */
-#define	ETHTOOL_GRXCLSRLCNT	0x0000002e /* Get RX class rule count */
-#define	ETHTOOL_GRXCLSRULE	0x0000002f /* Get RX classification rule */
-#define	ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
-#define	ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
-#define	ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
-#define	ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
-#define	ETHTOOL_RESET		0x00000034 /* Reset hardware */
-#define	ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
-#define	ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
-#define	ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
+#define ETHTOOL_GRXRINGS	0x0000002d /* Get RX rings available for LB */
+#define ETHTOOL_GRXCLSRLCNT	0x0000002e /* Get RX class rule count */
+#define ETHTOOL_GRXCLSRULE	0x0000002f /* Get RX classification rule */
+#define ETHTOOL_GRXCLSRLALL	0x00000030 /* Get all RX classification rule */
+#define ETHTOOL_SRXCLSRLDEL	0x00000031 /* Delete RX classification rule */
+#define ETHTOOL_SRXCLSRLINS	0x00000032 /* Insert RX classification rule */
+#define ETHTOOL_FLASHDEV	0x00000033 /* Flash firmware to device */
+#define ETHTOOL_RESET		0x00000034 /* Reset hardware */
+#define ETHTOOL_SRXNTUPLE	0x00000035 /* Add an n-tuple filter to device */
+#define ETHTOOL_GRXNTUPLE	0x00000036 /* Get n-tuple filters from device */
+#define ETHTOOL_GSSET_INFO	0x00000037 /* Get string set info */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
@@ -636,18 +645,18 @@ struct ethtool_ops {
 /* Indicates what features are supported by the interface. */
 #define SUPPORTED_10baseT_Half		(1 << 0)
 #define SUPPORTED_10baseT_Full		(1 << 1)
-#define SUPPORTED_100baseT_Half		(1 << 2)
-#define SUPPORTED_100baseT_Full		(1 << 3)
+#define SUPPORTED_100baseT_Half	(1 << 2)
+#define SUPPORTED_100baseT_Full	(1 << 3)
 #define SUPPORTED_1000baseT_Half	(1 << 4)
 #define SUPPORTED_1000baseT_Full	(1 << 5)
 #define SUPPORTED_Autoneg		(1 << 6)
 #define SUPPORTED_TP			(1 << 7)
 #define SUPPORTED_AUI			(1 << 8)
 #define SUPPORTED_MII			(1 << 9)
-#define SUPPORTED_FIBRE			(1 << 10)
+#define SUPPORTED_FIBRE		(1 << 10)
 #define SUPPORTED_BNC			(1 << 11)
 #define SUPPORTED_10000baseT_Full	(1 << 12)
-#define SUPPORTED_Pause			(1 << 13)
+#define SUPPORTED_Pause		(1 << 13)
 #define SUPPORTED_Asym_Pause		(1 << 14)
 #define SUPPORTED_2500baseX_Full	(1 << 15)
 #define SUPPORTED_Backplane		(1 << 16)
@@ -657,8 +666,8 @@ struct ethtool_ops {
 #define SUPPORTED_10000baseR_FEC	(1 << 20)
 
 /* Indicates what features are advertised by the interface. */
-#define ADVERTISED_10baseT_Half		(1 << 0)
-#define ADVERTISED_10baseT_Full		(1 << 1)
+#define ADVERTISED_10baseT_Half	(1 << 0)
+#define ADVERTISED_10baseT_Full	(1 << 1)
 #define ADVERTISED_100baseT_Half	(1 << 2)
 #define ADVERTISED_100baseT_Full	(1 << 3)
 #define ADVERTISED_1000baseT_Half	(1 << 4)
@@ -697,12 +706,12 @@ struct ethtool_ops {
 #define DUPLEX_FULL		0x01
 
 /* Which connector port. */
-#define PORT_TP			0x00
+#define PORT_TP		0x00
 #define PORT_AUI		0x01
 #define PORT_MII		0x02
 #define PORT_FIBRE		0x03
 #define PORT_BNC		0x04
-#define PORT_DA			0x05
+#define PORT_DA		0x05
 #define PORT_NONE		0xef
 #define PORT_OTHER		0xff
 
@@ -716,7 +725,7 @@ struct ethtool_ops {
 /* Enable or disable autonegotiation.  If this is set to enable,
  * the forced link modes above are completely ignored.
  */
-#define AUTONEG_DISABLE		0x00
+#define AUTONEG_DISABLE	0x00
 #define AUTONEG_ENABLE		0x01
 
 /* Mode MDI or MDI-X */
@@ -747,8 +756,8 @@ struct ethtool_ops {
 #define	AH_V6_FLOW	0x0b
 #define	ESP_V6_FLOW	0x0c
 #define	IP_USER_FLOW	0x0d
-#define IPV4_FLOW       0x10
-#define IPV6_FLOW       0x11
+#define	IPV4_FLOW	0x10
+#define	IPV6_FLOW	0x11
 
 /* L3-L4 network traffic flow hash options */
 #define	RXH_L2DA	(1 << 1)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 73c81edde8d9..99e9f850ea07 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -18,7 +18,7 @@
 #include <linux/ethtool.h>
 #include <linux/netdevice.h>
 #include <linux/bitops.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -30,6 +30,7 @@ u32 ethtool_op_get_link(struct net_device *dev)
 {
 	return netif_carrier_ok(dev) ? 1 : 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_link);
 
 u32 ethtool_op_get_rx_csum(struct net_device *dev)
 {
@@ -62,6 +63,7 @@ int ethtool_op_set_tx_hw_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
 
 int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 {
@@ -72,11 +74,13 @@ int ethtool_op_set_tx_ipv6_csum(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
 
 u32 ethtool_op_get_sg(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_SG) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_sg);
 
 int ethtool_op_set_sg(struct net_device *dev, u32 data)
 {
@@ -87,11 +91,13 @@ int ethtool_op_set_sg(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_sg);
 
 u32 ethtool_op_get_tso(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_TSO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_tso);
 
 int ethtool_op_set_tso(struct net_device *dev, u32 data)
 {
@@ -102,11 +108,13 @@ int ethtool_op_set_tso(struct net_device *dev, u32 data)
 
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_tso);
 
 u32 ethtool_op_get_ufo(struct net_device *dev)
 {
 	return (dev->features & NETIF_F_UFO) != 0;
 }
+EXPORT_SYMBOL(ethtool_op_get_ufo);
 
 int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 {
@@ -116,6 +124,7 @@ int ethtool_op_set_ufo(struct net_device *dev, u32 data)
 		dev->features &= ~NETIF_F_UFO;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_ufo);
 
 /* the following list of flags are the same as their associated
  * NETIF_F_xxx values in include/linux/netdevice.h
@@ -132,6 +141,7 @@ u32 ethtool_op_get_flags(struct net_device *dev)
 
 	return dev->features & flags_dup_features;
 }
+EXPORT_SYMBOL(ethtool_op_get_flags);
 
 int ethtool_op_set_flags(struct net_device *dev, u32 data)
 {
@@ -160,6 +170,7 @@ int ethtool_op_set_flags(struct net_device *dev, u32 data)
 	dev->features = features;
 	return 0;
 }
+EXPORT_SYMBOL(ethtool_op_set_flags);
 
 void ethtool_ntuple_flush(struct net_device *dev)
 {
@@ -205,7 +216,8 @@ static int ethtool_set_settings(struct net_device *dev, void __user *useraddr)
 	return dev->ethtool_ops->set_settings(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev,
+						  void __user *useraddr)
 {
 	struct ethtool_drvinfo info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -245,7 +257,7 @@ static noinline_for_stack int ethtool_get_drvinfo(struct net_device *dev, void _
 }
 
 static noinline_for_stack int ethtool_get_sset_info(struct net_device *dev,
-                                          void __user *useraddr)
+						    void __user *useraddr)
 {
 	struct ethtool_sset_info info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -304,7 +316,8 @@ out:
 	return ret;
 }
 
-static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc cmd;
 
@@ -317,7 +330,8 @@ static noinline_for_stack int ethtool_set_rxnfc(struct net_device *dev, void __u
 	return dev->ethtool_ops->set_rxnfc(dev, &cmd);
 }
 
-static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_rxnfc(struct net_device *dev,
+						void __user *useraddr)
 {
 	struct ethtool_rxnfc info;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -362,8 +376,8 @@ err_out:
 }
 
 static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
-                              struct ethtool_rx_ntuple_flow_spec *spec,
-                              struct ethtool_rx_ntuple_flow_spec_container *fsc)
+			struct ethtool_rx_ntuple_flow_spec *spec,
+			struct ethtool_rx_ntuple_flow_spec_container *fsc)
 {
 
 	/* don't add filters forever */
@@ -389,7 +403,8 @@ static void __rx_ntuple_filter_add(struct ethtool_rx_ntuple_list *list,
 	list->count++;
 }
 
-static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_rx_ntuple(struct net_device *dev,
+						    void __user *useraddr)
 {
 	struct ethtool_rx_ntuple cmd;
 	const struct ethtool_ops *ops = dev->ethtool_ops;
@@ -514,125 +529,125 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 		case UDP_V4_FLOW:
 		case SCTP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4src);
+				fsc->fs.h_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4src);
+				fsc->fs.m_u.tcp_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.h_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.tcp_ip4_spec.ip4dst);
+				fsc->fs.m_u.tcp_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.psrc,
-			        fsc->fs.m_u.tcp_ip4_spec.psrc);
+				fsc->fs.h_u.tcp_ip4_spec.psrc,
+				fsc->fs.m_u.tcp_ip4_spec.psrc);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest Port: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.pdst,
-			        fsc->fs.m_u.tcp_ip4_spec.pdst);
+				fsc->fs.h_u.tcp_ip4_spec.pdst,
+				fsc->fs.m_u.tcp_ip4_spec.pdst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.tcp_ip4_spec.tos,
-			        fsc->fs.m_u.tcp_ip4_spec.tos);
+				fsc->fs.h_u.tcp_ip4_spec.tos,
+				fsc->fs.m_u.tcp_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case AH_ESP_V4_FLOW:
 		case ESP_V4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4src);
+				fsc->fs.h_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4src);
+				fsc->fs.m_u.ah_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.ip4dst);
+				fsc->fs.h_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.ah_ip4_spec.ip4dst);
+				fsc->fs.m_u.ah_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSPI: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.spi,
-			        fsc->fs.m_u.ah_ip4_spec.spi);
+				fsc->fs.h_u.ah_ip4_spec.spi,
+				fsc->fs.m_u.ah_ip4_spec.spi);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.ah_ip4_spec.tos,
-			        fsc->fs.m_u.ah_ip4_spec.tos);
+				fsc->fs.h_u.ah_ip4_spec.tos,
+				fsc->fs.m_u.ah_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IP_USER_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4src);
+				fsc->fs.h_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4src);
+				fsc->fs.m_u.raw_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.raw_ip4_spec.ip4dst);
+				fsc->fs.h_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.raw_ip4_spec.ip4dst);
+				fsc->fs.m_u.raw_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		case IPV4_FLOW:
 			sprintf(p, "\tSrc IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4src);
+				fsc->fs.h_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tSrc IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4src);
+				fsc->fs.m_u.usr_ip4_spec.ip4src);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP addr: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip4dst);
+				fsc->fs.h_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tDest IP mask: 0x%x\n",
-			        fsc->fs.m_u.usr_ip4_spec.ip4dst);
+				fsc->fs.m_u.usr_ip4_spec.ip4dst);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tL4 bytes: 0x%x, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
-			        fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
+				fsc->fs.h_u.usr_ip4_spec.l4_4_bytes,
+				fsc->fs.m_u.usr_ip4_spec.l4_4_bytes);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tTOS: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.tos,
-			        fsc->fs.m_u.usr_ip4_spec.tos);
+				fsc->fs.h_u.usr_ip4_spec.tos,
+				fsc->fs.m_u.usr_ip4_spec.tos);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tIP Version: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.ip_ver,
-			        fsc->fs.m_u.usr_ip4_spec.ip_ver);
+				fsc->fs.h_u.usr_ip4_spec.ip_ver,
+				fsc->fs.m_u.usr_ip4_spec.ip_ver);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			sprintf(p, "\tProtocol: %d, mask: 0x%x\n",
-			        fsc->fs.h_u.usr_ip4_spec.proto,
-			        fsc->fs.m_u.usr_ip4_spec.proto);
+				fsc->fs.h_u.usr_ip4_spec.proto,
+				fsc->fs.m_u.usr_ip4_spec.proto);
 			p += ETH_GSTRING_LEN;
 			num_strings++;
 			break;
 		};
 		sprintf(p, "\tVLAN: %d, mask: 0x%x\n",
-		        fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
+			fsc->fs.vlan_tag, fsc->fs.vlan_tag_mask);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 		sprintf(p, "\tUser-defined: 0x%Lx\n", fsc->fs.data);
@@ -645,7 +660,7 @@ static int ethtool_get_rx_ntuple(struct net_device *dev, void __user *useraddr)
 			sprintf(p, "\tAction: Drop\n");
 		else
 			sprintf(p, "\tAction: Direct to queue %d\n",
-			        fsc->fs.action);
+				fsc->fs.action);
 		p += ETH_GSTRING_LEN;
 		num_strings++;
 unknown_filter:
@@ -857,7 +872,8 @@ static int ethtool_set_eeprom(struct net_device *dev, void __user *useraddr)
 	return ret;
 }
 
-static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
 
@@ -871,7 +887,8 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev, void
 	return 0;
 }
 
-static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev, void __user *useraddr)
+static noinline_for_stack int ethtool_set_coalesce(struct net_device *dev,
+						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce;
 
@@ -975,6 +992,7 @@ static int ethtool_set_tx_csum(struct net_device *dev, char __user *useraddr)
 
 	return dev->ethtool_ops->set_tx_csum(dev, edata.data);
 }
+EXPORT_SYMBOL(ethtool_op_set_tx_csum);
 
 static int ethtool_set_rx_csum(struct net_device *dev, char __user *useraddr)
 {
@@ -1046,7 +1064,7 @@ static int ethtool_get_gso(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GSO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1069,7 +1087,7 @@ static int ethtool_get_gro(struct net_device *dev, char __user *useraddr)
 
 	edata.data = dev->features & NETIF_F_GRO;
 	if (copy_to_user(useraddr, &edata, sizeof(edata)))
-		 return -EFAULT;
+		return -EFAULT;
 	return 0;
 }
 
@@ -1281,7 +1299,8 @@ static int ethtool_set_value(struct net_device *dev, char __user *useraddr,
 	return actor(dev, edata.data);
 }
 
-static noinline_for_stack int ethtool_flash_device(struct net_device *dev, char __user *useraddr)
+static noinline_for_stack int ethtool_flash_device(struct net_device *dev,
+						   char __user *useraddr)
 {
 	struct ethtool_flash efl;
 
@@ -1310,11 +1329,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	if (!dev->ethtool_ops)
 		return -EOPNOTSUPP;
 
-	if (copy_from_user(&ethcmd, useraddr, sizeof (ethcmd)))
+	if (copy_from_user(&ethcmd, useraddr, sizeof(ethcmd)))
 		return -EFAULT;
 
 	/* Allow some commands to be done by anyone */
-	switch(ethcmd) {
+	switch (ethcmd) {
 	case ETHTOOL_GDRVINFO:
 	case ETHTOOL_GMSGLVL:
 	case ETHTOOL_GCOALESCE:
@@ -1342,10 +1361,11 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 			return -EPERM;
 	}
 
-	if (dev->ethtool_ops->begin)
-		if ((rc = dev->ethtool_ops->begin(dev)) < 0)
+	if (dev->ethtool_ops->begin) {
+		rc = dev->ethtool_ops->begin(dev);
+		if (rc  < 0)
 			return rc;
-
+	}
 	old_features = dev->features;
 
 	switch (ethcmd) {
@@ -1535,16 +1555,3 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 
 	return rc;
 }
-
-EXPORT_SYMBOL(ethtool_op_get_link);
-EXPORT_SYMBOL(ethtool_op_get_sg);
-EXPORT_SYMBOL(ethtool_op_get_tso);
-EXPORT_SYMBOL(ethtool_op_set_sg);
-EXPORT_SYMBOL(ethtool_op_set_tso);
-EXPORT_SYMBOL(ethtool_op_set_tx_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_hw_csum);
-EXPORT_SYMBOL(ethtool_op_set_tx_ipv6_csum);
-EXPORT_SYMBOL(ethtool_op_set_ufo);
-EXPORT_SYMBOL(ethtool_op_get_ufo);
-EXPORT_SYMBOL(ethtool_op_set_flags);
-EXPORT_SYMBOL(ethtool_op_get_flags);
-- 
cgit v1.2.3


From 97ad9139fd68b5c71f44d28d3f9788d89cfd4916 Mon Sep 17 00:00:00 2001
From: Javier Cardona <javier@cozybit.com>
Date: Mon, 29 Mar 2010 11:00:21 -0700
Subject: mac80211: Moved mesh action codes to a more visible location

Grouped mesh action codes together with the other action codes in
ieee80211.h.

Signed-off-by: Javier Cardona <javier@cozybit.com>
Reviewed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 net/mac80211/mesh.c       | 4 ++--
 net/mac80211/mesh.h       | 2 --
 net/mac80211/mesh_hwmp.c  | 4 ++--
 net/mac80211/mesh_plink.c | 2 +-
 net/mac80211/rx.c         | 6 +++---
 6 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index e9e03b02cb08..1252ba1fbff5 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1211,6 +1211,8 @@ enum ieee80211_category {
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
 	WLAN_CATEGORY_WMM = 17,
+	WLAN_CATEGORY_MESH_PLINK = 30,		/* Pending ANA approval */
+	WLAN_CATEGORY_MESH_PATH_SEL = 32,	/* Pending ANA approval */
 	WLAN_CATEGORY_VENDOR_SPECIFIC_PROTECTED = 126,
 	WLAN_CATEGORY_VENDOR_SPECIFIC = 127,
 };
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 7a6bebce7f2f..2669fbf8c812 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -600,10 +600,10 @@ static void ieee80211_mesh_rx_mgmt_action(struct ieee80211_sub_if_data *sdata,
 					  struct ieee80211_rx_status *rx_status)
 {
 	switch (mgmt->u.action.category) {
-	case MESH_PLINK_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
 		mesh_rx_plink_frame(sdata, mgmt, len, rx_status);
 		break;
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		mesh_rx_path_sel_frame(sdata, mgmt, len);
 		break;
 	}
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 85562c59d7d6..c88087f1cd0f 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -209,8 +209,6 @@ struct mesh_rmc {
 #define MESH_MAX_MPATHS		1024
 
 /* Pending ANA approval */
-#define MESH_PLINK_CATEGORY	30
-#define MESH_PATH_SEL_CATEGORY	32
 #define MESH_PATH_SEL_ACTION	0
 
 /* PERR reason codes */
diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c
index ccff6133e19a..36141d6e701b 100644
--- a/net/mac80211/mesh_hwmp.c
+++ b/net/mac80211/mesh_hwmp.c
@@ -131,7 +131,7 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags,
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID == SA */
 	memcpy(mgmt->bssid, sdata->vif.addr, ETH_ALEN);
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 
 	switch (action) {
@@ -224,7 +224,7 @@ int mesh_path_error_tx(u8 ttl, u8 *target, __le32 target_sn,
 	memcpy(mgmt->da, ra, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PATH_SEL_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PATH_SEL;
 	mgmt->u.action.u.mesh_action.action_code = MESH_PATH_SEL_ACTION;
 	ie_len = 15;
 	pos = skb_put(skb, 2 + ie_len);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index bc4e20e57ff5..c384154ac895 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -171,7 +171,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 	memcpy(mgmt->da, da, ETH_ALEN);
 	memcpy(mgmt->sa, sdata->vif.addr, ETH_ALEN);
 	/* BSSID is left zeroed, wildcard value */
-	mgmt->u.action.category = MESH_PLINK_CATEGORY;
+	mgmt->u.action.category = WLAN_CATEGORY_MESH_PLINK;
 	mgmt->u.action.u.plink_action.action_code = action;
 
 	if (action == PLINK_CLOSE)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index c0ad7e879a6e..d08ede44ac7e 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -490,7 +490,7 @@ ieee80211_rx_mesh_check(struct ieee80211_rx_data *rx)
 
 		if (ieee80211_is_action(hdr->frame_control)) {
 			mgmt = (struct ieee80211_mgmt *)hdr;
-			if (mgmt->u.action.category != MESH_PLINK_CATEGORY)
+			if (mgmt->u.action.category != WLAN_CATEGORY_MESH_PLINK)
 				return RX_DROP_MONITOR;
 			return RX_CONTINUE;
 		}
@@ -1994,8 +1994,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			goto handled;
 		}
 		break;
-	case MESH_PLINK_CATEGORY:
-	case MESH_PATH_SEL_CATEGORY:
+	case WLAN_CATEGORY_MESH_PLINK:
+	case WLAN_CATEGORY_MESH_PATH_SEL:
 		if (ieee80211_vif_is_mesh(&sdata->vif))
 			return ieee80211_mesh_rx_mgmt(sdata, rx->skb);
 		break;
-- 
cgit v1.2.3


From 6dad2a29646ce3792c40cfc52d77e9b65a7bb143 Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Wed, 31 Mar 2010 21:56:46 +0200
Subject: cpufreq: Unify sysfs attribute definition macros

Multiple modules used to define those which are with identical
functionality and were needlessly replicated among the different cpufreq
drivers. Push them into the header and remove duplication.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <1270065406-1814-7-git-send-email-bp@amd64.org>
Reviewed-by: Thomas Renninger <trenn@suse.de>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 drivers/cpufreq/cpufreq.c              | 40 ++++++++++------------------
 drivers/cpufreq/cpufreq_conservative.c | 48 ++++++++++++----------------------
 drivers/cpufreq/cpufreq_ondemand.c     | 40 +++++++++-------------------
 include/linux/cpufreq.h                | 30 +++++++++++++++++++++
 4 files changed, 72 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 2d5d575e889d..e02e4174c2c8 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -662,32 +662,20 @@ static ssize_t show_bios_limit(struct cpufreq_policy *policy, char *buf)
 	return sprintf(buf, "%u\n", policy->cpuinfo.max_freq);
 }
 
-#define define_one_ro(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0444, show_##_name, NULL)
-
-#define define_one_ro0400(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0400, show_##_name, NULL)
-
-#define define_one_rw(_name) \
-static struct freq_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_ro0400(cpuinfo_cur_freq);
-define_one_ro(cpuinfo_min_freq);
-define_one_ro(cpuinfo_max_freq);
-define_one_ro(cpuinfo_transition_latency);
-define_one_ro(scaling_available_governors);
-define_one_ro(scaling_driver);
-define_one_ro(scaling_cur_freq);
-define_one_ro(bios_limit);
-define_one_ro(related_cpus);
-define_one_ro(affected_cpus);
-define_one_rw(scaling_min_freq);
-define_one_rw(scaling_max_freq);
-define_one_rw(scaling_governor);
-define_one_rw(scaling_setspeed);
+cpufreq_freq_attr_ro_perm(cpuinfo_cur_freq, 0400);
+cpufreq_freq_attr_ro(cpuinfo_min_freq);
+cpufreq_freq_attr_ro(cpuinfo_max_freq);
+cpufreq_freq_attr_ro(cpuinfo_transition_latency);
+cpufreq_freq_attr_ro(scaling_available_governors);
+cpufreq_freq_attr_ro(scaling_driver);
+cpufreq_freq_attr_ro(scaling_cur_freq);
+cpufreq_freq_attr_ro(bios_limit);
+cpufreq_freq_attr_ro(related_cpus);
+cpufreq_freq_attr_ro(affected_cpus);
+cpufreq_freq_attr_rw(scaling_min_freq);
+cpufreq_freq_attr_rw(scaling_max_freq);
+cpufreq_freq_attr_rw(scaling_governor);
+cpufreq_freq_attr_rw(scaling_setspeed);
 
 static struct attribute *default_attrs[] = {
 	&cpuinfo_min_freq.attr,
diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index 599a40b25cb0..ce5248e04218 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -178,12 +178,8 @@ static ssize_t show_sampling_rate_min(struct kobject *kobj,
 	return sprintf(buf, "%u\n", min_sampling_rate);
 }
 
-#define define_one_ro(_name)		\
-static struct global_attr _name =	\
-__ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(sampling_rate_max);
-define_one_ro(sampling_rate_min);
+define_one_global_ro(sampling_rate_max);
+define_one_global_ro(sampling_rate_min);
 
 /* cpufreq_conservative Governor Tunables */
 #define show_one(file_name, object)					\
@@ -221,12 +217,8 @@ show_one_old(freq_step);
 show_one_old(sampling_rate_min);
 show_one_old(sampling_rate_max);
 
-#define define_one_ro_old(object, _name)	\
-static struct freq_attr object =		\
-__ATTR(_name, 0444, show_##_name##_old, NULL)
-
-define_one_ro_old(sampling_rate_min_old, sampling_rate_min);
-define_one_ro_old(sampling_rate_max_old, sampling_rate_max);
+cpufreq_freq_attr_ro_old(sampling_rate_min);
+cpufreq_freq_attr_ro_old(sampling_rate_max);
 
 /*** delete after deprecation time ***/
 
@@ -364,16 +356,12 @@ static ssize_t store_freq_step(struct kobject *a, struct attribute *b,
 	return count;
 }
 
-#define define_one_rw(_name) \
-static struct global_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_rw(sampling_rate);
-define_one_rw(sampling_down_factor);
-define_one_rw(up_threshold);
-define_one_rw(down_threshold);
-define_one_rw(ignore_nice_load);
-define_one_rw(freq_step);
+define_one_global_rw(sampling_rate);
+define_one_global_rw(sampling_down_factor);
+define_one_global_rw(up_threshold);
+define_one_global_rw(down_threshold);
+define_one_global_rw(ignore_nice_load);
+define_one_global_rw(freq_step);
 
 static struct attribute *dbs_attributes[] = {
 	&sampling_rate_max.attr,
@@ -409,16 +397,12 @@ write_one_old(down_threshold);
 write_one_old(ignore_nice_load);
 write_one_old(freq_step);
 
-#define define_one_rw_old(object, _name)	\
-static struct freq_attr object =		\
-__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)
-
-define_one_rw_old(sampling_rate_old, sampling_rate);
-define_one_rw_old(sampling_down_factor_old, sampling_down_factor);
-define_one_rw_old(up_threshold_old, up_threshold);
-define_one_rw_old(down_threshold_old, down_threshold);
-define_one_rw_old(ignore_nice_load_old, ignore_nice_load);
-define_one_rw_old(freq_step_old, freq_step);
+cpufreq_freq_attr_rw_old(sampling_rate);
+cpufreq_freq_attr_rw_old(sampling_down_factor);
+cpufreq_freq_attr_rw_old(up_threshold);
+cpufreq_freq_attr_rw_old(down_threshold);
+cpufreq_freq_attr_rw_old(ignore_nice_load);
+cpufreq_freq_attr_rw_old(freq_step);
 
 static struct attribute *dbs_attributes_old[] = {
 	&sampling_rate_max_old.attr,
diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c
index bd444dc93cf2..c00b25f4d243 100644
--- a/drivers/cpufreq/cpufreq_ondemand.c
+++ b/drivers/cpufreq/cpufreq_ondemand.c
@@ -234,12 +234,8 @@ static ssize_t show_sampling_rate_min(struct kobject *kobj,
 	return sprintf(buf, "%u\n", min_sampling_rate);
 }
 
-#define define_one_ro(_name)		\
-static struct global_attr _name =	\
-__ATTR(_name, 0444, show_##_name, NULL)
-
-define_one_ro(sampling_rate_max);
-define_one_ro(sampling_rate_min);
+define_one_global_ro(sampling_rate_max);
+define_one_global_ro(sampling_rate_min);
 
 /* cpufreq_ondemand Governor Tunables */
 #define show_one(file_name, object)					\
@@ -274,12 +270,8 @@ show_one_old(powersave_bias);
 show_one_old(sampling_rate_min);
 show_one_old(sampling_rate_max);
 
-#define define_one_ro_old(object, _name)       \
-static struct freq_attr object =               \
-__ATTR(_name, 0444, show_##_name##_old, NULL)
-
-define_one_ro_old(sampling_rate_min_old, sampling_rate_min);
-define_one_ro_old(sampling_rate_max_old, sampling_rate_max);
+cpufreq_freq_attr_ro_old(sampling_rate_min);
+cpufreq_freq_attr_ro_old(sampling_rate_max);
 
 /*** delete after deprecation time ***/
 
@@ -376,14 +368,10 @@ static ssize_t store_powersave_bias(struct kobject *a, struct attribute *b,
 	return count;
 }
 
-#define define_one_rw(_name) \
-static struct global_attr _name = \
-__ATTR(_name, 0644, show_##_name, store_##_name)
-
-define_one_rw(sampling_rate);
-define_one_rw(up_threshold);
-define_one_rw(ignore_nice_load);
-define_one_rw(powersave_bias);
+define_one_global_rw(sampling_rate);
+define_one_global_rw(up_threshold);
+define_one_global_rw(ignore_nice_load);
+define_one_global_rw(powersave_bias);
 
 static struct attribute *dbs_attributes[] = {
 	&sampling_rate_max.attr,
@@ -415,14 +403,10 @@ write_one_old(up_threshold);
 write_one_old(ignore_nice_load);
 write_one_old(powersave_bias);
 
-#define define_one_rw_old(object, _name)       \
-static struct freq_attr object =               \
-__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)
-
-define_one_rw_old(sampling_rate_old, sampling_rate);
-define_one_rw_old(up_threshold_old, up_threshold);
-define_one_rw_old(ignore_nice_load_old, ignore_nice_load);
-define_one_rw_old(powersave_bias_old, powersave_bias);
+cpufreq_freq_attr_rw_old(sampling_rate);
+cpufreq_freq_attr_rw_old(up_threshold);
+cpufreq_freq_attr_rw_old(ignore_nice_load);
+cpufreq_freq_attr_rw_old(powersave_bias);
 
 static struct attribute *dbs_attributes_old[] = {
        &sampling_rate_max_old.attr,
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 4de02b10007f..9f15150ce8d6 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -278,6 +278,27 @@ struct freq_attr {
 	ssize_t (*store)(struct cpufreq_policy *, const char *, size_t count);
 };
 
+#define cpufreq_freq_attr_ro(_name)		\
+static struct freq_attr _name =			\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define cpufreq_freq_attr_ro_perm(_name, _perm)	\
+static struct freq_attr _name =			\
+__ATTR(_name, _perm, show_##_name, NULL)
+
+#define cpufreq_freq_attr_ro_old(_name)		\
+static struct freq_attr _name##_old =		\
+__ATTR(_name, 0444, show_##_name##_old, NULL)
+
+#define cpufreq_freq_attr_rw(_name)		\
+static struct freq_attr _name =			\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+#define cpufreq_freq_attr_rw_old(_name)		\
+static struct freq_attr _name##_old =		\
+__ATTR(_name, 0644, show_##_name##_old, store_##_name##_old)
+
+
 struct global_attr {
 	struct attribute attr;
 	ssize_t (*show)(struct kobject *kobj,
@@ -286,6 +307,15 @@ struct global_attr {
 			 const char *c, size_t count);
 };
 
+#define define_one_global_ro(_name)		\
+static struct global_attr _name =		\
+__ATTR(_name, 0444, show_##_name, NULL)
+
+#define define_one_global_rw(_name)		\
+static struct global_attr _name =		\
+__ATTR(_name, 0644, show_##_name, store_##_name)
+
+
 /*********************************************************************
  *                        CPUFREQ 2.6. INTERFACE                     *
  *********************************************************************/
-- 
cgit v1.2.3


From 353633100d8d684ac0acae4ce93fb833f92881f4 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:15 -0400
Subject: security: remove sb_check_sb hooks

Unused hook.  Remove it.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  5 -----
 include/linux/security.h | 14 --------------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..c1d0d877bab7 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1435,11 +1435,6 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 	if (IS_DEADDIR(path->dentry->d_inode))
 		goto out_unlock;
 
-	err = security_sb_check_sb(mnt, path);
-	if (err)
-		goto out_unlock;
-
-	err = -ENOENT;
 	if (!d_unlinked(path->dentry))
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
diff --git a/include/linux/security.h b/include/linux/security.h
index 233d20b52c1b..9ebd2e411d2d 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -267,12 +267,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@orig the original mount data copied from userspace.
  *	@copy copied data which will be passed to the security module.
  *	Returns 0 if the copy was successful.
- * @sb_check_sb:
- *	Check permission before the device with superblock @mnt->sb is mounted
- *	on the mount point named by @nd.
- *	@mnt contains the vfsmount for device being mounted.
- *	@path contains the path for the mount point.
- *	Return 0 if permission is granted.
  * @sb_umount:
  *	Check permission before the @mnt file system is unmounted.
  *	@mnt contains the mounted file system.
@@ -1484,7 +1478,6 @@ struct security_operations {
 	int (*sb_statfs) (struct dentry *dentry);
 	int (*sb_mount) (char *dev_name, struct path *path,
 			 char *type, unsigned long flags, void *data);
-	int (*sb_check_sb) (struct vfsmount *mnt, struct path *path);
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
 	void (*sb_umount_close) (struct vfsmount *mnt);
 	void (*sb_umount_busy) (struct vfsmount *mnt);
@@ -1783,7 +1776,6 @@ int security_sb_show_options(struct seq_file *m, struct super_block *sb);
 int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
-int security_sb_check_sb(struct vfsmount *mnt, struct path *path);
 int security_sb_umount(struct vfsmount *mnt, int flags);
 void security_sb_umount_close(struct vfsmount *mnt);
 void security_sb_umount_busy(struct vfsmount *mnt);
@@ -2099,12 +2091,6 @@ static inline int security_sb_mount(char *dev_name, struct path *path,
 	return 0;
 }
 
-static inline int security_sb_check_sb(struct vfsmount *mnt,
-				       struct path *path)
-{
-	return 0;
-}
-
 static inline int security_sb_umount(struct vfsmount *mnt, int flags)
 {
 	return 0;
diff --git a/security/capability.c b/security/capability.c
index 4875142b858d..a7e905d8f1d3 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -80,11 +80,6 @@ static int cap_sb_mount(char *dev_name, struct path *path, char *type,
 	return 0;
 }
 
-static int cap_sb_check_sb(struct vfsmount *mnt, struct path *path)
-{
-	return 0;
-}
-
 static int cap_sb_umount(struct vfsmount *mnt, int flags)
 {
 	return 0;
@@ -941,7 +936,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_show_options);
 	set_to_cap_if_null(ops, sb_statfs);
 	set_to_cap_if_null(ops, sb_mount);
-	set_to_cap_if_null(ops, sb_check_sb);
 	set_to_cap_if_null(ops, sb_umount);
 	set_to_cap_if_null(ops, sb_umount_close);
 	set_to_cap_if_null(ops, sb_umount_busy);
diff --git a/security/security.c b/security/security.c
index b98334b0a576..a0bee70ed4b6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -306,11 +306,6 @@ int security_sb_mount(char *dev_name, struct path *path,
 	return security_ops->sb_mount(dev_name, path, type, flags, data);
 }
 
-int security_sb_check_sb(struct vfsmount *mnt, struct path *path)
-{
-	return security_ops->sb_check_sb(mnt, path);
-}
-
 int security_sb_umount(struct vfsmount *mnt, int flags)
 {
 	return security_ops->sb_umount(mnt, flags);
-- 
cgit v1.2.3


From 231923bd0e06cba69f7c2028f4a68602b8d22160 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:21 -0400
Subject: security: remove dead hook sb_umount_close

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  1 -
 include/linux/security.h | 10 ----------
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 21 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index c1d0d877bab7..8aea78c8e760 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -628,7 +628,6 @@ repeat:
 		mnt->mnt_pinned = 0;
 		spin_unlock(&vfsmount_lock);
 		acct_auto_close_mnt(mnt);
-		security_sb_umount_close(mnt);
 		goto repeat;
 	}
 }
diff --git a/include/linux/security.h b/include/linux/security.h
index 9ebd2e411d2d..354b0ceefc87 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -272,11 +272,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mnt contains the mounted file system.
  *	@flags contains the unmount flags, e.g. MNT_FORCE.
  *	Return 0 if permission is granted.
- * @sb_umount_close:
- *	Close any files in the @mnt mounted filesystem that are held open by
- *	the security module.  This hook is called during an umount operation
- *	prior to checking whether the filesystem is still busy.
- *	@mnt contains the mounted filesystem.
  * @sb_umount_busy:
  *	Handle a failed umount of the @mnt mounted filesystem, e.g.  re-opening
  *	any files that were closed by umount_close.  This hook is called during
@@ -1479,7 +1474,6 @@ struct security_operations {
 	int (*sb_mount) (char *dev_name, struct path *path,
 			 char *type, unsigned long flags, void *data);
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
-	void (*sb_umount_close) (struct vfsmount *mnt);
 	void (*sb_umount_busy) (struct vfsmount *mnt);
 	void (*sb_post_remount) (struct vfsmount *mnt,
 				 unsigned long flags, void *data);
@@ -1777,7 +1771,6 @@ int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
 int security_sb_umount(struct vfsmount *mnt, int flags);
-void security_sb_umount_close(struct vfsmount *mnt);
 void security_sb_umount_busy(struct vfsmount *mnt);
 void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *data);
 void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint);
@@ -2096,9 +2089,6 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static inline void security_sb_umount_close(struct vfsmount *mnt)
-{ }
-
 static inline void security_sb_umount_busy(struct vfsmount *mnt)
 { }
 
diff --git a/security/capability.c b/security/capability.c
index a7e905d8f1d3..db72f4f5a9e1 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -85,10 +85,6 @@ static int cap_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static void cap_sb_umount_close(struct vfsmount *mnt)
-{
-}
-
 static void cap_sb_umount_busy(struct vfsmount *mnt)
 {
 }
@@ -937,7 +933,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_statfs);
 	set_to_cap_if_null(ops, sb_mount);
 	set_to_cap_if_null(ops, sb_umount);
-	set_to_cap_if_null(ops, sb_umount_close);
 	set_to_cap_if_null(ops, sb_umount_busy);
 	set_to_cap_if_null(ops, sb_post_remount);
 	set_to_cap_if_null(ops, sb_post_addmount);
diff --git a/security/security.c b/security/security.c
index a0bee70ed4b6..591c25fd430f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -311,11 +311,6 @@ int security_sb_umount(struct vfsmount *mnt, int flags)
 	return security_ops->sb_umount(mnt, flags);
 }
 
-void security_sb_umount_close(struct vfsmount *mnt)
-{
-	security_ops->sb_umount_close(mnt);
-}
-
 void security_sb_umount_busy(struct vfsmount *mnt)
 {
 	security_ops->sb_umount_busy(mnt);
-- 
cgit v1.2.3


From 4b61d12c84293ac061909f27f567c1905e4d90e3 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:27 -0400
Subject: security: remove dead hook sb_umount_busy

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  2 --
 include/linux/security.h | 11 -----------
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 8aea78c8e760..6c9ca7358aad 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1116,8 +1116,6 @@ static int do_umount(struct vfsmount *mnt, int flags)
 		retval = 0;
 	}
 	spin_unlock(&vfsmount_lock);
-	if (retval)
-		security_sb_umount_busy(mnt);
 	up_write(&namespace_sem);
 	release_mounts(&umount_list);
 	return retval;
diff --git a/include/linux/security.h b/include/linux/security.h
index 354b0ceefc87..b206795c09f7 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -272,12 +272,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mnt contains the mounted file system.
  *	@flags contains the unmount flags, e.g. MNT_FORCE.
  *	Return 0 if permission is granted.
- * @sb_umount_busy:
- *	Handle a failed umount of the @mnt mounted filesystem, e.g.  re-opening
- *	any files that were closed by umount_close.  This hook is called during
- *	an umount operation if the umount fails after a call to the
- *	umount_close hook.
- *	@mnt contains the mounted filesystem.
  * @sb_post_remount:
  *	Update the security module's state when a filesystem is remounted.
  *	This hook is only called if the remount was successful.
@@ -1474,7 +1468,6 @@ struct security_operations {
 	int (*sb_mount) (char *dev_name, struct path *path,
 			 char *type, unsigned long flags, void *data);
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
-	void (*sb_umount_busy) (struct vfsmount *mnt);
 	void (*sb_post_remount) (struct vfsmount *mnt,
 				 unsigned long flags, void *data);
 	void (*sb_post_addmount) (struct vfsmount *mnt,
@@ -1771,7 +1764,6 @@ int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
 int security_sb_umount(struct vfsmount *mnt, int flags);
-void security_sb_umount_busy(struct vfsmount *mnt);
 void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *data);
 void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint);
 int security_sb_pivotroot(struct path *old_path, struct path *new_path);
@@ -2089,9 +2081,6 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static inline void security_sb_umount_busy(struct vfsmount *mnt)
-{ }
-
 static inline void security_sb_post_remount(struct vfsmount *mnt,
 					     unsigned long flags, void *data)
 { }
diff --git a/security/capability.c b/security/capability.c
index db72f4f5a9e1..476d43030906 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -85,10 +85,6 @@ static int cap_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static void cap_sb_umount_busy(struct vfsmount *mnt)
-{
-}
-
 static void cap_sb_post_remount(struct vfsmount *mnt, unsigned long flags,
 				void *data)
 {
@@ -933,7 +929,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_statfs);
 	set_to_cap_if_null(ops, sb_mount);
 	set_to_cap_if_null(ops, sb_umount);
-	set_to_cap_if_null(ops, sb_umount_busy);
 	set_to_cap_if_null(ops, sb_post_remount);
 	set_to_cap_if_null(ops, sb_post_addmount);
 	set_to_cap_if_null(ops, sb_pivotroot);
diff --git a/security/security.c b/security/security.c
index 591c25fd430f..c1341fbe98ca 100644
--- a/security/security.c
+++ b/security/security.c
@@ -311,11 +311,6 @@ int security_sb_umount(struct vfsmount *mnt, int flags)
 	return security_ops->sb_umount(mnt, flags);
 }
 
-void security_sb_umount_busy(struct vfsmount *mnt)
-{
-	security_ops->sb_umount_busy(mnt);
-}
-
 void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *data)
 {
 	security_ops->sb_post_remount(mnt, flags, data);
-- 
cgit v1.2.3


From 82dab10453d65ad9ca551de5b8925673ca05c7e9 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:33 -0400
Subject: security: remove dead hook sb_post_remount

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  2 --
 include/linux/security.h | 13 -------------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 6c9ca7358aad..f87f56e348fd 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1573,8 +1573,6 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
 	}
 	up_write(&sb->s_umount);
 	if (!err) {
-		security_sb_post_remount(path->mnt, flags, data);
-
 		spin_lock(&vfsmount_lock);
 		touch_mnt_namespace(path->mnt->mnt_ns);
 		spin_unlock(&vfsmount_lock);
diff --git a/include/linux/security.h b/include/linux/security.h
index b206795c09f7..338617a50fa3 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -272,12 +272,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mnt contains the mounted file system.
  *	@flags contains the unmount flags, e.g. MNT_FORCE.
  *	Return 0 if permission is granted.
- * @sb_post_remount:
- *	Update the security module's state when a filesystem is remounted.
- *	This hook is only called if the remount was successful.
- *	@mnt contains the mounted file system.
- *	@flags contains the new filesystem flags.
- *	@data contains the filesystem-specific data.
  * @sb_post_addmount:
  *	Update the security module's state when a filesystem is mounted.
  *	This hook is called any time a mount is successfully grafetd to
@@ -1468,8 +1462,6 @@ struct security_operations {
 	int (*sb_mount) (char *dev_name, struct path *path,
 			 char *type, unsigned long flags, void *data);
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
-	void (*sb_post_remount) (struct vfsmount *mnt,
-				 unsigned long flags, void *data);
 	void (*sb_post_addmount) (struct vfsmount *mnt,
 				  struct path *mountpoint);
 	int (*sb_pivotroot) (struct path *old_path,
@@ -1764,7 +1756,6 @@ int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
 int security_sb_umount(struct vfsmount *mnt, int flags);
-void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *data);
 void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint);
 int security_sb_pivotroot(struct path *old_path, struct path *new_path);
 void security_sb_post_pivotroot(struct path *old_path, struct path *new_path);
@@ -2081,10 +2072,6 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static inline void security_sb_post_remount(struct vfsmount *mnt,
-					     unsigned long flags, void *data)
-{ }
-
 static inline void security_sb_post_addmount(struct vfsmount *mnt,
 					     struct path *mountpoint)
 { }
diff --git a/security/capability.c b/security/capability.c
index 476d43030906..26fc92caa339 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -85,11 +85,6 @@ static int cap_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static void cap_sb_post_remount(struct vfsmount *mnt, unsigned long flags,
-				void *data)
-{
-}
-
 static void cap_sb_post_addmount(struct vfsmount *mnt, struct path *path)
 {
 }
@@ -929,7 +924,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_statfs);
 	set_to_cap_if_null(ops, sb_mount);
 	set_to_cap_if_null(ops, sb_umount);
-	set_to_cap_if_null(ops, sb_post_remount);
 	set_to_cap_if_null(ops, sb_post_addmount);
 	set_to_cap_if_null(ops, sb_pivotroot);
 	set_to_cap_if_null(ops, sb_post_pivotroot);
diff --git a/security/security.c b/security/security.c
index c1341fbe98ca..d9538d98736b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -311,11 +311,6 @@ int security_sb_umount(struct vfsmount *mnt, int flags)
 	return security_ops->sb_umount(mnt, flags);
 }
 
-void security_sb_post_remount(struct vfsmount *mnt, unsigned long flags, void *data)
-{
-	security_ops->sb_post_remount(mnt, flags, data);
-}
-
 void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint)
 {
 	security_ops->sb_post_addmount(mnt, mountpoint);
-- 
cgit v1.2.3


From 3db291017753e539af64c8bab373785f34e43ed2 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:39 -0400
Subject: security: remove dead hook sb_post_addmount

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  2 --
 include/linux/security.h | 13 -------------
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index f87f56e348fd..7a0c9ce62bed 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1436,8 +1436,6 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 		err = attach_recursive_mnt(mnt, path, NULL);
 out_unlock:
 	mutex_unlock(&path->dentry->d_inode->i_mutex);
-	if (!err)
-		security_sb_post_addmount(mnt, path);
 	return err;
 }
 
diff --git a/include/linux/security.h b/include/linux/security.h
index 338617a50fa3..a42d733086a1 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -272,12 +272,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mnt contains the mounted file system.
  *	@flags contains the unmount flags, e.g. MNT_FORCE.
  *	Return 0 if permission is granted.
- * @sb_post_addmount:
- *	Update the security module's state when a filesystem is mounted.
- *	This hook is called any time a mount is successfully grafetd to
- *	the tree.
- *	@mnt contains the mounted filesystem.
- *	@mountpoint contains the path for the mount point.
  * @sb_pivotroot:
  *	Check permission before pivoting the root filesystem.
  *	@old_path contains the path for the new location of the current root (put_old).
@@ -1462,8 +1456,6 @@ struct security_operations {
 	int (*sb_mount) (char *dev_name, struct path *path,
 			 char *type, unsigned long flags, void *data);
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
-	void (*sb_post_addmount) (struct vfsmount *mnt,
-				  struct path *mountpoint);
 	int (*sb_pivotroot) (struct path *old_path,
 			     struct path *new_path);
 	void (*sb_post_pivotroot) (struct path *old_path,
@@ -1756,7 +1748,6 @@ int security_sb_statfs(struct dentry *dentry);
 int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
 int security_sb_umount(struct vfsmount *mnt, int flags);
-void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint);
 int security_sb_pivotroot(struct path *old_path, struct path *new_path);
 void security_sb_post_pivotroot(struct path *old_path, struct path *new_path);
 int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts);
@@ -2072,10 +2063,6 @@ static inline int security_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static inline void security_sb_post_addmount(struct vfsmount *mnt,
-					     struct path *mountpoint)
-{ }
-
 static inline int security_sb_pivotroot(struct path *old_path,
 					struct path *new_path)
 {
diff --git a/security/capability.c b/security/capability.c
index 26fc92caa339..38752ddf69d7 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -85,10 +85,6 @@ static int cap_sb_umount(struct vfsmount *mnt, int flags)
 	return 0;
 }
 
-static void cap_sb_post_addmount(struct vfsmount *mnt, struct path *path)
-{
-}
-
 static int cap_sb_pivotroot(struct path *old_path, struct path *new_path)
 {
 	return 0;
@@ -924,7 +920,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_statfs);
 	set_to_cap_if_null(ops, sb_mount);
 	set_to_cap_if_null(ops, sb_umount);
-	set_to_cap_if_null(ops, sb_post_addmount);
 	set_to_cap_if_null(ops, sb_pivotroot);
 	set_to_cap_if_null(ops, sb_post_pivotroot);
 	set_to_cap_if_null(ops, sb_set_mnt_opts);
diff --git a/security/security.c b/security/security.c
index d9538d98736b..6a8e5411d7a0 100644
--- a/security/security.c
+++ b/security/security.c
@@ -311,11 +311,6 @@ int security_sb_umount(struct vfsmount *mnt, int flags)
 	return security_ops->sb_umount(mnt, flags);
 }
 
-void security_sb_post_addmount(struct vfsmount *mnt, struct path *mountpoint)
-{
-	security_ops->sb_post_addmount(mnt, mountpoint);
-}
-
 int security_sb_pivotroot(struct path *old_path, struct path *new_path)
 {
 	return security_ops->sb_pivotroot(old_path, new_path);
-- 
cgit v1.2.3


From 91a9420f5826db482030c21eca8c507271bbc441 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:45 -0400
Subject: security: remove dead hook sb_post_pivotroot

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/namespace.c           |  1 -
 include/linux/security.h | 11 -----------
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 7a0c9ce62bed..c36785a2fd86 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2265,7 +2265,6 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	touch_mnt_namespace(current->nsproxy->mnt_ns);
 	spin_unlock(&vfsmount_lock);
 	chroot_fs_refs(&root, &new);
-	security_sb_post_pivotroot(&root, &new);
 	error = 0;
 	path_put(&root_parent);
 	path_put(&parent_path);
diff --git a/include/linux/security.h b/include/linux/security.h
index a42d733086a1..4de5bbb39052 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -277,10 +277,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@old_path contains the path for the new location of the current root (put_old).
  *	@new_path contains the path for the new root (new_root).
  *	Return 0 if permission is granted.
- * @sb_post_pivotroot:
- *	Update module state after a successful pivot.
- *	@old_path contains the path for the old root.
- *	@new_path contains the path for the new root.
  * @sb_set_mnt_opts:
  *	Set the security relevant mount options used for a superblock
  *	@sb the superblock to set security mount options for
@@ -1458,8 +1454,6 @@ struct security_operations {
 	int (*sb_umount) (struct vfsmount *mnt, int flags);
 	int (*sb_pivotroot) (struct path *old_path,
 			     struct path *new_path);
-	void (*sb_post_pivotroot) (struct path *old_path,
-				   struct path *new_path);
 	int (*sb_set_mnt_opts) (struct super_block *sb,
 				struct security_mnt_opts *opts);
 	void (*sb_clone_mnt_opts) (const struct super_block *oldsb,
@@ -1749,7 +1743,6 @@ int security_sb_mount(char *dev_name, struct path *path,
 		      char *type, unsigned long flags, void *data);
 int security_sb_umount(struct vfsmount *mnt, int flags);
 int security_sb_pivotroot(struct path *old_path, struct path *new_path);
-void security_sb_post_pivotroot(struct path *old_path, struct path *new_path);
 int security_sb_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts);
 void security_sb_clone_mnt_opts(const struct super_block *oldsb,
 				struct super_block *newsb);
@@ -2069,10 +2062,6 @@ static inline int security_sb_pivotroot(struct path *old_path,
 	return 0;
 }
 
-static inline void security_sb_post_pivotroot(struct path *old_path,
-					      struct path *new_path)
-{ }
-
 static inline int security_sb_set_mnt_opts(struct super_block *sb,
 					   struct security_mnt_opts *opts)
 {
diff --git a/security/capability.c b/security/capability.c
index 38752ddf69d7..b64b50105a04 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -90,10 +90,6 @@ static int cap_sb_pivotroot(struct path *old_path, struct path *new_path)
 	return 0;
 }
 
-static void cap_sb_post_pivotroot(struct path *old_path, struct path *new_path)
-{
-}
-
 static int cap_sb_set_mnt_opts(struct super_block *sb,
 			       struct security_mnt_opts *opts)
 {
@@ -921,7 +917,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, sb_mount);
 	set_to_cap_if_null(ops, sb_umount);
 	set_to_cap_if_null(ops, sb_pivotroot);
-	set_to_cap_if_null(ops, sb_post_pivotroot);
 	set_to_cap_if_null(ops, sb_set_mnt_opts);
 	set_to_cap_if_null(ops, sb_clone_mnt_opts);
 	set_to_cap_if_null(ops, sb_parse_opts_str);
diff --git a/security/security.c b/security/security.c
index 6a8e5411d7a0..5223fa408271 100644
--- a/security/security.c
+++ b/security/security.c
@@ -316,11 +316,6 @@ int security_sb_pivotroot(struct path *old_path, struct path *new_path)
 	return security_ops->sb_pivotroot(old_path, new_path);
 }
 
-void security_sb_post_pivotroot(struct path *old_path, struct path *new_path)
-{
-	security_ops->sb_post_pivotroot(old_path, new_path);
-}
-
 int security_sb_set_mnt_opts(struct super_block *sb,
 				struct security_mnt_opts *opts)
 {
-- 
cgit v1.2.3


From 9d5ed77dadc66a72b40419c91df942adfa55a102 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:50 -0400
Subject: security: remove dead hook inode_delete

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 fs/inode.c               |  2 --
 include/linux/security.h | 11 -----------
 security/capability.c    |  5 -----
 security/security.c      |  7 -------
 4 files changed, 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 407bf392e20a..258ec22bb298 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1205,8 +1205,6 @@ void generic_delete_inode(struct inode *inode)
 	inodes_stat.nr_inodes--;
 	spin_unlock(&inode_lock);
 
-	security_inode_delete(inode);
-
 	if (op->delete_inode) {
 		void (*delete)(struct inode *) = op->delete_inode;
 		/* Filesystems implementing their own
diff --git a/include/linux/security.h b/include/linux/security.h
index 4de5bbb39052..ef6edc759891 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -478,12 +478,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@mnt is the vfsmount where the dentry was looked up
  *	@dentry contains the dentry structure for the file.
  *	Return 0 if permission is granted.
- * @inode_delete:
- *	@inode contains the inode structure for deleted inode.
- *	This hook is called when a deleted inode is released (i.e. an inode
- *	with no hard links has its use count drop to zero).  A security module
- *	can use this hook to release any persistent label associated with the
- *	inode.
  * @inode_setxattr:
  *	Check permission before setting the extended attributes
  *	@value identified by @name for @dentry.
@@ -1502,7 +1496,6 @@ struct security_operations {
 	int (*inode_permission) (struct inode *inode, int mask);
 	int (*inode_setattr)	(struct dentry *dentry, struct iattr *attr);
 	int (*inode_getattr) (struct vfsmount *mnt, struct dentry *dentry);
-	void (*inode_delete) (struct inode *inode);
 	int (*inode_setxattr) (struct dentry *dentry, const char *name,
 			       const void *value, size_t size, int flags);
 	void (*inode_post_setxattr) (struct dentry *dentry, const char *name,
@@ -1768,7 +1761,6 @@ int security_inode_follow_link(struct dentry *dentry, struct nameidata *nd);
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry);
-void security_inode_delete(struct inode *inode);
 int security_inode_setxattr(struct dentry *dentry, const char *name,
 			    const void *value, size_t size, int flags);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
@@ -2177,9 +2169,6 @@ static inline int security_inode_getattr(struct vfsmount *mnt,
 	return 0;
 }
 
-static inline void security_inode_delete(struct inode *inode)
-{ }
-
 static inline int security_inode_setxattr(struct dentry *dentry,
 		const char *name, const void *value, size_t size, int flags)
 {
diff --git a/security/capability.c b/security/capability.c
index b64b50105a04..b855e9f27f0e 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -195,10 +195,6 @@ static int cap_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 	return 0;
 }
 
-static void cap_inode_delete(struct inode *ino)
-{
-}
-
 static void cap_inode_post_setxattr(struct dentry *dentry, const char *name,
 				    const void *value, size_t size, int flags)
 {
@@ -936,7 +932,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, inode_permission);
 	set_to_cap_if_null(ops, inode_setattr);
 	set_to_cap_if_null(ops, inode_getattr);
-	set_to_cap_if_null(ops, inode_delete);
 	set_to_cap_if_null(ops, inode_setxattr);
 	set_to_cap_if_null(ops, inode_post_setxattr);
 	set_to_cap_if_null(ops, inode_getxattr);
diff --git a/security/security.c b/security/security.c
index 5223fa408271..c65b0bca05bb 100644
--- a/security/security.c
+++ b/security/security.c
@@ -550,13 +550,6 @@ int security_inode_getattr(struct vfsmount *mnt, struct dentry *dentry)
 	return security_ops->inode_getattr(mnt, dentry);
 }
 
-void security_inode_delete(struct inode *inode)
-{
-	if (unlikely(IS_PRIVATE(inode)))
-		return;
-	security_ops->inode_delete(inode);
-}
-
 int security_inode_setxattr(struct dentry *dentry, const char *name,
 			    const void *value, size_t size, int flags)
 {
-- 
cgit v1.2.3


From 0968d0060a3c885e53d453380266c7792a55d302 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:14:56 -0400
Subject: security: remove dead hook cred_commit

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 11 -----------
 kernel/cred.c            |  2 --
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ef6edc759891..33b0c1b27f82 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -652,10 +652,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@old points to the original credentials.
  *	@gfp indicates the atomicity of any memory allocations.
  *	Prepare a new set of credentials by copying the data from the old set.
- * @cred_commit:
- *	@new points to the new credentials.
- *	@old points to the original credentials.
- *	Install a new set of credentials.
  * @cred_transfer:
  *	@new points to the new credentials.
  *	@old points to the original credentials.
@@ -1536,7 +1532,6 @@ struct security_operations {
 	void (*cred_free) (struct cred *cred);
 	int (*cred_prepare)(struct cred *new, const struct cred *old,
 			    gfp_t gfp);
-	void (*cred_commit)(struct cred *new, const struct cred *old);
 	void (*cred_transfer)(struct cred *new, const struct cred *old);
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
@@ -1794,7 +1789,6 @@ int security_task_create(unsigned long clone_flags);
 int security_cred_alloc_blank(struct cred *cred, gfp_t gfp);
 void security_cred_free(struct cred *cred);
 int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp);
-void security_commit_creds(struct cred *new, const struct cred *old);
 void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
@@ -2315,11 +2309,6 @@ static inline int security_prepare_creds(struct cred *new,
 	return 0;
 }
 
-static inline void security_commit_creds(struct cred *new,
-					 const struct cred *old)
-{
-}
-
 static inline void security_transfer_creds(struct cred *new,
 					   const struct cred *old)
 {
diff --git a/kernel/cred.c b/kernel/cred.c
index 1b1129d0cce8..fecb34640482 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -520,8 +520,6 @@ int commit_creds(struct cred *new)
 #endif
 	BUG_ON(atomic_read(&new->usage) < 1);
 
-	security_commit_creds(new, old);
-
 	get_cred(new); /* we will require a ref for the subj creds too */
 
 	/* dumpability changes */
diff --git a/security/capability.c b/security/capability.c
index b855e9f27f0e..a927bdea1816 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -373,10 +373,6 @@ static int cap_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp)
 	return 0;
 }
 
-static void cap_cred_commit(struct cred *new, const struct cred *old)
-{
-}
-
 static void cap_cred_transfer(struct cred *new, const struct cred *old)
 {
 }
@@ -972,7 +968,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, cred_alloc_blank);
 	set_to_cap_if_null(ops, cred_free);
 	set_to_cap_if_null(ops, cred_prepare);
-	set_to_cap_if_null(ops, cred_commit);
 	set_to_cap_if_null(ops, cred_transfer);
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
diff --git a/security/security.c b/security/security.c
index c65b0bca05bb..6e5942653d4f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -712,11 +712,6 @@ int security_prepare_creds(struct cred *new, const struct cred *old, gfp_t gfp)
 	return security_ops->cred_prepare(new, old, gfp);
 }
 
-void security_commit_creds(struct cred *new, const struct cred *old)
-{
-	security_ops->cred_commit(new, old);
-}
-
 void security_transfer_creds(struct cred *new, const struct cred *old)
 {
 	security_ops->cred_transfer(new, old);
-- 
cgit v1.2.3


From 43ed8c3b4573d5f5cd314937fee63b4ab046ac5f Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:15:02 -0400
Subject: security: remove dead hook task_setuid

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 20 --------------------
 kernel/sys.c             | 15 ---------------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 46 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 33b0c1b27f82..447c57fcec88 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -674,18 +674,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	userspace to load a kernel module with the given name.
  *	@kmod_name name of the module requested by the kernel
  *	Return 0 if successful.
- * @task_setuid:
- *	Check permission before setting one or more of the user identity
- *	attributes of the current process.  The @flags parameter indicates
- *	which of the set*uid system calls invoked this hook and how to
- *	interpret the @id0, @id1, and @id2 parameters.  See the LSM_SETID
- *	definitions at the beginning of this file for the @flags values and
- *	their meanings.
- *	@id0 contains a uid.
- *	@id1 contains a uid.
- *	@id2 contains a uid.
- *	@flags contains one of the LSM_SETID_* values.
- *	Return 0 if permission is granted.
  * @task_fix_setuid:
  *	Update the module's state after setting one or more of the user
  *	identity attributes of the current process.  The @flags parameter
@@ -1536,7 +1524,6 @@ struct security_operations {
 	int (*kernel_act_as)(struct cred *new, u32 secid);
 	int (*kernel_create_files_as)(struct cred *new, struct inode *inode);
 	int (*kernel_module_request)(char *kmod_name);
-	int (*task_setuid) (uid_t id0, uid_t id1, uid_t id2, int flags);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
 	int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags);
@@ -1793,7 +1780,6 @@ void security_transfer_creds(struct cred *new, const struct cred *old);
 int security_kernel_act_as(struct cred *new, u32 secid);
 int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
-int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
 int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags);
@@ -2330,12 +2316,6 @@ static inline int security_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
-static inline int security_task_setuid(uid_t id0, uid_t id1, uid_t id2,
-				       int flags)
-{
-	return 0;
-}
-
 static inline int security_task_fix_setuid(struct cred *new,
 					   const struct cred *old,
 					   int flags)
diff --git a/kernel/sys.c b/kernel/sys.c
index 8298878f4f71..396c11cd9a20 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -609,10 +609,6 @@ SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
 		return -ENOMEM;
 	old = current_cred();
 
-	retval = security_task_setuid(ruid, euid, (uid_t)-1, LSM_SETID_RE);
-	if (retval)
-		goto error;
-
 	retval = -EPERM;
 	if (ruid != (uid_t) -1) {
 		new->uid = ruid;
@@ -674,10 +670,6 @@ SYSCALL_DEFINE1(setuid, uid_t, uid)
 		return -ENOMEM;
 	old = current_cred();
 
-	retval = security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_ID);
-	if (retval)
-		goto error;
-
 	retval = -EPERM;
 	if (capable(CAP_SETUID)) {
 		new->suid = new->uid = uid;
@@ -718,9 +710,6 @@ SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
 	if (!new)
 		return -ENOMEM;
 
-	retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES);
-	if (retval)
-		goto error;
 	old = current_cred();
 
 	retval = -EPERM;
@@ -850,9 +839,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 	old = current_cred();
 	old_fsuid = old->fsuid;
 
-	if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS) < 0)
-		goto error;
-
 	if (uid == old->uid  || uid == old->euid  ||
 	    uid == old->suid || uid == old->fsuid ||
 	    capable(CAP_SETUID)) {
@@ -863,7 +849,6 @@ SYSCALL_DEFINE1(setfsuid, uid_t, uid)
 		}
 	}
 
-error:
 	abort_creds(new);
 	return old_fsuid;
 
diff --git a/security/capability.c b/security/capability.c
index a927bdea1816..41ff54f3b4d8 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -392,11 +392,6 @@ static int cap_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
-static int cap_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
-{
-	return 0;
-}
-
 static int cap_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
 {
 	return 0;
@@ -972,7 +967,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, kernel_act_as);
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
-	set_to_cap_if_null(ops, task_setuid);
 	set_to_cap_if_null(ops, task_fix_setuid);
 	set_to_cap_if_null(ops, task_setgid);
 	set_to_cap_if_null(ops, task_setpgid);
diff --git a/security/security.c b/security/security.c
index 6e5942653d4f..3900da3da87b 100644
--- a/security/security.c
+++ b/security/security.c
@@ -732,11 +732,6 @@ int security_kernel_module_request(char *kmod_name)
 	return security_ops->kernel_module_request(kmod_name);
 }
 
-int security_task_setuid(uid_t id0, uid_t id1, uid_t id2, int flags)
-{
-	return security_ops->task_setuid(id0, id1, id2, flags);
-}
-
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags)
 {
-- 
cgit v1.2.3


From 06ad187e280e725e356c62c3a30ddcd01564f8be Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:15:08 -0400
Subject: security: remove dead hook task_setgid

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 20 --------------------
 kernel/sys.c             | 16 ----------------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 447c57fcec88..04ce0d6e4edf 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -683,18 +683,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@old is the set of credentials that are being replaces
  *	@flags contains one of the LSM_SETID_* values.
  *	Return 0 on success.
- * @task_setgid:
- *	Check permission before setting one or more of the group identity
- *	attributes of the current process.  The @flags parameter indicates
- *	which of the set*gid system calls invoked this hook and how to
- *	interpret the @id0, @id1, and @id2 parameters.  See the LSM_SETID
- *	definitions at the beginning of this file for the @flags values and
- *	their meanings.
- *	@id0 contains a gid.
- *	@id1 contains a gid.
- *	@id2 contains a gid.
- *	@flags contains one of the LSM_SETID_* values.
- *	Return 0 if permission is granted.
  * @task_setpgid:
  *	Check permission before setting the process group identifier of the
  *	process @p to @pgid.
@@ -1526,7 +1514,6 @@ struct security_operations {
 	int (*kernel_module_request)(char *kmod_name);
 	int (*task_fix_setuid) (struct cred *new, const struct cred *old,
 				int flags);
-	int (*task_setgid) (gid_t id0, gid_t id1, gid_t id2, int flags);
 	int (*task_setpgid) (struct task_struct *p, pid_t pgid);
 	int (*task_getpgid) (struct task_struct *p);
 	int (*task_getsid) (struct task_struct *p);
@@ -1782,7 +1769,6 @@ int security_kernel_create_files_as(struct cred *new, struct inode *inode);
 int security_kernel_module_request(char *kmod_name);
 int security_task_fix_setuid(struct cred *new, const struct cred *old,
 			     int flags);
-int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags);
 int security_task_setpgid(struct task_struct *p, pid_t pgid);
 int security_task_getpgid(struct task_struct *p);
 int security_task_getsid(struct task_struct *p);
@@ -2323,12 +2309,6 @@ static inline int security_task_fix_setuid(struct cred *new,
 	return cap_task_fix_setuid(new, old, flags);
 }
 
-static inline int security_task_setgid(gid_t id0, gid_t id1, gid_t id2,
-				       int flags)
-{
-	return 0;
-}
-
 static inline int security_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return 0;
diff --git a/kernel/sys.c b/kernel/sys.c
index 396c11cd9a20..eb1ec5c7f03b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -491,10 +491,6 @@ SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
 		return -ENOMEM;
 	old = current_cred();
 
-	retval = security_task_setgid(rgid, egid, (gid_t)-1, LSM_SETID_RE);
-	if (retval)
-		goto error;
-
 	retval = -EPERM;
 	if (rgid != (gid_t) -1) {
 		if (old->gid == rgid ||
@@ -542,10 +538,6 @@ SYSCALL_DEFINE1(setgid, gid_t, gid)
 		return -ENOMEM;
 	old = current_cred();
 
-	retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID);
-	if (retval)
-		goto error;
-
 	retval = -EPERM;
 	if (capable(CAP_SETGID))
 		new->gid = new->egid = new->sgid = new->fsgid = gid;
@@ -776,10 +768,6 @@ SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
 		return -ENOMEM;
 	old = current_cred();
 
-	retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES);
-	if (retval)
-		goto error;
-
 	retval = -EPERM;
 	if (!capable(CAP_SETGID)) {
 		if (rgid != (gid_t) -1 && rgid != old->gid &&
@@ -872,9 +860,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 	old = current_cred();
 	old_fsgid = old->fsgid;
 
-	if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS))
-		goto error;
-
 	if (gid == old->gid  || gid == old->egid  ||
 	    gid == old->sgid || gid == old->fsgid ||
 	    capable(CAP_SETGID)) {
@@ -884,7 +869,6 @@ SYSCALL_DEFINE1(setfsgid, gid_t, gid)
 		}
 	}
 
-error:
 	abort_creds(new);
 	return old_fsgid;
 
diff --git a/security/capability.c b/security/capability.c
index 41ff54f3b4d8..66cd7453563f 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -392,11 +392,6 @@ static int cap_kernel_module_request(char *kmod_name)
 	return 0;
 }
 
-static int cap_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
-{
-	return 0;
-}
-
 static int cap_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return 0;
@@ -968,7 +963,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, kernel_create_files_as);
 	set_to_cap_if_null(ops, kernel_module_request);
 	set_to_cap_if_null(ops, task_fix_setuid);
-	set_to_cap_if_null(ops, task_setgid);
 	set_to_cap_if_null(ops, task_setpgid);
 	set_to_cap_if_null(ops, task_getpgid);
 	set_to_cap_if_null(ops, task_getsid);
diff --git a/security/security.c b/security/security.c
index 3900da3da87b..1e35dd669209 100644
--- a/security/security.c
+++ b/security/security.c
@@ -738,11 +738,6 @@ int security_task_fix_setuid(struct cred *new, const struct cred *old,
 	return security_ops->task_fix_setuid(new, old, flags);
 }
 
-int security_task_setgid(gid_t id0, gid_t id1, gid_t id2, int flags)
-{
-	return security_ops->task_setgid(id0, id1, id2, flags);
-}
-
 int security_task_setpgid(struct task_struct *p, pid_t pgid)
 {
 	return security_ops->task_setpgid(p, pgid);
-- 
cgit v1.2.3


From 6307f8fee295b364716d28686df6e69c2fee751a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:15:13 -0400
Subject: security: remove dead hook task_setgroups

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 12 ------------
 kernel/groups.c          |  6 ------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 04ce0d6e4edf..73505f0c9b75 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -704,11 +704,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@p contains the task_struct for the process and place is into @secid.
  *	In case of failure, @secid will be set to zero.
  *
- * @task_setgroups:
- *	Check permission before setting the supplementary group set of the
- *	current process.
- *	@group_info contains the new group information.
- *	Return 0 if permission is granted.
  * @task_setnice:
  *	Check permission before setting the nice value of @p to @nice.
  *	@p contains the task_struct of process.
@@ -1518,7 +1513,6 @@ struct security_operations {
 	int (*task_getpgid) (struct task_struct *p);
 	int (*task_getsid) (struct task_struct *p);
 	void (*task_getsecid) (struct task_struct *p, u32 *secid);
-	int (*task_setgroups) (struct group_info *group_info);
 	int (*task_setnice) (struct task_struct *p, int nice);
 	int (*task_setioprio) (struct task_struct *p, int ioprio);
 	int (*task_getioprio) (struct task_struct *p);
@@ -1773,7 +1767,6 @@ int security_task_setpgid(struct task_struct *p, pid_t pgid);
 int security_task_getpgid(struct task_struct *p);
 int security_task_getsid(struct task_struct *p);
 void security_task_getsecid(struct task_struct *p, u32 *secid);
-int security_task_setgroups(struct group_info *group_info);
 int security_task_setnice(struct task_struct *p, int nice);
 int security_task_setioprio(struct task_struct *p, int ioprio);
 int security_task_getioprio(struct task_struct *p);
@@ -2329,11 +2322,6 @@ static inline void security_task_getsecid(struct task_struct *p, u32 *secid)
 	*secid = 0;
 }
 
-static inline int security_task_setgroups(struct group_info *group_info)
-{
-	return 0;
-}
-
 static inline int security_task_setnice(struct task_struct *p, int nice)
 {
 	return cap_task_setnice(p, nice);
diff --git a/kernel/groups.c b/kernel/groups.c
index 2b45b2ee3964..53b1916c9492 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -164,12 +164,6 @@ int groups_search(const struct group_info *group_info, gid_t grp)
  */
 int set_groups(struct cred *new, struct group_info *group_info)
 {
-	int retval;
-
-	retval = security_task_setgroups(group_info);
-	if (retval)
-		return retval;
-
 	put_group_info(new->group_info);
 	groups_sort(group_info);
 	get_group_info(group_info);
diff --git a/security/capability.c b/security/capability.c
index 66cd7453563f..247c04edd468 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -412,11 +412,6 @@ static void cap_task_getsecid(struct task_struct *p, u32 *secid)
 	*secid = 0;
 }
 
-static int cap_task_setgroups(struct group_info *group_info)
-{
-	return 0;
-}
-
 static int cap_task_getioprio(struct task_struct *p)
 {
 	return 0;
@@ -967,7 +962,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, task_getpgid);
 	set_to_cap_if_null(ops, task_getsid);
 	set_to_cap_if_null(ops, task_getsecid);
-	set_to_cap_if_null(ops, task_setgroups);
 	set_to_cap_if_null(ops, task_setnice);
 	set_to_cap_if_null(ops, task_setioprio);
 	set_to_cap_if_null(ops, task_getioprio);
diff --git a/security/security.c b/security/security.c
index 1e35dd669209..5cf9ca6890f6 100644
--- a/security/security.c
+++ b/security/security.c
@@ -759,11 +759,6 @@ void security_task_getsecid(struct task_struct *p, u32 *secid)
 }
 EXPORT_SYMBOL(security_task_getsecid);
 
-int security_task_setgroups(struct group_info *group_info)
-{
-	return security_ops->task_setgroups(group_info);
-}
-
 int security_task_setnice(struct task_struct *p, int nice)
 {
 	return security_ops->task_setnice(p, nice);
-- 
cgit v1.2.3


From 3011a344cdcda34cdbcb40c3fb3d1a6e89954abb Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:15:19 -0400
Subject: security: remove dead hook key_session_to_parent

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 20 --------------------
 security/capability.c    |  8 --------
 security/keys/keyctl.c   |  7 -------
 security/security.c      |  7 -------
 4 files changed, 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 73505f0c9b75..ac536eedec90 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1067,13 +1067,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	Return the length of the string (including terminating NUL) or -ve if
  *      an error.
  *	May also return 0 (and a NULL buffer pointer) if there is no label.
- * @key_session_to_parent:
- *	Forcibly assign the session keyring from a process to its parent
- *	process.
- *	@cred: Pointer to process's credentials
- *	@parent_cred: Pointer to parent process's credentials
- *	@keyring: Proposed new session keyring
- *	Return 0 if permission is granted, -ve error otherwise.
  *
  * Security hooks affecting all System V IPC operations.
  *
@@ -1642,9 +1635,6 @@ struct security_operations {
 			       const struct cred *cred,
 			       key_perm_t perm);
 	int (*key_getsecurity)(struct key *key, char **_buffer);
-	int (*key_session_to_parent)(const struct cred *cred,
-				     const struct cred *parent_cred,
-				     struct key *key);
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -2918,9 +2908,6 @@ void security_key_free(struct key *key);
 int security_key_permission(key_ref_t key_ref,
 			    const struct cred *cred, key_perm_t perm);
 int security_key_getsecurity(struct key *key, char **_buffer);
-int security_key_session_to_parent(const struct cred *cred,
-				   const struct cred *parent_cred,
-				   struct key *key);
 
 #else
 
@@ -2948,13 +2935,6 @@ static inline int security_key_getsecurity(struct key *key, char **_buffer)
 	return 0;
 }
 
-static inline int security_key_session_to_parent(const struct cred *cred,
-						 const struct cred *parent_cred,
-						 struct key *key)
-{
-	return 0;
-}
-
 #endif
 #endif /* CONFIG_KEYS */
 
diff --git a/security/capability.c b/security/capability.c
index 247c04edd468..8cc2b8f3b166 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -826,13 +826,6 @@ static int cap_key_getsecurity(struct key *key, char **_buffer)
 	return 0;
 }
 
-static int cap_key_session_to_parent(const struct cred *cred,
-				     const struct cred *parent_cred,
-				     struct key *key)
-{
-	return 0;
-}
-
 #endif /* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
@@ -1053,7 +1046,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, key_free);
 	set_to_cap_if_null(ops, key_permission);
 	set_to_cap_if_null(ops, key_getsecurity);
-	set_to_cap_if_null(ops, key_session_to_parent);
 #endif	/* CONFIG_KEYS */
 #ifdef CONFIG_AUDIT
 	set_to_cap_if_null(ops, audit_rule_init);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e9c2e7c584d9..34b302b40dea 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -1295,13 +1295,6 @@ long keyctl_session_to_parent(void)
 	    mycred->tgcred->session_keyring->uid != mycred->euid)
 		goto not_permitted;
 
-	/* the LSM must permit the replacement of the parent's keyring with the
-	 * keyring from this process */
-	ret = security_key_session_to_parent(mycred, pcred,
-					     key_ref_to_ptr(keyring_r));
-	if (ret < 0)
-		goto not_permitted;
-
 	/* if there's an already pending keyring replacement, then we replace
 	 * that */
 	oldcred = parent->replacement_session_keyring;
diff --git a/security/security.c b/security/security.c
index 5cf9ca6890f6..490f77753b2d 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1262,13 +1262,6 @@ int security_key_getsecurity(struct key *key, char **_buffer)
 	return security_ops->key_getsecurity(key, _buffer);
 }
 
-int security_key_session_to_parent(const struct cred *cred,
-				   const struct cred *parent_cred,
-				   struct key *key)
-{
-	return security_ops->key_session_to_parent(cred, parent_cred, key);
-}
-
 #endif	/* CONFIG_KEYS */
 
 #ifdef CONFIG_AUDIT
-- 
cgit v1.2.3


From 05b90496f2f366b9d3eea468351888ddf010782a Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Wed, 7 Apr 2010 15:15:25 -0400
Subject: security: remove dead hook acct

Unused hook.  Remove.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 14 --------------
 kernel/acct.c            | 20 +++++---------------
 security/capability.c    |  6 ------
 security/security.c      |  5 -----
 4 files changed, 5 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index ac536eedec90..d670c9a3ec2b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1254,13 +1254,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@cap contains the capability <include/linux/capability.h>.
  *	@audit: Whether to write an audit message or not
  *	Return 0 if the capability is granted for @tsk.
- * @acct:
- *	Check permission before enabling or disabling process accounting.  If
- *	accounting is being enabled, then @file refers to the open file used to
- *	store accounting records.  If accounting is being disabled, then @file
- *	is NULL.
- *	@file contains the file structure for the accounting file (may be NULL).
- *	Return 0 if permission is granted.
  * @sysctl:
  *	Check permission before accessing the @table sysctl variable in the
  *	manner specified by @op.
@@ -1383,7 +1376,6 @@ struct security_operations {
 		       const kernel_cap_t *permitted);
 	int (*capable) (struct task_struct *tsk, const struct cred *cred,
 			int cap, int audit);
-	int (*acct) (struct file *file);
 	int (*sysctl) (struct ctl_table *table, int op);
 	int (*quotactl) (int cmds, int type, int id, struct super_block *sb);
 	int (*quota_on) (struct dentry *dentry);
@@ -1665,7 +1657,6 @@ int security_capset(struct cred *new, const struct cred *old,
 int security_capable(int cap);
 int security_real_capable(struct task_struct *tsk, int cap);
 int security_real_capable_noaudit(struct task_struct *tsk, int cap);
-int security_acct(struct file *file);
 int security_sysctl(struct ctl_table *table, int op);
 int security_quotactl(int cmds, int type, int id, struct super_block *sb);
 int security_quota_on(struct dentry *dentry);
@@ -1883,11 +1874,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
 	return ret;
 }
 
-static inline int security_acct(struct file *file)
-{
-	return 0;
-}
-
 static inline int security_sysctl(struct ctl_table *table, int op)
 {
 	return 0;
diff --git a/kernel/acct.c b/kernel/acct.c
index 24f8c81fc48d..9e53bb2acfff 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -216,7 +216,6 @@ static int acct_on(char *name)
 {
 	struct file *file;
 	struct vfsmount *mnt;
-	int error;
 	struct pid_namespace *ns;
 	struct bsd_acct_struct *acct = NULL;
 
@@ -244,13 +243,6 @@ static int acct_on(char *name)
 		}
 	}
 
-	error = security_acct(file);
-	if (error) {
-		kfree(acct);
-		filp_close(file, NULL);
-		return error;
-	}
-
 	spin_lock(&acct_lock);
 	if (ns->bacct == NULL) {
 		ns->bacct = acct;
@@ -281,7 +273,7 @@ static int acct_on(char *name)
  */
 SYSCALL_DEFINE1(acct, const char __user *, name)
 {
-	int error;
+	int error = 0;
 
 	if (!capable(CAP_SYS_PACCT))
 		return -EPERM;
@@ -299,13 +291,11 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
 		if (acct == NULL)
 			return 0;
 
-		error = security_acct(NULL);
-		if (!error) {
-			spin_lock(&acct_lock);
-			acct_file_reopen(acct, NULL, NULL);
-			spin_unlock(&acct_lock);
-		}
+		spin_lock(&acct_lock);
+		acct_file_reopen(acct, NULL, NULL);
+		spin_unlock(&acct_lock);
 	}
+
 	return error;
 }
 
diff --git a/security/capability.c b/security/capability.c
index 8cc2b8f3b166..7f093d573ede 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -12,11 +12,6 @@
 
 #include <linux/security.h>
 
-static int cap_acct(struct file *file)
-{
-	return 0;
-}
-
 static int cap_sysctl(ctl_table *table, int op)
 {
 	return 0;
@@ -865,7 +860,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, ptrace_traceme);
 	set_to_cap_if_null(ops, capget);
 	set_to_cap_if_null(ops, capset);
-	set_to_cap_if_null(ops, acct);
 	set_to_cap_if_null(ops, capable);
 	set_to_cap_if_null(ops, quotactl);
 	set_to_cap_if_null(ops, quota_on);
diff --git a/security/security.c b/security/security.c
index 490f77753b2d..8585019a1a59 100644
--- a/security/security.c
+++ b/security/security.c
@@ -190,11 +190,6 @@ int security_real_capable_noaudit(struct task_struct *tsk, int cap)
 	return ret;
 }
 
-int security_acct(struct file *file)
-{
-	return security_ops->acct(file);
-}
-
 int security_sysctl(struct ctl_table *table, int op)
 {
 	return security_ops->sysctl(table, op);
-- 
cgit v1.2.3


From 43b8774dc409ea5d9369b978e2e7bc79289f0522 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 13 Apr 2010 14:43:03 +0900
Subject: sh: intc: userimask support.

This adds support for hardware-assisted userspace irq masking for
special priority levels. Due to the SR.IMASK interactivity, only some
platforms implement this in hardware (including but not limited to
SH-4A interrupt controllers, and ARM-based SH-Mobile CPUs). Each CPU
needs to wire this up on its own, for now only SH7786 is wired up as an
example.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/Kconfig                        |  2 +
 arch/sh/kernel/cpu/sh4a/setup-sh7786.c |  3 ++
 drivers/sh/Kconfig                     | 13 +++++++
 drivers/sh/intc.c                      | 69 +++++++++++++++++++++++++++++++++-
 include/linux/sh_intc.h                |  9 +++++
 5 files changed, 94 insertions(+), 2 deletions(-)
 create mode 100644 drivers/sh/Kconfig

(limited to 'include/linux')

diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8d90564c2bcf..ba86bfba95ac 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -732,6 +732,8 @@ config GUSA_RB
 	  LLSC, this should be more efficient than the other alternative of
 	  disabling interrupts around the atomic sequence.
 
+source "drivers/sh/Kconfig"
+
 endmenu
 
 menu "Boot options"
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 61e549190873..235edf8065df 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -21,6 +21,7 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 #include <linux/sh_timer.h>
+#include <linux/sh_intc.h>
 #include <cpu/dma-register.h>
 #include <asm/mmzone.h>
 #include <asm/dmaengine.h>
@@ -907,6 +908,7 @@ static DECLARE_INTC_DESC(intc_desc_irl4567, "sh7786-irl4567", vectors_irl4567,
 #define INTC_INTMSK2	INTMSK2
 #define INTC_INTMSKCLR1	CnINTMSKCLR1
 #define INTC_INTMSKCLR2	INTMSKCLR2
+#define INTC_USERIMASK	0xfe411000
 
 void __init plat_irq_setup(void)
 {
@@ -921,6 +923,7 @@ void __init plat_irq_setup(void)
 	__raw_writel(__raw_readl(INTC_ICR0) & ~0x00c00000, INTC_ICR0);
 
 	register_intc_controller(&intc_desc);
+	register_intc_userimask(INTC_USERIMASK);
 }
 
 void __init plat_irq_setup_pins(int mode)
diff --git a/drivers/sh/Kconfig b/drivers/sh/Kconfig
new file mode 100644
index 000000000000..22c3cdaf22fe
--- /dev/null
+++ b/drivers/sh/Kconfig
@@ -0,0 +1,13 @@
+config INTC_USERIMASK
+	bool "Userspace interrupt masking support"
+	depends on ARCH_SHMOBILE || (SUPERH && CPU_SH4A)
+	help
+	  This enables support for hardware-assisted userspace hardirq
+	  masking.
+
+	  SH-4A and newer interrupt blocks all support a special shadowed
+	  page with all non-masking registers obscured when mapped in to
+	  userspace. This is primarily for use by userspace device
+	  drivers that are using special priority levels.
+
+	  If in doubt, say N.
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 65e15828faaa..77d10acf1884 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -27,6 +27,7 @@
 #include <linux/topology.h>
 #include <linux/bitmap.h>
 #include <linux/cpumask.h>
+#include <asm/sizes.h>
 
 #define _INTC_MK(fn, mode, addr_e, addr_d, width, shift) \
 	((shift) | ((width) << 5) | ((fn) << 9) | ((mode) << 13) | \
@@ -94,7 +95,8 @@ static DEFINE_SPINLOCK(vector_lock);
 #define SMP_NR(d, x) 1
 #endif
 
-static unsigned int intc_prio_level[NR_IRQS]; /* for now */
+static unsigned int intc_prio_level[NR_IRQS];	/* for now */
+static unsigned int default_prio_level = 2;	/* 2 - 16 */
 static unsigned long ack_handle[NR_IRQS];
 
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
@@ -787,7 +789,7 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	/* set priority level
 	 * - this needs to be at least 2 for 5-bit priorities on 7780
 	 */
-	intc_prio_level[irq] = 2;
+	intc_prio_level[irq] = default_prio_level;
 
 	/* enable secondary masking method if present */
 	if (data[!primary])
@@ -1037,6 +1039,64 @@ err0:
 	return -ENOMEM;
 }
 
+#ifdef CONFIG_INTC_USERIMASK
+static void __iomem *uimask;
+
+int register_intc_userimask(unsigned long addr)
+{
+	if (unlikely(uimask))
+		return -EBUSY;
+
+	uimask = ioremap_nocache(addr, SZ_4K);
+	if (unlikely(!uimask))
+		return -ENOMEM;
+
+	pr_info("intc: userimask support registered for levels 0 -> %d\n",
+		default_prio_level - 1);
+
+	return 0;
+}
+
+static ssize_t
+show_intc_userimask(struct sysdev_class *cls,
+		    struct sysdev_class_attribute *attr, char *buf)
+{
+	return sprintf(buf, "%d\n", (__raw_readl(uimask) >> 4) & 0xf);
+}
+
+static ssize_t
+store_intc_userimask(struct sysdev_class *cls,
+		     struct sysdev_class_attribute *attr,
+		     const char *buf, size_t count)
+{
+	unsigned long level;
+
+	level = simple_strtoul(buf, NULL, 10);
+
+	/*
+	 * Minimal acceptable IRQ levels are in the 2 - 16 range, but
+	 * these are chomped so as to not interfere with normal IRQs.
+	 *
+	 * Level 1 is a special case on some CPUs in that it's not
+	 * directly settable, but given that USERIMASK cuts off below a
+	 * certain level, we don't care about this limitation here.
+	 * Level 0 on the other hand equates to user masking disabled.
+	 *
+	 * We use default_prio_level as a cut off so that only special
+	 * case opt-in IRQs can be mangled.
+	 */
+	if (level >= default_prio_level)
+		return -EINVAL;
+
+	__raw_writel(0xa5 << 24 | level << 4, uimask);
+
+	return count;
+}
+
+static SYSDEV_CLASS_ATTR(userimask, S_IRUSR | S_IWUSR,
+			 show_intc_userimask, store_intc_userimask);
+#endif
+
 static ssize_t
 show_intc_name(struct sys_device *dev, struct sysdev_attribute *attr, char *buf)
 {
@@ -1108,6 +1168,11 @@ static int __init register_intc_sysdevs(void)
 	int id = 0;
 
 	error = sysdev_class_register(&intc_sysdev_class);
+#ifdef CONFIG_INTC_USERIMASK
+	if (!error && uimask)
+		error = sysdev_class_create_file(&intc_sysdev_class,
+						 &attr_userimask);
+#endif
 	if (!error) {
 		list_for_each_entry(d, &intc_list, list) {
 			d->sysdev.id = id;
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index 01d8168c5a1b..f0e8cca199c7 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -99,6 +99,15 @@ struct intc_desc symbol __initdata = {					\
 int __init register_intc_controller(struct intc_desc *desc);
 int intc_set_priority(unsigned int irq, unsigned int prio);
 
+#ifdef CONFIG_INTC_USERIMASK
+int register_intc_userimask(unsigned long addr);
+#else
+static inline int register_intc_userimask(unsigned long addr)
+{
+	return 0;
+}
+#endif
+
 int reserve_irq_vector(unsigned int irq);
 void reserve_irq_legacy(void);
 
-- 
cgit v1.2.3


From ed85b565b825566da34e55eee9ad150ed93fdda0 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Wed, 7 Apr 2010 22:41:28 +0000
Subject: packet: support for TX time stamps on RAW sockets

Enable the SO_TIMESTAMPING socket infrastructure for raw packet sockets.
We introduce PACKET_TX_TIMESTAMP for the control message cmsg_type.

Similar support for UDP and CAN sockets was added in commit
51f31cabe3ce5345b51e4a4f82138b38c4d5dc91

Signed-off-by: Richard Cochran <richard.cochran@omicron.at>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |  1 +
 net/packet/af_packet.c    | 61 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 61 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index aa57a5f993fc..6ac23ef1801a 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -47,6 +47,7 @@ struct sockaddr_ll {
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
 #define PACKET_VNET_HDR			15
+#define PACKET_TX_TIMESTAMP		16
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index d7d0310dca9d..f162d59d8161 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -82,6 +82,7 @@
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
+#include <linux/errqueue.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -315,6 +316,8 @@ static inline struct packet_sock *pkt_sk(struct sock *sk)
 
 static void packet_sock_destruct(struct sock *sk)
 {
+	skb_queue_purge(&sk->sk_error_queue);
+
 	WARN_ON(atomic_read(&sk->sk_rmem_alloc));
 	WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 
@@ -483,6 +486,9 @@ retry:
 	skb->dev = dev;
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_unlock;
 
 	dev_queue_xmit(skb);
 	rcu_read_unlock();
@@ -1188,6 +1194,9 @@ static int packet_snd(struct socket *sock,
 	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
+	err = sock_tx_timestamp(msg, sk, skb_tx(skb));
+	if (err < 0)
+		goto out_free;
 
 	skb->protocol = proto;
 	skb->dev = dev;
@@ -1487,6 +1496,51 @@ out:
 	return err;
 }
 
+static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct sock_exterr_skb *serr;
+	struct sk_buff *skb, *skb2;
+	int copied, err;
+
+	err = -EAGAIN;
+	skb = skb_dequeue(&sk->sk_error_queue);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	serr = SKB_EXT_ERR(skb);
+	put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
+		 sizeof(serr->ee), &serr->ee);
+
+	msg->msg_flags |= MSG_ERRQUEUE;
+	err = copied;
+
+	/* Reset and regenerate socket error */
+	spin_lock_bh(&sk->sk_error_queue.lock);
+	sk->sk_err = 0;
+	if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
+		sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+		sk->sk_error_report(sk);
+	} else
+		spin_unlock_bh(&sk->sk_error_queue.lock);
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
+
 /*
  *	Pull a packet from our receive queue and hand it to the user.
  *	If necessary we block.
@@ -1502,7 +1556,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
-	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
+	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
 		goto out;
 
 #if 0
@@ -1511,6 +1565,11 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		return -ENODEV;
 #endif
 
+	if (flags & MSG_ERRQUEUE) {
+		err = packet_recv_error(sk, msg, len);
+		goto out;
+	}
+
 	/*
 	 *	Call the generic datagram receiver. This handles all sorts
 	 *	of horrible races and re-entrancy so we can forget about it
-- 
cgit v1.2.3


From a79ff731a1b277d0e92d9453bdf374e04cec717a Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 13 Apr 2010 11:21:46 +0200
Subject: netfilter: xtables: make XT_ALIGN() usable in exported headers by
 exporting __ALIGN_KERNEL()

XT_ALIGN() was rewritten through ALIGN() by commit 42107f5009da223daa800d6da6904d77297ae829
"netfilter: xtables: symmetric COMPAT_XT_ALIGN definition".
ALIGN() is not exported in userspace headers, which created compile problem for tc(8)
and will create problem for iptables(8).

We can't export generic looking name ALIGN() but we can export less generic
__ALIGN_KERNEL() (suggested by Ben Hutchings).
Google knows nothing about __ALIGN_KERNEL().

COMPAT_XT_ALIGN() changed for symmetry.

Reported-by: Andreas Henriksson <andreas@fatal.se>
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/kernel.h             | 5 +++--
 include/linux/netfilter/x_tables.h | 6 +++---
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 7f0707463360..284ea995646e 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -4,6 +4,8 @@
 /*
  * 'kernel.h' contains some often-used function prototypes etc
  */
+#define __ALIGN_KERNEL(x, a)		__ALIGN_KERNEL_MASK(x, (typeof(x))(a) - 1)
+#define __ALIGN_KERNEL_MASK(x, mask)	(((x) + (mask)) & ~(mask))
 
 #ifdef __KERNEL__
 
@@ -37,8 +39,7 @@ extern const char linux_proc_banner[];
 
 #define STACK_MAGIC	0xdeadbeef
 
-#define ALIGN(x,a)		__ALIGN_MASK(x,(typeof(x))(a)-1)
-#define __ALIGN_MASK(x,mask)	(((x)+(mask))&~(mask))
+#define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
 #define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 1a65d45ee4f5..26ced0c323a5 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -1,6 +1,6 @@
 #ifndef _X_TABLES_H
 #define _X_TABLES_H
-
+#include <linux/kernel.h>
 #include <linux/types.h>
 
 #define XT_FUNCTION_MAXNAMELEN 30
@@ -93,7 +93,7 @@ struct _xt_align {
 	__u64 u64;
 };
 
-#define XT_ALIGN(s) ALIGN((s), __alignof__(struct _xt_align))
+#define XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _xt_align))
 
 /* Standard return verdict, or do jump. */
 #define XT_STANDARD_TARGET ""
@@ -603,7 +603,7 @@ struct _compat_xt_align {
 	compat_u64 u64;
 };
 
-#define COMPAT_XT_ALIGN(s) ALIGN((s), __alignof__(struct _compat_xt_align))
+#define COMPAT_XT_ALIGN(s) __ALIGN_KERNEL((s), __alignof__(struct _compat_xt_align))
 
 extern void xt_compat_lock(u_int8_t af);
 extern void xt_compat_unlock(u_int8_t af);
-- 
cgit v1.2.3


From cd58950a5345f006a318f178705b9250aa54425c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 9 Apr 2010 10:01:37 +0000
Subject: skbuff: remove unused dev_consume_skb macro definition

dev_consume_skb and kfree_skb_clean have no users and in the case of
kfree_skb_clean could cause potential build issues since I cannot find
where it is defined.  Based on the patch in which it was introduced it
appears to have been a bit of leftover code from an earlier version of the
patch in which kfree_skb_clean was dropped in favor of consume_skb.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cf42f194616e..38501d20650c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -470,7 +470,6 @@ extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
 				    struct sk_buff **trailer);
 extern int	       skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
-#define dev_consume_skb(a)	kfree_skb_clean(a)
 extern void	      skb_over_panic(struct sk_buff *skb, int len,
 				     void *here);
 extern void	      skb_under_panic(struct sk_buff *skb, int len,
-- 
cgit v1.2.3


From 829e0015431537176e38812f88fffe1d3250083e Mon Sep 17 00:00:00 2001
From: "Hans J. Koch" <hjk@linutronix.de>
Date: Tue, 13 Apr 2010 00:03:25 +0000
Subject: Fix some #includes in CAN drivers (rebased for net-next-2.6)

In the current implementation, CAN drivers need to #include <linux/can.h>
_before_ they #include <linux/can/dev.h>, which is both ugly and
unnecessary.

Fix this by including <linux/can.h> in <linux/can/dev.h> and remove the
#include <linux/can.h> lines from drivers.

Signed-off-by: Hans J. Koch <hjk@linutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c                    | 1 -
 drivers/net/can/bfin_can.c                    | 1 -
 drivers/net/can/mcp251x.c                     | 1 -
 drivers/net/can/mscan/mpc5xxx_can.c           | 1 -
 drivers/net/can/mscan/mscan.c                 | 1 -
 drivers/net/can/sja1000/ems_pci.c             | 1 -
 drivers/net/can/sja1000/kvaser_pci.c          | 1 -
 drivers/net/can/sja1000/plx_pci.c             | 1 -
 drivers/net/can/sja1000/sja1000.c             | 1 -
 drivers/net/can/sja1000/sja1000_isa.c         | 1 -
 drivers/net/can/sja1000/sja1000_of_platform.c | 1 -
 drivers/net/can/sja1000/sja1000_platform.c    | 1 -
 drivers/net/can/ti_hecc.c                     | 1 -
 include/linux/can/dev.h                       | 1 +
 14 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index a2f29a38798a..3f7ceaebd687 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -35,7 +35,6 @@
 #include <linux/string.h>
 #include <linux/types.h>
 
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 03489864376d..d77264ad326d 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -18,7 +18,6 @@
 #include <linux/skbuff.h>
 #include <linux/platform_device.h>
 
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index eb898515352c..8431eb08075d 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -58,7 +58,6 @@
  *
  */
 
-#include <linux/can.h>
 #include <linux/can/core.h>
 #include <linux/can/dev.h>
 #include <linux/can/platform/mcp251x.h>
diff --git a/drivers/net/can/mscan/mpc5xxx_can.c b/drivers/net/can/mscan/mpc5xxx_can.c
index 03e7c48465a2..225fd147774a 100644
--- a/drivers/net/can/mscan/mpc5xxx_can.c
+++ b/drivers/net/can/mscan/mpc5xxx_can.c
@@ -25,7 +25,6 @@
 #include <linux/interrupt.h>
 #include <linux/platform_device.h>
 #include <linux/netdevice.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/of_platform.h>
 #include <sysdev/fsl_soc.h>
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 6b7dd578d417..64c378cd0c34 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -28,7 +28,6 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/list.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 #include <linux/io.h>
diff --git a/drivers/net/can/sja1000/ems_pci.c b/drivers/net/can/sja1000/ems_pci.c
index 5f53da0bc40c..36f4f9780c30 100644
--- a/drivers/net/can/sja1000/ems_pci.c
+++ b/drivers/net/can/sja1000/ems_pci.c
@@ -24,7 +24,6 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/io.h>
 
diff --git a/drivers/net/can/sja1000/kvaser_pci.c b/drivers/net/can/sja1000/kvaser_pci.c
index 441e776a7f59..ed004cebd31f 100644
--- a/drivers/net/can/sja1000/kvaser_pci.c
+++ b/drivers/net/can/sja1000/kvaser_pci.c
@@ -36,7 +36,6 @@
 #include <linux/netdevice.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/io.h>
 
diff --git a/drivers/net/can/sja1000/plx_pci.c b/drivers/net/can/sja1000/plx_pci.c
index d5efd68085fd..437b5c716a24 100644
--- a/drivers/net/can/sja1000/plx_pci.c
+++ b/drivers/net/can/sja1000/plx_pci.c
@@ -27,7 +27,6 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/io.h>
 
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 145b1a731a53..618c11222abc 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -60,7 +60,6 @@
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 
diff --git a/drivers/net/can/sja1000/sja1000_isa.c b/drivers/net/can/sja1000/sja1000_isa.c
index a6a51f155962..496223e9e2fc 100644
--- a/drivers/net/can/sja1000/sja1000_isa.c
+++ b/drivers/net/can/sja1000/sja1000_isa.c
@@ -23,7 +23,6 @@
 #include <linux/delay.h>
 #include <linux/irq.h>
 #include <linux/io.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/platform/sja1000.h>
 
diff --git a/drivers/net/can/sja1000/sja1000_of_platform.c b/drivers/net/can/sja1000/sja1000_of_platform.c
index 9dd076a626a5..34e79efbd2fc 100644
--- a/drivers/net/can/sja1000/sja1000_of_platform.c
+++ b/drivers/net/can/sja1000/sja1000_of_platform.c
@@ -38,7 +38,6 @@
 #include <linux/interrupt.h>
 #include <linux/netdevice.h>
 #include <linux/delay.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 
 #include <linux/of_platform.h>
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index 1083b42ab6cb..b65cabb361ab 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -24,7 +24,6 @@
 #include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/irq.h>
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/platform/sja1000.h>
 #include <linux/io.h>
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 0c3d2ba0d178..4d07f1ee7168 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -47,7 +47,6 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 
-#include <linux/can.h>
 #include <linux/can/dev.h>
 #include <linux/can/error.h>
 #include <linux/can/platform/ti_hecc.h>
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 6e5a7f00223d..cc0bb4961669 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -14,6 +14,7 @@
 #ifndef CAN_DEV_H
 #define CAN_DEV_H
 
+#include <linux/can.h>
 #include <linux/can/netlink.h>
 #include <linux/can/error.h>
 
-- 
cgit v1.2.3


From acbbc07145b919248c410e1852b953d385be5c97 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sun, 11 Apr 2010 06:56:11 +0000
Subject: net: uninline skb_bond_should_drop()

skb_bond_should_drop() is too big to be inlined.

This patch reduces kernel text size, and its compilation time as well
(shrinking include/linux/netdevice.h)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 48 ++++------------------------------------------
 net/core/dev.c            | 49 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 53 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index d1a21b576a40..470f7c951afb 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2089,54 +2089,14 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
 	dev->gso_max_size = size;
 }
 
-static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
-					      struct net_device *master)
-{
-	if (skb->pkt_type == PACKET_HOST) {
-		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
-
-		memcpy(dest, master->dev_addr, ETH_ALEN);
-	}
-}
+extern int __skb_bond_should_drop(struct sk_buff *skb,
+				  struct net_device *master);
 
-/* On bonding slaves other than the currently active slave, suppress
- * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
- * ARP on active-backup slaves with arp_validate enabled.
- */
 static inline int skb_bond_should_drop(struct sk_buff *skb,
 				       struct net_device *master)
 {
-	if (master) {
-		struct net_device *dev = skb->dev;
-
-		if (master->priv_flags & IFF_MASTER_ARPMON)
-			dev->last_rx = jiffies;
-
-		if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
-			/* Do address unmangle. The local destination address
-			 * will be always the one master has. Provides the right
-			 * functionality in a bridge.
-			 */
-			skb_bond_set_mac_by_master(skb, master);
-		}
-
-		if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
-			if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
-			    skb->protocol == __cpu_to_be16(ETH_P_ARP))
-				return 0;
-
-			if (master->priv_flags & IFF_MASTER_ALB) {
-				if (skb->pkt_type != PACKET_BROADCAST &&
-				    skb->pkt_type != PACKET_MULTICAST)
-					return 0;
-			}
-			if (master->priv_flags & IFF_MASTER_8023AD &&
-			    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
-				return 0;
-
-			return 1;
-		}
-	}
+	if (master)
+		return __skb_bond_should_drop(skb, master);
 	return 0;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index ca4cdef74a1b..876b1112d5ba 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2646,6 +2646,55 @@ void netif_nit_deliver(struct sk_buff *skb)
 	rcu_read_unlock();
 }
 
+static inline void skb_bond_set_mac_by_master(struct sk_buff *skb,
+					      struct net_device *master)
+{
+	if (skb->pkt_type == PACKET_HOST) {
+		u16 *dest = (u16 *) eth_hdr(skb)->h_dest;
+
+		memcpy(dest, master->dev_addr, ETH_ALEN);
+	}
+}
+
+/* On bonding slaves other than the currently active slave, suppress
+ * duplicates except for 802.3ad ETH_P_SLOW, alb non-mcast/bcast, and
+ * ARP on active-backup slaves with arp_validate enabled.
+ */
+int __skb_bond_should_drop(struct sk_buff *skb, struct net_device *master)
+{
+	struct net_device *dev = skb->dev;
+
+	if (master->priv_flags & IFF_MASTER_ARPMON)
+		dev->last_rx = jiffies;
+
+	if ((master->priv_flags & IFF_MASTER_ALB) && master->br_port) {
+		/* Do address unmangle. The local destination address
+		 * will be always the one master has. Provides the right
+		 * functionality in a bridge.
+		 */
+		skb_bond_set_mac_by_master(skb, master);
+	}
+
+	if (dev->priv_flags & IFF_SLAVE_INACTIVE) {
+		if ((dev->priv_flags & IFF_SLAVE_NEEDARP) &&
+		    skb->protocol == __cpu_to_be16(ETH_P_ARP))
+			return 0;
+
+		if (master->priv_flags & IFF_MASTER_ALB) {
+			if (skb->pkt_type != PACKET_BROADCAST &&
+			    skb->pkt_type != PACKET_MULTICAST)
+				return 0;
+		}
+		if (master->priv_flags & IFF_MASTER_8023AD &&
+		    skb->protocol == __cpu_to_be16(ETH_P_SLOW))
+			return 0;
+
+		return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(__skb_bond_should_drop);
+
 static int __netif_receive_skb(struct sk_buff *skb)
 {
 	struct packet_type *ptype, *pt_prev;
-- 
cgit v1.2.3


From 6a867a395558a7f882d041783e4cdea6744ca2bf Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Tue, 6 Apr 2010 14:30:51 -0700
Subject: time: Remove xtime_cache
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With the earlier logarithmic time accumulation patch, xtime will now
always be within one "tick" of the current time, instead of possibly
half a second off.

This removes the need for the xtime_cache value, which always stored the
time at the last interrupt, so this patch cleans that up removing the
xtime_cache related code.

This patch also addresses an issue with an earlier version of this change,
where xtime_cache was normalizing xtime, which could in some cases be
not valid (ie: tv_nsec == NSEC_PER_SEC). This is fixed by handling
the edge case in update_wall_time().

Signed-off-by: John Stultz <johnstul@us.ibm.com>
Cc: Petr Titěra <P.Titera@century.cz>
LKML-Reference: <1270589451-30773-1-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/time.h      |  1 -
 kernel/time/timekeeping.c | 35 ++++++++++++++++-------------------
 2 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 6e026e45a179..ea3559f0b3f2 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -150,7 +150,6 @@ extern struct timespec timespec_trunc(struct timespec t, unsigned gran);
 extern int timekeeping_valid_for_hres(void);
 extern u64 timekeeping_max_deferment(void);
 extern void update_wall_time(void);
-extern void update_xtime_cache(u64 nsec);
 extern void timekeeping_leap_insert(int leapsecond);
 
 struct tms;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 16736379a9ca..1137f245a4ba 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -165,13 +165,6 @@ struct timespec raw_time;
 /* flag for if timekeeping is suspended */
 int __read_mostly timekeeping_suspended;
 
-static struct timespec xtime_cache __attribute__ ((aligned (16)));
-void update_xtime_cache(u64 nsec)
-{
-	xtime_cache = xtime;
-	timespec_add_ns(&xtime_cache, nsec);
-}
-
 /* must hold xtime_lock */
 void timekeeping_leap_insert(int leapsecond)
 {
@@ -332,8 +325,6 @@ int do_settimeofday(struct timespec *tv)
 
 	xtime = *tv;
 
-	update_xtime_cache(0);
-
 	timekeeper.ntp_error = 0;
 	ntp_clear();
 
@@ -559,7 +550,6 @@ void __init timekeeping_init(void)
 	}
 	set_normalized_timespec(&wall_to_monotonic,
 				-boot.tv_sec, -boot.tv_nsec);
-	update_xtime_cache(0);
 	total_sleep_time.tv_sec = 0;
 	total_sleep_time.tv_nsec = 0;
 	write_sequnlock_irqrestore(&xtime_lock, flags);
@@ -593,7 +583,6 @@ static int timekeeping_resume(struct sys_device *dev)
 		wall_to_monotonic = timespec_sub(wall_to_monotonic, ts);
 		total_sleep_time = timespec_add_safe(total_sleep_time, ts);
 	}
-	update_xtime_cache(0);
 	/* re-base the last cycle value */
 	timekeeper.clock->cycle_last = timekeeper.clock->read(timekeeper.clock);
 	timekeeper.ntp_error = 0;
@@ -788,7 +777,6 @@ void update_wall_time(void)
 {
 	struct clocksource *clock;
 	cycle_t offset;
-	u64 nsecs;
 	int shift = 0, maxshift;
 
 	/* Make sure we're fully resumed: */
@@ -846,7 +834,9 @@ void update_wall_time(void)
 		timekeeper.ntp_error += neg << timekeeper.ntp_error_shift;
 	}
 
-	/* store full nanoseconds into xtime after rounding it up and
+
+	/*
+	 * Store full nanoseconds into xtime after rounding it up and
 	 * add the remainder to the error difference.
 	 */
 	xtime.tv_nsec =	((s64) timekeeper.xtime_nsec >> timekeeper.shift) + 1;
@@ -854,8 +844,15 @@ void update_wall_time(void)
 	timekeeper.ntp_error +=	timekeeper.xtime_nsec <<
 				timekeeper.ntp_error_shift;
 
-	nsecs = clocksource_cyc2ns(offset, timekeeper.mult, timekeeper.shift);
-	update_xtime_cache(nsecs);
+	/*
+	 * Finally, make sure that after the rounding
+	 * xtime.tv_nsec isn't larger then NSEC_PER_SEC
+	 */
+	if (unlikely(xtime.tv_nsec >= NSEC_PER_SEC)) {
+		xtime.tv_nsec -= NSEC_PER_SEC;
+		xtime.tv_sec++;
+		second_overflow();
+	}
 
 	/* check to see if there is a new clocksource to use */
 	update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
@@ -895,13 +892,13 @@ EXPORT_SYMBOL_GPL(monotonic_to_bootbased);
 
 unsigned long get_seconds(void)
 {
-	return xtime_cache.tv_sec;
+	return xtime.tv_sec;
 }
 EXPORT_SYMBOL(get_seconds);
 
 struct timespec __current_kernel_time(void)
 {
-	return xtime_cache;
+	return xtime;
 }
 
 struct timespec current_kernel_time(void)
@@ -912,7 +909,7 @@ struct timespec current_kernel_time(void)
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
-		now = xtime_cache;
+		now = xtime;
 	} while (read_seqretry(&xtime_lock, seq));
 
 	return now;
@@ -927,7 +924,7 @@ struct timespec get_monotonic_coarse(void)
 	do {
 		seq = read_seqbegin(&xtime_lock);
 
-		now = xtime_cache;
+		now = xtime;
 		mono = wall_to_monotonic;
 	} while (read_seqretry(&xtime_lock, seq));
 
-- 
cgit v1.2.3


From 9f93ff5be54108066372d1c4100c515d9d9acc1b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Tue, 13 Apr 2010 14:09:15 +0200
Subject: Restore __ALIGN_MASK()

Fix lib/bitmap.c compile failure due to __ALIGN_KERNEL changes.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/kernel.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 284ea995646e..db6717d0fd6f 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -40,6 +40,7 @@ extern const char linux_proc_banner[];
 #define STACK_MAGIC	0xdeadbeef
 
 #define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
+#define __ALIGN_MASK(x, mask)	__ALIGN_KERNEL_MASK((x), (mask))
 #define PTR_ALIGN(p, a)		((typeof(p))ALIGN((unsigned long)(p), (a)))
 #define IS_ALIGNED(x, a)		(((x) & ((typeof(x))(a) - 1)) == 0)
 
-- 
cgit v1.2.3


From 9c6eb28aca52d562f3ffbaebaa56385df9972a43 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 13 Apr 2010 15:32:16 +0200
Subject: netfilter: ipv6: add IPSKB_REROUTED exclusion to NF_HOOK/POSTROUTING
 invocation

Similar to how IPv4's ip_output.c works, have ip6_output also check
the IPSKB_REROUTED flag. It will be set from xt_TEE for cloned packets
since Xtables can currently only deal with a single packet in flight
at a time.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Acked-by: David S. Miller <davem@davemloft.net>
[Patrick: changed to use an IP6SKB value instead of IPSKB]
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ipv6.h  | 1 +
 net/ipv6/ip6_output.c | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0cc9a7db2b5..7bdf6ffe2b49 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -250,6 +250,7 @@ struct inet6_skb_parm {
 
 #define IP6SKB_XFRM_TRANSFORMED	1
 #define IP6SKB_FORWARDED	2
+#define IP6SKB_REROUTED		4
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 236ac7813744..c10a38a71a5e 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -172,8 +172,9 @@ int ip6_output(struct sk_buff *skb)
 		return 0;
 	}
 
-	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
-		       ip6_finish_output);
+	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
+			    ip6_finish_output,
+			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
 
 /*
-- 
cgit v1.2.3


From ae731f8d0785ccd3380f511bae888933b6562e45 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <maz@misterjones.org>
Date: Mon, 15 Mar 2010 22:56:33 +0000
Subject: genirq: Introduce request_any_context_irq()

Now that we enjoy threaded interrupts, we're starting to see irq_chip
implementations (wm831x, pca953x) that make use of threaded interrupts
for the controller, and nested interrupts for the client interrupt. It
all works very well, with one drawback:

Drivers requesting an IRQ must now know whether the handler will
run in a thread context or not, and call request_threaded_irq() or
request_irq() accordingly.

The problem is that the requesting driver sometimes doesn't know
about the nature of the interrupt, specially when the interrupt
controller is a discrete chip (typically a GPIO expander connected
over I2C) that can be connected to a wide variety of otherwise perfectly
supported hardware.

This patch introduces the request_any_context_irq() function that mostly
mimics the usual request_irq(), except that it checks whether the irq
level is configured as nested or not, and calls the right backend.
On success, it also returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.

[ tglx: Made return value an enum, simplified code and made the export
  	of request_any_context_irq GPL ]

Signed-off-by: Marc Zyngier <maz@misterjones.org>
Cc: <joachim.eastwood@jotron.com>
LKML-Reference: <927ea285bd0c68934ddae1a47e44a9ba@localhost>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h | 23 +++++++++++++++++++++++
 kernel/irq/manage.c       | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 75f3f00ac1e5..d7e7a7660c6c 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -77,6 +77,18 @@ enum {
 	IRQTF_AFFINITY,
 };
 
+/**
+ * These values can be returned by request_any_context_irq() and
+ * describe the context the interrupt will be run in.
+ *
+ * IRQC_IS_HARDIRQ - interrupt runs in hardirq context
+ * IRQC_IS_NESTED - interrupt runs in a nested threaded context
+ */
+enum {
+	IRQC_IS_HARDIRQ	= 0,
+	IRQC_IS_NESTED,
+};
+
 typedef irqreturn_t (*irq_handler_t)(int, void *);
 
 /**
@@ -120,6 +132,10 @@ request_irq(unsigned int irq, irq_handler_t handler, unsigned long flags,
 	return request_threaded_irq(irq, handler, NULL, flags, name, dev);
 }
 
+extern int __must_check
+request_any_context_irq(unsigned int irq, irq_handler_t handler,
+			unsigned long flags, const char *name, void *dev_id);
+
 extern void exit_irq_thread(void);
 #else
 
@@ -141,6 +157,13 @@ request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	return request_irq(irq, handler, flags, name, dev);
 }
 
+static inline int __must_check
+request_any_context_irq(unsigned int irq, irq_handler_t handler,
+			unsigned long flags, const char *name, void *dev_id)
+{
+	return request_irq(irq, handler, flags, name, dev_id);
+}
+
 static inline void exit_irq_thread(void) { }
 #endif
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 704e488730a5..84f32278ff1f 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1120,3 +1120,40 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	return retval;
 }
 EXPORT_SYMBOL(request_threaded_irq);
+
+/**
+ *	request_any_context_irq - allocate an interrupt line
+ *	@irq: Interrupt line to allocate
+ *	@handler: Function to be called when the IRQ occurs.
+ *		  Threaded handler for threaded interrupts.
+ *	@flags: Interrupt type flags
+ *	@name: An ascii name for the claiming device
+ *	@dev_id: A cookie passed back to the handler function
+ *
+ *	This call allocates interrupt resources and enables the
+ *	interrupt line and IRQ handling. It selects either a
+ *	hardirq or threaded handling method depending on the
+ *	context.
+ *
+ *	On failure, it returns a negative value. On success,
+ *	it returns either IRQC_IS_HARDIRQ or IRQC_IS_NESTED.
+ */
+int request_any_context_irq(unsigned int irq, irq_handler_t handler,
+			    unsigned long flags, const char *name, void *dev_id)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	int ret;
+
+	if (!desc)
+		return -EINVAL;
+
+	if (desc->status & IRQ_NESTED_THREAD) {
+		ret = request_threaded_irq(irq, NULL, handler,
+					   flags, name, dev_id);
+		return !ret ? IRQC_IS_NESTED : ret;
+	}
+
+	ret = request_irq(irq, handler, flags, name, dev_id);
+	return !ret ? IRQC_IS_HARDIRQ : ret;
+}
+EXPORT_SYMBOL_GPL(request_any_context_irq);
-- 
cgit v1.2.3


From 6932bf37bed45ce8ed531928b1b0f98162fe6df6 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 26 Mar 2010 00:06:55 +0000
Subject: genirq: Remove IRQF_DISABLED from core code

Remove all code which is related to IRQF_DISABLED from the core kernel
code. IRQF_DISABLED still exists as a flag, but becomes a NOOP and
will be removed after a grace period. That way we can easily revert to
the previous behaviour by just restoring the core code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@suse.de>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Linus Torvalds <torvalds@osdl.org>
LKML-Reference: <20100326000405.991244690@linutronix.de>
---
 Documentation/feature-removal-schedule.txt |  7 +++++++
 include/linux/interrupt.h                  |  3 ++-
 kernel/irq/manage.c                        | 30 ------------------------------
 3 files changed, 9 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt
index ed511af0f79a..9c31c2e63684 100644
--- a/Documentation/feature-removal-schedule.txt
+++ b/Documentation/feature-removal-schedule.txt
@@ -589,3 +589,10 @@ Why:	Useful in 2003, implementation is a hack.
 	Generally invoked by accident today.
 	Seen as doing more harm than good.
 Who:	Len Brown <len.brown@intel.com>
+
+----------------------------
+
+What:	IRQF_DISABLED
+When:	2.6.36
+Why:	The flag is a NOOP as we run interrupt handlers with interrupts disabled
+Who:	Thomas Gleixner <tglx@linutronix.de>
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index d7e7a7660c6c..e6d2f4441fda 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -39,7 +39,8 @@
  * These flags used only by the kernel as part of the
  * irq handling routines.
  *
- * IRQF_DISABLED - keep irqs disabled when calling the action handler
+ * IRQF_DISABLED - keep irqs disabled when calling the action handler.
+ *                 DEPRECATED. This flag is a NOOP and scheduled to be removed
  * IRQF_SAMPLE_RANDOM - irq is used to feed the random generator
  * IRQF_SHARED - allow sharing the irq among several devices
  * IRQF_PROBE_SHARED - set by callers when they expect sharing mismatches to occur
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 84f32278ff1f..444d5a81a209 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -757,16 +757,6 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 		if (new->flags & IRQF_ONESHOT)
 			desc->status |= IRQ_ONESHOT;
 
-		/*
-		 * Force MSI interrupts to run with interrupts
-		 * disabled. The multi vector cards can cause stack
-		 * overflows due to nested interrupts when enough of
-		 * them are directed to a core and fire at the same
-		 * time.
-		 */
-		if (desc->msi_desc)
-			new->flags |= IRQF_DISABLED;
-
 		if (!(desc->status & IRQ_NOAUTOEN)) {
 			desc->depth = 0;
 			desc->status &= ~IRQ_DISABLED;
@@ -1027,7 +1017,6 @@ EXPORT_SYMBOL(free_irq);
  *	Flags:
  *
  *	IRQF_SHARED		Interrupt is shared
- *	IRQF_DISABLED	Disable local interrupts while processing
  *	IRQF_SAMPLE_RANDOM	The interrupt can be used for entropy
  *	IRQF_TRIGGER_*		Specify active edge(s) or level
  *
@@ -1040,25 +1029,6 @@ int request_threaded_irq(unsigned int irq, irq_handler_t handler,
 	struct irq_desc *desc;
 	int retval;
 
-	/*
-	 * handle_IRQ_event() always ignores IRQF_DISABLED except for
-	 * the _first_ irqaction (sigh).  That can cause oopsing, but
-	 * the behavior is classified as "will not fix" so we need to
-	 * start nudging drivers away from using that idiom.
-	 */
-	if ((irqflags & (IRQF_SHARED|IRQF_DISABLED)) ==
-					(IRQF_SHARED|IRQF_DISABLED)) {
-		pr_warning(
-		  "IRQ %d/%s: IRQF_DISABLED is not guaranteed on shared IRQs\n",
-			irq, devname);
-	}
-
-#ifdef CONFIG_LOCKDEP
-	/*
-	 * Lockdep wants atomic interrupt handlers:
-	 */
-	irqflags |= IRQF_DISABLED;
-#endif
 	/*
 	 * Sanity-check: shared interrupts must pass in a real dev-ID,
 	 * otherwise we'll have trouble later trying to figure out
-- 
cgit v1.2.3


From 0f87b1dd01b51dc3c789f7a212656a4a87eee1bd Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:17 +0000
Subject: net: fib_rules: decouple address families from real address families

Decouple the address family values used for fib_rules from the real
address families in socket.h. This allows to use fib_rules for
code that is not a real address family without increasing AF_MAX/NPROTO.

Values up to 127 are reserved for real address families and map directly
to the corresponding AF value, values starting from 128 are for other
uses. rtnetlink is changed to invoke the AF_UNSPEC dumpit/doit handlers
for these families.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |  7 +++++++
 net/core/rtnetlink.c      | 15 ++++++++++-----
 net/decnet/dn_rules.c     |  2 +-
 net/ipv4/fib_rules.c      |  2 +-
 net/ipv6/fib6_rules.c     |  2 +-
 5 files changed, 20 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 51da65b68b85..405e41139a4d 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -15,6 +15,13 @@
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
 
+/* fib_rules families. values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define FIB_RULES_IPV4		AF_INET
+#define FIB_RULES_IPV6		AF_INET6
+#define FIB_RULES_DECNET	AF_DECnet
+
 struct fib_rule_hdr {
 	__u8		family;
 	__u8		dst_len;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index bf919b6acea2..78c85985cb30 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -118,7 +118,11 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].doit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -129,7 +133,11 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	tab = rtnl_msg_handlers[protocol];
+	if (protocol < NPROTO)
+		tab = rtnl_msg_handlers[protocol];
+	else
+		tab = NULL;
+
 	if (tab == NULL || tab[msgindex].dumpit == NULL)
 		tab = rtnl_msg_handlers[PF_UNSPEC];
 
@@ -1444,9 +1452,6 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)
 		return 0;
 
 	family = ((struct rtgenmsg *)NLMSG_DATA(nlh))->rtgen_family;
-	if (family >= NPROTO)
-		return -EAFNOSUPPORT;
-
 	sz_idx = type>>2;
 	kind = type&3;
 
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1c8cc6d5b645..af28dcc21844 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops dn_fib_rules_ops_template = {
-	.family		= AF_DECnet,
+	.family		= FIB_RULES_DECNET,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index a18355e15111..3ec84fea5b71 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static struct fib_rules_ops fib4_rules_ops_template = {
-	.family		= AF_INET,
+	.family		= FIB_RULES_IPV4,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 92b2b7fb6c3d..8124f16f2ac2 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static struct fib_rules_ops fib6_rules_ops_template = {
-	.family			= AF_INET6,
+	.family			= FIB_RULES_IPV6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.3


From d658f8a0e63b6476148162aa7a3ffffc58dcad52 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:20 +0000
Subject: ipv4: ipmr: remove net pointer from struct mfc_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h | 15 ------------
 net/ipv4/ipmr.c        | 65 +++++++++++++++++++++++++-------------------------
 2 files changed, 32 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index c5f3d53548e2..de7780a6dd32 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -192,9 +192,6 @@ struct vif_device {
 
 struct mfc_cache {
 	struct mfc_cache *next;			/* Next entry on cache line 	*/
-#ifdef CONFIG_NET_NS
-	struct net *mfc_net;
-#endif
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
 	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
@@ -217,18 +214,6 @@ struct mfc_cache {
 	} mfc_un;
 };
 
-static inline
-struct net *mfc_net(const struct mfc_cache *mfc)
-{
-	return read_pnet(&mfc->mfc_net);
-}
-
-static inline
-void mfc_net_set(struct mfc_cache *mfc, struct net *net)
-{
-	write_pnet(&mfc->mfc_net, hold_net(net));
-}
-
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index d6aa65e2b08f..f8e25c8ba070 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -93,10 +93,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local);
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local);
 static int ipmr_cache_report(struct net *net,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
-static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+static int ipmr_fill_mroute(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, struct rtmsg *rtm);
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -325,7 +327,6 @@ static int vif_delete(struct net *net, int vifi, int notify,
 
 static inline void ipmr_cache_free(struct mfc_cache *c)
 {
-	release_net(mfc_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -333,11 +334,10 @@ static inline void ipmr_cache_free(struct mfc_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ipmr_destroy_unres(struct mfc_cache *c)
+static void ipmr_destroy_unres(struct net *net, struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
-	struct net *net = mfc_net(c);
 
 	atomic_dec(&net->ipv4.cache_resolve_queue_len);
 
@@ -392,7 +392,7 @@ static void ipmr_expire_process(unsigned long arg)
 
 		*cp = c->next;
 
-		ipmr_destroy_unres(c);
+		ipmr_destroy_unres(net, c);
 	}
 
 	if (net->ipv4.mfc_unres_queue != NULL)
@@ -404,10 +404,10 @@ out:
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ipmr_update_thresholds(struct mfc_cache *cache, unsigned char *ttls)
+static void ipmr_update_thresholds(struct net *net, struct mfc_cache *cache,
+				   unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc_net(cache);
 
 	cache->mfc_un.res.minvif = MAXVIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -547,24 +547,22 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc_cache *ipmr_cache_alloc(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXVIFS;
-	mfc_net_set(c, net);
 	return c;
 }
 
-static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
+static struct mfc_cache *ipmr_cache_alloc_unres(void)
 {
 	struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10*HZ;
-	mfc_net_set(c, net);
 	return c;
 }
 
@@ -572,7 +570,8 @@ static struct mfc_cache *ipmr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
+static void ipmr_cache_resolve(struct net *net, struct mfc_cache *uc,
+			       struct mfc_cache *c)
 {
 	struct sk_buff *skb;
 	struct nlmsgerr *e;
@@ -585,7 +584,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 		if (ip_hdr(skb)->version == 0) {
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
 
-			if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ipmr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = (skb_tail_pointer(skb) -
 						  (u8 *)nlh);
 			} else {
@@ -597,9 +596,9 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c)
 				memset(&e->msg, 0, sizeof(e->msg));
 			}
 
-			rtnl_unicast(skb, mfc_net(c), NETLINK_CB(skb).pid);
+			rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip_mr_forward(skb, c, 0);
+			ip_mr_forward(net, skb, c, 0);
 	}
 }
 
@@ -717,7 +716,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv4.cache_resolve_queue_len) >= 10 ||
-		    (c = ipmr_cache_alloc_unres(net)) == NULL) {
+		    (c = ipmr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -814,7 +813,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
-		ipmr_update_thresholds(c, mfc->mfcc_ttls);
+		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -824,14 +823,14 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
 		return -EINVAL;
 
-	c = ipmr_cache_alloc(net);
+	c = ipmr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mfc_origin = mfc->mfcc_origin.s_addr;
 	c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
 	c->mfc_parent = mfc->mfcc_parent;
-	ipmr_update_thresholds(c, mfc->mfcc_ttls);
+	ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -859,7 +858,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ipmr_cache_resolve(uc, c);
+		ipmr_cache_resolve(net, uc, c);
 		ipmr_cache_free(uc);
 	}
 	return 0;
@@ -910,7 +909,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv4.mfc_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ipmr_destroy_unres(c);
+			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1221,9 +1220,9 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
  *	Processing handlers for ipmr_forward
  */
 
-static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
+static void ipmr_queue_xmit(struct net *net, struct sk_buff *skb,
+			    struct mfc_cache *c, int vifi)
 {
-	struct net *net = mfc_net(c);
 	const struct iphdr *iph = ip_hdr(skb);
 	struct vif_device *vif = &net->ipv4.vif_table[vifi];
 	struct net_device *dev;
@@ -1335,11 +1334,11 @@ static int ipmr_find_vif(struct net_device *dev)
 
 /* "local" means that we should preserve one skb (for local delivery) */
 
-static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local)
+static int ip_mr_forward(struct net *net, struct sk_buff *skb,
+			 struct mfc_cache *cache, int local)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc_net(cache);
 
 	vif = cache->mfc_parent;
 	cache->mfc_un.res.pkt++;
@@ -1396,7 +1395,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ipmr_queue_xmit(skb2, cache, psend);
+					ipmr_queue_xmit(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
@@ -1405,9 +1404,9 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
 		if (local) {
 			struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 			if (skb2)
-				ipmr_queue_xmit(skb2, cache, psend);
+				ipmr_queue_xmit(net, skb2, cache, psend);
 		} else {
-			ipmr_queue_xmit(skb, cache, psend);
+			ipmr_queue_xmit(net, skb, cache, psend);
 			return 0;
 		}
 	}
@@ -1488,7 +1487,7 @@ int ip_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip_mr_forward(skb, cache, local);
+	ip_mr_forward(net, skb, cache, local);
 
 	read_unlock(&mrt_lock);
 
@@ -1602,11 +1601,11 @@ drop:
 #endif
 
 static int
-ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm)
+ipmr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc_cache *c,
+		 struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1686,7 +1685,7 @@ int ipmr_get_route(struct net *net,
 
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
-	err = ipmr_fill_mroute(skb, cache, rtm);
+	err = ipmr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3


From 862465f2e7e90975e7bf0ecfbb171dd3adedd950 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:21 +0000
Subject: ipv4: ipmr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mroute.h   |   2 +-
 include/net/netns/ipv4.h |   4 +-
 net/ipv4/ipmr.c          | 125 +++++++++++++++++++++++------------------------
 3 files changed, 64 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index de7780a6dd32..7ff6c77d6008 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -191,7 +191,7 @@ struct vif_device {
 #define VIFF_STATIC 0x8000
 
 struct mfc_cache {
-	struct mfc_cache *next;			/* Next entry on cache line 	*/
+	struct list_head list;
 	__be32 mfc_mcastgrp;			/* Group the entry belongs to 	*/
 	__be32 mfc_origin;			/* Source of packet 		*/
 	vifi_t mfc_parent;			/* Source interface		*/
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index b15e518f952a..5d06429968d5 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -61,8 +61,8 @@ struct netns_ipv4 {
 #ifdef CONFIG_IP_MROUTE
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
-	struct mfc_cache	*mfc_unres_queue;
-	struct mfc_cache	**mfc_cache_array;
+	struct list_head	mfc_unres_queue;
+	struct list_head	*mfc_cache_array;
 	struct vif_device	*vif_table;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index f8e25c8ba070..21b5edc2f343 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -367,35 +367,32 @@ static void ipmr_expire_process(unsigned long arg)
 	struct net *net = (struct net *)arg;
 	unsigned long now;
 	unsigned long expires;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies+HZ/10);
 		return;
 	}
 
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		goto out;
 
 	now = jiffies;
 	expires = 10*HZ;
-	cp = &net->ipv4.mfc_unres_queue;
 
-	while ((c=*cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
-
+		list_del(&c->list);
 		ipmr_destroy_unres(net, c);
 	}
 
-	if (net->ipv4.mfc_unres_queue != NULL)
+	if (!list_empty(&net->ipv4.mfc_unres_queue))
 		mod_timer(&net->ipv4.ipmr_expire_timer, jiffies + expires);
 
 out:
@@ -537,11 +534,11 @@ static struct mfc_cache *ipmr_cache_find(struct net *net,
 	int line = MFC_HASH(mcastgrp, origin);
 	struct mfc_cache *c;
 
-	for (c = net->ipv4.mfc_cache_array[line]; c; c = c->next) {
-		if (c->mfc_origin==origin && c->mfc_mcastgrp==mcastgrp)
-			break;
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
+		if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -699,18 +696,21 @@ static int ipmr_cache_report(struct net *net,
 static int
 ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc_cache *c;
 	const struct iphdr *iph = ip_hdr(skb);
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c=net->ipv4.mfc_unres_queue; c; c=c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_unres_queue, list) {
 		if (c->mfc_mcastgrp == iph->daddr &&
-		    c->mfc_origin == iph->saddr)
+		    c->mfc_origin == iph->saddr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -746,8 +746,7 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv4.cache_resolve_queue_len);
-		c->next = net->ipv4.mfc_unres_queue;
-		net->ipv4.mfc_unres_queue = c;
+		list_add(&c->list, &net->ipv4.mfc_unres_queue);
 
 		mod_timer(&net->ipv4.ipmr_expire_timer, c->mfc_un.unres.expires);
 	}
@@ -774,16 +773,15 @@ ipmr_cache_unresolved(struct net *net, vifi_t vifi, struct sk_buff *skb)
 static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 {
 	int line;
-	struct mfc_cache *c, **cp;
+	struct mfc_cache *c, *next;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
 		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -795,22 +793,24 @@ static int ipmr_mfc_delete(struct net *net, struct mfcctl *mfc)
 
 static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc_cache *uc, *c, **cp;
+	struct mfc_cache *uc, *c;
 
 	if (mfc->mfcc_parent >= MAXVIFS)
 		return -ENFILE;
 
 	line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
 
-	for (cp = &net->ipv4.mfc_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv4.mfc_cache_array[line], list) {
 		if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
-		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr)
+		    c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mfc_parent = mfc->mfcc_parent;
 		ipmr_update_thresholds(net, c, mfc->mfcc_ttls);
@@ -835,8 +835,7 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv4.mfc_cache_array[line];
-	net->ipv4.mfc_cache_array[line] = c;
+	list_add(&c->list, &net->ipv4.mfc_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -844,16 +843,15 @@ static int ipmr_mfc_add(struct net *net, struct mfcctl *mfc, int mrtsock)
 	 *	need to send on the frames and tidy up.
 	 */
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv4.mfc_unres_queue; (uc=*cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv4.mfc_unres_queue, list) {
 		if (uc->mfc_origin == c->mfc_origin &&
 		    uc->mfc_mcastgrp == c->mfc_mcastgrp) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv4.cache_resolve_queue_len);
 			break;
 		}
 	}
-	if (net->ipv4.mfc_unres_queue == NULL)
+	if (list_empty(&net->ipv4.mfc_unres_queue))
 		del_timer(&net->ipv4.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
@@ -872,6 +870,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -885,17 +884,12 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Wipe the cache
 	 */
-	for (i=0; i<MFC_LINES; i++) {
-		struct mfc_cache *c, **cp;
-
-		cp = &net->ipv4.mfc_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags&MFC_STATIC) {
-				cp = &c->next;
+	for (i = 0; i < MFC_LINES; i++) {
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_cache_array[i], list) {
+			if (c->mfc_flags&MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ipmr_cache_free(c);
@@ -903,12 +897,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv4.cache_resolve_queue_len) != 0) {
-		struct mfc_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv4.mfc_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv4.mfc_unres_queue, list) {
+			list_del(&c->list);
 			ipmr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1789,7 +1780,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -1799,18 +1790,18 @@ static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc_cache *mfc;
 
-	it->cache = net->ipv4.mfc_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC_LINES; it->ct++)
-		for (mfc = net->ipv4.mfc_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv4.mfc_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv4.mfc_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv4.mfc_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1842,18 +1833,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc_cache, list);
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv4.mfc_cache_array);
+	BUG_ON(it->cache != &net->ipv4.mfc_cache_array[it->ct]);
 
 	while (++it->ct < MFC_LINES) {
-		mfc = net->ipv4.mfc_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv4.mfc_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -1862,9 +1854,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv4.mfc_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -1880,7 +1871,7 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 
 	if (it->cache == &net->ipv4.mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv4.mfc_cache_array)
+	else if (it->cache == &net->ipv4.mfc_cache_array[it->ct])
 		read_unlock(&mrt_lock);
 }
 
@@ -1960,6 +1951,7 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
 
 	net->ipv4.vif_table = kcalloc(MAXVIFS, sizeof(struct vif_device),
@@ -1971,13 +1963,18 @@ static int __net_init ipmr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv4.mfc_cache_array = kcalloc(MFC_LINES,
-					    sizeof(struct mfc_cache *),
+					    sizeof(struct list_head),
 					    GFP_KERNEL);
 	if (!net->ipv4.mfc_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc_cache;
 	}
 
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv4.mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv4.mfc_unres_queue);
+
 	setup_timer(&net->ipv4.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
-- 
cgit v1.2.3


From f0ad0860d01e47a3ffd220564c5c653b3afbe962 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 13 Apr 2010 05:03:23 +0000
Subject: ipv4: ipmr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT_TABLE. The table number is
stored in the raw socket data and affects all following ipmr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT_TABLE_DEFAULT)
is created with a default routing rule pointing to it. Newly created pimreg
devices have the table number appended ("pimregX"), with the exception of
devices created in the default table, which are named just "pimreg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, IPPROTO_IP, MRT_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip mrule add iif eth0 lookup 123
# ip mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fib_rules.h |   1 +
 include/linux/mroute.h    |   3 +-
 include/net/netns/ipv4.h  |   5 +
 include/net/raw.h         |   1 +
 net/ipv4/Kconfig          |  14 ++
 net/ipv4/ipmr.c           | 399 +++++++++++++++++++++++++++++++++++++++-------
 6 files changed, 361 insertions(+), 62 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 405e41139a4d..04a397619ebe 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -21,6 +21,7 @@
 #define FIB_RULES_IPV4		AF_INET
 #define FIB_RULES_IPV6		AF_INET6
 #define FIB_RULES_DECNET	AF_DECnet
+#define FIB_RULES_IPMR		128
 
 struct fib_rule_hdr {
 	__u8		family;
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 7ff6c77d6008..fa04b246c9ae 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -27,7 +27,8 @@
 #define MRT_DEL_MFC	(MRT_BASE+5)	/* Delete a multicast forwarding entry	*/
 #define MRT_VERSION	(MRT_BASE+6)	/* Get the kernel multicast version	*/
 #define MRT_ASSERT	(MRT_BASE+7)	/* Activate PIM assert mode		*/
-#define MRT_PIM		(MRT_BASE+8)	/* enable PIM code	*/
+#define MRT_PIM		(MRT_BASE+8)	/* enable PIM code			*/
+#define MRT_TABLE	(MRT_BASE+9)	/* Specify mroute table ID		*/
 
 #define SIOCGETVIFCNT	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT	(SIOCPROTOPRIVATE+1)
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 72e762ab3e5d..ae07feec6446 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -59,7 +59,12 @@ struct netns_ipv4 {
 	atomic_t rt_genid;
 
 #ifdef CONFIG_IP_MROUTE
+#ifndef CONFIG_IP_MROUTE_MULTIPLE_TABLES
 	struct mr_table		*mrt;
+#else
+	struct list_head	mr_tables;
+	struct fib_rules_ops	*mr_rules_ops;
+#endif
 #endif
 };
 #endif
diff --git a/include/net/raw.h b/include/net/raw.h
index 67cc64369432..43c57502659b 100644
--- a/include/net/raw.h
+++ b/include/net/raw.h
@@ -61,6 +61,7 @@ struct raw_sock {
 	/* inet_sock has to be the first member */
 	struct inet_sock   inet;
 	struct icmp_filter filter;
+	u32		   ipmr_table;
 };
 
 static inline struct raw_sock *raw_sk(const struct sock *sk)
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index c9a1c68767ff..be597749c385 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -250,6 +250,20 @@ config IP_MROUTE
 	  <file:Documentation/networking/multicast.txt>. If you haven't heard
 	  about it, you don't need it.
 
+config IP_MROUTE_MULTIPLE_TABLES
+	bool "IP: multicast policy routing"
+	depends on IP_ADVANCED_ROUTER
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IP_PIMSM_V1
 	bool "IP: PIM-SM version 1 support"
 	depends on IP_MROUTE
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 498f4e907d52..5df5fd74c6d1 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -63,12 +63,15 @@
 #include <net/ipip.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
 #define CONFIG_IP_PIMSM	1
 #endif
 
 struct mr_table {
+	struct list_head	list;
+	u32			id;
 	struct sock		*mroute_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc_unres_queue;
@@ -83,6 +86,14 @@ struct mr_table {
 #endif
 };
 
+struct ipmr_rule {
+	struct fib_rule		common;
+};
+
+struct ipmr_result {
+	struct mr_table		*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -108,6 +119,7 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr_table *ipmr_new_table(struct net *net, u32 id);
 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
 			 struct sk_buff *skb, struct mfc_cache *cache,
 			 int local);
@@ -115,6 +127,206 @@ static int ipmr_cache_report(struct mr_table *mrt,
 			     struct sk_buff *pkt, vifi_t vifi, int assert);
 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 			    struct mfc_cache *c, struct rtmsg *rtm);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+#define ipmr_for_each_table(mrt, net) \
+	list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+
+	ipmr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	struct ipmr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			    int flags, struct fib_lookup_arg *arg)
+{
+	struct ipmr_result *res = arg->result;
+	struct mr_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ipmr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+			       struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			     struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			  struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static struct fib_rules_ops ipmr_rules_ops_template = {
+	.family		= FIB_RULES_IPMR,
+	.rule_size	= sizeof(struct ipmr_rule),
+	.addr_size	= sizeof(u32),
+	.action		= ipmr_rule_action,
+	.match		= ipmr_rule_match,
+	.configure	= ipmr_rule_configure,
+	.compare	= ipmr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ipmr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV4_RULE,
+	.policy		= ipmr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ipmr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv4.mr_tables);
+
+	mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv4.mr_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	struct mr_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list)
+		kfree(mrt);
+	fib_rules_unregister(net->ipv4.mr_rules_ops);
+}
+#else
+#define ipmr_for_each_table(mrt, net) \
+	for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
+
+static struct mr_table *ipmr_get_table(struct net *net, u32 id)
+{
+	return net->ipv4.mrt;
+}
+
+static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
+			   struct mr_table **mrt)
+{
+	*mrt = net->ipv4.mrt;
+	return 0;
+}
+
+static int __net_init ipmr_rules_init(struct net *net)
+{
+	net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
+	return net->ipv4.mrt ? 0 : -ENOMEM;
+}
+
+static void __net_exit ipmr_rules_exit(struct net *net)
+{
+	kfree(net->ipv4.mrt);
+}
+#endif
+
+static struct mr_table *ipmr_new_table(struct net *net, u32 id)
+{
+	struct mr_table *mrt;
+	unsigned int i;
+
+	mrt = ipmr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IP_PIMSM
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
+#endif
+	return mrt;
+}
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
@@ -215,7 +427,17 @@ failure:
 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ipmr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -240,12 +462,18 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ipmr_reg_vif(struct net *net)
+static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
 {
 	struct net_device *dev;
 	struct in_device *in_dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT_TABLE_DEFAULT)
+		sprintf(name, "pimreg");
+	else
+		sprintf(name, "pimreg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pimreg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 
 	if (dev == NULL)
 		return NULL;
@@ -461,7 +689,7 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ipmr_reg_vif(net);
+		dev = ipmr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -928,17 +1156,19 @@ static void mroute_clean_tables(struct mr_table *mrt)
 static void mrtsock_destruct(struct sock *sk)
 {
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute_sk) {
-		IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
+	ipmr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute_sk) {
+			IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
 
-		write_lock_bh(&mrt_lock);
-		mrt->mroute_sk = NULL;
-		write_unlock_bh(&mrt_lock);
+			write_lock_bh(&mrt_lock);
+			mrt->mroute_sk = NULL;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
+			mroute_clean_tables(mrt);
+		}
 	}
 	rtnl_unlock();
 }
@@ -956,7 +1186,11 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 	struct vifctl vif;
 	struct mfcctl mfc;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_INIT) {
 		if (sk != mrt->mroute_sk && !capable(CAP_NET_ADMIN))
@@ -1054,6 +1288,27 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsi
 		rtnl_unlock();
 		return ret;
 	}
+#endif
+#ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
+	case MRT_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ipmr_new_table(net, v))
+			ret = -ENOMEM;
+		raw_sk(sk)->ipmr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT_VERSION which you cannot
@@ -1073,7 +1328,11 @@ int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT_VERSION &&
 #ifdef CONFIG_IP_PIMSM
@@ -1115,7 +1374,11 @@ int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct vif_device *vif;
 	struct mfc_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETVIFCNT:
@@ -1166,17 +1429,20 @@ static int ipmr_device_event(struct notifier_block *this, unsigned long event, v
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct vif_device *v;
 	int ct;
 	LIST_HEAD(list);
 
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
-	v = &mrt->vif_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			vif_delete(mrt, ct, 1, &list);
+
+	ipmr_for_each_table(mrt, net) {
+		v = &mrt->vif_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				vif_delete(mrt, ct, 1, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 	return NOTIFY_DONE;
@@ -1443,8 +1709,9 @@ int ip_mr_input(struct sk_buff *skb)
 {
 	struct mfc_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 	int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
+	struct mr_table *mrt;
+	int err;
 
 	/* Packet is looped back after forward, it should not be
 	   forwarded second time, but still can be delivered locally.
@@ -1452,6 +1719,10 @@ int ip_mr_input(struct sk_buff *skb)
 	if (IPCB(skb)->flags&IPSKB_FORWARDED)
 		goto dont_forward;
 
+	err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
+	if (err < 0)
+		return err;
+
 	if (!local) {
 		    if (IPCB(skb)->opt.router_alert) {
 			    if (ip_call_ra_chain(skb))
@@ -1522,12 +1793,11 @@ dont_forward:
 }
 
 #ifdef CONFIG_IP_PIMSM
-static int __pim_rcv(struct sk_buff *skb, unsigned int pimlen)
+static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
+		     unsigned int pimlen)
 {
 	struct net_device *reg_dev = NULL;
 	struct iphdr *encap;
-	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
 	/*
@@ -1578,18 +1848,21 @@ int pim_rcv_v1(struct sk_buff * skb)
 {
 	struct igmphdr *pim;
 	struct net *net = dev_net(skb->dev);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
 
 	pim = igmp_hdr(skb);
 
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
 	if (!mrt->mroute_do_pim ||
 	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1601,6 +1874,8 @@ drop:
 static int pim_rcv(struct sk_buff * skb)
 {
 	struct pimreghdr *pim;
+	struct net *net = dev_net(skb->dev);
+	struct mr_table *mrt;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
 		goto drop;
@@ -1612,7 +1887,10 @@ static int pim_rcv(struct sk_buff * skb)
 	     csum_fold(skb_checksum(skb, 0, skb->len, 0))))
 		goto drop;
 
-	if (__pim_rcv(skb, sizeof(*pim))) {
+	if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
+		goto drop;
+
+	if (__pim_rcv(mrt, skb, sizeof(*pim))) {
 drop:
 		kfree_skb(skb);
 	}
@@ -1663,10 +1941,14 @@ int ipmr_get_route(struct net *net,
 		   struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt;
 	struct mfc_cache *cache;
 	struct rtable *rt = skb_rtable(skb);
 
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
 
@@ -1717,6 +1999,7 @@ int ipmr_get_route(struct net *net,
  */
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	int ct;
 };
 
@@ -1724,7 +2007,7 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 					   struct ipmr_vif_iter *iter,
 					   loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!VIF_EXISTS(mrt, iter->ct))
@@ -1738,7 +2021,15 @@ static struct vif_device *ipmr_vif_seq_idx(struct net *net,
 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -1749,7 +2040,7 @@ static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -1771,8 +2062,8 @@ static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1814,6 +2105,7 @@ static const struct file_operations ipmr_vif_fops = {
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -1822,7 +2114,7 @@ struct ipmr_mfc_iter {
 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
 					  struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 	struct mfc_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -1850,7 +2142,13 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr_table *mrt;
+
+	mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	it->cache = NULL;
 	it->ct = 0;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
@@ -1862,7 +2160,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -1903,8 +2201,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
+	struct mr_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -1915,8 +2212,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr_table *mrt = net->ipv4.mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -1924,6 +2219,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		const struct mr_table *mrt = it->mrt;
 
 		seq_printf(seq, "%08lX %08lX %-3hd",
 			   (unsigned long) mfc->mfc_mcastgrp,
@@ -1989,28 +2285,11 @@ static const struct net_protocol pim_protocol = {
  */
 static int __net_init ipmr_net_init(struct net *net)
 {
-	struct mr_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ipmr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	/* Forwarding cache */
-	for (i = 0; i < MFC_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
-
-#ifdef CONFIG_IP_PIMSM
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -2019,15 +2298,13 @@ static int __net_init ipmr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
 		goto proc_cache_fail;
 #endif
-
-	net->ipv4.mrt = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ipmr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -2039,7 +2316,7 @@ static void __net_exit ipmr_net_exit(struct net *net)
 	proc_net_remove(net, "ip_mr_cache");
 	proc_net_remove(net, "ip_mr_vif");
 #endif
-	kfree(net->ipv4.mrt);
+	ipmr_rules_exit(net);
 }
 
 static struct pernet_operations ipmr_net_ops = {
-- 
cgit v1.2.3


From 97f5f0cd8cd0a05449cbb77d1e6f02e026875802 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Sun, 21 Mar 2010 22:31:26 -0700
Subject: Input: implement SysRq as a separate input handler

Instead of keeping SysRq support inside of legacy keyboard driver split
it out into a separate input handler (filter). This stops most SysRq input
events from leaking into evdev clients (some events, such as first SysRq
scancode - not keycode - event, are still leaked into both legacy keyboard
and evdev).

[martinez.javier@gmail.com: fix compile error when CONFIG_MAGIC_SYSRQ is
 not defined]
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/char/keyboard.c |  40 +-------
 drivers/char/sysrq.c    | 243 ++++++++++++++++++++++++++++++++++++++++++------
 include/linux/sysrq.h   |  23 ++---
 kernel/sysctl.c         |  23 ++++-
 4 files changed, 246 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/keyboard.c b/drivers/char/keyboard.c
index ada25bb8941e..50f6c01f44ec 100644
--- a/drivers/char/keyboard.c
+++ b/drivers/char/keyboard.c
@@ -38,7 +38,6 @@
 #include <linux/kbd_kern.h>
 #include <linux/kbd_diacr.h>
 #include <linux/vt_kern.h>
-#include <linux/sysrq.h>
 #include <linux/input.h>
 #include <linux/reboot.h>
 #include <linux/notifier.h>
@@ -82,8 +81,7 @@ void compute_shiftstate(void);
 typedef void (k_handler_fn)(struct vc_data *vc, unsigned char value,
 			    char up_flag);
 static k_handler_fn K_HANDLERS;
-k_handler_fn *k_handler[16] = { K_HANDLERS };
-EXPORT_SYMBOL_GPL(k_handler);
+static k_handler_fn *k_handler[16] = { K_HANDLERS };
 
 #define FN_HANDLERS\
 	fn_null,	fn_enter,	fn_show_ptregs,	fn_show_mem,\
@@ -147,22 +145,6 @@ static struct ledptr {
 	unsigned char valid:1;
 } ledptrs[3];
 
-/* Simple translation table for the SysRq keys */
-
-#ifdef CONFIG_MAGIC_SYSRQ
-unsigned char kbd_sysrq_xlate[KEY_MAX + 1] =
-        "\000\0331234567890-=\177\t"                    /* 0x00 - 0x0f */
-        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
-        "dfghjkl;'`\000\\zxcv"                          /* 0x20 - 0x2f */
-        "bnm,./\000*\000 \000\201\202\203\204\205"      /* 0x30 - 0x3f */
-        "\206\207\210\211\212\000\000789-456+1"         /* 0x40 - 0x4f */
-        "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
-        "\r\000/";                                      /* 0x60 - 0x6f */
-static int sysrq_down;
-static int sysrq_alt_use;
-#endif
-static int sysrq_alt;
-
 /*
  * Notifier list for console keyboard events
  */
@@ -1108,7 +1090,8 @@ static int emulate_raw(struct vc_data *vc, unsigned int keycode,
 			 * pressing PrtSc/SysRq alone, but simply 0x54
 			 * when pressing Alt+PrtSc/SysRq.
 			 */
-			if (sysrq_alt) {
+			if (test_bit(KEY_LEFTALT, key_down) ||
+			    test_bit(KEY_RIGHTALT, key_down)) {
 				put_queue(vc, 0x54 | up_flag);
 			} else {
 				put_queue(vc, 0xe0);
@@ -1176,8 +1159,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 
 	kbd = kbd_table + vc->vc_num;
 
-	if (keycode == KEY_LEFTALT || keycode == KEY_RIGHTALT)
-		sysrq_alt = down ? keycode : 0;
 #ifdef CONFIG_SPARC
 	if (keycode == KEY_STOP)
 		sparc_l1_a_state = down;
@@ -1190,21 +1171,6 @@ static void kbd_keycode(unsigned int keycode, int down, int hw_raw)
 			if (keycode < BTN_MISC && printk_ratelimit())
 				printk(KERN_WARNING "keyboard.c: can't emulate rawmode for keycode %d\n", keycode);
 
-#ifdef CONFIG_MAGIC_SYSRQ	       /* Handle the SysRq Hack */
-	if (keycode == KEY_SYSRQ && (sysrq_down || (down == 1 && sysrq_alt))) {
-		if (!sysrq_down) {
-			sysrq_down = down;
-			sysrq_alt_use = sysrq_alt;
-		}
-		return;
-	}
-	if (sysrq_down && !down && keycode == sysrq_alt_use)
-		sysrq_down = 0;
-	if (sysrq_down && down && !rep) {
-		handle_sysrq(kbd_sysrq_xlate[keycode], tty);
-		return;
-	}
-#endif
 #ifdef CONFIG_SPARC
 	if (keycode == KEY_A && sparc_l1_a_state) {
 		sparc_l1_a_state = 0;
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 59de2525d303..193f9c214946 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -1,7 +1,4 @@
-/* -*- linux-c -*-
- *
- *	$Id: sysrq.c,v 1.15 1998/08/23 14:56:41 mj Exp $
- *
+/*
  *	Linux Magic System Request Key Hacks
  *
  *	(c) 1997 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
@@ -10,8 +7,13 @@
  *	(c) 2000 Crutcher Dunnavant <crutcher+kernel@datastacks.com>
  *	overhauled to use key registration
  *	based upon discusions in irc://irc.openprojects.net/#kernelnewbies
+ *
+ *	Copyright (c) 2010 Dmitry Torokhov
+ *	Input handler conversion
  */
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
 #include <linux/sched.h>
 #include <linux/interrupt.h>
 #include <linux/mm.h>
@@ -39,33 +41,34 @@
 #include <linux/hrtimer.h>
 #include <linux/oom.h>
 #include <linux/slab.h>
+#include <linux/input.h>
 
 #include <asm/ptrace.h>
 #include <asm/irq_regs.h>
 
 /* Whether we react on sysrq keys or just ignore them */
-int __read_mostly __sysrq_enabled = 1;
-
-static int __read_mostly sysrq_always_enabled;
+static int __read_mostly sysrq_enabled = 1;
+static bool __read_mostly sysrq_always_enabled;
 
-int sysrq_on(void)
+static bool sysrq_on(void)
 {
-	return __sysrq_enabled || sysrq_always_enabled;
+	return sysrq_enabled || sysrq_always_enabled;
 }
 
 /*
  * A value of 1 means 'all', other nonzero values are an op mask:
  */
-static inline int sysrq_on_mask(int mask)
+static bool sysrq_on_mask(int mask)
 {
-	return sysrq_always_enabled || __sysrq_enabled == 1 ||
-						(__sysrq_enabled & mask);
+	return sysrq_always_enabled ||
+	       sysrq_enabled == 1 ||
+	       (sysrq_enabled & mask);
 }
 
 static int __init sysrq_always_enabled_setup(char *str)
 {
-	sysrq_always_enabled = 1;
-	printk(KERN_INFO "debug: sysrq always enabled.\n");
+	sysrq_always_enabled = true;
+	pr_info("sysrq always enabled.\n");
 
 	return 1;
 }
@@ -76,6 +79,7 @@ __setup("sysrq_always_enabled", sysrq_always_enabled_setup);
 static void sysrq_handle_loglevel(int key, struct tty_struct *tty)
 {
 	int i;
+
 	i = key - '0';
 	console_loglevel = 7;
 	printk("Loglevel set to %d\n", i);
@@ -101,7 +105,7 @@ static struct sysrq_key_op sysrq_SAK_op = {
 	.enable_mask	= SYSRQ_ENABLE_KEYBOARD,
 };
 #else
-#define sysrq_SAK_op (*(struct sysrq_key_op *)0)
+#define sysrq_SAK_op (*(struct sysrq_key_op *)NULL)
 #endif
 
 #ifdef CONFIG_VT
@@ -119,7 +123,7 @@ static struct sysrq_key_op sysrq_unraw_op = {
 	.enable_mask	= SYSRQ_ENABLE_KEYBOARD,
 };
 #else
-#define sysrq_unraw_op (*(struct sysrq_key_op *)0)
+#define sysrq_unraw_op (*(struct sysrq_key_op *)NULL)
 #endif /* CONFIG_VT */
 
 static void sysrq_handle_crash(int key, struct tty_struct *tty)
@@ -195,7 +199,7 @@ static struct sysrq_key_op sysrq_showlocks_op = {
 	.action_msg	= "Show Locks Held",
 };
 #else
-#define sysrq_showlocks_op (*(struct sysrq_key_op *)0)
+#define sysrq_showlocks_op (*(struct sysrq_key_op *)NULL)
 #endif
 
 #ifdef CONFIG_SMP
@@ -298,7 +302,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
 	.enable_mask	= SYSRQ_ENABLE_DUMP,
 };
 #else
-#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)0)
+#define sysrq_ftrace_dump_op (*(struct sysrq_key_op *)NULL)
 #endif
 
 static void sysrq_handle_showmem(int key, struct tty_struct *tty)
@@ -477,6 +481,7 @@ struct sysrq_key_op *__sysrq_get_key_op(int key)
 	i = sysrq_key_table_key2index(key);
 	if (i != -1)
 	        op_p = sysrq_key_table[i];
+
         return op_p;
 }
 
@@ -488,11 +493,7 @@ static void __sysrq_put_key_op(int key, struct sysrq_key_op *op_p)
                 sysrq_key_table[i] = op_p;
 }
 
-/*
- * This is the non-locking version of handle_sysrq.  It must/can only be called
- * by sysrq key handlers, as they are inside of the lock
- */
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+static void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 {
 	struct sysrq_key_op *op_p;
 	int orig_log_level;
@@ -544,10 +545,6 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
 	spin_unlock_irqrestore(&sysrq_key_table_lock, flags);
 }
 
-/*
- * This function is called by the keyboard handler when SysRq is pressed
- * and any other keycode arrives.
- */
 void handle_sysrq(int key, struct tty_struct *tty)
 {
 	if (sysrq_on())
@@ -555,10 +552,177 @@ void handle_sysrq(int key, struct tty_struct *tty)
 }
 EXPORT_SYMBOL(handle_sysrq);
 
+#ifdef CONFIG_INPUT
+
+/* Simple translation table for the SysRq keys */
+static const unsigned char sysrq_xlate[KEY_MAX + 1] =
+        "\000\0331234567890-=\177\t"                    /* 0x00 - 0x0f */
+        "qwertyuiop[]\r\000as"                          /* 0x10 - 0x1f */
+        "dfghjkl;'`\000\\zxcv"                          /* 0x20 - 0x2f */
+        "bnm,./\000*\000 \000\201\202\203\204\205"      /* 0x30 - 0x3f */
+        "\206\207\210\211\212\000\000789-456+1"         /* 0x40 - 0x4f */
+        "230\177\000\000\213\214\000\000\000\000\000\000\000\000\000\000" /* 0x50 - 0x5f */
+        "\r\000/";                                      /* 0x60 - 0x6f */
+
+static bool sysrq_down;
+static int sysrq_alt_use;
+static int sysrq_alt;
+
+static bool sysrq_filter(struct input_handle *handle, unsigned int type,
+		         unsigned int code, int value)
+{
+	if (type != EV_KEY)
+		goto out;
+
+	switch (code) {
+
+	case KEY_LEFTALT:
+	case KEY_RIGHTALT:
+		if (value)
+			sysrq_alt = code;
+		else if (sysrq_down && code == sysrq_alt_use)
+			sysrq_down = false;
+		break;
+
+	case KEY_SYSRQ:
+		if (value == 1 && sysrq_alt) {
+			sysrq_down = true;
+			sysrq_alt_use = sysrq_alt;
+		}
+		break;
+
+	default:
+		if (sysrq_down && value && value != 2)
+			__handle_sysrq(sysrq_xlate[code], NULL, 1);
+		break;
+	}
+
+out:
+	return sysrq_down;
+}
+
+static int sysrq_connect(struct input_handler *handler,
+			 struct input_dev *dev,
+			 const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int error;
+
+	sysrq_down = false;
+	sysrq_alt = 0;
+
+	handle = kzalloc(sizeof(struct input_handle), GFP_KERNEL);
+	if (!handle)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = "sysrq";
+
+	error = input_register_handle(handle);
+	if (error) {
+		pr_err("Failed to register input sysrq handler, error %d\n",
+			error);
+		goto err_free;
+	}
+
+	error = input_open_device(handle);
+	if (error) {
+		pr_err("Failed to open input device, error %d\n", error);
+		goto err_unregister;
+	}
+
+	return 0;
+
+ err_unregister:
+	input_unregister_handle(handle);
+ err_free:
+	kfree(handle);
+	return error;
+}
+
+static void sysrq_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+
+/*
+ * We are matching on KEY_LEFTALT insteard of KEY_SYSRQ because not all
+ * keyboards have SysRq ikey predefined and so user may add it to keymap
+ * later, but we expect all such keyboards to have left alt.
+ */
+static const struct input_device_id sysrq_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT |
+				INPUT_DEVICE_ID_MATCH_KEYBIT,
+		.evbit = { BIT_MASK(EV_KEY) },
+		.keybit = { BIT_MASK(KEY_LEFTALT) },
+	},
+	{ },
+};
+
+static struct input_handler sysrq_handler = {
+	.filter		= sysrq_filter,
+	.connect	= sysrq_connect,
+	.disconnect	= sysrq_disconnect,
+	.name		= "sysrq",
+	.id_table	= sysrq_ids,
+};
+
+static bool sysrq_handler_registered;
+
+static inline void sysrq_register_handler(void)
+{
+	int error;
+
+	error = input_register_handler(&sysrq_handler);
+	if (error)
+		pr_err("Failed to register input handler, error %d", error);
+	else
+		sysrq_handler_registered = true;
+}
+
+static inline void sysrq_unregister_handler(void)
+{
+	if (sysrq_handler_registered) {
+		input_unregister_handler(&sysrq_handler);
+		sysrq_handler_registered = false;
+	}
+}
+
+#else
+
+static inline void sysrq_register_handler(void)
+{
+}
+
+static inline void sysrq_unregister_handler(void)
+{
+}
+
+#endif /* CONFIG_INPUT */
+
+int sysrq_toggle_support(int enable_mask)
+{
+	bool was_enabled = sysrq_on();
+
+	sysrq_enabled = enable_mask;
+
+	if (was_enabled != sysrq_on()) {
+		if (sysrq_on())
+			sysrq_register_handler();
+		else
+			sysrq_unregister_handler();
+	}
+
+	return 0;
+}
+
 static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
                                 struct sysrq_key_op *remove_op_p)
 {
-
 	int retval;
 	unsigned long flags;
 
@@ -599,6 +763,7 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
 			return -EFAULT;
 		__handle_sysrq(c, NULL, 0);
 	}
+
 	return count;
 }
 
@@ -606,10 +771,28 @@ static const struct file_operations proc_sysrq_trigger_operations = {
 	.write		= write_sysrq_trigger,
 };
 
+static void sysrq_init_procfs(void)
+{
+	if (!proc_create("sysrq-trigger", S_IWUSR, NULL,
+			 &proc_sysrq_trigger_operations))
+		pr_err("Failed to register proc interface\n");
+}
+
+#else
+
+static inline void sysrq_init_procfs(void)
+{
+}
+
+#endif /* CONFIG_PROC_FS */
+
 static int __init sysrq_init(void)
 {
-	proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
+	sysrq_init_procfs();
+
+	if (sysrq_on())
+		sysrq_register_handler();
+
 	return 0;
 }
 module_init(sysrq_init);
-#endif
diff --git a/include/linux/sysrq.h b/include/linux/sysrq.h
index 99adcdc0d3ca..4496322e28dd 100644
--- a/include/linux/sysrq.h
+++ b/include/linux/sysrq.h
@@ -39,41 +39,34 @@ struct sysrq_key_op {
 
 #ifdef CONFIG_MAGIC_SYSRQ
 
-extern int sysrq_on(void);
-
-/*
- * Do not use this one directly:
- */
-extern int __sysrq_enabled;
-
 /* Generic SysRq interface -- you may call it from any device driver, supplying
  * ASCII code of the key, pointer to registers and kbd/tty structs (if they
  * are available -- else NULL's).
  */
 
 void handle_sysrq(int key, struct tty_struct *tty);
-void __handle_sysrq(int key, struct tty_struct *tty, int check_mask);
 int register_sysrq_key(int key, struct sysrq_key_op *op);
 int unregister_sysrq_key(int key, struct sysrq_key_op *op);
 struct sysrq_key_op *__sysrq_get_key_op(int key);
 
+int sysrq_toggle_support(int enable_mask);
+
 #else
 
-static inline int sysrq_on(void)
+static inline void handle_sysrq(int key, struct tty_struct *tty)
 {
-	return 0;
 }
-static inline int __reterr(void)
+
+static inline int register_sysrq_key(int key, struct sysrq_key_op *op)
 {
 	return -EINVAL;
 }
-static inline void handle_sysrq(int key, struct tty_struct *tty)
+
+static inline int unregister_sysrq_key(int key, struct sysrq_key_op *op)
 {
+	return -EINVAL;
 }
 
-#define register_sysrq_key(ig,nore) __reterr()
-#define unregister_sysrq_key(ig,nore) __reterr()
-
 #endif
 
 #endif /* _LINUX_SYSRQ_H */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8686b0f5fc12..ce724a0dd0bb 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -163,6 +163,27 @@ static int proc_taint(struct ctl_table *table, int write,
 			       void __user *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
+#ifdef CONFIG_MAGIC_SYSRQ
+static int __sysrq_enabled; /* Note: sysrq code ises it's own private copy */
+
+static int sysrq_sysctl_handler(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp,
+				loff_t *ppos)
+{
+	int error;
+
+	error = proc_dointvec(table, write, buffer, lenp, ppos);
+	if (error)
+		return error;
+
+	if (write)
+		sysrq_toggle_support(__sysrq_enabled);
+
+	return 0;
+}
+
+#endif
+
 static struct ctl_table root_table[];
 static struct ctl_table_root sysctl_table_root;
 static struct ctl_table_header root_table_header = {
@@ -567,7 +588,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &__sysrq_enabled,
 		.maxlen		= sizeof (int),
 		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= sysrq_sysctl_handler,
 	},
 #endif
 #ifdef CONFIG_PROC_SYSCTL
-- 
cgit v1.2.3


From 31a6296333b94964e9a073649840bb34d4603369 Mon Sep 17 00:00:00 2001
From: Bryan Wu <cooloney@kernel.org>
Date: Sun, 21 Mar 2010 23:23:24 -0700
Subject: Input: add Analog Devices AD714x captouch input driver

AD7142 and AD7147 are integrated capacitance-to-digital converters
(CDCs) with on-chip environmental calibration for use in systems
requiring a novel user input method. The AD7142 and AD7147 can interface
to external capacitance sensors implementing functions such as buttons,
scrollwheels, sliders, touchpads and so on.

The chips don't restrict the specific usage. Depending on the hardware
connection, one special target board can include one or several these
components. The platform_data for the device's "struct device" holds
these information. The data-struct defined in head file descript the
hardware feature of button/scrollwheel/slider/touchpad components on
target boards, which need be filled in the arch/mach-/.

As the result, the driver is independent of boards. It gets the
components layout from the platform_data, registers related devices,
fullfills the algorithms and state machines for these components and
report related input events to up level.

Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Barry Song <21cnbao@gmail.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/misc/Kconfig      |   30 +
 drivers/input/misc/Makefile     |    3 +
 drivers/input/misc/ad714x-i2c.c |  137 ++++
 drivers/input/misc/ad714x-spi.c |  103 +++
 drivers/input/misc/ad714x.c     | 1331 +++++++++++++++++++++++++++++++++++++++
 drivers/input/misc/ad714x.h     |   26 +
 include/linux/input.h           |    1 +
 include/linux/input/ad714x.h    |   63 ++
 8 files changed, 1694 insertions(+)
 create mode 100644 drivers/input/misc/ad714x-i2c.c
 create mode 100644 drivers/input/misc/ad714x-spi.c
 create mode 100644 drivers/input/misc/ad714x.c
 create mode 100644 drivers/input/misc/ad714x.h
 create mode 100644 include/linux/input/ad714x.h

(limited to 'include/linux')

diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
index 54a9c2d0ba1c..a4b9dc5cf456 100644
--- a/drivers/input/misc/Kconfig
+++ b/drivers/input/misc/Kconfig
@@ -22,6 +22,36 @@ config INPUT_88PM860X_ONKEY
 	  To compile this driver as a module, choose M here: the module
 	  will be called 88pm860x_onkey.
 
+config INPUT_AD714X
+	tristate "Analog Devices AD714x Capacitance Touch Sensor"
+	help
+	  Say Y here if you want to support an AD7142/AD7147 touch sensor.
+
+	  You should select a bus connection too.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad714x.
+
+config INPUT_AD714X_I2C
+	tristate "support I2C bus connection"
+	depends on INPUT_AD714X && I2C
+	default y
+	help
+	  Say Y here if you have AD7142/AD7147 hooked to an I2C bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad714x-i2c.
+
+config INPUT_AD714X_SPI
+	tristate "support SPI bus connection"
+	depends on INPUT_AD714X && SPI
+	default y
+	help
+	  Say Y here if you have AD7142/AD7147 hooked to a SPI bus.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called ad714x-spi.
+
 config INPUT_PCSPKR
 	tristate "PC Speaker support"
 	depends on PCSPKR_PLATFORM
diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile
index a662df21bf57..f9f577031e06 100644
--- a/drivers/input/misc/Makefile
+++ b/drivers/input/misc/Makefile
@@ -5,6 +5,9 @@
 # Each configuration option enables a list of files.
 
 obj-$(CONFIG_INPUT_88PM860X_ONKEY)	+= 88pm860x_onkey.o
+obj-$(CONFIG_INPUT_AD714X)		+= ad714x.o
+obj-$(CONFIG_INPUT_AD714X_I2C)		+= ad714x-i2c.o
+obj-$(CONFIG_INPUT_AD714X_SPI)		+= ad714x-spi.o
 obj-$(CONFIG_INPUT_APANEL)		+= apanel.o
 obj-$(CONFIG_INPUT_ATI_REMOTE)		+= ati_remote.o
 obj-$(CONFIG_INPUT_ATI_REMOTE2)		+= ati_remote2.o
diff --git a/drivers/input/misc/ad714x-i2c.c b/drivers/input/misc/ad714x-i2c.c
new file mode 100644
index 000000000000..a2cb6b426dc7
--- /dev/null
+++ b/drivers/input/misc/ad714x-i2c.c
@@ -0,0 +1,137 @@
+/*
+ * AD714X CapTouch Programmable Controller driver (I2C bus)
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>	/* BUS_I2C */
+#include <linux/i2c.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include "ad714x.h"
+
+#ifdef CONFIG_PM
+static int ad714x_i2c_suspend(struct i2c_client *client, pm_message_t message)
+{
+	return ad714x_disable(i2c_get_clientdata(client));
+}
+
+static int ad714x_i2c_resume(struct i2c_client *client)
+{
+	return ad714x_enable(i2c_get_clientdata(client));
+}
+#else
+# define ad714x_i2c_suspend NULL
+# define ad714x_i2c_resume  NULL
+#endif
+
+static int ad714x_i2c_write(struct device *dev, unsigned short reg,
+				unsigned short data)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret = 0;
+	u8 *_reg = (u8 *)&reg;
+	u8 *_data = (u8 *)&data;
+
+	u8 tx[4] = {
+		_reg[1],
+		_reg[0],
+		_data[1],
+		_data[0]
+	};
+
+	ret = i2c_master_send(client, tx, 4);
+	if (ret < 0)
+		dev_err(&client->dev, "I2C write error\n");
+
+	return ret;
+}
+
+static int ad714x_i2c_read(struct device *dev, unsigned short reg,
+				unsigned short *data)
+{
+	struct i2c_client *client = to_i2c_client(dev);
+	int ret = 0;
+	u8 *_reg = (u8 *)&reg;
+	u8 *_data = (u8 *)data;
+
+	u8 tx[2] = {
+		_reg[1],
+		_reg[0]
+	};
+	u8 rx[2];
+
+	ret = i2c_master_send(client, tx, 2);
+	if (ret >= 0)
+		ret = i2c_master_recv(client, rx, 2);
+
+	if (unlikely(ret < 0)) {
+		dev_err(&client->dev, "I2C read error\n");
+	} else {
+		_data[0] = rx[1];
+		_data[1] = rx[0];
+	}
+
+	return ret;
+}
+
+static int __devinit ad714x_i2c_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct ad714x_chip *chip;
+
+	chip = ad714x_probe(&client->dev, BUS_I2C, client->irq,
+			    ad714x_i2c_read, ad714x_i2c_write);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
+
+	i2c_set_clientdata(client, chip);
+
+	return 0;
+}
+
+static int __devexit ad714x_i2c_remove(struct i2c_client *client)
+{
+	struct ad714x_chip *chip = i2c_get_clientdata(client);
+
+	ad714x_remove(chip);
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+static const struct i2c_device_id ad714x_id[] = {
+	{ "ad7142_captouch", 0 },
+	{ "ad7147_captouch", 0 },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, ad714x_id);
+
+static struct i2c_driver ad714x_i2c_driver = {
+	.driver = {
+		.name = "ad714x_captouch",
+	},
+	.probe    = ad714x_i2c_probe,
+	.remove   = __devexit_p(ad714x_i2c_remove),
+	.suspend  = ad714x_i2c_suspend,
+	.resume	  = ad714x_i2c_resume,
+	.id_table = ad714x_id,
+};
+
+static __init int ad714x_i2c_init(void)
+{
+	return i2c_add_driver(&ad714x_i2c_driver);
+}
+module_init(ad714x_i2c_init);
+
+static __exit void ad714x_i2c_exit(void)
+{
+	i2c_del_driver(&ad714x_i2c_driver);
+}
+module_exit(ad714x_i2c_exit);
+
+MODULE_DESCRIPTION("Analog Devices AD714X Capacitance Touch Sensor I2C Bus Driver");
+MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/ad714x-spi.c b/drivers/input/misc/ad714x-spi.c
new file mode 100644
index 000000000000..7f8dedfd1bfe
--- /dev/null
+++ b/drivers/input/misc/ad714x-spi.c
@@ -0,0 +1,103 @@
+/*
+ * AD714X CapTouch Programmable Controller driver (SPI bus)
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/input.h>	/* BUS_I2C */
+#include <linux/module.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+#include "ad714x.h"
+
+#define AD714x_SPI_CMD_PREFIX      0xE000   /* bits 15:11 */
+#define AD714x_SPI_READ            BIT(10)
+
+#ifdef CONFIG_PM
+static int ad714x_spi_suspend(struct spi_device *spi, pm_message_t message)
+{
+	return ad714x_disable(spi_get_drvdata(spi));
+}
+
+static int ad714x_spi_resume(struct spi_device *spi)
+{
+	return ad714x_enable(spi_get_drvdata(spi));
+}
+#else
+# define ad714x_spi_suspend NULL
+# define ad714x_spi_resume  NULL
+#endif
+
+static int ad714x_spi_read(struct device *dev, unsigned short reg,
+		unsigned short *data)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	unsigned short tx = AD714x_SPI_CMD_PREFIX | AD714x_SPI_READ | reg;
+
+	return spi_write_then_read(spi, (u8 *)&tx, 2, (u8 *)data, 2);
+}
+
+static int ad714x_spi_write(struct device *dev, unsigned short reg,
+		unsigned short data)
+{
+	struct spi_device *spi = to_spi_device(dev);
+	unsigned short tx[2] = {
+		AD714x_SPI_CMD_PREFIX | reg,
+		data
+	};
+
+	return spi_write(spi, (u8 *)tx, 4);
+}
+
+static int __devinit ad714x_spi_probe(struct spi_device *spi)
+{
+	struct ad714x_chip *chip;
+
+	chip = ad714x_probe(&spi->dev, BUS_SPI, spi->irq,
+			    ad714x_spi_read, ad714x_spi_write);
+	if (IS_ERR(chip))
+		return PTR_ERR(chip);
+
+	spi_set_drvdata(spi, chip);
+
+	return 0;
+}
+
+static int __devexit ad714x_spi_remove(struct spi_device *spi)
+{
+	struct ad714x_chip *chip = spi_get_drvdata(spi);
+
+	ad714x_remove(chip);
+	spi_set_drvdata(spi, NULL);
+
+	return 0;
+}
+
+static struct spi_driver ad714x_spi_driver = {
+	.driver = {
+		.name	= "ad714x_captouch",
+		.owner	= THIS_MODULE,
+	},
+	.probe		= ad714x_spi_probe,
+	.remove		= __devexit_p(ad714x_spi_remove),
+	.suspend	= ad714x_spi_suspend,
+	.resume		= ad714x_spi_resume,
+};
+
+static __init int ad714x_spi_init(void)
+{
+	return spi_register_driver(&ad714x_spi_driver);
+}
+module_init(ad714x_spi_init);
+
+static __exit void ad714x_spi_exit(void)
+{
+	spi_unregister_driver(&ad714x_spi_driver);
+}
+module_exit(ad714x_spi_exit);
+
+MODULE_DESCRIPTION("Analog Devices AD714X Capacitance Touch Sensor SPI Bus Driver");
+MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/ad714x.c b/drivers/input/misc/ad714x.c
new file mode 100644
index 000000000000..691b1d37331f
--- /dev/null
+++ b/drivers/input/misc/ad714x.c
@@ -0,0 +1,1331 @@
+/*
+ * AD714X CapTouch Programmable Controller driver
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/input/ad714x.h>
+#include "ad714x.h"
+
+#define AD714X_PWR_CTRL           0x0
+#define AD714X_STG_CAL_EN_REG     0x1
+#define AD714X_AMB_COMP_CTRL0_REG 0x2
+#define AD714X_PARTID_REG         0x17
+#define AD7147_PARTID             0x1470
+#define AD7142_PARTID             0xE620
+#define AD714X_STAGECFG_REG       0x80
+#define AD714X_SYSCFG_REG         0x0
+
+#define STG_LOW_INT_EN_REG     0x5
+#define STG_HIGH_INT_EN_REG    0x6
+#define STG_COM_INT_EN_REG     0x7
+#define STG_LOW_INT_STA_REG    0x8
+#define STG_HIGH_INT_STA_REG   0x9
+#define STG_COM_INT_STA_REG    0xA
+
+#define CDC_RESULT_S0          0xB
+#define CDC_RESULT_S1          0xC
+#define CDC_RESULT_S2          0xD
+#define CDC_RESULT_S3          0xE
+#define CDC_RESULT_S4          0xF
+#define CDC_RESULT_S5          0x10
+#define CDC_RESULT_S6          0x11
+#define CDC_RESULT_S7          0x12
+#define CDC_RESULT_S8          0x13
+#define CDC_RESULT_S9          0x14
+#define CDC_RESULT_S10         0x15
+#define CDC_RESULT_S11         0x16
+
+#define STAGE0_AMBIENT		0xF1
+#define STAGE1_AMBIENT		0x115
+#define STAGE2_AMBIENT		0x139
+#define STAGE3_AMBIENT		0x15D
+#define STAGE4_AMBIENT		0x181
+#define STAGE5_AMBIENT		0x1A5
+#define STAGE6_AMBIENT		0x1C9
+#define STAGE7_AMBIENT		0x1ED
+#define STAGE8_AMBIENT		0x211
+#define STAGE9_AMBIENT		0x234
+#define STAGE10_AMBIENT		0x259
+#define STAGE11_AMBIENT		0x27D
+
+#define PER_STAGE_REG_NUM      36
+#define STAGE_NUM              12
+#define STAGE_CFGREG_NUM       8
+#define SYS_CFGREG_NUM         8
+
+/*
+ * driver information which will be used to maintain the software flow
+ */
+enum ad714x_device_state { IDLE, JITTER, ACTIVE, SPACE };
+
+struct ad714x_slider_drv {
+	int highest_stage;
+	int abs_pos;
+	int flt_pos;
+	enum ad714x_device_state state;
+	struct input_dev *input;
+};
+
+struct ad714x_wheel_drv {
+	int abs_pos;
+	int flt_pos;
+	int pre_mean_value;
+	int pre_highest_stage;
+	int pre_mean_value_no_offset;
+	int mean_value;
+	int mean_value_no_offset;
+	int pos_offset;
+	int pos_ratio;
+	int highest_stage;
+	enum ad714x_device_state state;
+	struct input_dev *input;
+};
+
+struct ad714x_touchpad_drv {
+	int x_highest_stage;
+	int x_flt_pos;
+	int x_abs_pos;
+	int y_highest_stage;
+	int y_flt_pos;
+	int y_abs_pos;
+	int left_ep;
+	int left_ep_val;
+	int right_ep;
+	int right_ep_val;
+	int top_ep;
+	int top_ep_val;
+	int bottom_ep;
+	int bottom_ep_val;
+	enum ad714x_device_state state;
+	struct input_dev *input;
+};
+
+struct ad714x_button_drv {
+	enum ad714x_device_state state;
+	/*
+	 * Unlike slider/wheel/touchpad, all buttons point to
+	 * same input_dev instance
+	 */
+	struct input_dev *input;
+};
+
+struct ad714x_driver_data {
+	struct ad714x_slider_drv *slider;
+	struct ad714x_wheel_drv *wheel;
+	struct ad714x_touchpad_drv *touchpad;
+	struct ad714x_button_drv *button;
+};
+
+/*
+ * information to integrate all things which will be private data
+ * of spi/i2c device
+ */
+struct ad714x_chip {
+	unsigned short h_state;
+	unsigned short l_state;
+	unsigned short c_state;
+	unsigned short adc_reg[STAGE_NUM];
+	unsigned short amb_reg[STAGE_NUM];
+	unsigned short sensor_val[STAGE_NUM];
+
+	struct ad714x_platform_data *hw;
+	struct ad714x_driver_data *sw;
+
+	int irq;
+	struct device *dev;
+	ad714x_read_t read;
+	ad714x_write_t write;
+
+	struct mutex mutex;
+
+	unsigned product;
+	unsigned version;
+};
+
+static void ad714x_use_com_int(struct ad714x_chip *ad714x,
+				int start_stage, int end_stage)
+{
+	unsigned short data;
+	unsigned short mask;
+
+	mask = ((1 << (end_stage + 1)) - 1) - (1 << start_stage);
+
+	ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+	data |= 1 << start_stage;
+	ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+
+	ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+	data &= ~mask;
+	ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+}
+
+static void ad714x_use_thr_int(struct ad714x_chip *ad714x,
+				int start_stage, int end_stage)
+{
+	unsigned short data;
+	unsigned short mask;
+
+	mask = ((1 << (end_stage + 1)) - 1) - (1 << start_stage);
+
+	ad714x->read(ad714x->dev, STG_COM_INT_EN_REG, &data);
+	data &= ~(1 << start_stage);
+	ad714x->write(ad714x->dev, STG_COM_INT_EN_REG, data);
+
+	ad714x->read(ad714x->dev, STG_HIGH_INT_EN_REG, &data);
+	data |= mask;
+	ad714x->write(ad714x->dev, STG_HIGH_INT_EN_REG, data);
+}
+
+static int ad714x_cal_highest_stage(struct ad714x_chip *ad714x,
+					int start_stage, int end_stage)
+{
+	int max_res = 0;
+	int max_idx = 0;
+	int i;
+
+	for (i = start_stage; i <= end_stage; i++) {
+		if (ad714x->sensor_val[i] > max_res) {
+			max_res = ad714x->sensor_val[i];
+			max_idx = i;
+		}
+	}
+
+	return max_idx;
+}
+
+static int ad714x_cal_abs_pos(struct ad714x_chip *ad714x,
+				int start_stage, int end_stage,
+				int highest_stage, int max_coord)
+{
+	int a_param, b_param;
+
+	if (highest_stage == start_stage) {
+		a_param = ad714x->sensor_val[start_stage + 1];
+		b_param = ad714x->sensor_val[start_stage] +
+			ad714x->sensor_val[start_stage + 1];
+	} else if (highest_stage == end_stage) {
+		a_param = ad714x->sensor_val[end_stage] *
+			(end_stage - start_stage) +
+			ad714x->sensor_val[end_stage - 1] *
+			(end_stage - start_stage - 1);
+		b_param = ad714x->sensor_val[end_stage] +
+			ad714x->sensor_val[end_stage - 1];
+	} else {
+		a_param = ad714x->sensor_val[highest_stage] *
+			(highest_stage - start_stage) +
+			ad714x->sensor_val[highest_stage - 1] *
+			(highest_stage - start_stage - 1) +
+			ad714x->sensor_val[highest_stage + 1] *
+			(highest_stage - start_stage + 1);
+		b_param = ad714x->sensor_val[highest_stage] +
+			ad714x->sensor_val[highest_stage - 1] +
+			ad714x->sensor_val[highest_stage + 1];
+	}
+
+	return (max_coord / (end_stage - start_stage)) * a_param / b_param;
+}
+
+/*
+ * One button can connect to multi positive and negative of CDCs
+ * Multi-buttons can connect to same positive/negative of one CDC
+ */
+static void ad714x_button_state_machine(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_button_plat *hw = &ad714x->hw->button[idx];
+	struct ad714x_button_drv *sw = &ad714x->sw->button[idx];
+
+	switch (sw->state) {
+	case IDLE:
+		if (((ad714x->h_state & hw->h_mask) == hw->h_mask) &&
+		    ((ad714x->l_state & hw->l_mask) == hw->l_mask)) {
+			dev_dbg(ad714x->dev, "button %d touched\n", idx);
+			input_report_key(sw->input, hw->keycode, 1);
+			input_sync(sw->input);
+			sw->state = ACTIVE;
+		}
+		break;
+
+	case ACTIVE:
+		if (((ad714x->h_state & hw->h_mask) != hw->h_mask) ||
+		    ((ad714x->l_state & hw->l_mask) != hw->l_mask)) {
+			dev_dbg(ad714x->dev, "button %d released\n", idx);
+			input_report_key(sw->input, hw->keycode, 0);
+			input_sync(sw->input);
+			sw->state = IDLE;
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*
+ * The response of a sensor is defined by the absolute number of codes
+ * between the current CDC value and the ambient value.
+ */
+static void ad714x_slider_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+	int i;
+
+	for (i = hw->start_stage; i <= hw->end_stage; i++) {
+		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
+			&ad714x->adc_reg[i]);
+		ad714x->read(ad714x->dev,
+				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+				&ad714x->amb_reg[i]);
+
+		ad714x->sensor_val[i] = abs(ad714x->adc_reg[i] -
+				ad714x->amb_reg[i]);
+	}
+}
+
+static void ad714x_slider_cal_highest_stage(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+	struct ad714x_slider_drv *sw = &ad714x->sw->slider[idx];
+
+	sw->highest_stage = ad714x_cal_highest_stage(ad714x, hw->start_stage,
+			hw->end_stage);
+
+	dev_dbg(ad714x->dev, "slider %d highest_stage:%d\n", idx,
+		sw->highest_stage);
+}
+
+/*
+ * The formulae are very straight forward. It uses the sensor with the
+ * highest response and the 2 adjacent ones.
+ * When Sensor 0 has the highest response, only sensor 0 and sensor 1
+ * are used in the calculations. Similarly when the last sensor has the
+ * highest response, only the last sensor and the second last sensors
+ * are used in the calculations.
+ *
+ * For i= idx_of_peak_Sensor-1 to i= idx_of_peak_Sensor+1
+ *         v += Sensor response(i)*i
+ *         w += Sensor response(i)
+ * POS=(Number_of_Positions_Wanted/(Number_of_Sensors_Used-1)) *(v/w)
+ */
+static void ad714x_slider_cal_abs_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+	struct ad714x_slider_drv *sw = &ad714x->sw->slider[idx];
+
+	sw->abs_pos = ad714x_cal_abs_pos(ad714x, hw->start_stage, hw->end_stage,
+		sw->highest_stage, hw->max_coord);
+
+	dev_dbg(ad714x->dev, "slider %d absolute position:%d\n", idx,
+		sw->abs_pos);
+}
+
+/*
+ * To minimise the Impact of the noise on the algorithm, ADI developed a
+ * routine that filters the CDC results after they have been read by the
+ * host processor.
+ * The filter used is an Infinite Input Response(IIR) filter implemented
+ * in firmware and attenuates the noise on the CDC results after they've
+ * been read by the host processor.
+ * Filtered_CDC_result = (Filtered_CDC_result * (10 - Coefficient) +
+ *				Latest_CDC_result * Coefficient)/10
+ */
+static void ad714x_slider_cal_flt_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_drv *sw = &ad714x->sw->slider[idx];
+
+	sw->flt_pos = (sw->flt_pos * (10 - 4) +
+			sw->abs_pos * 4)/10;
+
+	dev_dbg(ad714x->dev, "slider %d filter position:%d\n", idx,
+		sw->flt_pos);
+}
+
+static void ad714x_slider_use_com_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+
+	ad714x_use_com_int(ad714x, hw->start_stage, hw->end_stage);
+}
+
+static void ad714x_slider_use_thr_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+
+	ad714x_use_thr_int(ad714x, hw->start_stage, hw->end_stage);
+}
+
+static void ad714x_slider_state_machine(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_slider_plat *hw = &ad714x->hw->slider[idx];
+	struct ad714x_slider_drv *sw = &ad714x->sw->slider[idx];
+	unsigned short h_state, c_state;
+	unsigned short mask;
+
+	mask = ((1 << (hw->end_stage + 1)) - 1) - ((1 << hw->start_stage) - 1);
+
+	h_state = ad714x->h_state & mask;
+	c_state = ad714x->c_state & mask;
+
+	switch (sw->state) {
+	case IDLE:
+		if (h_state) {
+			sw->state = JITTER;
+			/* In End of Conversion interrupt mode, the AD714X
+			 * continuously generates hardware interrupts.
+			 */
+			ad714x_slider_use_com_int(ad714x, idx);
+			dev_dbg(ad714x->dev, "slider %d touched\n", idx);
+		}
+		break;
+
+	case JITTER:
+		if (c_state == mask) {
+			ad714x_slider_cal_sensor_val(ad714x, idx);
+			ad714x_slider_cal_highest_stage(ad714x, idx);
+			ad714x_slider_cal_abs_pos(ad714x, idx);
+			sw->flt_pos = sw->abs_pos;
+			sw->state = ACTIVE;
+		}
+		break;
+
+	case ACTIVE:
+		if (c_state == mask) {
+			if (h_state) {
+				ad714x_slider_cal_sensor_val(ad714x, idx);
+				ad714x_slider_cal_highest_stage(ad714x, idx);
+				ad714x_slider_cal_abs_pos(ad714x, idx);
+				ad714x_slider_cal_flt_pos(ad714x, idx);
+
+				input_report_abs(sw->input, ABS_X, sw->flt_pos);
+				input_report_key(sw->input, BTN_TOUCH, 1);
+			} else {
+				/* When the user lifts off the sensor, configure
+				 * the AD714X back to threshold interrupt mode.
+				 */
+				ad714x_slider_use_thr_int(ad714x, idx);
+				sw->state = IDLE;
+				input_report_key(sw->input, BTN_TOUCH, 0);
+				dev_dbg(ad714x->dev, "slider %d released\n",
+					idx);
+			}
+			input_sync(sw->input);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+/*
+ * When the scroll wheel is activated, we compute the absolute position based
+ * on the sensor values. To calculate the position, we first determine the
+ * sensor that has the greatest response among the 8 sensors that constitutes
+ * the scrollwheel. Then we determined the 2 sensors on either sides of the
+ * sensor with the highest response and we apply weights to these sensors.
+ */
+static void ad714x_wheel_cal_highest_stage(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+	struct ad714x_wheel_drv *sw = &ad714x->sw->wheel[idx];
+
+	sw->pre_highest_stage = sw->highest_stage;
+	sw->highest_stage = ad714x_cal_highest_stage(ad714x, hw->start_stage,
+			hw->end_stage);
+
+	dev_dbg(ad714x->dev, "wheel %d highest_stage:%d\n", idx,
+		sw->highest_stage);
+}
+
+static void ad714x_wheel_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+	int i;
+
+	for (i = hw->start_stage; i <= hw->end_stage; i++) {
+		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
+			&ad714x->adc_reg[i]);
+		ad714x->read(ad714x->dev,
+				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+				&ad714x->amb_reg[i]);
+		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
+			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
+				ad714x->amb_reg[i];
+		else
+			ad714x->sensor_val[i] = 0;
+	}
+}
+
+/*
+ * When the scroll wheel is activated, we compute the absolute position based
+ * on the sensor values. To calculate the position, we first determine the
+ * sensor that has the greatest response among the 8 sensors that constitutes
+ * the scrollwheel. Then we determined the 2 sensors on either sides of the
+ * sensor with the highest response and we apply weights to these sensors. The
+ * result of this computation gives us the mean value which defined by the
+ * following formula:
+ * For i= second_before_highest_stage to i= second_after_highest_stage
+ *         v += Sensor response(i)*WEIGHT*(i+3)
+ *         w += Sensor response(i)
+ * Mean_Value=v/w
+ * pos_on_scrollwheel = (Mean_Value - position_offset) / position_ratio
+ */
+
+#define WEIGHT_FACTOR 30
+/* This constant prevents the "PositionOffset" from reaching a big value */
+#define OFFSET_POSITION_CLAMP	120
+static void ad714x_wheel_cal_abs_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+	struct ad714x_wheel_drv *sw = &ad714x->sw->wheel[idx];
+	int stage_num = hw->end_stage - hw->start_stage + 1;
+	int second_before, first_before, highest, first_after, second_after;
+	int a_param, b_param;
+
+	/* Calculate Mean value */
+
+	second_before = (sw->highest_stage + stage_num - 2) % stage_num;
+	first_before = (sw->highest_stage + stage_num - 1) % stage_num;
+	highest = sw->highest_stage;
+	first_after = (sw->highest_stage + stage_num + 1) % stage_num;
+	second_after = (sw->highest_stage + stage_num + 2) % stage_num;
+
+	if (((sw->highest_stage - hw->start_stage) > 1) &&
+	    ((hw->end_stage - sw->highest_stage) > 1)) {
+		a_param = ad714x->sensor_val[second_before] *
+			(second_before - hw->start_stage + 3) +
+			ad714x->sensor_val[first_before] *
+			(second_before - hw->start_stage + 3) +
+			ad714x->sensor_val[highest] *
+			(second_before - hw->start_stage + 3) +
+			ad714x->sensor_val[first_after] *
+			(first_after - hw->start_stage + 3) +
+			ad714x->sensor_val[second_after] *
+			(second_after - hw->start_stage + 3);
+	} else {
+		a_param = ad714x->sensor_val[second_before] *
+			(second_before - hw->start_stage + 1) +
+			ad714x->sensor_val[first_before] *
+			(second_before - hw->start_stage + 2) +
+			ad714x->sensor_val[highest] *
+			(second_before - hw->start_stage + 3) +
+			ad714x->sensor_val[first_after] *
+			(first_after - hw->start_stage + 4) +
+			ad714x->sensor_val[second_after] *
+			(second_after - hw->start_stage + 5);
+	}
+	a_param *= WEIGHT_FACTOR;
+
+	b_param = ad714x->sensor_val[second_before] +
+		ad714x->sensor_val[first_before] +
+		ad714x->sensor_val[highest] +
+		ad714x->sensor_val[first_after] +
+		ad714x->sensor_val[second_after];
+
+	sw->pre_mean_value = sw->mean_value;
+	sw->mean_value = a_param / b_param;
+
+	/* Calculate the offset */
+
+	if ((sw->pre_highest_stage == hw->end_stage) &&
+			(sw->highest_stage == hw->start_stage))
+		sw->pos_offset = sw->mean_value;
+	else if ((sw->pre_highest_stage == hw->start_stage) &&
+			(sw->highest_stage == hw->end_stage))
+		sw->pos_offset = sw->pre_mean_value;
+
+	if (sw->pos_offset > OFFSET_POSITION_CLAMP)
+		sw->pos_offset = OFFSET_POSITION_CLAMP;
+
+	/* Calculate the mean value without the offset */
+
+	sw->pre_mean_value_no_offset = sw->mean_value_no_offset;
+	sw->mean_value_no_offset = sw->mean_value - sw->pos_offset;
+	if (sw->mean_value_no_offset < 0)
+		sw->mean_value_no_offset = 0;
+
+	/* Calculate ratio to scale down to NUMBER_OF_WANTED_POSITIONS */
+
+	if ((sw->pre_highest_stage == hw->end_stage) &&
+			(sw->highest_stage == hw->start_stage))
+		sw->pos_ratio = (sw->pre_mean_value_no_offset * 100) /
+			hw->max_coord;
+	else if ((sw->pre_highest_stage == hw->start_stage) &&
+			(sw->highest_stage == hw->end_stage))
+		sw->pos_ratio = (sw->mean_value_no_offset * 100) /
+			hw->max_coord;
+	sw->abs_pos = (sw->mean_value_no_offset * 100) / sw->pos_ratio;
+	if (sw->abs_pos > hw->max_coord)
+		sw->abs_pos = hw->max_coord;
+}
+
+static void ad714x_wheel_cal_flt_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+	struct ad714x_wheel_drv *sw = &ad714x->sw->wheel[idx];
+	if (((sw->pre_highest_stage == hw->end_stage) &&
+			(sw->highest_stage == hw->start_stage)) ||
+	    ((sw->pre_highest_stage == hw->start_stage) &&
+			(sw->highest_stage == hw->end_stage)))
+		sw->flt_pos = sw->abs_pos;
+	else
+		sw->flt_pos = ((sw->flt_pos * 30) + (sw->abs_pos * 71)) / 100;
+
+	if (sw->flt_pos > hw->max_coord)
+		sw->flt_pos = hw->max_coord;
+}
+
+static void ad714x_wheel_use_com_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+
+	ad714x_use_com_int(ad714x, hw->start_stage, hw->end_stage);
+}
+
+static void ad714x_wheel_use_thr_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+
+	ad714x_use_thr_int(ad714x, hw->start_stage, hw->end_stage);
+}
+
+static void ad714x_wheel_state_machine(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_wheel_plat *hw = &ad714x->hw->wheel[idx];
+	struct ad714x_wheel_drv *sw = &ad714x->sw->wheel[idx];
+	unsigned short h_state, c_state;
+	unsigned short mask;
+
+	mask = ((1 << (hw->end_stage + 1)) - 1) - ((1 << hw->start_stage) - 1);
+
+	h_state = ad714x->h_state & mask;
+	c_state = ad714x->c_state & mask;
+
+	switch (sw->state) {
+	case IDLE:
+		if (h_state) {
+			sw->state = JITTER;
+			/* In End of Conversion interrupt mode, the AD714X
+			 * continuously generates hardware interrupts.
+			 */
+			ad714x_wheel_use_com_int(ad714x, idx);
+			dev_dbg(ad714x->dev, "wheel %d touched\n", idx);
+		}
+		break;
+
+	case JITTER:
+		if (c_state == mask)	{
+			ad714x_wheel_cal_sensor_val(ad714x, idx);
+			ad714x_wheel_cal_highest_stage(ad714x, idx);
+			ad714x_wheel_cal_abs_pos(ad714x, idx);
+			sw->flt_pos = sw->abs_pos;
+			sw->state = ACTIVE;
+		}
+		break;
+
+	case ACTIVE:
+		if (c_state == mask) {
+			if (h_state) {
+				ad714x_wheel_cal_sensor_val(ad714x, idx);
+				ad714x_wheel_cal_highest_stage(ad714x, idx);
+				ad714x_wheel_cal_abs_pos(ad714x, idx);
+				ad714x_wheel_cal_flt_pos(ad714x, idx);
+
+				input_report_abs(sw->input, ABS_WHEEL,
+					sw->abs_pos);
+				input_report_key(sw->input, BTN_TOUCH, 1);
+			} else {
+				/* When the user lifts off the sensor, configure
+				 * the AD714X back to threshold interrupt mode.
+				 */
+				ad714x_wheel_use_thr_int(ad714x, idx);
+				sw->state = IDLE;
+				input_report_key(sw->input, BTN_TOUCH, 0);
+
+				dev_dbg(ad714x->dev, "wheel %d released\n",
+					idx);
+			}
+			input_sync(sw->input);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static void touchpad_cal_sensor_val(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	int i;
+
+	for (i = hw->x_start_stage; i <= hw->x_end_stage; i++) {
+		ad714x->read(ad714x->dev, CDC_RESULT_S0 + i,
+				&ad714x->adc_reg[i]);
+		ad714x->read(ad714x->dev,
+				STAGE0_AMBIENT + i * PER_STAGE_REG_NUM,
+				&ad714x->amb_reg[i]);
+		if (ad714x->adc_reg[i] > ad714x->amb_reg[i])
+			ad714x->sensor_val[i] = ad714x->adc_reg[i] -
+				ad714x->amb_reg[i];
+		else
+			ad714x->sensor_val[i] = 0;
+	}
+}
+
+static void touchpad_cal_highest_stage(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	struct ad714x_touchpad_drv *sw = &ad714x->sw->touchpad[idx];
+
+	sw->x_highest_stage = ad714x_cal_highest_stage(ad714x,
+		hw->x_start_stage, hw->x_end_stage);
+	sw->y_highest_stage = ad714x_cal_highest_stage(ad714x,
+		hw->y_start_stage, hw->y_end_stage);
+
+	dev_dbg(ad714x->dev,
+		"touchpad %d x_highest_stage:%d, y_highest_stage:%d\n",
+		idx, sw->x_highest_stage, sw->y_highest_stage);
+}
+
+/*
+ * If 2 fingers are touching the sensor then 2 peaks can be observed in the
+ * distribution.
+ * The arithmetic doesn't support to get absolute coordinates for multi-touch
+ * yet.
+ */
+static int touchpad_check_second_peak(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	struct ad714x_touchpad_drv *sw = &ad714x->sw->touchpad[idx];
+	int i;
+
+	for (i = hw->x_start_stage; i < sw->x_highest_stage; i++) {
+		if ((ad714x->sensor_val[i] - ad714x->sensor_val[i + 1])
+			> (ad714x->sensor_val[i + 1] / 10))
+			return 1;
+	}
+
+	for (i = sw->x_highest_stage; i < hw->x_end_stage; i++) {
+		if ((ad714x->sensor_val[i + 1] - ad714x->sensor_val[i])
+			> (ad714x->sensor_val[i] / 10))
+			return 1;
+	}
+
+	for (i = hw->y_start_stage; i < sw->y_highest_stage; i++) {
+		if ((ad714x->sensor_val[i] - ad714x->sensor_val[i + 1])
+			> (ad714x->sensor_val[i + 1] / 10))
+			return 1;
+	}
+
+	for (i = sw->y_highest_stage; i < hw->y_end_stage; i++) {
+		if ((ad714x->sensor_val[i + 1] - ad714x->sensor_val[i])
+			> (ad714x->sensor_val[i] / 10))
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * If only one finger is used to activate the touch pad then only 1 peak will be
+ * registered in the distribution. This peak and the 2 adjacent sensors will be
+ * used in the calculation of the absolute position. This will prevent hand
+ * shadows to affect the absolute position calculation.
+ */
+static void touchpad_cal_abs_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	struct ad714x_touchpad_drv *sw = &ad714x->sw->touchpad[idx];
+
+	sw->x_abs_pos = ad714x_cal_abs_pos(ad714x, hw->x_start_stage,
+			hw->x_end_stage, sw->x_highest_stage, hw->x_max_coord);
+	sw->y_abs_pos = ad714x_cal_abs_pos(ad714x, hw->y_start_stage,
+			hw->y_end_stage, sw->y_highest_stage, hw->y_max_coord);
+
+	dev_dbg(ad714x->dev, "touchpad %d absolute position:(%d, %d)\n", idx,
+			sw->x_abs_pos, sw->y_abs_pos);
+}
+
+static void touchpad_cal_flt_pos(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_drv *sw = &ad714x->sw->touchpad[idx];
+
+	sw->x_flt_pos = (sw->x_flt_pos * (10 - 4) +
+			sw->x_abs_pos * 4)/10;
+	sw->y_flt_pos = (sw->y_flt_pos * (10 - 4) +
+			sw->y_abs_pos * 4)/10;
+
+	dev_dbg(ad714x->dev, "touchpad %d filter position:(%d, %d)\n",
+			idx, sw->x_flt_pos, sw->y_flt_pos);
+}
+
+/*
+ * To prevent distortion from showing in the absolute position, it is
+ * necessary to detect the end points. When endpoints are detected, the
+ * driver stops updating the status variables with absolute positions.
+ * End points are detected on the 4 edges of the touchpad sensor. The
+ * method to detect them is the same for all 4.
+ * To detect the end points, the firmware computes the difference in
+ * percent between the sensor on the edge and the adjacent one. The
+ * difference is calculated in percent in order to make the end point
+ * detection independent of the pressure.
+ */
+
+#define LEFT_END_POINT_DETECTION_LEVEL                  550
+#define RIGHT_END_POINT_DETECTION_LEVEL                 750
+#define LEFT_RIGHT_END_POINT_DEAVTIVALION_LEVEL         850
+#define TOP_END_POINT_DETECTION_LEVEL                   550
+#define BOTTOM_END_POINT_DETECTION_LEVEL                950
+#define TOP_BOTTOM_END_POINT_DEAVTIVALION_LEVEL         700
+static int touchpad_check_endpoint(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	struct ad714x_touchpad_drv *sw  = &ad714x->sw->touchpad[idx];
+	int percent_sensor_diff;
+
+	/* left endpoint detect */
+	percent_sensor_diff = (ad714x->sensor_val[hw->x_start_stage] -
+			ad714x->sensor_val[hw->x_start_stage + 1]) * 100 /
+			ad714x->sensor_val[hw->x_start_stage + 1];
+	if (!sw->left_ep) {
+		if (percent_sensor_diff >= LEFT_END_POINT_DETECTION_LEVEL)  {
+			sw->left_ep = 1;
+			sw->left_ep_val =
+				ad714x->sensor_val[hw->x_start_stage + 1];
+		}
+	} else {
+		if ((percent_sensor_diff < LEFT_END_POINT_DETECTION_LEVEL) &&
+		    (ad714x->sensor_val[hw->x_start_stage + 1] >
+		     LEFT_RIGHT_END_POINT_DEAVTIVALION_LEVEL + sw->left_ep_val))
+			sw->left_ep = 0;
+	}
+
+	/* right endpoint detect */
+	percent_sensor_diff = (ad714x->sensor_val[hw->x_end_stage] -
+			ad714x->sensor_val[hw->x_end_stage - 1]) * 100 /
+			ad714x->sensor_val[hw->x_end_stage - 1];
+	if (!sw->right_ep) {
+		if (percent_sensor_diff >= RIGHT_END_POINT_DETECTION_LEVEL)  {
+			sw->right_ep = 1;
+			sw->right_ep_val =
+				ad714x->sensor_val[hw->x_end_stage - 1];
+		}
+	} else {
+		if ((percent_sensor_diff < RIGHT_END_POINT_DETECTION_LEVEL) &&
+		(ad714x->sensor_val[hw->x_end_stage - 1] >
+		LEFT_RIGHT_END_POINT_DEAVTIVALION_LEVEL + sw->right_ep_val))
+			sw->right_ep = 0;
+	}
+
+	/* top endpoint detect */
+	percent_sensor_diff = (ad714x->sensor_val[hw->y_start_stage] -
+			ad714x->sensor_val[hw->y_start_stage + 1]) * 100 /
+			ad714x->sensor_val[hw->y_start_stage + 1];
+	if (!sw->top_ep) {
+		if (percent_sensor_diff >= TOP_END_POINT_DETECTION_LEVEL)  {
+			sw->top_ep = 1;
+			sw->top_ep_val =
+				ad714x->sensor_val[hw->y_start_stage + 1];
+		}
+	} else {
+		if ((percent_sensor_diff < TOP_END_POINT_DETECTION_LEVEL) &&
+		(ad714x->sensor_val[hw->y_start_stage + 1] >
+		TOP_BOTTOM_END_POINT_DEAVTIVALION_LEVEL + sw->top_ep_val))
+			sw->top_ep = 0;
+	}
+
+	/* bottom endpoint detect */
+	percent_sensor_diff = (ad714x->sensor_val[hw->y_end_stage] -
+		ad714x->sensor_val[hw->y_end_stage - 1]) * 100 /
+		ad714x->sensor_val[hw->y_end_stage - 1];
+	if (!sw->bottom_ep) {
+		if (percent_sensor_diff >= BOTTOM_END_POINT_DETECTION_LEVEL)  {
+			sw->bottom_ep = 1;
+			sw->bottom_ep_val =
+				ad714x->sensor_val[hw->y_end_stage - 1];
+		}
+	} else {
+		if ((percent_sensor_diff < BOTTOM_END_POINT_DETECTION_LEVEL) &&
+		(ad714x->sensor_val[hw->y_end_stage - 1] >
+		 TOP_BOTTOM_END_POINT_DEAVTIVALION_LEVEL + sw->bottom_ep_val))
+			sw->bottom_ep = 0;
+	}
+
+	return sw->left_ep || sw->right_ep || sw->top_ep || sw->bottom_ep;
+}
+
+static void touchpad_use_com_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+
+	ad714x_use_com_int(ad714x, hw->x_start_stage, hw->x_end_stage);
+}
+
+static void touchpad_use_thr_int(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+
+	ad714x_use_thr_int(ad714x, hw->x_start_stage, hw->x_end_stage);
+	ad714x_use_thr_int(ad714x, hw->y_start_stage, hw->y_end_stage);
+}
+
+static void ad714x_touchpad_state_machine(struct ad714x_chip *ad714x, int idx)
+{
+	struct ad714x_touchpad_plat *hw = &ad714x->hw->touchpad[idx];
+	struct ad714x_touchpad_drv *sw = &ad714x->sw->touchpad[idx];
+	unsigned short h_state, c_state;
+	unsigned short mask;
+
+	mask = (((1 << (hw->x_end_stage + 1)) - 1) -
+		((1 << hw->x_start_stage) - 1)) +
+		(((1 << (hw->y_end_stage + 1)) - 1) -
+		((1 << hw->y_start_stage) - 1));
+
+	h_state = ad714x->h_state & mask;
+	c_state = ad714x->c_state & mask;
+
+	switch (sw->state) {
+	case IDLE:
+		if (h_state) {
+			sw->state = JITTER;
+			/* In End of Conversion interrupt mode, the AD714X
+			 * continuously generates hardware interrupts.
+			 */
+			touchpad_use_com_int(ad714x, idx);
+			dev_dbg(ad714x->dev, "touchpad %d touched\n", idx);
+		}
+		break;
+
+	case JITTER:
+		if (c_state == mask) {
+			touchpad_cal_sensor_val(ad714x, idx);
+			touchpad_cal_highest_stage(ad714x, idx);
+			if ((!touchpad_check_second_peak(ad714x, idx)) &&
+				(!touchpad_check_endpoint(ad714x, idx))) {
+				dev_dbg(ad714x->dev,
+					"touchpad%d, 2 fingers or endpoint\n",
+					idx);
+				touchpad_cal_abs_pos(ad714x, idx);
+				sw->x_flt_pos = sw->x_abs_pos;
+				sw->y_flt_pos = sw->y_abs_pos;
+				sw->state = ACTIVE;
+			}
+		}
+		break;
+
+	case ACTIVE:
+		if (c_state == mask) {
+			if (h_state) {
+				touchpad_cal_sensor_val(ad714x, idx);
+				touchpad_cal_highest_stage(ad714x, idx);
+				if ((!touchpad_check_second_peak(ad714x, idx))
+				  && (!touchpad_check_endpoint(ad714x, idx))) {
+					touchpad_cal_abs_pos(ad714x, idx);
+					touchpad_cal_flt_pos(ad714x, idx);
+					input_report_abs(sw->input, ABS_X,
+						sw->x_flt_pos);
+					input_report_abs(sw->input, ABS_Y,
+						sw->y_flt_pos);
+					input_report_key(sw->input, BTN_TOUCH,
+						1);
+				}
+			} else {
+				/* When the user lifts off the sensor, configure
+				 * the AD714X back to threshold interrupt mode.
+				 */
+				touchpad_use_thr_int(ad714x, idx);
+				sw->state = IDLE;
+				input_report_key(sw->input, BTN_TOUCH, 0);
+				dev_dbg(ad714x->dev, "touchpad %d released\n",
+					idx);
+			}
+			input_sync(sw->input);
+		}
+		break;
+
+	default:
+		break;
+	}
+}
+
+static int ad714x_hw_detect(struct ad714x_chip *ad714x)
+{
+	unsigned short data;
+
+	ad714x->read(ad714x->dev, AD714X_PARTID_REG, &data);
+	switch (data & 0xFFF0) {
+	case AD7147_PARTID:
+		ad714x->product = 0x7147;
+		ad714x->version = data & 0xF;
+		dev_info(ad714x->dev, "found AD7147 captouch, rev:%d\n",
+				ad714x->version);
+		return 0;
+
+	case AD7142_PARTID:
+		ad714x->product = 0x7142;
+		ad714x->version = data & 0xF;
+		dev_info(ad714x->dev, "found AD7142 captouch, rev:%d\n",
+				ad714x->version);
+		return 0;
+
+	default:
+		dev_err(ad714x->dev,
+			"fail to detect AD714X captouch, read ID is %04x\n",
+			data);
+		return -ENODEV;
+	}
+}
+
+static void ad714x_hw_init(struct ad714x_chip *ad714x)
+{
+	int i, j;
+	unsigned short reg_base;
+	unsigned short data;
+
+	/* configuration CDC and interrupts */
+
+	for (i = 0; i < STAGE_NUM; i++) {
+		reg_base = AD714X_STAGECFG_REG + i * STAGE_CFGREG_NUM;
+		for (j = 0; j < STAGE_CFGREG_NUM; j++)
+			ad714x->write(ad714x->dev, reg_base + j,
+					ad714x->hw->stage_cfg_reg[i][j]);
+	}
+
+	for (i = 0; i < SYS_CFGREG_NUM; i++)
+		ad714x->write(ad714x->dev, AD714X_SYSCFG_REG + i,
+			ad714x->hw->sys_cfg_reg[i]);
+	for (i = 0; i < SYS_CFGREG_NUM; i++)
+		ad714x->read(ad714x->dev, AD714X_SYSCFG_REG + i,
+			&data);
+
+	ad714x->write(ad714x->dev, AD714X_STG_CAL_EN_REG, 0xFFF);
+
+	/* clear all interrupts */
+	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
+	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
+	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+}
+
+static irqreturn_t ad714x_interrupt_thread(int irq, void *data)
+{
+	struct ad714x_chip *ad714x = data;
+	int i;
+
+	mutex_lock(&ad714x->mutex);
+
+	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &ad714x->l_state);
+	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &ad714x->h_state);
+	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &ad714x->c_state);
+
+	for (i = 0; i < ad714x->hw->button_num; i++)
+		ad714x_button_state_machine(ad714x, i);
+	for (i = 0; i < ad714x->hw->slider_num; i++)
+		ad714x_slider_state_machine(ad714x, i);
+	for (i = 0; i < ad714x->hw->wheel_num; i++)
+		ad714x_wheel_state_machine(ad714x, i);
+	for (i = 0; i < ad714x->hw->touchpad_num; i++)
+		ad714x_touchpad_state_machine(ad714x, i);
+
+	mutex_unlock(&ad714x->mutex);
+
+	return IRQ_HANDLED;
+}
+
+#define MAX_DEVICE_NUM 8
+struct ad714x_chip *ad714x_probe(struct device *dev, u16 bus_type, int irq,
+				 ad714x_read_t read, ad714x_write_t write)
+{
+	int i, alloc_idx;
+	int error;
+	struct input_dev *input[MAX_DEVICE_NUM];
+
+	struct ad714x_platform_data *plat_data = dev->platform_data;
+	struct ad714x_chip *ad714x;
+	void *drv_mem;
+
+	struct ad714x_button_drv *bt_drv;
+	struct ad714x_slider_drv *sd_drv;
+	struct ad714x_wheel_drv *wl_drv;
+	struct ad714x_touchpad_drv *tp_drv;
+
+
+	if (irq <= 0) {
+		dev_err(dev, "IRQ not configured!\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	if (dev->platform_data == NULL) {
+		dev_err(dev, "platform data for ad714x doesn't exist\n");
+		error = -EINVAL;
+		goto err_out;
+	}
+
+	ad714x = kzalloc(sizeof(*ad714x) + sizeof(*ad714x->sw) +
+			 sizeof(*sd_drv) * plat_data->slider_num +
+			 sizeof(*wl_drv) * plat_data->wheel_num +
+			 sizeof(*tp_drv) * plat_data->touchpad_num +
+			 sizeof(*bt_drv) * plat_data->button_num, GFP_KERNEL);
+	if (!ad714x) {
+		error = -ENOMEM;
+		goto err_out;
+	}
+
+	ad714x->hw = plat_data;
+
+	drv_mem = ad714x + 1;
+	ad714x->sw = drv_mem;
+	drv_mem += sizeof(*ad714x->sw);
+	ad714x->sw->slider = sd_drv = drv_mem;
+	drv_mem += sizeof(*sd_drv) * ad714x->hw->slider_num;
+	ad714x->sw->wheel = wl_drv = drv_mem;
+	drv_mem += sizeof(*wl_drv) * ad714x->hw->wheel_num;
+	ad714x->sw->touchpad = tp_drv = drv_mem;
+	drv_mem += sizeof(*tp_drv) * ad714x->hw->touchpad_num;
+	ad714x->sw->button = bt_drv = drv_mem;
+	drv_mem += sizeof(*bt_drv) * ad714x->hw->button_num;
+
+	ad714x->read = read;
+	ad714x->write = write;
+	ad714x->irq = irq;
+	ad714x->dev = dev;
+
+	error = ad714x_hw_detect(ad714x);
+	if (error)
+		goto err_free_mem;
+
+	/* initilize and request sw/hw resources */
+
+	ad714x_hw_init(ad714x);
+	mutex_init(&ad714x->mutex);
+
+	/*
+	 * Allocate and register AD714X input device
+	 */
+	alloc_idx = 0;
+
+	/* a slider uses one input_dev instance */
+	if (ad714x->hw->slider_num > 0) {
+		struct ad714x_slider_plat *sd_plat = ad714x->hw->slider;
+
+		for (i = 0; i < ad714x->hw->slider_num; i++) {
+			sd_drv[i].input = input[alloc_idx] = input_allocate_device();
+			if (!input[alloc_idx]) {
+				error = -ENOMEM;
+				goto err_free_dev;
+			}
+
+			__set_bit(EV_ABS, input[alloc_idx]->evbit);
+			__set_bit(EV_KEY, input[alloc_idx]->evbit);
+			__set_bit(ABS_X, input[alloc_idx]->absbit);
+			__set_bit(BTN_TOUCH, input[alloc_idx]->keybit);
+			input_set_abs_params(input[alloc_idx],
+				ABS_X, 0, sd_plat->max_coord, 0, 0);
+
+			input[alloc_idx]->id.bustype = bus_type;
+			input[alloc_idx]->id.product = ad714x->product;
+			input[alloc_idx]->id.version = ad714x->version;
+
+			error = input_register_device(input[alloc_idx]);
+			if (error)
+				goto err_free_dev;
+
+			alloc_idx++;
+		}
+	}
+
+	/* a wheel uses one input_dev instance */
+	if (ad714x->hw->wheel_num > 0) {
+		struct ad714x_wheel_plat *wl_plat = ad714x->hw->wheel;
+
+		for (i = 0; i < ad714x->hw->wheel_num; i++) {
+			wl_drv[i].input = input[alloc_idx] = input_allocate_device();
+			if (!input[alloc_idx]) {
+				error = -ENOMEM;
+				goto err_free_dev;
+			}
+
+			__set_bit(EV_KEY, input[alloc_idx]->evbit);
+			__set_bit(EV_ABS, input[alloc_idx]->evbit);
+			__set_bit(ABS_WHEEL, input[alloc_idx]->absbit);
+			__set_bit(BTN_TOUCH, input[alloc_idx]->keybit);
+			input_set_abs_params(input[alloc_idx],
+				ABS_WHEEL, 0, wl_plat->max_coord, 0, 0);
+
+			input[alloc_idx]->id.bustype = bus_type;
+			input[alloc_idx]->id.product = ad714x->product;
+			input[alloc_idx]->id.version = ad714x->version;
+
+			error = input_register_device(input[alloc_idx]);
+			if (error)
+				goto err_free_dev;
+
+			alloc_idx++;
+		}
+	}
+
+	/* a touchpad uses one input_dev instance */
+	if (ad714x->hw->touchpad_num > 0) {
+		struct ad714x_touchpad_plat *tp_plat = ad714x->hw->touchpad;
+
+		for (i = 0; i < ad714x->hw->touchpad_num; i++) {
+			tp_drv[i].input = input[alloc_idx] = input_allocate_device();
+			if (!input[alloc_idx]) {
+				error = -ENOMEM;
+				goto err_free_dev;
+			}
+
+			__set_bit(EV_ABS, input[alloc_idx]->evbit);
+			__set_bit(EV_KEY, input[alloc_idx]->evbit);
+			__set_bit(ABS_X, input[alloc_idx]->absbit);
+			__set_bit(ABS_Y, input[alloc_idx]->absbit);
+			__set_bit(BTN_TOUCH, input[alloc_idx]->keybit);
+			input_set_abs_params(input[alloc_idx],
+				ABS_X, 0, tp_plat->x_max_coord, 0, 0);
+			input_set_abs_params(input[alloc_idx],
+				ABS_Y, 0, tp_plat->y_max_coord, 0, 0);
+
+			input[alloc_idx]->id.bustype = bus_type;
+			input[alloc_idx]->id.product = ad714x->product;
+			input[alloc_idx]->id.version = ad714x->version;
+
+			error = input_register_device(input[alloc_idx]);
+			if (error)
+				goto err_free_dev;
+
+			alloc_idx++;
+		}
+	}
+
+	/* all buttons use one input node */
+	if (ad714x->hw->button_num > 0) {
+		struct ad714x_button_plat *bt_plat = ad714x->hw->button;
+
+		input[alloc_idx] = input_allocate_device();
+		if (!input[alloc_idx]) {
+			error = -ENOMEM;
+			goto err_free_dev;
+		}
+
+		__set_bit(EV_KEY, input[alloc_idx]->evbit);
+		for (i = 0; i < ad714x->hw->button_num; i++) {
+			bt_drv[i].input = input[alloc_idx];
+			__set_bit(bt_plat[i].keycode, input[alloc_idx]->keybit);
+		}
+
+		input[alloc_idx]->id.bustype = bus_type;
+		input[alloc_idx]->id.product = ad714x->product;
+		input[alloc_idx]->id.version = ad714x->version;
+
+		error = input_register_device(input[alloc_idx]);
+		if (error)
+			goto err_free_dev;
+
+		alloc_idx++;
+	}
+
+	error = request_threaded_irq(ad714x->irq, NULL, ad714x_interrupt_thread,
+			IRQF_TRIGGER_FALLING, "ad714x_captouch", ad714x);
+	if (error) {
+		dev_err(dev, "can't allocate irq %d\n", ad714x->irq);
+		goto err_unreg_dev;
+	}
+
+	return ad714x;
+
+ err_free_dev:
+	dev_err(dev, "failed to setup AD714x input device %i\n", alloc_idx);
+	input_free_device(input[alloc_idx]);
+ err_unreg_dev:
+	while (--alloc_idx >= 0)
+		input_unregister_device(input[alloc_idx]);
+ err_free_mem:
+	kfree(ad714x);
+ err_out:
+	return ERR_PTR(error);
+}
+EXPORT_SYMBOL(ad714x_probe);
+
+void ad714x_remove(struct ad714x_chip *ad714x)
+{
+	struct ad714x_platform_data *hw = ad714x->hw;
+	struct ad714x_driver_data *sw = ad714x->sw;
+	int i;
+
+	free_irq(ad714x->irq, ad714x);
+
+	/* unregister and free all input devices */
+
+	for (i = 0; i < hw->slider_num; i++)
+		input_unregister_device(sw->slider[i].input);
+
+	for (i = 0; i < hw->wheel_num; i++)
+		input_unregister_device(sw->wheel[i].input);
+
+	for (i = 0; i < hw->touchpad_num; i++)
+		input_unregister_device(sw->touchpad[i].input);
+
+	if (hw->button_num)
+		input_unregister_device(sw->button[0].input);
+
+	kfree(ad714x);
+}
+EXPORT_SYMBOL(ad714x_remove);
+
+#ifdef CONFIG_PM
+int ad714x_disable(struct ad714x_chip *ad714x)
+{
+	unsigned short data;
+
+	dev_dbg(ad714x->dev, "%s enter\n", __func__);
+
+	mutex_lock(&ad714x->mutex);
+
+	data = ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL] | 0x3;
+	ad714x->write(ad714x->dev, AD714X_PWR_CTRL, data);
+
+	mutex_unlock(&ad714x->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(ad714x_disable);
+
+int ad714x_enable(struct ad714x_chip *ad714x)
+{
+	unsigned short data;
+
+	dev_dbg(ad714x->dev, "%s enter\n", __func__);
+
+	mutex_lock(&ad714x->mutex);
+
+	/* resume to non-shutdown mode */
+
+	ad714x->write(ad714x->dev, AD714X_PWR_CTRL,
+			ad714x->hw->sys_cfg_reg[AD714X_PWR_CTRL]);
+
+	/* make sure the interrupt output line is not low level after resume,
+	 * otherwise we will get no chance to enter falling-edge irq again
+	 */
+
+	ad714x->read(ad714x->dev, STG_LOW_INT_STA_REG, &data);
+	ad714x->read(ad714x->dev, STG_HIGH_INT_STA_REG, &data);
+	ad714x->read(ad714x->dev, STG_COM_INT_STA_REG, &data);
+
+	mutex_unlock(&ad714x->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(ad714x_enable);
+#endif
+
+MODULE_DESCRIPTION("Analog Devices AD714X Capacitance Touch Sensor Driver");
+MODULE_AUTHOR("Barry Song <21cnbao@gmail.com>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/input/misc/ad714x.h b/drivers/input/misc/ad714x.h
new file mode 100644
index 000000000000..45c54fb13f07
--- /dev/null
+++ b/drivers/input/misc/ad714x.h
@@ -0,0 +1,26 @@
+/*
+ * AD714X CapTouch Programmable Controller driver (bus interfaces)
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef _AD714X_H_
+#define _AD714X_H_
+
+#include <linux/types.h>
+
+struct device;
+struct ad714x_chip;
+
+typedef int (*ad714x_read_t)(struct device *, unsigned short, unsigned short *);
+typedef int (*ad714x_write_t)(struct device *, unsigned short, unsigned short);
+
+int ad714x_disable(struct ad714x_chip *ad714x);
+int ad714x_enable(struct ad714x_chip *ad714x);
+struct ad714x_chip *ad714x_probe(struct device *dev, u16 bus_type, int irq,
+				 ad714x_read_t read, ad714x_write_t write);
+void ad714x_remove(struct ad714x_chip *ad714x);
+
+#endif
diff --git a/include/linux/input.h b/include/linux/input.h
index 7ed2251b33f1..83524e4f3290 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -806,6 +806,7 @@ struct input_absinfo {
 #define BUS_HOST		0x19
 #define BUS_GSC			0x1A
 #define BUS_ATARI		0x1B
+#define BUS_SPI			0x1C
 
 /*
  * MT_TOOL types
diff --git a/include/linux/input/ad714x.h b/include/linux/input/ad714x.h
new file mode 100644
index 000000000000..0cbe5e81482e
--- /dev/null
+++ b/include/linux/input/ad714x.h
@@ -0,0 +1,63 @@
+/*
+ * include/linux/input/ad714x.h
+ *
+ * AD714x is very flexible, it can be used as buttons, scrollwheel,
+ * slider, touchpad at the same time. That depends on the boards.
+ * The platform_data for the device's "struct device" holds this
+ * information.
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#ifndef __LINUX_INPUT_AD714X_H__
+#define __LINUX_INPUT_AD714X_H__
+
+#define STAGE_NUM              12
+#define STAGE_CFGREG_NUM       8
+#define SYS_CFGREG_NUM         8
+
+/* board information which need be initialized in arch/mach... */
+struct ad714x_slider_plat {
+	int start_stage;
+	int end_stage;
+	int max_coord;
+};
+
+struct ad714x_wheel_plat {
+	int start_stage;
+	int end_stage;
+	int max_coord;
+};
+
+struct ad714x_touchpad_plat {
+	int x_start_stage;
+	int x_end_stage;
+	int x_max_coord;
+
+	int y_start_stage;
+	int y_end_stage;
+	int y_max_coord;
+};
+
+struct ad714x_button_plat {
+	int keycode;
+	unsigned short l_mask;
+	unsigned short h_mask;
+};
+
+struct ad714x_platform_data {
+	int slider_num;
+	int wheel_num;
+	int touchpad_num;
+	int button_num;
+	struct ad714x_slider_plat *slider;
+	struct ad714x_wheel_plat *wheel;
+	struct ad714x_touchpad_plat *touchpad;
+	struct ad714x_button_plat *button;
+	unsigned short stage_cfg_reg[STAGE_NUM][STAGE_CFGREG_NUM];
+	unsigned short sys_cfg_reg[SYS_CFGREG_NUM];
+};
+
+#endif
-- 
cgit v1.2.3


From 422dee56b8155e4627f657ee67e962066ca7aff3 Mon Sep 17 00:00:00 2001
From: Adam Bennett <abennett72@gmail.com>
Date: Mon, 12 Apr 2010 19:54:38 -0700
Subject: Input: add driver for hampshire serial touchscreens

Adds support for Hampshire TSHARC serial touchscreens.  Implements
Hampshire's 4-byte communication protocol.

Signed-off-by: Adam Bennett <abennett72@gmail.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/touchscreen/Kconfig     |  12 ++
 drivers/input/touchscreen/Makefile    |   1 +
 drivers/input/touchscreen/hampshire.c | 205 ++++++++++++++++++++++++++++++++++
 include/linux/serio.h                 |   1 +
 4 files changed, 219 insertions(+)
 create mode 100644 drivers/input/touchscreen/hampshire.c

(limited to 'include/linux')

diff --git a/drivers/input/touchscreen/Kconfig b/drivers/input/touchscreen/Kconfig
index 3d1ade2e5196..cc471983ac0e 100644
--- a/drivers/input/touchscreen/Kconfig
+++ b/drivers/input/touchscreen/Kconfig
@@ -119,6 +119,18 @@ config TOUCHSCREEN_DYNAPRO
 	  To compile this driver as a module, choose M here: the
 	  module will be called dynapro.
 
+config TOUCHSCREEN_HAMPSHIRE
+	tristate "Hampshire serial touchscreen"
+	select SERIO
+	help
+	  Say Y here if you have a Hampshire serial touchscreen connected to
+	  your system.
+
+	  If unsure, say N.
+
+	  To compile this driver as a module, choose M here: the
+	  module will be called hampshire.
+
 config TOUCHSCREEN_EETI
 	tristate "EETI touchscreen panel support"
 	depends on I2C
diff --git a/drivers/input/touchscreen/Makefile b/drivers/input/touchscreen/Makefile
index 41145d074dec..8ad36eef90a2 100644
--- a/drivers/input/touchscreen/Makefile
+++ b/drivers/input/touchscreen/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_TOUCHSCREEN_ADS7846)	+= ads7846.o
 obj-$(CONFIG_TOUCHSCREEN_ATMEL_TSADCC)	+= atmel_tsadcc.o
 obj-$(CONFIG_TOUCHSCREEN_BITSY)		+= h3600_ts_input.o
 obj-$(CONFIG_TOUCHSCREEN_DYNAPRO)	+= dynapro.o
+obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE)	+= hampshire.o
 obj-$(CONFIG_TOUCHSCREEN_GUNZE)		+= gunze.o
 obj-$(CONFIG_TOUCHSCREEN_EETI)		+= eeti_ts.o
 obj-$(CONFIG_TOUCHSCREEN_ELO)		+= elo.o
diff --git a/drivers/input/touchscreen/hampshire.c b/drivers/input/touchscreen/hampshire.c
new file mode 100644
index 000000000000..2da6cc31bb21
--- /dev/null
+++ b/drivers/input/touchscreen/hampshire.c
@@ -0,0 +1,205 @@
+/*
+ * Hampshire serial touchscreen driver
+ *
+ * Copyright (c) 2010 Adam Bennett
+ * Based on the dynapro driver (c) Tias Guns
+ *
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ */
+
+/*
+ * 2010/04/08 Adam Bennett <abennett72@gmail.com>
+ *   Copied dynapro.c and edited for Hampshire 4-byte protocol
+ */
+
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/input.h>
+#include <linux/serio.h>
+#include <linux/init.h>
+
+#define DRIVER_DESC	"Hampshire serial touchscreen driver"
+
+MODULE_AUTHOR("Adam Bennett <abennett72@gmail.com>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
+
+/*
+ * Definitions & global arrays.
+ */
+
+#define HAMPSHIRE_FORMAT_TOUCH_BIT 0x40
+#define HAMPSHIRE_FORMAT_LENGTH 4
+#define HAMPSHIRE_RESPONSE_BEGIN_BYTE 0x80
+
+#define HAMPSHIRE_MIN_XC 0
+#define HAMPSHIRE_MAX_XC 0x1000
+#define HAMPSHIRE_MIN_YC 0
+#define HAMPSHIRE_MAX_YC 0x1000
+
+#define HAMPSHIRE_GET_XC(data) (((data[3] & 0x0c) >> 2) | (data[1] << 2) | ((data[0] & 0x38) << 6))
+#define HAMPSHIRE_GET_YC(data) ((data[3] & 0x03) | (data[2] << 2) | ((data[0] & 0x07) << 9))
+#define HAMPSHIRE_GET_TOUCHED(data) (HAMPSHIRE_FORMAT_TOUCH_BIT & data[0])
+
+/*
+ * Per-touchscreen data.
+ */
+
+struct hampshire {
+	struct input_dev *dev;
+	struct serio *serio;
+	int idx;
+	unsigned char data[HAMPSHIRE_FORMAT_LENGTH];
+	char phys[32];
+};
+
+static void hampshire_process_data(struct hampshire *phampshire)
+{
+	struct input_dev *dev = phampshire->dev;
+
+	if (HAMPSHIRE_FORMAT_LENGTH == ++phampshire->idx) {
+		input_report_abs(dev, ABS_X, HAMPSHIRE_GET_XC(phampshire->data));
+		input_report_abs(dev, ABS_Y, HAMPSHIRE_GET_YC(phampshire->data));
+		input_report_key(dev, BTN_TOUCH,
+				 HAMPSHIRE_GET_TOUCHED(phampshire->data));
+		input_sync(dev);
+
+		phampshire->idx = 0;
+	}
+}
+
+static irqreturn_t hampshire_interrupt(struct serio *serio,
+		unsigned char data, unsigned int flags)
+{
+	struct hampshire *phampshire = serio_get_drvdata(serio);
+
+	phampshire->data[phampshire->idx] = data;
+
+	if (HAMPSHIRE_RESPONSE_BEGIN_BYTE & phampshire->data[0])
+		hampshire_process_data(phampshire);
+	else
+		dev_dbg(&serio->dev, "unknown/unsynchronized data: %x\n",
+			phampshire->data[0]);
+
+	return IRQ_HANDLED;
+}
+
+static void hampshire_disconnect(struct serio *serio)
+{
+	struct hampshire *phampshire = serio_get_drvdata(serio);
+
+	input_get_device(phampshire->dev);
+	input_unregister_device(phampshire->dev);
+	serio_close(serio);
+	serio_set_drvdata(serio, NULL);
+	input_put_device(phampshire->dev);
+	kfree(phampshire);
+}
+
+/*
+ * hampshire_connect() is the routine that is called when someone adds a
+ * new serio device that supports hampshire protocol and registers it as
+ * an input device. This is usually accomplished using inputattach.
+ */
+
+static int hampshire_connect(struct serio *serio, struct serio_driver *drv)
+{
+	struct hampshire *phampshire;
+	struct input_dev *input_dev;
+	int err;
+
+	phampshire = kzalloc(sizeof(struct hampshire), GFP_KERNEL);
+	input_dev = input_allocate_device();
+	if (!phampshire || !input_dev) {
+		err = -ENOMEM;
+		goto fail1;
+	}
+
+	phampshire->serio = serio;
+	phampshire->dev = input_dev;
+	snprintf(phampshire->phys, sizeof(phampshire->phys),
+		 "%s/input0", serio->phys);
+
+	input_dev->name = "Hampshire Serial TouchScreen";
+	input_dev->phys = phampshire->phys;
+	input_dev->id.bustype = BUS_RS232;
+	input_dev->id.vendor = SERIO_HAMPSHIRE;
+	input_dev->id.product = 0;
+	input_dev->id.version = 0x0001;
+	input_dev->dev.parent = &serio->dev;
+	input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
+	input_dev->keybit[BIT_WORD(BTN_TOUCH)] = BIT_MASK(BTN_TOUCH);
+	input_set_abs_params(phampshire->dev, ABS_X,
+			     HAMPSHIRE_MIN_XC, HAMPSHIRE_MAX_XC, 0, 0);
+	input_set_abs_params(phampshire->dev, ABS_Y,
+			     HAMPSHIRE_MIN_YC, HAMPSHIRE_MAX_YC, 0, 0);
+
+	serio_set_drvdata(serio, phampshire);
+
+	err = serio_open(serio, drv);
+	if (err)
+		goto fail2;
+
+	err = input_register_device(phampshire->dev);
+	if (err)
+		goto fail3;
+
+	return 0;
+
+ fail3:	serio_close(serio);
+ fail2:	serio_set_drvdata(serio, NULL);
+ fail1:	input_free_device(input_dev);
+	kfree(phampshire);
+	return err;
+}
+
+/*
+ * The serio driver structure.
+ */
+
+static struct serio_device_id hampshire_serio_ids[] = {
+	{
+		.type	= SERIO_RS232,
+		.proto	= SERIO_HAMPSHIRE,
+		.id	= SERIO_ANY,
+		.extra	= SERIO_ANY,
+	},
+	{ 0 }
+};
+
+MODULE_DEVICE_TABLE(serio, hampshire_serio_ids);
+
+static struct serio_driver hampshire_drv = {
+	.driver		= {
+		.name	= "hampshire",
+	},
+	.description	= DRIVER_DESC,
+	.id_table	= hampshire_serio_ids,
+	.interrupt	= hampshire_interrupt,
+	.connect	= hampshire_connect,
+	.disconnect	= hampshire_disconnect,
+};
+
+/*
+ * The functions for inserting/removing us as a module.
+ */
+
+static int __init hampshire_init(void)
+{
+	return serio_register_driver(&hampshire_drv);
+}
+
+static void __exit hampshire_exit(void)
+{
+	serio_unregister_driver(&hampshire_drv);
+}
+
+module_init(hampshire_init);
+module_exit(hampshire_exit);
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 64b473066b9a..b5552568178d 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -196,5 +196,6 @@ static inline void serio_continue_rx(struct serio *serio)
 #define SERIO_TOUCHIT213	0x38
 #define SERIO_W8001	0x39
 #define SERIO_DYNAPRO	0x3a
+#define SERIO_HAMPSHIRE	0x3b
 
 #endif
-- 
cgit v1.2.3


From e326e8503dfc73e4f79d488a78ee3d7cde10081f Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Tue, 13 Apr 2010 20:21:14 +0000
Subject: stmmac: new descriptor field for the driver's platform

The new enh_desc is used for selecting the enhanced descriptors
structure. There are several scenarios; some chips (mac10/100
or gmac) want to use the enhanced descriptors; others want the normal
ones.
For example, on ST platforms: MAC10/100 uses the normal desc structure
and the GMAC uses the enhanced one.
It can be useful to get this information from the platform.
This could also be decided at run-time looking at the chip's ID number;
but it could happen that chips with the same ID want to use different
descriptor structure.

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
index 32bfd1a8a48d..632ff7c03280 100644
--- a/include/linux/stmmac.h
+++ b/include/linux/stmmac.h
@@ -33,6 +33,7 @@ struct plat_stmmacenet_data {
 	int bus_id;
 	int pbl;
 	int has_gmac;
+	int enh_desc;
 	void (*fix_mac_speed)(void *priv, unsigned int speed);
 	void (*bus_setup)(unsigned long ioaddr);
 #ifdef CONFIG_STM_DRIVERS
-- 
cgit v1.2.3


From 808d97ccbe8e8251b1435e86c762965fd7e8a75e Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@stericsson.com>
Date: Thu, 8 Apr 2010 07:39:38 +0100
Subject: ARM: 6033/1: ARM: MMCI: pass max frequency from platform

This introduce the field f_max into the mmci_platform_data,
making it possible to pass in a desired block clocking frequency
from a board configuration. This is often more desirable than
using a module parameter. We keep the module parameter as a
fallback as well as the default frequency specified for this
parameter if a parameter is not provided.

This also adds some kerneldoc style documentation to the
platform data struct in mmci.h.

Signed-off-by: Linus Walleij <linus.walleij@stericsson.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 drivers/mmc/host/mmci.c   | 13 ++++++++++++-
 include/linux/amba/mmci.h | 23 ++++++++++++++++++++++-
 2 files changed, 34 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 7e70c1a06d8a..ff115d920888 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -631,7 +631,18 @@ static int __devinit mmci_probe(struct amba_device *dev, struct amba_id *id)
 
 	mmc->ops = &mmci_ops;
 	mmc->f_min = (host->mclk + 511) / 512;
-	mmc->f_max = min(host->mclk, fmax);
+	/*
+	 * If the platform data supplies a maximum operating
+	 * frequency, this takes precedence. Else, we fall back
+	 * to using the module parameter, which has a (low)
+	 * default value in case it is not specified. Either
+	 * value must not exceed the clock rate into the block,
+	 * of course.
+	 */
+	if (plat->f_max)
+		mmc->f_max = min(host->mclk, plat->f_max);
+	else
+		mmc->f_max = min(host->mclk, fmax);
 	dev_dbg(mmc_dev(mmc), "clocking block at %u Hz\n", mmc->f_max);
 
 #ifdef CONFIG_REGULATOR
diff --git a/include/linux/amba/mmci.h b/include/linux/amba/mmci.h
index 6b4241748dda..7e466fe72025 100644
--- a/include/linux/amba/mmci.h
+++ b/include/linux/amba/mmci.h
@@ -6,8 +6,29 @@
 
 #include <linux/mmc/host.h>
 
+/**
+ * struct mmci_platform_data - platform configuration for the MMCI
+ * (also known as PL180) block.
+ * @f_max: the maximum operational frequency for this host in this
+ * platform configuration. When this is specified it takes precedence
+ * over the module parameter for the same frequency.
+ * @ocr_mask: available voltages on the 4 pins from the block, this
+ * is ignored if a regulator is used, see the MMC_VDD_* masks in
+ * mmc/host.h
+ * @translate_vdd: a callback function to translate a MMC_VDD_*
+ * mask into a value to be binary or:ed and written into the
+ * MMCIPWR register of the block
+ * @status: if no GPIO read function was given to the block in
+ * gpio_wp (below) this function will be called to determine
+ * whether a card is present in the MMC slot or not
+ * @gpio_wp: read this GPIO pin to see if the card is write protected
+ * @gpio_cd: read this GPIO pin to detect card insertion
+ * @capabilities: the capabilities of the block as implemented in
+ * this platform, signify anything MMC_CAP_* from mmc/host.h
+ */
 struct mmci_platform_data {
-	unsigned int ocr_mask;			/* available voltages */
+	unsigned int f_max;
+	unsigned int ocr_mask;
 	u32 (*translate_vdd)(struct device *, unsigned int);
 	unsigned int (*status)(struct device *);
 	int	gpio_wp;
-- 
cgit v1.2.3


From 76e1d9047e4edefb8ada20aa90d5762306082bd6 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Mon, 5 Apr 2010 15:35:57 +0200
Subject: perf: Store active software events in a hashlist

Each time a software event triggers, we need to walk through
the entire list of events from the current cpu and task contexts
to retrieve a running perf event that matches.
We also need to check a matching perf event is actually counting.

This walk is wasteful and makes the event fast path scaling
down with a growing number of events running on the same
contexts.

To solve this, we store the running perf events in a hashlist to
get an immediate access to them against their type:event_id when
they trigger.

v2: - Fix SWEVENT_HLIST_SIZE definition (and re-learn some basic
      maths along the way)
    - Only allocate hlist for online cpus, but keep track of the
      refcount on offline possible cpus too, so that we allocate it
      if needed when it becomes online.
    - Drop the kref use as it's not adapted to our tricks anymore.

v3: - Fix bad refcount check (address instead of value). Thanks to
      Eric Dumazet who spotted this.
    - While exiting cpu, move the hlist release out of the IPI path
      to lock the hlist mutex sanely.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h |  12 +++
 kernel/perf_event.c        | 246 +++++++++++++++++++++++++++++++++------------
 2 files changed, 195 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6e96cc8225d4..bf896d0b2e9c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -589,6 +589,14 @@ enum perf_group_flag {
 	PERF_GROUP_SOFTWARE = 0x1,
 };
 
+#define SWEVENT_HLIST_BITS	8
+#define SWEVENT_HLIST_SIZE	(1 << SWEVENT_HLIST_BITS)
+
+struct swevent_hlist {
+	struct hlist_head	heads[SWEVENT_HLIST_SIZE];
+	struct rcu_head		rcu_head;
+};
+
 /**
  * struct perf_event - performance event kernel representation:
  */
@@ -597,6 +605,7 @@ struct perf_event {
 	struct list_head		group_entry;
 	struct list_head		event_entry;
 	struct list_head		sibling_list;
+	struct hlist_node		hlist_entry;
 	int				nr_siblings;
 	int				group_flags;
 	struct perf_event		*group_leader;
@@ -744,6 +753,9 @@ struct perf_cpu_context {
 	int				active_oncpu;
 	int				max_pertask;
 	int				exclusive;
+	struct swevent_hlist		*swevent_hlist;
+	struct mutex			hlist_mutex;
+	int				hlist_refcount;
 
 	/*
 	 * Recursion avoidance:
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fcf42dcd6089..9efdfe5b8d3b 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -16,6 +16,7 @@
 #include <linux/file.h>
 #include <linux/poll.h>
 #include <linux/slab.h>
+#include <linux/hash.h>
 #include <linux/sysfs.h>
 #include <linux/dcache.h>
 #include <linux/percpu.h>
@@ -3966,36 +3967,6 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
 	perf_swevent_overflow(event, 0, nmi, data, regs);
 }
 
-static int perf_swevent_is_counting(struct perf_event *event)
-{
-	/*
-	 * The event is active, we're good!
-	 */
-	if (event->state == PERF_EVENT_STATE_ACTIVE)
-		return 1;
-
-	/*
-	 * The event is off/error, not counting.
-	 */
-	if (event->state != PERF_EVENT_STATE_INACTIVE)
-		return 0;
-
-	/*
-	 * The event is inactive, if the context is active
-	 * we're part of a group that didn't make it on the 'pmu',
-	 * not counting.
-	 */
-	if (event->ctx->is_active)
-		return 0;
-
-	/*
-	 * We're inactive and the context is too, this means the
-	 * task is scheduled out, we're counting events that happen
-	 * to us, like migration events.
-	 */
-	return 1;
-}
-
 static int perf_tp_event_match(struct perf_event *event,
 				struct perf_sample_data *data);
 
@@ -4019,12 +3990,6 @@ static int perf_swevent_match(struct perf_event *event,
 				struct perf_sample_data *data,
 				struct pt_regs *regs)
 {
-	if (event->cpu != -1 && event->cpu != smp_processor_id())
-		return 0;
-
-	if (!perf_swevent_is_counting(event))
-		return 0;
-
 	if (event->attr.type != type)
 		return 0;
 
@@ -4041,18 +4006,53 @@ static int perf_swevent_match(struct perf_event *event,
 	return 1;
 }
 
-static void perf_swevent_ctx_event(struct perf_event_context *ctx,
-				     enum perf_type_id type,
-				     u32 event_id, u64 nr, int nmi,
-				     struct perf_sample_data *data,
-				     struct pt_regs *regs)
+static inline u64 swevent_hash(u64 type, u32 event_id)
+{
+	u64 val = event_id | (type << 32);
+
+	return hash_64(val, SWEVENT_HLIST_BITS);
+}
+
+static struct hlist_head *
+find_swevent_head(struct perf_cpu_context *ctx, u64 type, u32 event_id)
+{
+	u64 hash;
+	struct swevent_hlist *hlist;
+
+	hash = swevent_hash(type, event_id);
+
+	hlist = rcu_dereference(ctx->swevent_hlist);
+	if (!hlist)
+		return NULL;
+
+	return &hlist->heads[hash];
+}
+
+static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
+				    u64 nr, int nmi,
+				    struct perf_sample_data *data,
+				    struct pt_regs *regs)
 {
+	struct perf_cpu_context *cpuctx;
 	struct perf_event *event;
+	struct hlist_node *node;
+	struct hlist_head *head;
 
-	list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
+	cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	rcu_read_lock();
+
+	head = find_swevent_head(cpuctx, type, event_id);
+
+	if (!head)
+		goto end;
+
+	hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
 		if (perf_swevent_match(event, type, event_id, data, regs))
 			perf_swevent_add(event, nr, nmi, data, regs);
 	}
+end:
+	rcu_read_unlock();
 }
 
 int perf_swevent_get_recursion_context(void)
@@ -4090,27 +4090,6 @@ void perf_swevent_put_recursion_context(int rctx)
 }
 EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
 
-static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
-				    u64 nr, int nmi,
-				    struct perf_sample_data *data,
-				    struct pt_regs *regs)
-{
-	struct perf_cpu_context *cpuctx;
-	struct perf_event_context *ctx;
-
-	cpuctx = &__get_cpu_var(perf_cpu_context);
-	rcu_read_lock();
-	perf_swevent_ctx_event(&cpuctx->ctx, type, event_id,
-				 nr, nmi, data, regs);
-	/*
-	 * doesn't really matter which of the child contexts the
-	 * events ends up in.
-	 */
-	ctx = rcu_dereference(current->perf_event_ctxp);
-	if (ctx)
-		perf_swevent_ctx_event(ctx, type, event_id, nr, nmi, data, regs);
-	rcu_read_unlock();
-}
 
 void __perf_sw_event(u32 event_id, u64 nr, int nmi,
 			    struct pt_regs *regs, u64 addr)
@@ -4136,16 +4115,28 @@ static void perf_swevent_read(struct perf_event *event)
 static int perf_swevent_enable(struct perf_event *event)
 {
 	struct hw_perf_event *hwc = &event->hw;
+	struct perf_cpu_context *cpuctx;
+	struct hlist_head *head;
+
+	cpuctx = &__get_cpu_var(perf_cpu_context);
 
 	if (hwc->sample_period) {
 		hwc->last_period = hwc->sample_period;
 		perf_swevent_set_period(event);
 	}
+
+	head = find_swevent_head(cpuctx, event->attr.type, event->attr.config);
+	if (WARN_ON_ONCE(!head))
+		return -EINVAL;
+
+	hlist_add_head_rcu(&event->hlist_entry, head);
+
 	return 0;
 }
 
 static void perf_swevent_disable(struct perf_event *event)
 {
+	hlist_del_rcu(&event->hlist_entry);
 }
 
 static const struct pmu perf_ops_generic = {
@@ -4359,13 +4350,115 @@ static int perf_tp_event_match(struct perf_event *event,
 	return 0;
 }
 
+static void swevent_hlist_release_rcu(struct rcu_head *rcu_head)
+{
+	struct swevent_hlist *hlist;
+
+	hlist = container_of(rcu_head, struct swevent_hlist, rcu_head);
+	kfree(hlist);
+}
+
+static void swevent_hlist_release(struct perf_cpu_context *cpuctx)
+{
+	struct swevent_hlist *hlist;
+
+	if (!cpuctx->swevent_hlist)
+		return;
+
+	hlist = cpuctx->swevent_hlist;
+	rcu_assign_pointer(cpuctx->swevent_hlist, NULL);
+	call_rcu(&hlist->rcu_head, swevent_hlist_release_rcu);
+}
+
+static void swevent_hlist_put_cpu(struct perf_event *event, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+
+	mutex_lock(&cpuctx->hlist_mutex);
+
+	if (!--cpuctx->hlist_refcount)
+		swevent_hlist_release(cpuctx);
+
+	mutex_unlock(&cpuctx->hlist_mutex);
+}
+
+static void swevent_hlist_put(struct perf_event *event)
+{
+	int cpu;
+
+	if (event->cpu != -1) {
+		swevent_hlist_put_cpu(event, event->cpu);
+		return;
+	}
+
+	for_each_possible_cpu(cpu)
+		swevent_hlist_put_cpu(event, cpu);
+}
+
+static int swevent_hlist_get_cpu(struct perf_event *event, int cpu)
+{
+	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
+	int err = 0;
+
+	mutex_lock(&cpuctx->hlist_mutex);
+
+	if (!cpuctx->swevent_hlist && cpu_online(cpu)) {
+		struct swevent_hlist *hlist;
+
+		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+		if (!hlist) {
+			err = -ENOMEM;
+			goto exit;
+		}
+		rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+	}
+	cpuctx->hlist_refcount++;
+ exit:
+	mutex_unlock(&cpuctx->hlist_mutex);
+
+	return err;
+}
+
+static int swevent_hlist_get(struct perf_event *event)
+{
+	int err;
+	int cpu, failed_cpu;
+
+	if (event->cpu != -1)
+		return swevent_hlist_get_cpu(event, event->cpu);
+
+	get_online_cpus();
+	for_each_possible_cpu(cpu) {
+		err = swevent_hlist_get_cpu(event, cpu);
+		if (err) {
+			failed_cpu = cpu;
+			goto fail;
+		}
+	}
+	put_online_cpus();
+
+	return 0;
+ fail:
+	for_each_possible_cpu(cpu) {
+		if (cpu == failed_cpu)
+			break;
+		swevent_hlist_put_cpu(event, cpu);
+	}
+
+	put_online_cpus();
+	return err;
+}
+
 static void tp_perf_event_destroy(struct perf_event *event)
 {
 	perf_trace_disable(event->attr.config);
+	swevent_hlist_put(event);
 }
 
 static const struct pmu *tp_perf_event_init(struct perf_event *event)
 {
+	int err;
+
 	/*
 	 * Raw tracepoint data is a severe data leak, only allow root to
 	 * have these.
@@ -4379,6 +4472,11 @@ static const struct pmu *tp_perf_event_init(struct perf_event *event)
 		return NULL;
 
 	event->destroy = tp_perf_event_destroy;
+	err = swevent_hlist_get(event);
+	if (err) {
+		perf_trace_disable(event->attr.config);
+		return ERR_PTR(err);
+	}
 
 	return &perf_ops_generic;
 }
@@ -4479,6 +4577,7 @@ static void sw_perf_event_destroy(struct perf_event *event)
 	WARN_ON(event->parent);
 
 	atomic_dec(&perf_swevent_enabled[event_id]);
+	swevent_hlist_put(event);
 }
 
 static const struct pmu *sw_perf_event_init(struct perf_event *event)
@@ -4517,6 +4616,12 @@ static const struct pmu *sw_perf_event_init(struct perf_event *event)
 	case PERF_COUNT_SW_ALIGNMENT_FAULTS:
 	case PERF_COUNT_SW_EMULATION_FAULTS:
 		if (!event->parent) {
+			int err;
+
+			err = swevent_hlist_get(event);
+			if (err)
+				return ERR_PTR(err);
+
 			atomic_inc(&perf_swevent_enabled[event_id]);
 			event->destroy = sw_perf_event_destroy;
 		}
@@ -5389,6 +5494,7 @@ static void __init perf_event_init_all_cpus(void)
 
 	for_each_possible_cpu(cpu) {
 		cpuctx = &per_cpu(perf_cpu_context, cpu);
+		mutex_init(&cpuctx->hlist_mutex);
 		__perf_event_init_context(&cpuctx->ctx, NULL);
 	}
 }
@@ -5402,6 +5508,16 @@ static void __cpuinit perf_event_init_cpu(int cpu)
 	spin_lock(&perf_resource_lock);
 	cpuctx->max_pertask = perf_max_events - perf_reserved_percpu;
 	spin_unlock(&perf_resource_lock);
+
+	mutex_lock(&cpuctx->hlist_mutex);
+	if (cpuctx->hlist_refcount > 0) {
+		struct swevent_hlist *hlist;
+
+		hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
+		WARN_ON_ONCE(!hlist);
+		rcu_assign_pointer(cpuctx->swevent_hlist, hlist);
+	}
+	mutex_unlock(&cpuctx->hlist_mutex);
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
@@ -5421,6 +5537,10 @@ static void perf_event_exit_cpu(int cpu)
 	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
 	struct perf_event_context *ctx = &cpuctx->ctx;
 
+	mutex_lock(&cpuctx->hlist_mutex);
+	swevent_hlist_release(cpuctx);
+	mutex_unlock(&cpuctx->hlist_mutex);
+
 	mutex_lock(&ctx->mutex);
 	smp_call_function_single(cpu, __perf_event_exit_cpu, NULL, 1);
 	mutex_unlock(&ctx->mutex);
-- 
cgit v1.2.3


From dc825b17904a06bbd2f79d720b23156e4c01a22f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 15 Apr 2010 13:13:52 +0900
Subject: sh: intc: IRQ auto-distribution support.

This implements support for hardware-managed IRQ balancing as implemented
by SH-X3 cores (presently only hooked up for SH7786, but can probably be
carried over to other SH-X3 cores, too).

CPUs need to specify their distribution register along with the mask
definitions, as these follow the same format. Peripheral IRQs that don't
opt out of balancing will be automatically distributed at the whim of the
hardware block, while each CPU needs to verify whether it is handling the
IRQ or not, especially before clearing the mask.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/irq.h              |  16 +++
 arch/sh/kernel/cpu/sh4a/setup-sh7786.c |  36 +++++-
 arch/sh/kernel/irq.c                   |  49 ++++----
 drivers/sh/Kconfig                     |  11 ++
 drivers/sh/intc.c                      | 200 ++++++++++++++++++++++++---------
 include/linux/sh_intc.h                |  11 +-
 6 files changed, 244 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/irq.h b/arch/sh/include/asm/irq.h
index df8e1500527c..99c593b3a827 100644
--- a/arch/sh/include/asm/irq.h
+++ b/arch/sh/include/asm/irq.h
@@ -11,6 +11,14 @@
 #define NR_IRQS			256
 #define NR_IRQS_LEGACY		8	/* Legacy external IRQ0-7 */
 
+/*
+ * This is a special IRQ number for indicating that no IRQ has been
+ * triggered and to simply ignore the IRQ dispatch. This is a special
+ * case that can happen with IRQ auto-distribution when multiple CPUs
+ * are woken up and signalled in parallel.
+ */
+#define NO_IRQ_IGNORE		((unsigned int)-1)
+
 /*
  * Convert back and forth between INTEVT and IRQ values.
  */
@@ -53,6 +61,14 @@ extern void irq_ctx_exit(int cpu);
 # define irq_ctx_exit(cpu) do { } while (0)
 #endif
 
+#ifdef CONFIG_INTC_BALANCING
+extern unsigned int irq_lookup(unsigned int irq);
+extern void irq_finish(unsigned int irq);
+#else
+#define irq_lookup(irq)		(irq)
+#define irq_finish(irq)		do { } while (0)
+#endif
+
 #include <asm-generic/irq.h>
 #ifdef CONFIG_CPU_SH5
 #include <cpu/irq.h>
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index 235edf8065df..d7336036d04d 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -573,7 +573,6 @@ static struct platform_device *sh7786_devices[] __initdata = {
 	&usb_ohci_device,
 };
 
-
 /*
  * Please call this function if your platform board
  * use external clock for USB
@@ -581,6 +580,7 @@ static struct platform_device *sh7786_devices[] __initdata = {
 #define USBCTL0		0xffe70858
 #define CLOCK_MODE_MASK 0xffffff7f
 #define EXT_CLOCK_MODE  0x00000080
+
 void __init sh7786_usb_use_exclock(void)
 {
 	u32 val = __raw_readl(USBCTL0) & CLOCK_MODE_MASK;
@@ -598,6 +598,7 @@ void __init sh7786_usb_use_exclock(void)
 #define PLL_ENB		0x00000002
 #define PHY_RST		0x00000004
 #define ACT_PLL_STATUS	0xc0000000
+
 static void __init sh7786_usb_setup(void)
 {
 	int i = 1000000;
@@ -753,9 +754,19 @@ static struct intc_vect vectors[] __initdata = {
 #define INTMSK2		0xfe410068
 #define INTMSKCLR2	0xfe41006c
 
+#define INTDISTCR0	0xfe4100b0
+#define INTDISTCR1	0xfe4100b4
+#define INTACK		0xfe4100b8
+#define INTACKCLR	0xfe4100bc
+#define INT2DISTCR0	0xfe410900
+#define INT2DISTCR1	0xfe410904
+#define INT2DISTCR2	0xfe410908
+#define INT2DISTCR3	0xfe41090c
+
 static struct intc_mask_reg mask_registers[] __initdata = {
 	{ CnINTMSK0, CnINTMSKCLR0, 32,
-	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 } },
+	  { IRQ0, IRQ1, IRQ2, IRQ3, IRQ4, IRQ5, IRQ6, IRQ7 },
+	    INTC_SMP_BALANCING(INTDISTCR0) },
 	{ INTMSK2, INTMSKCLR2, 32,
 	  { IRL0_LLLL, IRL0_LLLH, IRL0_LLHL, IRL0_LLHH,
 	    IRL0_LHLL, IRL0_LHLH, IRL0_LHHL, IRL0_LHHH,
@@ -767,7 +778,8 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 	    IRL4_HHLL, IRL4_HHLH, IRL4_HHHL, 0, } },
 	{ CnINT2MSKR0, CnINT2MSKCR0 , 32,
 	  { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, WDT } },
+	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, WDT },
+	    INTC_SMP_BALANCING(INT2DISTCR0) },
 	{ CnINT2MSKR1, CnINT2MSKCR1, 32,
 	  { TMU0_0, TMU0_1, TMU0_2, TMU0_3, TMU1_0, TMU1_1, TMU1_2, 0,
 	    DMAC0_0, DMAC0_1, DMAC0_2, DMAC0_3, DMAC0_4, DMAC0_5, DMAC0_6,
@@ -776,14 +788,14 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 	    HPB_0, HPB_1, HPB_2,
 	    SCIF0_0, SCIF0_1, SCIF0_2, SCIF0_3,
 	    SCIF1,
-	    TMU2, TMU3, 0, } },
+	    TMU2, TMU3, 0, }, INTC_SMP_BALANCING(INT2DISTCR1) },
 	{ CnINT2MSKR2, CnINT2MSKCR2, 32,
 	  { 0, 0, SCIF2, SCIF3, SCIF4, SCIF5,
 	    Eth_0, Eth_1,
 	    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 	    PCIeC0_0, PCIeC0_1, PCIeC0_2,
 	    PCIeC1_0, PCIeC1_1, PCIeC1_2,
-	    USB, 0, 0 } },
+	    USB, 0, 0 }, INTC_SMP_BALANCING(INT2DISTCR2) },
 	{ CnINT2MSKR3, CnINT2MSKCR3, 32,
 	  { 0, 0, 0, 0, 0, 0,
 	    I2C0, I2C1,
@@ -792,7 +804,7 @@ static struct intc_mask_reg mask_registers[] __initdata = {
 	    HAC0, HAC1,
 	    FLCTL, 0,
 	    HSPI, GPIO0, GPIO1, Thermal,
-	    0, 0, 0, 0, 0, 0, 0, 0 } },
+	    0, 0, 0, 0, 0, 0, 0, 0 }, INTC_SMP_BALANCING(INT2DISTCR3) },
 };
 
 static struct intc_prio_reg prio_registers[] __initdata = {
@@ -910,6 +922,18 @@ static DECLARE_INTC_DESC(intc_desc_irl4567, "sh7786-irl4567", vectors_irl4567,
 #define INTC_INTMSKCLR2	INTMSKCLR2
 #define INTC_USERIMASK	0xfe411000
 
+#ifdef CONFIG_INTC_BALANCING
+unsigned int irq_lookup(unsigned int irq)
+{
+	return __raw_readl(INTACK) & 1 ? irq : NO_IRQ_IGNORE;
+}
+
+void irq_finish(unsigned int irq)
+{
+	__raw_writel(irq2evt(irq), INTACKCLR);
+}
+#endif
+
 void __init plat_irq_setup(void)
 {
 	/* disable IRQ3-0 + IRQ7-4 */
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index d2d41d046657..f6a9319c28e2 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -113,19 +113,14 @@ union irq_ctx {
 
 static union irq_ctx *hardirq_ctx[NR_CPUS] __read_mostly;
 static union irq_ctx *softirq_ctx[NR_CPUS] __read_mostly;
-#endif
 
-asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
+static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
+
+static inline void handle_one_irq(unsigned int irq)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
-#ifdef CONFIG_IRQSTACKS
 	union irq_ctx *curctx, *irqctx;
-#endif
-
-	irq_enter();
-	irq = irq_demux(irq);
 
-#ifdef CONFIG_IRQSTACKS
 	curctx = (union irq_ctx *)current_thread_info();
 	irqctx = hardirq_ctx[smp_processor_id()];
 
@@ -164,20 +159,9 @@ asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
 			  "r5", "r6", "r7", "r8", "t", "pr"
 		);
 	} else
-#endif
 		generic_handle_irq(irq);
-
-	irq_exit();
-
-	set_irq_regs(old_regs);
-	return 1;
 }
 
-#ifdef CONFIG_IRQSTACKS
-static char softirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-
-static char hardirq_stack[NR_CPUS * THREAD_SIZE] __page_aligned_bss;
-
 /*
  * allocate per-cpu stacks for hardirq and for softirq processing
  */
@@ -257,8 +241,33 @@ asmlinkage void do_softirq(void)
 
 	local_irq_restore(flags);
 }
+#else
+static inline void handle_one_irq(unsigned int irq)
+{
+	generic_handle_irq(irq);
+}
 #endif
 
+asmlinkage __irq_entry int do_IRQ(unsigned int irq, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+
+	irq_enter();
+
+	irq = irq_demux(irq_lookup(irq));
+
+	if (irq != NO_IRQ_IGNORE) {
+		handle_one_irq(irq);
+		irq_finish(irq);
+	}
+
+	irq_exit();
+
+	set_irq_regs(old_regs);
+
+	return IRQ_HANDLED;
+}
+
 void __init init_IRQ(void)
 {
 	plat_irq_setup();
diff --git a/drivers/sh/Kconfig b/drivers/sh/Kconfig
index 22c3cdaf22fe..a54de0b9b3df 100644
--- a/drivers/sh/Kconfig
+++ b/drivers/sh/Kconfig
@@ -11,3 +11,14 @@ config INTC_USERIMASK
 	  drivers that are using special priority levels.
 
 	  If in doubt, say N.
+
+config INTC_BALANCING
+	bool "Hardware IRQ balancing support"
+	depends on SMP && SUPERH && CPU_SUBTYPE_SH7786
+	help
+	  This enables support for IRQ auto-distribution mode on SH-X3
+	  SMP parts. All of the balancing and CPU wakeup decisions are
+	  taken care of automatically by hardware for distributed
+	  vectors.
+
+	  If in doubt, say N.
diff --git a/drivers/sh/intc.c b/drivers/sh/intc.c
index 77d10acf1884..dcb4c833820b 100644
--- a/drivers/sh/intc.c
+++ b/drivers/sh/intc.c
@@ -98,6 +98,9 @@ static DEFINE_SPINLOCK(vector_lock);
 static unsigned int intc_prio_level[NR_IRQS];	/* for now */
 static unsigned int default_prio_level = 2;	/* 2 - 16 */
 static unsigned long ack_handle[NR_IRQS];
+#ifdef CONFIG_INTC_BALANCING
+static unsigned long dist_handle[NR_IRQS];
+#endif
 
 static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 {
@@ -105,6 +108,47 @@ static inline struct intc_desc_int *get_intc_desc(unsigned int irq)
 	return container_of(chip, struct intc_desc_int, chip);
 }
 
+static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
+				       unsigned long address)
+{
+	struct intc_window *window;
+	int k;
+
+	/* scan through physical windows and convert address */
+	for (k = 0; k < d->nr_windows; k++) {
+		window = d->window + k;
+
+		if (address < window->phys)
+			continue;
+
+		if (address >= (window->phys + window->size))
+			continue;
+
+		address -= window->phys;
+		address += (unsigned long)window->virt;
+
+		return address;
+	}
+
+	/* no windows defined, register must be 1:1 mapped virt:phys */
+	return address;
+}
+
+static unsigned int intc_get_reg(struct intc_desc_int *d, unsigned long address)
+{
+	unsigned int k;
+
+	address = intc_phys_to_virt(d, address);
+
+	for (k = 0; k < d->nr_reg; k++) {
+		if (d->reg[k] == address)
+			return k;
+	}
+
+	BUG();
+	return 0;
+}
+
 static inline unsigned int set_field(unsigned int value,
 				     unsigned int field_value,
 				     unsigned int handle)
@@ -238,6 +282,85 @@ static void (*intc_disable_fns[])(unsigned long addr,
 	[MODE_PCLR_REG] = intc_mode_field,
 };
 
+#ifdef CONFIG_INTC_BALANCING
+static inline void intc_balancing_enable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 1);
+}
+
+static inline void intc_balancing_disable(unsigned int irq)
+{
+	struct intc_desc_int *d = get_intc_desc(irq);
+	unsigned long handle = dist_handle[irq];
+	unsigned long addr;
+
+	if (irq_balancing_disabled(irq) || !handle)
+		return;
+
+	addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
+	intc_reg_fns[_INTC_FN(handle)](addr, handle, 0);
+}
+
+static unsigned int intc_dist_data(struct intc_desc *desc,
+				   struct intc_desc_int *d,
+				   intc_enum enum_id)
+{
+	struct intc_mask_reg *mr = desc->hw.mask_regs;
+	unsigned int i, j, fn, mode;
+	unsigned long reg_e, reg_d;
+
+	for (i = 0; mr && enum_id && i < desc->hw.nr_mask_regs; i++) {
+		mr = desc->hw.mask_regs + i;
+
+		/*
+		 * Skip this entry if there's no auto-distribution
+		 * register associated with it.
+		 */
+		if (!mr->dist_reg)
+			continue;
+
+		for (j = 0; j < ARRAY_SIZE(mr->enum_ids); j++) {
+			if (mr->enum_ids[j] != enum_id)
+				continue;
+
+			fn = REG_FN_MODIFY_BASE;
+			mode = MODE_ENABLE_REG;
+			reg_e = mr->dist_reg;
+			reg_d = mr->dist_reg;
+
+			fn += (mr->reg_width >> 3) - 1;
+			return _INTC_MK(fn, mode,
+					intc_get_reg(d, reg_e),
+					intc_get_reg(d, reg_d),
+					1,
+					(mr->reg_width - 1) - j);
+		}
+	}
+
+	/*
+	 * It's possible we've gotten here with no distribution options
+	 * available for the IRQ in question, so we just skip over those.
+	 */
+	return 0;
+}
+#else
+static inline void intc_balancing_enable(unsigned int irq)
+{
+}
+
+static inline void intc_balancing_disable(unsigned int irq)
+{
+}
+#endif
+
 static inline void _intc_enable(unsigned int irq, unsigned long handle)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
@@ -253,6 +376,8 @@ static inline void _intc_enable(unsigned int irq, unsigned long handle)
 		intc_enable_fns[_INTC_MODE(handle)](addr, handle, intc_reg_fns\
 						    [_INTC_FN(handle)], irq);
 	}
+
+	intc_balancing_enable(irq);
 }
 
 static void intc_enable(unsigned int irq)
@@ -263,10 +388,12 @@ static void intc_enable(unsigned int irq)
 static void intc_disable(unsigned int irq)
 {
 	struct intc_desc_int *d = get_intc_desc(irq);
-	unsigned long handle = (unsigned long) get_irq_chip_data(irq);
+	unsigned long handle = (unsigned long)get_irq_chip_data(irq);
 	unsigned long addr;
 	unsigned int cpu;
 
+	intc_balancing_disable(irq);
+
 	for (cpu = 0; cpu < SMP_NR(d, _INTC_ADDR_D(handle)); cpu++) {
 #ifdef CONFIG_SMP
 		if (!cpumask_test_cpu(cpu, irq_to_desc(irq)->affinity))
@@ -345,8 +472,7 @@ static void intc_mask_ack(unsigned int irq)
 
 	intc_disable(irq);
 
-	/* read register and write zero only to the assocaited bit */
-
+	/* read register and write zero only to the associated bit */
 	if (handle) {
 		addr = INTC_REG(d, _INTC_ADDR_D(handle), 0);
 		switch (_INTC_FN(handle)) {
@@ -375,7 +501,8 @@ static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
 {
 	int i;
 
-	/* this doesn't scale well, but...
+	/*
+	 * this doesn't scale well, but...
 	 *
 	 * this function should only be used for cerain uncommon
 	 * operations such as intc_set_priority() and intc_set_sense()
@@ -386,7 +513,6 @@ static struct intc_handle_int *intc_find_irq(struct intc_handle_int *hp,
 	 * memory footprint down is to make sure the array is sorted
 	 * and then perform a bisect to lookup the irq.
 	 */
-
 	for (i = 0; i < nr_hp; i++) {
 		if ((hp + i)->irq != irq)
 			continue;
@@ -417,7 +543,6 @@ int intc_set_priority(unsigned int irq, unsigned int prio)
 		 * primary masking method is using intc_prio_level[irq]
 		 * priority level will be set during next enable()
 		 */
-
 		if (_INTC_FN(ihp->handle) != REG_FN_ERR)
 			_intc_enable(irq, ihp->handle);
 	}
@@ -456,48 +581,6 @@ static int intc_set_sense(unsigned int irq, unsigned int type)
 	return 0;
 }
 
-static unsigned long intc_phys_to_virt(struct intc_desc_int *d,
-				       unsigned long address)
-{
-	struct intc_window *window;
-	int k;
-
-	/* scan through physical windows and convert address */
-	for (k = 0; k < d->nr_windows; k++) {
-		window = d->window + k;
-
-		if (address < window->phys)
-			continue;
-
-		if (address >= (window->phys + window->size))
-			continue;
-
-		address -= window->phys;
-		address += (unsigned long)window->virt;
-
-		return address;
-	}
-
-	/* no windows defined, register must be 1:1 mapped virt:phys */
-	return address;
-}
-
-static unsigned int __init intc_get_reg(struct intc_desc_int *d,
-					unsigned long address)
-{
-	unsigned int k;
-
-	address = intc_phys_to_virt(d, address);
-
-	for (k = 0; k < d->nr_reg; k++) {
-		if (d->reg[k] == address)
-			return k;
-	}
-
-	BUG();
-	return 0;
-}
-
 static intc_enum __init intc_grp_id(struct intc_desc *desc,
 				    intc_enum enum_id)
 {
@@ -755,13 +838,14 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	 */
 	set_bit(irq, intc_irq_map);
 
-	/* Prefer single interrupt source bitmap over other combinations:
+	/*
+	 * Prefer single interrupt source bitmap over other combinations:
+	 *
 	 * 1. bitmap, single interrupt source
 	 * 2. priority, single interrupt source
 	 * 3. bitmap, multiple interrupt sources (groups)
 	 * 4. priority, multiple interrupt sources (groups)
 	 */
-
 	data[0] = intc_mask_data(desc, d, enum_id, 0);
 	data[1] = intc_prio_data(desc, d, enum_id, 0);
 
@@ -786,7 +870,8 @@ static void __init intc_register_irq(struct intc_desc *desc,
 				      handle_level_irq, "level");
 	set_irq_chip_data(irq, (void *)data[primary]);
 
-	/* set priority level
+	/*
+	 * set priority level
 	 * - this needs to be at least 2 for 5-bit priorities on 7780
 	 */
 	intc_prio_level[irq] = default_prio_level;
@@ -806,7 +891,6 @@ static void __init intc_register_irq(struct intc_desc *desc,
 			 * only secondary priority should access registers, so
 			 * set _INTC_FN(h) = REG_FN_ERR for intc_set_priority()
 			 */
-
 			hp->handle &= ~_INTC_MK(0x0f, 0, 0, 0, 0, 0);
 			hp->handle |= _INTC_MK(REG_FN_ERR, 0, 0, 0, 0, 0);
 		}
@@ -827,6 +911,11 @@ static void __init intc_register_irq(struct intc_desc *desc,
 	if (desc->hw.ack_regs)
 		ack_handle[irq] = intc_ack_data(desc, d, enum_id);
 
+#ifdef CONFIG_INTC_BALANCING
+	if (desc->hw.mask_regs)
+		dist_handle[irq] = intc_dist_data(desc, d, enum_id);
+#endif
+
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, IRQF_VALID); /* Enable IRQ on ARM systems */
 #endif
@@ -892,6 +981,10 @@ int __init register_intc_controller(struct intc_desc *desc)
 	}
 
 	d->nr_reg = hw->mask_regs ? hw->nr_mask_regs * 2 : 0;
+#ifdef CONFIG_INTC_BALANCING
+	if (d->nr_reg)
+		d->nr_reg += hw->nr_mask_regs;
+#endif
 	d->nr_reg += hw->prio_regs ? hw->nr_prio_regs * 2 : 0;
 	d->nr_reg += hw->sense_regs ? hw->nr_sense_regs : 0;
 	d->nr_reg += hw->ack_regs ? hw->nr_ack_regs : 0;
@@ -912,6 +1005,9 @@ int __init register_intc_controller(struct intc_desc *desc)
 			smp = IS_SMP(hw->mask_regs[i]);
 			k += save_reg(d, k, hw->mask_regs[i].set_reg, smp);
 			k += save_reg(d, k, hw->mask_regs[i].clr_reg, smp);
+#ifdef CONFIG_INTC_BALANCING
+			k += save_reg(d, k, hw->mask_regs[i].dist_reg, 0);
+#endif
 		}
 	}
 
diff --git a/include/linux/sh_intc.h b/include/linux/sh_intc.h
index f0e8cca199c7..0d6cd38e673d 100644
--- a/include/linux/sh_intc.h
+++ b/include/linux/sh_intc.h
@@ -23,6 +23,9 @@ struct intc_group {
 struct intc_mask_reg {
 	unsigned long set_reg, clr_reg, reg_width;
 	intc_enum enum_ids[32];
+#ifdef CONFIG_INTC_BALANCING
+	unsigned long dist_reg;
+#endif
 #ifdef CONFIG_SMP
 	unsigned long smp;
 #endif
@@ -41,8 +44,14 @@ struct intc_sense_reg {
 	intc_enum enum_ids[16];
 };
 
+#ifdef CONFIG_INTC_BALANCING
+#define INTC_SMP_BALANCING(reg)	.dist_reg = (reg)
+#else
+#define INTC_SMP_BALANCING(reg)
+#endif
+
 #ifdef CONFIG_SMP
-#define INTC_SMP(stride, nr) .smp = (stride) | ((nr) << 8)
+#define INTC_SMP(stride, nr)	.smp = (stride) | ((nr) << 8)
 #else
 #define INTC_SMP(stride, nr)
 #endif
-- 
cgit v1.2.3


From fd793d8905720595caede6bd26c5df6c0ecd37f8 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Thu, 15 Apr 2010 00:16:59 -0700
Subject: net: CONFIG_SMP should be CONFIG_RPS

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 470f7c951afb..55c2086e1f06 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1331,7 +1331,7 @@ struct softnet_data {
 	struct sk_buff		*completion_queue;
 
 	/* Elements below can be accessed between CPUs for RPS */
-#ifdef CONFIG_SMP
+#ifdef CONFIG_RPS
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
 #endif
 	struct sk_buff_head	input_pkt_queue;
-- 
cgit v1.2.3


From ea2d9b41bd418894d1ee25de1642c3325d71c397 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:14:51 +0200
Subject: netfilter: bridge-netfilter: simplify IP DNAT

Remove br_netfilter.c::br_nf_local_out(). The function
br_nf_local_out() was needed because the PF_BRIDGE::LOCAL_OUT hook
could be called when IP DNAT happens on to-be-bridged traffic. The
new scheme eliminates this mess.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h |  17 ++++--
 net/bridge/br_device.c           |   9 +++-
 net/bridge/br_netfilter.c        | 114 +++++++--------------------------------
 3 files changed, 40 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index f8105e54716a..ffab6c423a57 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -41,9 +41,8 @@ enum nf_br_hook_priorities {
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
-#define BRNF_DONT_TAKE_PARENT		0x04
-#define BRNF_BRIDGED			0x08
-#define BRNF_NF_BRIDGE_PREROUTING	0x10
+#define BRNF_BRIDGED			0x04
+#define BRNF_NF_BRIDGE_PREROUTING	0x08
 
 
 /* Only used in br_forward.c */
@@ -68,6 +67,18 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+extern int br_handle_frame_finish(struct sk_buff *skb);
+/* Only used in br_device.c */
+static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+
+	skb_pull(skb, ETH_HLEN);
+	nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
+	skb->dev = nf_bridge->physindev;
+	return br_handle_frame_finish(skb);
+}
+
 /* This is called by the IP fragmenting code and it ensures there is
  * enough room for the encapsulating header (if there is one). */
 static inline unsigned int nf_bridge_pad(const struct sk_buff *skb)
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 5b8a6e73b02f..007bde87415d 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -15,7 +15,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
-
+#include <linux/netfilter_bridge.h>
 #include <asm/uaccess.h>
 #include "br_private.h"
 
@@ -28,6 +28,13 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct net_bridge_mdb_entry *mdst;
 	struct br_cpu_netstats *brstats = this_cpu_ptr(br->stats);
 
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
+		br_nf_pre_routing_finish_bridge_slow(skb);
+		return NETDEV_TX_OK;
+	}
+#endif
+
 	brstats->tx_packets++;
 	brstats->tx_bytes += skb->len;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index dd6f538ba0b0..05dc6304992c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -246,8 +246,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 }
 
 /* This requires some explaining. If DNAT has taken place,
- * we will need to fix up the destination Ethernet address,
- * and this is a tricky process.
+ * we will need to fix up the destination Ethernet address.
  *
  * There are two cases to consider:
  * 1. The packet was DNAT'ed to a device in the same bridge
@@ -261,52 +260,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * call ip_route_input() and to look at skb->dst->dev, which is
  * changed to the destination device if ip_route_input() succeeds.
  *
- * Let us first consider the case that ip_route_input() succeeds:
- *
- * If skb->dst->dev equals the logical bridge device the packet
- * came in on, we can consider this bridging. The packet is passed
- * through the neighbour output function to build a new destination
- * MAC address, which will make the packet enter br_nf_local_out()
- * not much later. In that function it is assured that the iptables
- * FORWARD chain is traversed for the packet.
+ * Let's first consider the case that ip_route_input() succeeds:
  *
+ * If the output device equals the logical bridge device the packet
+ * came in on, we can consider this bridging. The corresponding MAC
+ * address will be obtained in br_nf_pre_routing_finish_bridge.
  * Otherwise, the packet is considered to be routed and we just
  * change the destination MAC address so that the packet will
  * later be passed up to the IP stack to be routed. For a redirected
  * packet, ip_route_input() will give back the localhost as output device,
  * which differs from the bridge device.
  *
- * Let us now consider the case that ip_route_input() fails:
+ * Let's now consider the case that ip_route_input() fails:
  *
  * This can be because the destination address is martian, in which case
  * the packet will be dropped.
- * After a "echo '0' > /proc/sys/net/ipv4/ip_forward" ip_route_input()
- * will fail, while __ip_route_output_key() will return success. The source
- * address for __ip_route_output_key() is set to zero, so __ip_route_output_key
+ * If IP forwarding is disabled, ip_route_input() will fail, while
+ * ip_route_output_key() can return success. The source
+ * address for ip_route_output_key() is set to zero, so ip_route_output_key()
  * thinks we're handling a locally generated packet and won't care
- * if IP forwarding is allowed. We send a warning message to the users's
- * log telling her to put IP forwarding on.
- *
- * ip_route_input() will also fail if there is no route available.
- * In that case we just drop the packet.
- *
- * --Lennert, 20020411
- * --Bart, 20020416 (updated)
- * --Bart, 20021007 (updated)
- * --Bart, 20062711 (updated) */
+ * if IP forwarding is enabled. If the output device equals the logical bridge
+ * device, we proceed as if ip_route_input() succeeded. If it differs from the
+ * logical bridge port or if ip_route_output_key() fails we drop the packet.
+ */
+
 static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
 {
-	if (skb->pkt_type == PACKET_OTHERHOST) {
-		skb->pkt_type = PACKET_HOST;
-		skb->nf_bridge->mask |= BRNF_PKT_TYPE;
-	}
-	skb->nf_bridge->mask ^= BRNF_NF_BRIDGE_PREROUTING;
-
 	skb->dev = bridge_parent(skb->dev);
 	if (skb->dev) {
 		struct dst_entry *dst = skb_dst(skb);
 
 		nf_bridge_pull_encap_header(skb);
+		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 
 		if (dst->hh)
 			return neigh_hh_output(dst->hh, skb);
@@ -368,9 +353,6 @@ free_skb:
 		} else {
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
-				/* Tell br_nf_local_out this is a
-				 * bridged frame */
-				nf_bridge->mask |= BRNF_BRIDGED_DNAT;
 				skb->dev = nf_bridge->physindev;
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
@@ -721,54 +703,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 	return NF_STOLEN;
 }
 
-/* PF_BRIDGE/LOCAL_OUT ***********************************************
- *
- * This function sees both locally originated IP packets and forwarded
- * IP packets (in both cases the destination device is a bridge
- * device). It also sees bridged-and-DNAT'ed packets.
- *
- * If (nf_bridge->mask & BRNF_BRIDGED_DNAT) then the packet is bridged
- * and we fake the PF_BRIDGE/FORWARD hook. The function br_nf_forward()
- * will then fake the PF_INET/FORWARD hook. br_nf_local_out() has priority
- * NF_BR_PRI_FIRST, so no relevant PF_BRIDGE/INPUT functions have been nor
- * will be executed.
- */
-static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff *skb,
-				    const struct net_device *in,
-				    const struct net_device *out,
-				    int (*okfn)(struct sk_buff *))
-{
-	struct net_device *realindev;
-	struct nf_bridge_info *nf_bridge;
-
-	if (!skb->nf_bridge)
-		return NF_ACCEPT;
-
-	/* Need exclusive nf_bridge_info since we might have multiple
-	 * different physoutdevs. */
-	if (!nf_bridge_unshare(skb))
-		return NF_DROP;
-
-	nf_bridge = skb->nf_bridge;
-	if (!(nf_bridge->mask & BRNF_BRIDGED_DNAT))
-		return NF_ACCEPT;
-
-	/* Bridged, take PF_BRIDGE/FORWARD.
-	 * (see big note in front of br_nf_pre_routing_finish) */
-	nf_bridge->physoutdev = skb->dev;
-	realindev = nf_bridge->physindev;
-
-	if (nf_bridge->mask & BRNF_PKT_TYPE) {
-		skb->pkt_type = PACKET_OTHERHOST;
-		nf_bridge->mask ^= BRNF_PKT_TYPE;
-	}
-	nf_bridge_push_encap_header(skb);
-
-	NF_HOOK(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, realindev, skb->dev,
-		br_forward_finish);
-	return NF_STOLEN;
-}
-
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
@@ -797,10 +731,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 	struct net_device *realoutdev = bridge_parent(skb->dev);
 	u_int8_t pf;
 
-	if (!nf_bridge)
-		return NF_ACCEPT;
-
-	if (!(nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT)))
+	if (!nf_bridge || !(nf_bridge->mask & BRNF_BRIDGED))
 		return NF_ACCEPT;
 
 	if (!realoutdev)
@@ -847,10 +778,8 @@ static unsigned int ip_sabotage_in(unsigned int hook, struct sk_buff *skb,
 	return NF_ACCEPT;
 }
 
-/* For br_nf_local_out we need (prio = NF_BR_PRI_FIRST), to insure that innocent
- * PF_BRIDGE/NF_BR_LOCAL_OUT functions don't get bridged traffic as input.
- * For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
- * ip_refrag() can return NF_STOLEN. */
+/* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because
+ * br_dev_queue_push_xmit is called afterwards */
 static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 	{
 		.hook = br_nf_pre_routing,
@@ -880,13 +809,6 @@ static struct nf_hook_ops br_nf_ops[] __read_mostly = {
 		.hooknum = NF_BR_FORWARD,
 		.priority = NF_BR_PRI_BRNF,
 	},
-	{
-		.hook = br_nf_local_out,
-		.owner = THIS_MODULE,
-		.pf = PF_BRIDGE,
-		.hooknum = NF_BR_LOCAL_OUT,
-		.priority = NF_BR_PRI_FIRST,
-	},
 	{
 		.hook = br_nf_post_routing,
 		.owner = THIS_MODULE,
-- 
cgit v1.2.3


From e179e6322ac334e21a3c6d669d95bc967e5d0a80 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Thu, 15 Apr 2010 12:26:39 +0200
Subject: netfilter: bridge-netfilter: Fix MAC header handling with IP DNAT

- fix IP DNAT on vlan- or pppoe-encapsulated traffic: The functions
neigh_hh_output() or dst->neighbour->output() overwrite the complete
Ethernet header, although we only need the destination MAC address.
For encapsulated packets, they ended up overwriting the encapsulating
header. The new code copies the Ethernet source MAC address and
protocol number before calling dst->neighbour->output(). The Ethernet
source MAC and protocol number are copied back in place in
br_nf_pre_routing_finish_bridge_slow(). This also makes the IP DNAT
more transparent because in the old scheme the source MAC of the
bridge was copied into the source address in the Ethernet header. We
also let skb->protocol equal ETH_P_IP resp. ETH_P_IPV6 during the
execution of the PF_INET resp. PF_INET6 hooks.

- Speed up IP DNAT by calling neigh_hh_bridge() instead of
neigh_hh_output(): if dst->hh is available, we already know the MAC
address so we can just copy it.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h |  5 ++-
 include/net/neighbour.h          | 14 +++++++
 net/bridge/br_netfilter.c        | 90 +++++++++++++++++++++++++++++-----------
 3 files changed, 83 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ffab6c423a57..ea0e44b90432 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -43,7 +43,8 @@ enum nf_br_hook_priorities {
 #define BRNF_BRIDGED_DNAT		0x02
 #define BRNF_BRIDGED			0x04
 #define BRNF_NF_BRIDGE_PREROUTING	0x08
-
+#define BRNF_8021Q			0x10
+#define BRNF_PPPoE			0x20
 
 /* Only used in br_forward.c */
 extern int nf_bridge_copy_header(struct sk_buff *skb);
@@ -75,6 +76,8 @@ static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
 
 	skb_pull(skb, ETH_HLEN);
 	nf_bridge->mask ^= BRNF_BRIDGED_DNAT;
+	skb_copy_to_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN),
+				       skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
 	skb->dev = nf_bridge->physindev;
 	return br_handle_frame_finish(skb);
 }
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index da1d58be31b7..eb21340a573b 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -299,6 +299,20 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return 0;
 }
 
+#ifdef CONFIG_BRIDGE_NETFILTER
+static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
+{
+	unsigned seq, hh_alen;
+
+	do {
+		seq = read_seqbegin(&hh->hh_lock);
+		hh_alen = HH_DATA_ALIGN(ETH_HLEN);
+		memcpy(skb->data - hh_alen, hh->hh_data, ETH_ALEN + hh_alen - ETH_HLEN);
+	} while (read_seqretry(&hh->hh_lock, seq));
+	return 0;
+}
+#endif
+
 static inline int neigh_hh_output(struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned seq;
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 05dc6304992c..b7e405dc9d1c 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -196,15 +196,24 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 					 skb->nf_bridge->data, header_size);
 }
 
-/*
- * When forwarding bridge frames, we save a copy of the original
- * header before processing.
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
  */
 int nf_bridge_copy_header(struct sk_buff *skb)
 {
 	int err;
-	int header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	unsigned int header_size;
 
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
 	err = skb_cow_head(skb, header_size);
 	if (err)
 		return err;
@@ -238,6 +247,7 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	skb_dst_set(skb, &rt->u.dst);
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -245,6 +255,38 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
 	return 0;
 }
 
+/* Obtain the correct destination MAC address, while preserving the original
+ * source MAC address. If we already know this address, we just copy it. If we
+ * don't, we use the neighbour framework to find out. In both cases, we make
+ * sure that br_handle_frame_finish() is called afterwards.
+ */
+static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
+{
+	struct nf_bridge_info *nf_bridge = skb->nf_bridge;
+	struct dst_entry *dst;
+
+	skb->dev = bridge_parent(skb->dev);
+	if (!skb->dev)
+		goto free_skb;
+	dst = skb_dst(skb);
+	if (dst->hh) {
+		neigh_hh_bridge(dst->hh, skb);
+		skb->dev = nf_bridge->physindev;
+		return br_handle_frame_finish(skb);
+	} else if (dst->neighbour) {
+		/* the neighbour function below overwrites the complete
+		 * MAC header, so we save the Ethernet source address and
+		 * protocol number. */
+		skb_copy_from_linear_data_offset(skb, -(ETH_HLEN-ETH_ALEN), skb->nf_bridge->data, ETH_HLEN-ETH_ALEN);
+		/* tell br_dev_xmit to continue with forwarding */
+		nf_bridge->mask |= BRNF_BRIDGED_DNAT;
+		return dst->neighbour->output(skb);
+	}
+free_skb:
+	kfree_skb(skb);
+	return 0;
+}
+
 /* This requires some explaining. If DNAT has taken place,
  * we will need to fix up the destination Ethernet address.
  *
@@ -283,25 +325,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
  * device, we proceed as if ip_route_input() succeeded. If it differs from the
  * logical bridge port or if ip_route_output_key() fails we drop the packet.
  */
-
-static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
-{
-	skb->dev = bridge_parent(skb->dev);
-	if (skb->dev) {
-		struct dst_entry *dst = skb_dst(skb);
-
-		nf_bridge_pull_encap_header(skb);
-		skb->nf_bridge->mask |= BRNF_BRIDGED_DNAT;
-
-		if (dst->hh)
-			return neigh_hh_output(dst->hh, skb);
-		else if (dst->neighbour)
-			return dst->neighbour->output(skb);
-	}
-	kfree_skb(skb);
-	return 0;
-}
-
 static int br_nf_pre_routing_finish(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
@@ -354,6 +377,7 @@ free_skb:
 			if (skb_dst(skb)->dev == dev) {
 bridged_dnat:
 				skb->dev = nf_bridge->physindev;
+				nf_bridge_update_protocol(skb);
 				nf_bridge_push_encap_header(skb);
 				NF_HOOK_THRESH(NFPROTO_BRIDGE,
 					       NF_BR_PRE_ROUTING,
@@ -376,6 +400,7 @@ bridged_dnat:
 	}
 
 	skb->dev = nf_bridge->physindev;
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_PRE_ROUTING, skb, skb->dev, NULL,
 		       br_handle_frame_finish, 1);
@@ -396,6 +421,10 @@ static struct net_device *setup_pre_routing(struct sk_buff *skb)
 	nf_bridge->mask |= BRNF_NF_BRIDGE_PREROUTING;
 	nf_bridge->physindev = skb->dev;
 	skb->dev = bridge_parent(skb->dev);
+	if (skb->protocol == htons(ETH_P_8021Q))
+		nf_bridge->mask |= BRNF_8021Q;
+	else if (skb->protocol == htons(ETH_P_PPP_SES))
+		nf_bridge->mask |= BRNF_PPPoE;
 
 	return skb->dev;
 }
@@ -494,6 +523,7 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 
+	skb->protocol = htons(ETH_P_IPV6);
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish_ipv6);
 
@@ -566,6 +596,7 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff *skb,
 	if (!setup_pre_routing(skb))
 		return NF_DROP;
 	store_orig_dstaddr(skb);
+	skb->protocol = htons(ETH_P_IP);
 
 	NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, skb->dev, NULL,
 		br_nf_pre_routing_finish);
@@ -614,7 +645,9 @@ static int br_nf_forward_finish(struct sk_buff *skb)
 	} else {
 		in = *((struct net_device **)(skb->cb));
 	}
+	nf_bridge_update_protocol(skb);
 	nf_bridge_push_encap_header(skb);
+
 	NF_HOOK_THRESH(NFPROTO_BRIDGE, NF_BR_FORWARD, skb, in,
 		       skb->dev, br_forward_finish, 1);
 	return 0;
@@ -666,6 +699,10 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff *skb,
 	/* The physdev module checks on this */
 	nf_bridge->mask |= BRNF_BRIDGED;
 	nf_bridge->physoutdev = skb->dev;
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_FORWARD, skb, bridge_parent(in), parent,
 		br_nf_forward_finish);
@@ -706,8 +743,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 #if defined(CONFIG_NF_CONNTRACK_IPV4) || defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
-	if (skb->nfct != NULL &&
-	    (skb->protocol == htons(ETH_P_IP) || IS_VLAN_IP(skb)) &&
+	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
 	    skb->len > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
@@ -755,6 +791,10 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff *skb,
 
 	nf_bridge_pull_encap_header(skb);
 	nf_bridge_save_header(skb);
+	if (pf == PF_INET)
+		skb->protocol = htons(ETH_P_IP);
+	else
+		skb->protocol = htons(ETH_P_IPV6);
 
 	NF_HOOK(pf, NF_INET_POST_ROUTING, skb, NULL, realoutdev,
 		br_nf_dev_queue_xmit);
-- 
cgit v1.2.3


From a02a2956809d88b42b9a1f8003d60a343353b5ea Mon Sep 17 00:00:00 2001
From: Grazvydas Ignotas <notasas@gmail.com>
Date: Fri, 16 Apr 2010 13:22:12 +0300
Subject: wl1251: add support for dedicated IRQ line

wl1251 has WLAN_IRQ pin for generating interrupts to host processor,
which is mandatory in SPI mode and optional in SDIO mode (which can
use SDIO interrupts instead). However TI recommends using deditated
IRQ line for SDIO too.

Add support for using dedicated interrupt line with SDIO, but also leave
ability to switch to SDIO interrupts in case it's needed.

Signed-off-by: Grazvydas Ignotas <notasas@gmail.com>
Reviewed-by: Bob Copeland <me@bobcopeland.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/wl12xx/wl1251_sdio.c | 56 ++++++++++++++++++++++++++++---
 include/linux/spi/wl12xx.h                |  2 ++
 2 files changed, 53 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/wl12xx/wl1251_sdio.c b/drivers/net/wireless/wl12xx/wl1251_sdio.c
index 7409c3468e25..d234285c2c81 100644
--- a/drivers/net/wireless/wl12xx/wl1251_sdio.c
+++ b/drivers/net/wireless/wl12xx/wl1251_sdio.c
@@ -25,6 +25,7 @@
 #include <linux/mmc/sdio_ids.h>
 #include <linux/platform_device.h>
 #include <linux/spi/wl12xx.h>
+#include <linux/irq.h>
 
 #include "wl1251.h"
 
@@ -134,18 +135,36 @@ static void wl1251_sdio_disable_irq(struct wl1251 *wl)
 	sdio_release_host(func);
 }
 
+/* Interrupts when using dedicated WLAN_IRQ pin */
+static irqreturn_t wl1251_line_irq(int irq, void *cookie)
+{
+	struct wl1251 *wl = cookie;
+
+	ieee80211_queue_work(wl->hw, &wl->irq_work);
+
+	return IRQ_HANDLED;
+}
+
+static void wl1251_enable_line_irq(struct wl1251 *wl)
+{
+	return enable_irq(wl->irq);
+}
+
+static void wl1251_disable_line_irq(struct wl1251 *wl)
+{
+	return disable_irq(wl->irq);
+}
+
 static void wl1251_sdio_set_power(bool enable)
 {
 }
 
-static const struct wl1251_if_operations wl1251_sdio_ops = {
+static struct wl1251_if_operations wl1251_sdio_ops = {
 	.read = wl1251_sdio_read,
 	.write = wl1251_sdio_write,
 	.write_elp = wl1251_sdio_write_elp,
 	.read_elp = wl1251_sdio_read_elp,
 	.reset = wl1251_sdio_reset,
-	.enable_irq = wl1251_sdio_enable_irq,
-	.disable_irq = wl1251_sdio_disable_irq,
 };
 
 static int wl1251_platform_probe(struct platform_device *pdev)
@@ -191,6 +210,7 @@ static int wl1251_sdio_probe(struct sdio_func *func,
 		goto release;
 
 	sdio_set_block_size(func, 512);
+	sdio_release_host(func);
 
 	SET_IEEE80211_DEV(hw, &func->dev);
 	wl->if_priv = func;
@@ -199,17 +219,41 @@ static int wl1251_sdio_probe(struct sdio_func *func,
 
 	if (wl12xx_board_data != NULL) {
 		wl->set_power = wl12xx_board_data->set_power;
+		wl->irq = wl12xx_board_data->irq;
 		wl->use_eeprom = wl12xx_board_data->use_eeprom;
 	}
 
-	sdio_release_host(func);
+	if (wl->irq) {
+		ret = request_irq(wl->irq, wl1251_line_irq, 0, "wl1251", wl);
+		if (ret < 0) {
+			wl1251_error("request_irq() failed: %d", ret);
+			goto disable;
+		}
+
+		set_irq_type(wl->irq, IRQ_TYPE_EDGE_RISING);
+		disable_irq(wl->irq);
+
+		wl1251_sdio_ops.enable_irq = wl1251_enable_line_irq;
+		wl1251_sdio_ops.disable_irq = wl1251_disable_line_irq;
+
+		wl1251_info("using dedicated interrupt line");
+	} else {
+		wl1251_sdio_ops.enable_irq = wl1251_sdio_enable_irq;
+		wl1251_sdio_ops.disable_irq = wl1251_sdio_disable_irq;
+
+		wl1251_info("using SDIO interrupt");
+	}
+
 	ret = wl1251_init_ieee80211(wl);
 	if (ret)
-		goto disable;
+		goto out_free_irq;
 
 	sdio_set_drvdata(func, wl);
 	return ret;
 
+out_free_irq:
+	if (wl->irq)
+		free_irq(wl->irq, wl);
 disable:
 	sdio_claim_host(func);
 	sdio_disable_func(func);
@@ -222,6 +266,8 @@ static void __devexit wl1251_sdio_remove(struct sdio_func *func)
 {
 	struct wl1251 *wl = sdio_get_drvdata(func);
 
+	if (wl->irq)
+		free_irq(wl->irq, wl);
 	wl1251_free_hw(wl);
 
 	sdio_claim_host(func);
diff --git a/include/linux/spi/wl12xx.h b/include/linux/spi/wl12xx.h
index aed64ed3dc8a..a223ecbc71ef 100644
--- a/include/linux/spi/wl12xx.h
+++ b/include/linux/spi/wl12xx.h
@@ -26,6 +26,8 @@
 
 struct wl12xx_platform_data {
 	void (*set_power)(bool enable);
+	/* SDIO only: IRQ number if WLAN_IRQ line is used, 0 for SDIO IRQs */
+	int irq;
 	bool use_eeprom;
 };
 
-- 
cgit v1.2.3


From fec5e652e58fa6017b2c9e06466cb2a6538de5b4 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 16 Apr 2010 16:01:27 -0700
Subject: rfs: Receive Flow Steering

This patch implements receive flow steering (RFS).  RFS steers
received packets for layer 3 and 4 processing to the CPU where
the application for the corresponding flow is running.  RFS is an
extension of Receive Packet Steering (RPS).

The basic idea of RFS is that when an application calls recvmsg
(or sendmsg) the application's running CPU is stored in a hash
table that is indexed by the connection's rxhash which is stored in
the socket structure.  The rxhash is passed in skb's received on
the connection from netif_receive_skb.  For each received packet,
the associated rxhash is used to look up the CPU in the hash table,
if a valid CPU is set then the packet is steered to that CPU using
the RPS mechanisms.

The convolution of the simple approach is that it would potentially
allow OOO packets.  If threads are thrashing around CPUs or multiple
threads are trying to read from the same sockets, a quickly changing
CPU value in the hash table could cause rampant OOO packets--
we consider this a non-starter.

To avoid OOO packets, this solution implements two types of hash
tables: rps_sock_flow_table and rps_dev_flow_table.

rps_sock_table is a global hash table.  Each entry is just a CPU
number and it is populated in recvmsg and sendmsg as described above.
This table contains the "desired" CPUs for flows.

rps_dev_flow_table is specific to each device queue.  Each entry
contains a CPU and a tail queue counter.  The CPU is the "current"
CPU for a matching flow.  The tail queue counter holds the value
of a tail queue counter for the associated CPU's backlog queue at
the time of last enqueue for a flow matching the entry.

Each backlog queue has a queue head counter which is incremented
on dequeue, and so a queue tail counter is computed as queue head
count + queue length.  When a packet is enqueued on a backlog queue,
the current value of the queue tail counter is saved in the hash
entry of the rps_dev_flow_table.

And now the trick: when selecting the CPU for RPS (get_rps_cpu)
the rps_sock_flow table and the rps_dev_flow table for the RX queue
are consulted.  When the desired CPU for the flow (found in the
rps_sock_flow table) does not match the current CPU (found in the
rps_dev_flow table), the current CPU is changed to the desired CPU
if one of the following is true:

- The current CPU is unset (equal to RPS_NO_CPU)
- Current CPU is offline
- The current CPU's queue head counter >= queue tail counter in the
rps_dev_flow table.  This checks if the queue tail has advanced
beyond the last packet that was enqueued using this table entry.
This guarantees that all packets queued using this entry have been
dequeued, thus preserving in order delivery.

Making each queue have its own rps_dev_flow table has two advantages:
1) the tail queue counters will be written on each receive, so
keeping the table local to interrupting CPU s good for locality.  2)
this allows lockless access to the table-- the CPU number and queue
tail counter need to be accessed together under mutual exclusion
from netif_receive_skb, we assume that this is only called from
device napi_poll which is non-reentrant.

This patch implements RFS for TCP and connected UDP sockets.
It should be usable for other flow oriented protocols.

There are two configuration parameters for RFS.  The
"rps_flow_entries" kernel init parameter sets the number of
entries in the rps_sock_flow_table, the per rxqueue sysfs entry
"rps_flow_cnt" contains the number of entries in the rps_dev_flow
table for the rxqueue.  Both are rounded to power of two.

The obvious benefit of RFS (over just RPS) is that it achieves
CPU locality between the receive processing for a flow and the
applications processing; this can result in increased performance
(higher pps, lower latency).

The benefits of RFS are dependent on cache hierarchy, application
load, and other factors.  On simple benchmarks, we don't necessarily
see improvement and sometimes see degradation.  However, for more
complex benchmarks and for applications where cache pressure is
much higher this technique seems to perform very well.

Below are some benchmark results which show the potential benfit of
this patch.  The netperf test has 500 instances of netperf TCP_RR
test with 1 byte req. and resp.  The RPC test is an request/response
test similar in structure to netperf RR test ith 100 threads on
each host, but does more work in userspace that netperf.

e1000e on 8 core Intel
   No RFS or RPS		104K tps at 30% CPU
   No RFS (best RPS config):    290K tps at 63% CPU
   RFS				303K tps at 61% CPU

RPC test	tps	CPU%	50/90/99% usec latency	Latency StdDev
  No RFS/RPS	103K	48%	757/900/3185		4472.35
  RPS only:	174K	73%	415/993/2468		491.66
  RFS		223K	73%	379/651/1382		315.61

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  |  69 +++++++++++++++++++++++++++-
 include/net/inet_sock.h    |  38 ++++++++++++++++
 net/core/dev.c             | 111 +++++++++++++++++++++++++++++++++++++--------
 net/core/net-sysfs.c       |  94 ++++++++++++++++++++++++++++++++++++--
 net/core/sysctl_net_core.c |  68 +++++++++++++++++++++++++++
 net/ipv4/af_inet.c         |  29 ++++++++++--
 net/ipv4/tcp_ipv4.c        |   2 +
 net/ipv4/udp.c             |   7 ++-
 8 files changed, 389 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 55c2086e1f06..649a0252686e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -530,14 +530,73 @@ struct rps_map {
 };
 #define RPS_MAP_SIZE(_num) (sizeof(struct rps_map) + (_num * sizeof(u16)))
 
+/*
+ * The rps_dev_flow structure contains the mapping of a flow to a CPU and the
+ * tail pointer for that CPU's input queue at the time of last enqueue.
+ */
+struct rps_dev_flow {
+	u16 cpu;
+	u16 fill;
+	unsigned int last_qtail;
+};
+
+/*
+ * The rps_dev_flow_table structure contains a table of flow mappings.
+ */
+struct rps_dev_flow_table {
+	unsigned int mask;
+	struct rcu_head rcu;
+	struct work_struct free_work;
+	struct rps_dev_flow flows[0];
+};
+#define RPS_DEV_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_dev_flow_table) + \
+    (_num * sizeof(struct rps_dev_flow)))
+
+/*
+ * The rps_sock_flow_table contains mappings of flows to the last CPU
+ * on which they were processed by the application (set in recvmsg).
+ */
+struct rps_sock_flow_table {
+	unsigned int mask;
+	u16 ents[0];
+};
+#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
+    (_num * sizeof(u16)))
+
+#define RPS_NO_CPU 0xffff
+
+static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
+					u32 hash)
+{
+	if (table && hash) {
+		unsigned int cpu, index = hash & table->mask;
+
+		/* We only give a hint, preemption can change cpu under us */
+		cpu = raw_smp_processor_id();
+
+		if (table->ents[index] != cpu)
+			table->ents[index] = cpu;
+	}
+}
+
+static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
+				       u32 hash)
+{
+	if (table && hash)
+		table->ents[hash & table->mask] = RPS_NO_CPU;
+}
+
+extern struct rps_sock_flow_table *rps_sock_flow_table;
+
 /* This structure contains an instance of an RX queue. */
 struct netdev_rx_queue {
 	struct rps_map *rps_map;
+	struct rps_dev_flow_table *rps_flow_table;
 	struct kobject kobj;
 	struct netdev_rx_queue *first;
 	atomic_t count;
 } ____cacheline_aligned_in_smp;
-#endif
+#endif /* CONFIG_RPS */
 
 /*
  * This structure defines the management hooks for network devices.
@@ -1333,11 +1392,19 @@ struct softnet_data {
 	/* Elements below can be accessed between CPUs for RPS */
 #ifdef CONFIG_RPS
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	unsigned int		input_queue_head;
 #endif
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
 
+static inline void incr_input_queue_head(struct softnet_data *queue)
+{
+#ifdef CONFIG_RPS
+	queue->input_queue_head++;
+#endif
+}
+
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 #define HAVE_NETIF_QUEUE
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 83fd34437cf1..b487bc1b99ab 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -21,6 +21,7 @@
 #include <linux/string.h>
 #include <linux/types.h>
 #include <linux/jhash.h>
+#include <linux/netdevice.h>
 
 #include <net/flow.h>
 #include <net/sock.h>
@@ -101,6 +102,7 @@ struct rtable;
  * @uc_ttl - Unicast TTL
  * @inet_sport - Source port
  * @inet_id - ID counter for DF pkts
+ * @rxhash - flow hash received from netif layer
  * @tos - TOS
  * @mc_ttl - Multicasting TTL
  * @is_icsk - is this an inet_connection_sock?
@@ -124,6 +126,9 @@ struct inet_sock {
 	__u16			cmsg_flags;
 	__be16			inet_sport;
 	__u16			inet_id;
+#ifdef CONFIG_RPS
+	__u32			rxhash;
+#endif
 
 	struct ip_options	*opt;
 	__u8			tos;
@@ -219,4 +224,37 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 	return inet_sk(sk)->transparent ? FLOWI_FLAG_ANYSRC : 0;
 }
 
+static inline void inet_rps_record_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_record_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void inet_rps_reset_flow(const struct sock *sk)
+{
+#ifdef CONFIG_RPS
+	struct rps_sock_flow_table *sock_flow_table;
+
+	rcu_read_lock();
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	rps_reset_sock_flow(sock_flow_table, inet_sk(sk)->rxhash);
+	rcu_read_unlock();
+#endif
+}
+
+static inline void inet_rps_save_rxhash(const struct sock *sk, u32 rxhash)
+{
+#ifdef CONFIG_RPS
+	if (unlikely(inet_sk(sk)->rxhash != rxhash)) {
+		inet_rps_reset_flow(sk);
+		inet_sk(sk)->rxhash = rxhash;
+	}
+#endif
+}
 #endif	/* _INET_SOCK_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index e8041eb76ac1..d7107ac835fa 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2203,19 +2203,28 @@ int weight_p __read_mostly = 64;            /* old backlog weight */
 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
 
 #ifdef CONFIG_RPS
+
+/* One global table that all flow-based protocols share. */
+struct rps_sock_flow_table *rps_sock_flow_table;
+EXPORT_SYMBOL(rps_sock_flow_table);
+
 /*
  * get_rps_cpu is called from netif_receive_skb and returns the target
  * CPU from the RPS map of the receiving queue for a given skb.
  * rcu_read_lock must be held on entry.
  */
-static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
+static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
+		       struct rps_dev_flow **rflowp)
 {
 	struct ipv6hdr *ip6;
 	struct iphdr *ip;
 	struct netdev_rx_queue *rxqueue;
 	struct rps_map *map;
+	struct rps_dev_flow_table *flow_table;
+	struct rps_sock_flow_table *sock_flow_table;
 	int cpu = -1;
 	u8 ip_proto;
+	u16 tcpu;
 	u32 addr1, addr2, ports, ihl;
 
 	if (skb_rx_queue_recorded(skb)) {
@@ -2232,7 +2241,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 	} else
 		rxqueue = dev->_rx;
 
-	if (!rxqueue->rps_map)
+	if (!rxqueue->rps_map && !rxqueue->rps_flow_table)
 		goto done;
 
 	if (skb->rxhash)
@@ -2284,9 +2293,48 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb)
 		skb->rxhash = 1;
 
 got_hash:
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
+	sock_flow_table = rcu_dereference(rps_sock_flow_table);
+	if (flow_table && sock_flow_table) {
+		u16 next_cpu;
+		struct rps_dev_flow *rflow;
+
+		rflow = &flow_table->flows[skb->rxhash & flow_table->mask];
+		tcpu = rflow->cpu;
+
+		next_cpu = sock_flow_table->ents[skb->rxhash &
+		    sock_flow_table->mask];
+
+		/*
+		 * If the desired CPU (where last recvmsg was done) is
+		 * different from current CPU (one in the rx-queue flow
+		 * table entry), switch if one of the following holds:
+		 *   - Current CPU is unset (equal to RPS_NO_CPU).
+		 *   - Current CPU is offline.
+		 *   - The current CPU's queue tail has advanced beyond the
+		 *     last packet that was enqueued using this table entry.
+		 *     This guarantees that all previous packets for the flow
+		 *     have been dequeued, thus preserving in order delivery.
+		 */
+		if (unlikely(tcpu != next_cpu) &&
+		    (tcpu == RPS_NO_CPU || !cpu_online(tcpu) ||
+		     ((int)(per_cpu(softnet_data, tcpu).input_queue_head -
+		      rflow->last_qtail)) >= 0)) {
+			tcpu = rflow->cpu = next_cpu;
+			if (tcpu != RPS_NO_CPU)
+				rflow->last_qtail = per_cpu(softnet_data,
+				    tcpu).input_queue_head;
+		}
+		if (tcpu != RPS_NO_CPU && cpu_online(tcpu)) {
+			*rflowp = rflow;
+			cpu = tcpu;
+			goto done;
+		}
+	}
+
 	map = rcu_dereference(rxqueue->rps_map);
 	if (map) {
-		u16 tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
+		tcpu = map->cpus[((u64) skb->rxhash * map->len) >> 32];
 
 		if (cpu_online(tcpu)) {
 			cpu = tcpu;
@@ -2320,13 +2368,14 @@ static void trigger_softirq(void *data)
 	__napi_schedule(&queue->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
-#endif /* CONFIG_SMP */
+#endif /* CONFIG_RPS */
 
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
  */
-static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
+static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
+			      unsigned int *qtail)
 {
 	struct softnet_data *queue;
 	unsigned long flags;
@@ -2341,6 +2390,10 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu)
 		if (queue->input_pkt_queue.qlen) {
 enqueue:
 			__skb_queue_tail(&queue->input_pkt_queue, skb);
+#ifdef CONFIG_RPS
+			*qtail = queue->input_queue_head +
+			    queue->input_pkt_queue.qlen;
+#endif
 			rps_unlock(queue);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
@@ -2355,11 +2408,10 @@ enqueue:
 
 				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-			} else
-				__napi_schedule(&queue->backlog);
-#else
-			__napi_schedule(&queue->backlog);
+				goto enqueue;
+			}
 #endif
+			__napi_schedule(&queue->backlog);
 		}
 		goto enqueue;
 	}
@@ -2401,18 +2453,25 @@ int netif_rx(struct sk_buff *skb)
 
 #ifdef CONFIG_RPS
 	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
 		int cpu;
 
 		rcu_read_lock();
-		cpu = get_rps_cpu(skb->dev, skb);
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 		if (cpu < 0)
 			cpu = smp_processor_id();
-		ret = enqueue_to_backlog(skb, cpu);
+
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+
 		rcu_read_unlock();
 	}
 #else
-	ret = enqueue_to_backlog(skb, get_cpu());
-	put_cpu();
+	{
+		unsigned int qtail;
+		ret = enqueue_to_backlog(skb, get_cpu(), &qtail);
+		put_cpu();
+	}
 #endif
 	return ret;
 }
@@ -2830,14 +2889,22 @@ out:
 int netif_receive_skb(struct sk_buff *skb)
 {
 #ifdef CONFIG_RPS
-	int cpu;
+	struct rps_dev_flow voidflow, *rflow = &voidflow;
+	int cpu, ret;
+
+	rcu_read_lock();
 
-	cpu = get_rps_cpu(skb->dev, skb);
+	cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	if (cpu < 0)
-		return __netif_receive_skb(skb);
-	else
-		return enqueue_to_backlog(skb, cpu);
+	if (cpu >= 0) {
+		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+		rcu_read_unlock();
+	} else {
+		rcu_read_unlock();
+		ret = __netif_receive_skb(skb);
+	}
+
+	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
@@ -2856,6 +2923,7 @@ static void flush_backlog(void *arg)
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &queue->input_pkt_queue);
 			kfree_skb(skb);
+			incr_input_queue_head(queue);
 		}
 	rps_unlock(queue);
 }
@@ -3179,6 +3247,7 @@ static int process_backlog(struct napi_struct *napi, int quota)
 			local_irq_enable();
 			break;
 		}
+		incr_input_queue_head(queue);
 		rps_unlock(queue);
 		local_irq_enable();
 
@@ -5542,8 +5611,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	local_irq_enable();
 
 	/* Process offline CPU's input_pkt_queue */
-	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
+	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
+		incr_input_queue_head(oldsd);
+	}
 
 	return NOTIFY_OK;
 }
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 96ed6905b823..143052a22b9b 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -17,6 +17,7 @@
 #include <net/sock.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
+#include <linux/vmalloc.h>
 #include <net/wext.h>
 
 #include "net-sysfs.h"
@@ -601,22 +602,109 @@ ssize_t store_rps_map(struct netdev_rx_queue *queue,
 	return len;
 }
 
+static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+					   struct rx_queue_attribute *attr,
+					   char *buf)
+{
+	struct rps_dev_flow_table *flow_table;
+	unsigned int val = 0;
+
+	rcu_read_lock();
+	flow_table = rcu_dereference(queue->rps_flow_table);
+	if (flow_table)
+		val = flow_table->mask + 1;
+	rcu_read_unlock();
+
+	return sprintf(buf, "%u\n", val);
+}
+
+static void rps_dev_flow_table_release_work(struct work_struct *work)
+{
+	struct rps_dev_flow_table *table = container_of(work,
+	    struct rps_dev_flow_table, free_work);
+
+	vfree(table);
+}
+
+static void rps_dev_flow_table_release(struct rcu_head *rcu)
+{
+	struct rps_dev_flow_table *table = container_of(rcu,
+	    struct rps_dev_flow_table, rcu);
+
+	INIT_WORK(&table->free_work, rps_dev_flow_table_release_work);
+	schedule_work(&table->free_work);
+}
+
+ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
+				     struct rx_queue_attribute *attr,
+				     const char *buf, size_t len)
+{
+	unsigned int count;
+	char *endp;
+	struct rps_dev_flow_table *table, *old_table;
+	static DEFINE_SPINLOCK(rps_dev_flow_lock);
+
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	count = simple_strtoul(buf, &endp, 0);
+	if (endp == buf)
+		return -EINVAL;
+
+	if (count) {
+		int i;
+
+		if (count > 1<<30) {
+			/* Enforce a limit to prevent overflow */
+			return -EINVAL;
+		}
+		count = roundup_pow_of_two(count);
+		table = vmalloc(RPS_DEV_FLOW_TABLE_SIZE(count));
+		if (!table)
+			return -ENOMEM;
+
+		table->mask = count - 1;
+		for (i = 0; i < count; i++)
+			table->flows[i].cpu = RPS_NO_CPU;
+	} else
+		table = NULL;
+
+	spin_lock(&rps_dev_flow_lock);
+	old_table = queue->rps_flow_table;
+	rcu_assign_pointer(queue->rps_flow_table, table);
+	spin_unlock(&rps_dev_flow_lock);
+
+	if (old_table)
+		call_rcu(&old_table->rcu, rps_dev_flow_table_release);
+
+	return len;
+}
+
 static struct rx_queue_attribute rps_cpus_attribute =
 	__ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map);
 
+
+static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute =
+	__ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR,
+	    show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt);
+
 static struct attribute *rx_queue_default_attrs[] = {
 	&rps_cpus_attribute.attr,
+	&rps_dev_flow_table_cnt_attribute.attr,
 	NULL
 };
 
 static void rx_queue_release(struct kobject *kobj)
 {
 	struct netdev_rx_queue *queue = to_rx_queue(kobj);
-	struct rps_map *map = queue->rps_map;
 	struct netdev_rx_queue *first = queue->first;
 
-	if (map)
-		call_rcu(&map->rcu, rps_map_release);
+	if (queue->rps_map)
+		call_rcu(&queue->rps_map->rcu, rps_map_release);
+
+	if (queue->rps_flow_table)
+		call_rcu(&queue->rps_flow_table->rcu,
+		    rps_dev_flow_table_release);
 
 	if (atomic_dec_and_test(&first->count))
 		kfree(first);
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index b7b6b8208f75..dcc7d25996ab 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -11,12 +11,72 @@
 #include <linux/socket.h>
 #include <linux/netdevice.h>
 #include <linux/ratelimit.h>
+#include <linux/vmalloc.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 
 #include <net/ip.h>
 #include <net/sock.h>
 
+#ifdef CONFIG_RPS
+static int rps_sock_flow_sysctl(ctl_table *table, int write,
+				void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	unsigned int orig_size, size;
+	int ret, i;
+	ctl_table tmp = {
+		.data = &size,
+		.maxlen = sizeof(size),
+		.mode = table->mode
+	};
+	struct rps_sock_flow_table *orig_sock_table, *sock_table;
+	static DEFINE_MUTEX(sock_flow_mutex);
+
+	mutex_lock(&sock_flow_mutex);
+
+	orig_sock_table = rps_sock_flow_table;
+	size = orig_size = orig_sock_table ? orig_sock_table->mask + 1 : 0;
+
+	ret = proc_dointvec(&tmp, write, buffer, lenp, ppos);
+
+	if (write) {
+		if (size) {
+			if (size > 1<<30) {
+				/* Enforce limit to prevent overflow */
+				mutex_unlock(&sock_flow_mutex);
+				return -EINVAL;
+			}
+			size = roundup_pow_of_two(size);
+			if (size != orig_size) {
+				sock_table =
+				    vmalloc(RPS_SOCK_FLOW_TABLE_SIZE(size));
+				if (!sock_table) {
+					mutex_unlock(&sock_flow_mutex);
+					return -ENOMEM;
+				}
+
+				sock_table->mask = size - 1;
+			} else
+				sock_table = orig_sock_table;
+
+			for (i = 0; i < size; i++)
+				sock_table->ents[i] = RPS_NO_CPU;
+		} else
+			sock_table = NULL;
+
+		if (sock_table != orig_sock_table) {
+			rcu_assign_pointer(rps_sock_flow_table, sock_table);
+			synchronize_rcu();
+			vfree(orig_sock_table);
+		}
+	}
+
+	mutex_unlock(&sock_flow_mutex);
+
+	return ret;
+}
+#endif /* CONFIG_RPS */
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -82,6 +142,14 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_RPS
+	{
+		.procname	= "rps_sock_flow_entries",
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= rps_sock_flow_sysctl
+	},
+#endif
 #endif /* CONFIG_NET */
 	{
 		.procname	= "netdev_budget",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 193dcd6ed64f..c5376c725503 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -419,6 +419,8 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
+		inet_rps_reset_flow(sk);
+
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
@@ -720,6 +722,8 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -728,12 +732,13 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(inet_sendmsg);
 
-
 static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 			     size_t size, int flags)
 {
 	struct sock *sk = sock->sk;
 
+	inet_rps_record_flow(sk);
+
 	/* We may need to bind the socket. */
 	if (!inet_sk(sk)->inet_num && inet_autobind(sk))
 		return -EAGAIN;
@@ -743,6 +748,22 @@ static ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset,
 	return sock_no_sendpage(sock, page, offset, size, flags);
 }
 
+int inet_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
+		 size_t size, int flags)
+{
+	struct sock *sk = sock->sk;
+	int addr_len = 0;
+	int err;
+
+	inet_rps_record_flow(sk);
+
+	err = sk->sk_prot->recvmsg(iocb, sk, msg, size, flags & MSG_DONTWAIT,
+				   flags & ~MSG_DONTWAIT, &addr_len);
+	if (err >= 0)
+		msg->msg_namelen = addr_len;
+	return err;
+}
+EXPORT_SYMBOL(inet_recvmsg);
 
 int inet_shutdown(struct socket *sock, int how)
 {
@@ -872,7 +893,7 @@ const struct proto_ops inet_stream_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = tcp_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = tcp_sendpage,
 	.splice_read	   = tcp_splice_read,
@@ -899,7 +920,7 @@ const struct proto_ops inet_dgram_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
@@ -929,7 +950,7 @@ static const struct proto_ops inet_sockraw_ops = {
 	.setsockopt	   = sock_common_setsockopt,
 	.getsockopt	   = sock_common_getsockopt,
 	.sendmsg	   = inet_sendmsg,
-	.recvmsg	   = sock_common_recvmsg,
+	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a24995cdc4b6..ad08392a738c 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1672,6 +1672,8 @@ process:
 
 	skb->dev = NULL;
 
+	inet_rps_save_rxhash(sk, skb->rxhash);
+
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 8fef859db35d..666b963496ff 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1217,6 +1217,7 @@ int udp_disconnect(struct sock *sk, int flags)
 	sk->sk_state = TCP_CLOSE;
 	inet->inet_daddr = 0;
 	inet->inet_dport = 0;
+	inet_rps_save_rxhash(sk, 0);
 	sk->sk_bound_dev_if = 0;
 	if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
 		inet_reset_saddr(sk);
@@ -1258,8 +1259,12 @@ EXPORT_SYMBOL(udp_lib_unhash);
 
 static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 {
-	int rc = sock_queue_rcv_skb(sk, skb);
+	int rc;
+
+	if (inet_sk(sk)->inet_daddr)
+		inet_rps_save_rxhash(sk, skb->rxhash);
 
+	rc = sock_queue_rcv_skb(sk, skb);
 	if (rc < 0) {
 		int is_udplite = IS_UDPLITE(sk);
 
-- 
cgit v1.2.3


From 39447b386c846bbf1c56f6403c5282837486200f Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Mon, 19 Apr 2010 13:32:41 +0800
Subject: perf: Enhance perf to allow for guest statistic collection from host

Below patch introduces perf_guest_info_callbacks and related
register/unregister functions. Add more PERF_RECORD_MISC_XXX bits
meaning guest kernel and guest user space.

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/perf_event.h | 15 ++++-----------
 arch/x86/kernel/cpu/perf_event.c  | 31 +++++++++++++++++++++++++++++++
 include/linux/perf_event.h        | 21 ++++++++++++++++++++-
 kernel/perf_event.c               | 23 ++++++++++++++++++++++-
 4 files changed, 77 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index f6d43dbfd8e7..254883d0c7e0 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -135,17 +135,10 @@ extern void perf_events_lapic_init(void);
  */
 #define PERF_EFLAGS_EXACT	(1UL << 3)
 
-#define perf_misc_flags(regs)				\
-({	int misc = 0;					\
-	if (user_mode(regs))				\
-		misc |= PERF_RECORD_MISC_USER;		\
-	else						\
-		misc |= PERF_RECORD_MISC_KERNEL;	\
-	if (regs->flags & PERF_EFLAGS_EXACT)		\
-		misc |= PERF_RECORD_MISC_EXACT;		\
-	misc; })
-
-#define perf_instruction_pointer(regs)	((regs)->ip)
+struct pt_regs;
+extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
+extern unsigned long perf_misc_flags(struct pt_regs *regs);
+#define perf_misc_flags(regs)	perf_misc_flags(regs)
 
 #else
 static inline void init_hw_perf_events(void)		{ }
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 626154a9f535..2ea78abf69d9 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1720,6 +1720,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
 {
 	struct perf_callchain_entry *entry;
 
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		/* TODO: We don't support guest os callchain now */
+		return NULL;
+	}
+
 	if (in_nmi())
 		entry = &__get_cpu_var(pmc_nmi_entry);
 	else
@@ -1743,3 +1748,29 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
 	regs->cs = __KERNEL_CS;
 	local_save_flags(regs->flags);
 }
+
+unsigned long perf_instruction_pointer(struct pt_regs *regs)
+{
+	unsigned long ip;
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
+		ip = perf_guest_cbs->get_guest_ip();
+	else
+		ip = instruction_pointer(regs);
+	return ip;
+}
+
+unsigned long perf_misc_flags(struct pt_regs *regs)
+{
+	int misc = 0;
+	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+		misc |= perf_guest_cbs->is_user_mode() ?
+			PERF_RECORD_MISC_GUEST_USER :
+			PERF_RECORD_MISC_GUEST_KERNEL;
+	} else
+		misc |= user_mode(regs) ? PERF_RECORD_MISC_USER :
+			PERF_RECORD_MISC_KERNEL;
+	if (regs->flags & PERF_EFLAGS_EXACT)
+		misc |= PERF_RECORD_MISC_EXACT;
+
+	return misc;
+}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index bf896d0b2e9c..24de5f181a41 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -288,11 +288,13 @@ struct perf_event_mmap_page {
 	__u64	data_tail;		/* user-space written tail */
 };
 
-#define PERF_RECORD_MISC_CPUMODE_MASK		(3 << 0)
+#define PERF_RECORD_MISC_CPUMODE_MASK		(7 << 0)
 #define PERF_RECORD_MISC_CPUMODE_UNKNOWN	(0 << 0)
 #define PERF_RECORD_MISC_KERNEL			(1 << 0)
 #define PERF_RECORD_MISC_USER			(2 << 0)
 #define PERF_RECORD_MISC_HYPERVISOR		(3 << 0)
+#define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
+#define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
 #define PERF_RECORD_MISC_EXACT			(1 << 14)
 /*
@@ -446,6 +448,12 @@ enum perf_callchain_context {
 # include <asm/perf_event.h>
 #endif
 
+struct perf_guest_info_callbacks {
+	int (*is_in_guest) (void);
+	int (*is_user_mode) (void);
+	unsigned long (*get_guest_ip) (void);
+};
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <asm/hw_breakpoint.h>
 #endif
@@ -932,6 +940,12 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)
 		__perf_event_mmap(vma);
 }
 
+extern struct perf_guest_info_callbacks *perf_guest_cbs;
+extern int perf_register_guest_info_callbacks(
+		struct perf_guest_info_callbacks *);
+extern int perf_unregister_guest_info_callbacks(
+		struct perf_guest_info_callbacks *);
+
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
 
@@ -1001,6 +1015,11 @@ perf_sw_event(u32 event_id, u64 nr, int nmi,
 static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
+static inline int perf_register_guest_info_callbacks
+(struct perf_guest_info_callbacks *) {return 0; }
+static inline int perf_unregister_guest_info_callbacks
+(struct perf_guest_info_callbacks *) {return 0; }
+
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
 static inline void perf_event_fork(struct task_struct *tsk)		{ }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 07b7a435bf03..9dbe8cdaf145 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2797,6 +2797,27 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
 }
 
 
+/*
+ * We assume there is only KVM supporting the callbacks.
+ * Later on, we might change it to a list if there is
+ * another virtualization implementation supporting the callbacks.
+ */
+struct perf_guest_info_callbacks *perf_guest_cbs;
+
+int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+	perf_guest_cbs = cbs;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_register_guest_info_callbacks);
+
+int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs)
+{
+	perf_guest_cbs = NULL;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
+
 /*
  * Output
  */
@@ -3749,7 +3770,7 @@ void __perf_event_mmap(struct vm_area_struct *vma)
 		.event_id  = {
 			.header = {
 				.type = PERF_RECORD_MMAP,
-				.misc = 0,
+				.misc = PERF_RECORD_MISC_USER,
 				/* .size */
 			},
 			/* .pid */
-- 
cgit v1.2.3


From b5e5a37e36cd4d355b875665312d7aaae4e5833c Mon Sep 17 00:00:00 2001
From: Bastien Nocera <hadess@hadess.net>
Date: Fri, 16 Apr 2010 17:19:50 +0100
Subject: HID: add HID_QUIRK_HIDDEV_FORCE and HID_QUIRK_NO_IGNORE

Add two quirks to make it possible for usbhid module options to
override whether a device is ignored (HID_QUIRK_NO_IGNORE) and
whether to connect a hiddev device (HID_QUIRK_HIDDEV_FORCE).

Passing HID_QUIRK_NO_IGNORE for your device means that it will
not be ignored by the HID layer, even if present in a blacklist.

HID_QUIRK_HIDDEV_FORCE will force the creation of a hiddev for that
device, making it accessible from user-space.

Tested with an Apple IR Receiver, switching it from using appleir
to using lirc's macmini driver.

Signed-off-by: Bastien Nocera <hadess@hadess.net>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        | 4 +++-
 drivers/hid/usbhid/hid-core.c | 1 +
 include/linux/hid.h           | 2 ++
 3 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 8617aa97a9c8..468c6c2d4ad5 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1168,6 +1168,8 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 	unsigned int i;
 	int len;
 
+	if (hdev->quirks & HID_QUIRK_HIDDEV_FORCE)
+		connect_mask |= (HID_CONNECT_HIDDEV_FORCE | HID_CONNECT_HIDDEV);
 	if (hdev->bus != BUS_USB)
 		connect_mask &= ~HID_CONNECT_HIDDEV;
 	if (hid_hiddev(hdev))
@@ -1757,7 +1759,7 @@ int hid_add_device(struct hid_device *hdev)
 
 	/* we need to kill them here, otherwise they will stay allocated to
 	 * wait for coming driver */
-	if (hid_ignore(hdev))
+	if (!(hdev->quirks & HID_QUIRK_NO_IGNORE) && hid_ignore(hdev))
 		return -ENODEV;
 
 	/* XXX hack, any other cleaner solution after the driver core
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 9cd61a52e9e3..245aef0de8f2 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1143,6 +1143,7 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
 	hid->vendor = le16_to_cpu(dev->descriptor.idVendor);
 	hid->product = le16_to_cpu(dev->descriptor.idProduct);
 	hid->name[0] = 0;
+	hid->quirks = usbhid_lookup_quirk(hid->vendor, hid->product);
 	if (intf->cur_altsetting->desc.bInterfaceProtocol ==
 			USB_INTERFACE_PROTOCOL_MOUSE)
 		hid->type = HID_TYPE_USBMOUSE;
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b1344ec4b7fc..f1f2b6f0d1c4 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -308,11 +308,13 @@ struct hid_item {
 #define HID_QUIRK_NOTOUCH			0x00000002
 #define HID_QUIRK_IGNORE			0x00000004
 #define HID_QUIRK_NOGET				0x00000008
+#define HID_QUIRK_HIDDEV_FORCE			0x00000010
 #define HID_QUIRK_BADPAD			0x00000020
 #define HID_QUIRK_MULTI_INPUT			0x00000040
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_FULLSPEED_INTERVAL		0x10000000
 #define HID_QUIRK_NO_INIT_REPORTS		0x20000000
+#define HID_QUIRK_NO_IGNORE			0x40000000
 
 /*
  * This is the global environment of the parser. This information is
-- 
cgit v1.2.3


From e281b19897dc21c1071802808d461627d747a877 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 14:17:47 +0200
Subject: netfilter: xtables: inclusion of xt_TEE

xt_TEE can be used to clone and reroute a packet. This can for
example be used to copy traffic at a router for logging purposes
to another dedicated machine.

References: http://www.gossamer-threads.com/lists/iptables/devel/68781
Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/Kbuild   |   1 +
 include/linux/netfilter/xt_TEE.h |   9 ++
 net/ipv4/ip_output.c             |   1 +
 net/ipv6/ip6_output.c            |   1 +
 net/netfilter/Kconfig            |   7 ++
 net/netfilter/Makefile           |   1 +
 net/netfilter/xt_TEE.c           | 256 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 276 insertions(+)
 create mode 100644 include/linux/netfilter/xt_TEE.h
 create mode 100644 net/netfilter/xt_TEE.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter/Kbuild b/include/linux/netfilter/Kbuild
index a5a63e41b8af..48767cd16453 100644
--- a/include/linux/netfilter/Kbuild
+++ b/include/linux/netfilter/Kbuild
@@ -16,6 +16,7 @@ header-y += xt_RATEEST.h
 header-y += xt_SECMARK.h
 header-y += xt_TCPMSS.h
 header-y += xt_TCPOPTSTRIP.h
+header-y += xt_TEE.h
 header-y += xt_TPROXY.h
 header-y += xt_comment.h
 header-y += xt_connbytes.h
diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
new file mode 100644
index 000000000000..55d4a5011523
--- /dev/null
+++ b/include/linux/netfilter/xt_TEE.h
@@ -0,0 +1,9 @@
+#ifndef _XT_TEE_TARGET_H
+#define _XT_TEE_TARGET_H
+
+struct xt_tee_tginfo {
+	union nf_inet_addr gw;
+	char oif[16];
+};
+
+#endif /* _XT_TEE_TARGET_H */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index f09135e1e14f..0abfddec1e26 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -309,6 +309,7 @@ int ip_output(struct sk_buff *skb)
 			    ip_finish_output,
 			    !(IPCB(skb)->flags & IPSKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip_output);
 
 int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
 {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c10a38a71a5e..d09be7ff8735 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -176,6 +176,7 @@ int ip6_output(struct sk_buff *skb)
 			    ip6_finish_output,
 			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));
 }
+EXPORT_SYMBOL_GPL(ip6_output);
 
 /*
  *	xmit an sk_buff (used by TCP)
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 8055786b7702..673a6c8f0e95 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -502,6 +502,13 @@ config NETFILTER_XT_TARGET_RATEEST
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
+config NETFILTER_XT_TARGET_TEE
+	tristate '"TEE" - packet cloning to alternate destiantion'
+	depends on NETFILTER_ADVANCED
+	---help---
+	This option adds a "TEE" target with which a packet can be cloned and
+	this clone be rerouted to another nexthop.
+
 config NETFILTER_XT_TARGET_TPROXY
 	tristate '"TPROXY" target support (EXPERIMENTAL)'
 	depends on EXPERIMENTAL
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index cd31afe0692a..14e3a8fd8180 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -59,6 +59,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP) += xt_TCPOPTSTRIP.o
+obj-$(CONFIG_NETFILTER_XT_TARGET_TEE) += xt_TEE.o
 obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
 
 # matches
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
new file mode 100644
index 000000000000..b3d730163f12
--- /dev/null
+++ b/net/netfilter/xt_TEE.c
@@ -0,0 +1,256 @@
+/*
+ *	"TEE" target extension for Xtables
+ *	Copyright © Sebastian Claßen, 2007
+ *	Jan Engelhardt, 2007-2010
+ *
+ *	based on ipt_ROUTE.c from Cédric de Launois
+ *	<delaunois@info.ucl.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	version 2 or later, as published by the Free Software Foundation.
+ */
+#include <linux/ip.h>
+#include <linux/module.h>
+#include <linux/route.h>
+#include <linux/skbuff.h>
+#include <net/checksum.h>
+#include <net/icmp.h>
+#include <net/ip.h>
+#include <net/ipv6.h>
+#include <net/ip6_route.h>
+#include <net/route.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_TEE.h>
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#	define WITH_CONNTRACK 1
+#	include <net/netfilter/nf_conntrack.h>
+#endif
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+#	define WITH_IPV6 1
+#endif
+
+static const union nf_inet_addr tee_zero_address;
+
+static struct net *pick_net(struct sk_buff *skb)
+{
+#ifdef CONFIG_NET_NS
+	const struct dst_entry *dst;
+
+	if (skb->dev != NULL)
+		return dev_net(skb->dev);
+	dst = skb_dst(skb);
+	if (dst != NULL && dst->dev != NULL)
+		return dev_net(dst->dev);
+#endif
+	return &init_net;
+}
+
+static bool tee_tg_route_oif(struct flowi *f, struct net *net,
+			     const struct xt_tee_tginfo *info)
+{
+	const struct net_device *dev;
+
+	if (*info->oif != '\0')
+		return true;
+	dev = dev_get_by_name(net, info->oif);
+	if (dev == NULL)
+		return false;
+	f->oif = dev->ifindex;
+	return true;
+}
+
+static bool
+tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct rtable *rt;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip4_u.daddr = info->gw.ip;
+	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
+	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
+	if (ip_route_output_key(net, &rt, &fl) != 0)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, &rt->u.dst);
+	skb->dev      = rt->u.dst.dev;
+	skb->protocol = htons(ETH_P_IP);
+	return true;
+}
+
+static unsigned int
+tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+	struct iphdr *iph;
+
+	/*
+	 * Copy the skb, and route the copy. Will later return %XT_CONTINUE for
+	 * the original skb, which should continue on its way as if nothing has
+	 * happened. The copy should be independently delivered to the TEE
+	 * --gateway.
+	 */
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	/* Avoid counting cloned packets towards the original connection. */
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	/*
+	 * If we are in PREROUTING/INPUT, the checksum must be recalculated
+	 * since the length could have changed as a result of defragmentation.
+	 *
+	 * We also decrease the TTL to mitigate potential TEE loops
+	 * between two hosts.
+	 *
+	 * Set %IP_DF so that the original source is notified of a potentially
+	 * decreased MTU on the clone route. IPv6 does this too.
+	 */
+	iph = ip_hdr(skb);
+	iph->frag_off |= htons(IP_DF);
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN)
+		--iph->ttl;
+	ip_send_check(iph);
+
+	/*
+	 * Xtables is not reentrant currently, so a choice has to be made:
+	 * 1. return absolute verdict for the original and let the cloned
+	 *    packet travel through the chains
+	 * 2. let the original continue travelling and not pass the clone
+	 *    to Xtables.
+	 * #2 is chosen. Normally, we would use ip_local_out for the clone.
+	 * Because iph->check is already correct and we don't pass it to
+	 * Xtables anyway, a shortcut to dst_output [forwards to ip_output] can
+	 * be taken. %IPSKB_REROUTED needs to be set so that ip_output does not
+	 * invoke POSTROUTING on the cloned packet.
+	 */
+	IPCB(skb)->flags |= IPSKB_REROUTED;
+	if (tee_tg_route4(skb, info))
+		ip_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+
+#ifdef WITH_IPV6
+static bool
+tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
+{
+	const struct ipv6hdr *iph = ipv6_hdr(skb);
+	struct net *net = pick_net(skb);
+	struct dst_entry *dst;
+	struct flowi fl;
+
+	memset(&fl, 0, sizeof(fl));
+	if (!tee_tg_route_oif(&fl, net, info))
+		return false;
+	fl.nl_u.ip6_u.daddr = info->gw.in6;
+	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
+				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
+	dst = ip6_route_output(net, NULL, &fl);
+	if (dst == NULL)
+		return false;
+
+	dst_release(skb_dst(skb));
+	skb_dst_set(skb, dst);
+	skb->dev      = dst->dev;
+	skb->protocol = htons(ETH_P_IPV6);
+	return true;
+}
+
+static unsigned int
+tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	skb = pskb_copy(skb, GFP_ATOMIC);
+	if (skb == NULL)
+		return XT_CONTINUE;
+
+#ifdef WITH_CONNTRACK
+	nf_conntrack_put(skb->nfct);
+	skb->nfct     = &nf_conntrack_untracked.ct_general;
+	skb->nfctinfo = IP_CT_NEW;
+	nf_conntrack_get(skb->nfct);
+#endif
+	if (par->hooknum == NF_INET_PRE_ROUTING ||
+	    par->hooknum == NF_INET_LOCAL_IN) {
+		struct ipv6hdr *iph = ipv6_hdr(skb);
+		--iph->hop_limit;
+	}
+	IP6CB(skb)->flags |= IP6SKB_REROUTED;
+	if (tee_tg_route6(skb, info))
+		ip6_output(skb);
+	else
+		kfree_skb(skb);
+
+	return XT_CONTINUE;
+}
+#endif /* WITH_IPV6 */
+
+static int tee_tg_check(const struct xt_tgchk_param *par)
+{
+	const struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->oif[sizeof(info->oif)-1] != '\0')
+		return -EINVAL;
+	/* 0.0.0.0 and :: not allowed */
+	return (memcmp(&info->gw, &tee_zero_address,
+	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+}
+
+static struct xt_target tee_tg_reg[] __read_mostly = {
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV4,
+		.target     = tee_tg4,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#ifdef WITH_IPV6
+	{
+		.name       = "TEE",
+		.revision   = 1,
+		.family     = NFPROTO_IPV6,
+		.target     = tee_tg6,
+		.targetsize = sizeof(struct xt_tee_tginfo),
+		.checkentry = tee_tg_check,
+		.me         = THIS_MODULE,
+	},
+#endif
+};
+
+static int __init tee_tg_init(void)
+{
+	return xt_register_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+static void __exit tee_tg_exit(void)
+{
+	xt_unregister_targets(tee_tg_reg, ARRAY_SIZE(tee_tg_reg));
+}
+
+module_init(tee_tg_init);
+module_exit(tee_tg_exit);
+MODULE_AUTHOR("Sebastian Claßen <sebastian.classen@freenet.ag>");
+MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
+MODULE_DESCRIPTION("Xtables: Reroute packet copy");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ipt_TEE");
+MODULE_ALIAS("ip6t_TEE");
-- 
cgit v1.2.3


From f3c5c1bfd430858d3a05436f82c51e53104feb6b Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Mon, 19 Apr 2010 16:05:10 +0200
Subject: netfilter: xtables: make ip_tables reentrant

Currently, the table traverser stores return addresses in the ruleset
itself (struct ip6t_entry->comefrom). This has a well-known drawback:
the jumpstack is overwritten on reentry, making it necessary for
targets to return absolute verdicts. Also, the ruleset (which might
be heavy memory-wise) needs to be replicated for each CPU that can
possibly invoke ip6t_do_table.

This patch decouples the jumpstack from struct ip6t_entry and instead
puts it into xt_table_info. Not being restricted by 'comefrom'
anymore, we can set up a stack as needed. By default, there is room
allocated for two entries into the traverser.

arp_tables is not touched though, because there is just one/two
modules and further patches seek to collapse the table traverser
anyhow.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h |  7 ++++
 net/ipv4/netfilter/arp_tables.c    |  6 ++-
 net/ipv4/netfilter/ip_tables.c     | 65 +++++++++++++++++---------------
 net/ipv6/netfilter/ip6_tables.c    | 56 +++++++++++----------------
 net/netfilter/x_tables.c           | 77 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 145 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 26ced0c323a5..50c867256ca3 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -401,6 +401,13 @@ struct xt_table_info {
 	unsigned int hook_entry[NF_INET_NUMHOOKS];
 	unsigned int underflow[NF_INET_NUMHOOKS];
 
+	/*
+	 * Number of user chains. Since tables cannot have loops, at most
+	 * @stacksize jumps (number of user chains) can possibly be made.
+	 */
+	unsigned int stacksize;
+	unsigned int *stackptr;
+	void ***jumpstack;
 	/* ipt_entry tables: one per CPU */
 	/* Note : this field MUST be the last one, see XT_TABLE_INFO_SZ */
 	void *entries[1];
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index e8e363d90365..07a699059390 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -649,6 +649,9 @@ static int translate_table(struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			break;
 		++i;
+		if (strcmp(arpt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 	duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
 	if (ret != 0)
@@ -1774,8 +1777,7 @@ struct xt_table *arpt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 18c5b1573f3e..70900ecf88e2 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,8 +321,6 @@ ipt_do_table(struct sk_buff *skb,
 	     const struct net_device *out,
 	     struct xt_table *table)
 {
-#define tb_comefrom ((struct ipt_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
 	bool hotdrop = false;
@@ -330,7 +328,8 @@ ipt_do_table(struct sk_buff *skb,
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ipt_entry *e, *back;
+	struct ipt_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -356,19 +355,23 @@ ipt_do_table(struct sk_buff *skb,
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
+	pr_devel("Entering %s(hook %u); sp at %u (UF %p)\n",
+		 table->name, hook, origptr,
+		 get_entry(table_base, private->underflow[hook]));
 
 	do {
 		const struct ipt_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip_packet_match(ip, indev, outdev,
 		    &e->ip, mtpar.fragoff)) {
  no_match:
@@ -403,17 +406,28 @@ ipt_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0) {
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+					pr_devel("Underflow (this is normal) "
+						 "to %p\n", e);
+				} else {
+					e = jumpstack[--*stackptr];
+					pr_devel("Pulled %p out from pos %u\n",
+						 e, *stackptr);
+					e = ipt_next_entry(e);
+				}
 				continue;
 			}
 			if (table_base + v != ipt_next_entry(e) &&
 			    !(e->ip.flags & IPT_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ipt_entry *next = ipt_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
+				pr_devel("Pushed %p into pos %u\n",
+					 e, *stackptr - 1);
 			}
 
 			e = get_entry(table_base, v);
@@ -426,18 +440,7 @@ ipt_do_table(struct sk_buff *skb,
 		tgpar.targinfo = t->data;
 
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IPT_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
@@ -447,7 +450,9 @@ ipt_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 	xt_info_rdunlock_bh();
-
+	pr_devel("Exiting %s; resetting sp from %u to %u\n",
+		 __func__, *stackptr, origptr);
+	*stackptr = origptr;
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
@@ -455,8 +460,6 @@ ipt_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -838,6 +841,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ipt_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2086,8 +2092,7 @@ struct xt_table *ipt_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f2b815e72329..2a2770bcd640 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,15 +351,14 @@ ip6t_do_table(struct sk_buff *skb,
 	      const struct net_device *out,
 	      struct xt_table *table)
 {
-#define tb_comefrom ((struct ip6t_entry *)table_base)->comefrom
-
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
 	const void *table_base;
-	struct ip6t_entry *e, *back;
+	struct ip6t_entry *e, **jumpstack;
+	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
 	struct xt_match_param mtpar;
 	struct xt_target_param tgpar;
@@ -383,19 +382,19 @@ ip6t_do_table(struct sk_buff *skb,
 
 	xt_info_rdlock_bh();
 	private = table->private;
-	table_base = private->entries[smp_processor_id()];
+	cpu        = smp_processor_id();
+	table_base = private->entries[cpu];
+	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
+	stackptr   = &private->stackptr[cpu];
+	origptr    = *stackptr;
 
 	e = get_entry(table_base, private->hook_entry[hook]);
 
-	/* For return from builtin chain */
-	back = get_entry(table_base, private->underflow[hook]);
-
 	do {
 		const struct ip6t_entry_target *t;
 		const struct xt_entry_match *ematch;
 
 		IP_NF_ASSERT(e);
-		IP_NF_ASSERT(back);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
 		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
  no_match:
@@ -432,17 +431,20 @@ ip6t_do_table(struct sk_buff *skb,
 					verdict = (unsigned)(-v) - 1;
 					break;
 				}
-				e = back;
-				back = get_entry(table_base, back->comefrom);
+				if (*stackptr == 0)
+					e = get_entry(table_base,
+					    private->underflow[hook]);
+				else
+					e = ip6t_next_entry(jumpstack[--*stackptr]);
 				continue;
 			}
 			if (table_base + v != ip6t_next_entry(e) &&
 			    !(e->ipv6.flags & IP6T_F_GOTO)) {
-				/* Save old back ptr in next entry */
-				struct ip6t_entry *next = ip6t_next_entry(e);
-				next->comefrom = (void *)back - table_base;
-				/* set back pointer to next entry */
-				back = next;
+				if (*stackptr >= private->stacksize) {
+					verdict = NF_DROP;
+					break;
+				}
+				jumpstack[(*stackptr)++] = e;
 			}
 
 			e = get_entry(table_base, v);
@@ -454,19 +456,7 @@ ip6t_do_table(struct sk_buff *skb,
 		tgpar.target   = t->u.kernel.target;
 		tgpar.targinfo = t->data;
 
-#ifdef CONFIG_NETFILTER_DEBUG
-		tb_comefrom = 0xeeeeeeec;
-#endif
 		verdict = t->u.kernel.target->target(skb, &tgpar);
-
-#ifdef CONFIG_NETFILTER_DEBUG
-		if (tb_comefrom != 0xeeeeeeec && verdict == IP6T_CONTINUE) {
-			printk("Target %s reentered!\n",
-			       t->u.kernel.target->name);
-			verdict = NF_DROP;
-		}
-		tb_comefrom = 0x57acc001;
-#endif
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
@@ -474,10 +464,8 @@ ip6t_do_table(struct sk_buff *skb,
 			break;
 	} while (!hotdrop);
 
-#ifdef CONFIG_NETFILTER_DEBUG
-	tb_comefrom = NETFILTER_LINK_POISON;
-#endif
 	xt_info_rdunlock_bh();
+	*stackptr = origptr;
 
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
@@ -486,8 +474,6 @@ ip6t_do_table(struct sk_buff *skb,
 		return NF_DROP;
 	else return verdict;
 #endif
-
-#undef tb_comefrom
 }
 
 /* Figures out from what hook each rule can be called: returns 0 if
@@ -869,6 +855,9 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
 		if (ret != 0)
 			return ret;
 		++i;
+		if (strcmp(ip6t_get_target(iter)->u.user.name,
+		    XT_ERROR_TARGET) == 0)
+			++newinfo->stacksize;
 	}
 
 	if (i != repl->num_entries) {
@@ -2120,8 +2109,7 @@ struct xt_table *ip6t_register_table(struct net *net,
 {
 	int ret;
 	struct xt_table_info *newinfo;
-	struct xt_table_info bootstrap
-		= { 0, 0, 0, { 0 }, { 0 }, { } };
+	struct xt_table_info bootstrap = {0};
 	void *loc_cpu_entry;
 	struct xt_table *new_table;
 
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 8e23d8f68459..edde5c602890 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -62,6 +62,9 @@ static const char *const xt_prefix[NFPROTO_NUMPROTO] = {
 	[NFPROTO_IPV6]   = "ip6",
 };
 
+/* Allow this many total (re)entries. */
+static const unsigned int xt_jumpstack_multiplier = 2;
+
 /* Registration hooks for targets. */
 int
 xt_register_target(struct xt_target *target)
@@ -680,6 +683,26 @@ void xt_free_table_info(struct xt_table_info *info)
 		else
 			vfree(info->entries[cpu]);
 	}
+
+	if (info->jumpstack != NULL) {
+		if (sizeof(void *) * info->stacksize > PAGE_SIZE) {
+			for_each_possible_cpu(cpu)
+				vfree(info->jumpstack[cpu]);
+		} else {
+			for_each_possible_cpu(cpu)
+				kfree(info->jumpstack[cpu]);
+		}
+	}
+
+	if (sizeof(void **) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->jumpstack);
+	else
+		kfree(info->jumpstack);
+	if (sizeof(unsigned int) * nr_cpu_ids > PAGE_SIZE)
+		vfree(info->stackptr);
+	else
+		kfree(info->stackptr);
+
 	kfree(info);
 }
 EXPORT_SYMBOL(xt_free_table_info);
@@ -724,6 +747,49 @@ EXPORT_SYMBOL_GPL(xt_compat_unlock);
 DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
 EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
 
+static int xt_jumpstack_alloc(struct xt_table_info *i)
+{
+	unsigned int size;
+	int cpu;
+
+	size = sizeof(unsigned int) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->stackptr = vmalloc(size);
+	else
+		i->stackptr = kmalloc(size, GFP_KERNEL);
+	if (i->stackptr == NULL)
+		return -ENOMEM;
+	memset(i->stackptr, 0, size);
+
+	size = sizeof(void **) * nr_cpu_ids;
+	if (size > PAGE_SIZE)
+		i->jumpstack = vmalloc(size);
+	else
+		i->jumpstack = kmalloc(size, GFP_KERNEL);
+	if (i->jumpstack == NULL)
+		return -ENOMEM;
+	memset(i->jumpstack, 0, size);
+
+	i->stacksize *= xt_jumpstack_multiplier;
+	size = sizeof(void *) * i->stacksize;
+	for_each_possible_cpu(cpu) {
+		if (size > PAGE_SIZE)
+			i->jumpstack[cpu] = vmalloc_node(size,
+				cpu_to_node(cpu));
+		else
+			i->jumpstack[cpu] = kmalloc_node(size,
+				GFP_KERNEL, cpu_to_node(cpu));
+		if (i->jumpstack[cpu] == NULL)
+			/*
+			 * Freeing will be done later on by the callers. The
+			 * chain is: xt_replace_table -> __do_replace ->
+			 * do_replace -> xt_free_table_info.
+			 */
+			return -ENOMEM;
+	}
+
+	return 0;
+}
 
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
@@ -732,6 +798,7 @@ xt_replace_table(struct xt_table *table,
 	      int *error)
 {
 	struct xt_table_info *private;
+	int ret;
 
 	/* Do the substitution. */
 	local_bh_disable();
@@ -746,6 +813,12 @@ xt_replace_table(struct xt_table *table,
 		return NULL;
 	}
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0) {
+		*error = ret;
+		return NULL;
+	}
+
 	table->private = newinfo;
 	newinfo->initial_entries = private->initial_entries;
 
@@ -770,6 +843,10 @@ struct xt_table *xt_register_table(struct net *net,
 	struct xt_table_info *private;
 	struct xt_table *t, *table;
 
+	ret = xt_jumpstack_alloc(newinfo);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
 	/* Don't add one object to multiple lists. */
 	table = kmemdup(input_table, sizeof(struct xt_table), GFP_KERNEL);
 	if (!table) {
-- 
cgit v1.2.3


From 88751275b8e867d756e4f86ae92afe0232de129f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 05:07:33 +0000
Subject: rps: shortcut net_rps_action()

net_rps_action() is a bit expensive on NR_CPUS=64..4096 kernels, even if
RPS is not active.

Tom Herbert used two bitmasks to hold information needed to send IPI,
but a single LIFO list seems more appropriate.

Move all RPS logic into net_rps_action() to cleanup net_rx_action() code
(remove two ifdefs)

Move rps_remote_softirq_cpus into softnet_data to share its first cache
line, filling an existing hole.

In a future patch, we could call net_rps_action() from process_backlog()
to make sure we send IPI before handling this cpu backlog.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  9 ++++--
 net/core/dev.c            | 79 +++++++++++++++++++----------------------------
 2 files changed, 38 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 649a0252686e..83ab3da149ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1381,17 +1381,20 @@ static inline int unregister_gifconf(unsigned int family)
 }
 
 /*
- * Incoming packets are placed on per-cpu queues so that
- * no locking is needed.
+ * Incoming packets are placed on per-cpu queues
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
-	/* Elements below can be accessed between CPUs for RPS */
 #ifdef CONFIG_RPS
+	struct softnet_data	*rps_ipi_list;
+
+	/* Elements below can be accessed between CPUs for RPS */
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
+	struct softnet_data	*rps_ipi_next;
+	unsigned int		cpu;
 	unsigned int		input_queue_head;
 #endif
 	struct sk_buff_head	input_pkt_queue;
diff --git a/net/core/dev.c b/net/core/dev.c
index 8eb50e2292fb..05a2b294906b 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2345,21 +2345,6 @@ done:
 	return cpu;
 }
 
-/*
- * This structure holds the per-CPU mask of CPUs for which IPIs are scheduled
- * to be sent to kick remote softirq processing.  There are two masks since
- * the sending of IPIs must be done with interrupts enabled.  The select field
- * indicates the current mask that enqueue_backlog uses to schedule IPIs.
- * select is flipped before net_rps_action is called while still under lock,
- * net_rps_action then uses the non-selected mask to send the IPIs and clears
- * it without conflicting with enqueue_backlog operation.
- */
-struct rps_remote_softirq_cpus {
-	cpumask_t mask[2];
-	int select;
-};
-static DEFINE_PER_CPU(struct rps_remote_softirq_cpus, rps_remote_softirq_cpus);
-
 /* Called from hardirq (IPI) context */
 static void trigger_softirq(void *data)
 {
@@ -2402,10 +2387,12 @@ enqueue:
 		if (napi_schedule_prep(&queue->backlog)) {
 #ifdef CONFIG_RPS
 			if (cpu != smp_processor_id()) {
-				struct rps_remote_softirq_cpus *rcpus =
-				    &__get_cpu_var(rps_remote_softirq_cpus);
+				struct softnet_data *myqueue;
+
+				myqueue = &__get_cpu_var(softnet_data);
+				queue->rps_ipi_next = myqueue->rps_ipi_list;
+				myqueue->rps_ipi_list = queue;
 
-				cpu_set(cpu, rcpus->mask[rcpus->select]);
 				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 				goto enqueue;
 			}
@@ -2910,7 +2897,9 @@ int netif_receive_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(netif_receive_skb);
 
-/* Network device is going away, flush any packets still pending  */
+/* Network device is going away, flush any packets still pending
+ * Called with irqs disabled.
+ */
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
@@ -3338,24 +3327,33 @@ void netif_napi_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(netif_napi_del);
 
-#ifdef CONFIG_RPS
 /*
- * net_rps_action sends any pending IPI's for rps.  This is only called from
- * softirq and interrupts must be enabled.
+ * net_rps_action sends any pending IPI's for rps.
+ * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(cpumask_t *mask)
+static void net_rps_action(void)
 {
-	int cpu;
+#ifdef CONFIG_RPS
+	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
+	struct softnet_data *remqueue = locqueue->rps_ipi_list;
 
-	/* Send pending IPI's to kick RPS processing on remote cpus. */
-	for_each_cpu_mask_nr(cpu, *mask) {
-		struct softnet_data *queue = &per_cpu(softnet_data, cpu);
-		if (cpu_online(cpu))
-			__smp_call_function_single(cpu, &queue->csd, 0);
-	}
-	cpus_clear(*mask);
-}
+	if (remqueue) {
+		locqueue->rps_ipi_list = NULL;
+
+		local_irq_enable();
+
+		/* Send pending IPI's to kick RPS processing on remote cpus. */
+		while (remqueue) {
+			struct softnet_data *next = remqueue->rps_ipi_next;
+			if (cpu_online(remqueue->cpu))
+				__smp_call_function_single(remqueue->cpu,
+							   &remqueue->csd, 0);
+			remqueue = next;
+		}
+	} else
 #endif
+		local_irq_enable();
+}
 
 static void net_rx_action(struct softirq_action *h)
 {
@@ -3363,10 +3361,6 @@ static void net_rx_action(struct softirq_action *h)
 	unsigned long time_limit = jiffies + 2;
 	int budget = netdev_budget;
 	void *have;
-#ifdef CONFIG_RPS
-	int select;
-	struct rps_remote_softirq_cpus *rcpus;
-#endif
 
 	local_irq_disable();
 
@@ -3429,17 +3423,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-#ifdef CONFIG_RPS
-	rcpus = &__get_cpu_var(rps_remote_softirq_cpus);
-	select = rcpus->select;
-	rcpus->select ^= 1;
-
-	local_irq_enable();
-
-	net_rps_action(&rcpus->mask[select]);
-#else
-	local_irq_enable();
-#endif
+	net_rps_action();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5839,6 +5823,7 @@ static int __init net_dev_init(void)
 		queue->csd.func = trigger_softirq;
 		queue->csd.info = queue;
 		queue->csd.flags = 0;
+		queue->cpu = i;
 #endif
 
 		queue->backlog.poll = process_backlog;
-- 
cgit v1.2.3


From dcf46b9443ad48a227a61713adea001228925adf Mon Sep 17 00:00:00 2001
From: "Zhang, Yanmin" <yanmin_zhang@linux.intel.com>
Date: Tue, 20 Apr 2010 10:13:58 +0800
Subject: perf & kvm: Clean up some of the guest profiling callback API details

Fix some build bug and programming style issues:

 - use valid C
 - fix up various style details

Signed-off-by: Zhang Yanmin <yanmin_zhang@linux.intel.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sheng Yang <sheng@linux.intel.com>
Cc: Marcelo Tosatti <mtosatti@redhat.com>
Cc: oerg Roedel <joro@8bytes.org>
Cc: Jes Sorensen <Jes.Sorensen@redhat.com>
Cc: Gleb Natapov <gleb@redhat.com>
Cc: Zachary Amsden <zamsden@redhat.com>
Cc: zhiteng.huang@intel.com
Cc: tim.c.chen@intel.com
Cc: Arnaldo Carvalho de Melo <acme@infradead.org>
LKML-Reference: <1271729638.2078.624.camel@ymzhang.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c | 20 ++++++++++++++------
 arch/x86/kvm/x86.c               |  4 ++++
 include/linux/perf_event.h       | 10 ++++------
 3 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 2ea78abf69d9..7de70613e6c3 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1752,23 +1752,31 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski
 unsigned long perf_instruction_pointer(struct pt_regs *regs)
 {
 	unsigned long ip;
+
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
 		ip = perf_guest_cbs->get_guest_ip();
 	else
 		ip = instruction_pointer(regs);
+
 	return ip;
 }
 
 unsigned long perf_misc_flags(struct pt_regs *regs)
 {
 	int misc = 0;
+
 	if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
-		misc |= perf_guest_cbs->is_user_mode() ?
-			PERF_RECORD_MISC_GUEST_USER :
-			PERF_RECORD_MISC_GUEST_KERNEL;
-	} else
-		misc |= user_mode(regs) ? PERF_RECORD_MISC_USER :
-			PERF_RECORD_MISC_KERNEL;
+		if (perf_guest_cbs->is_user_mode())
+			misc |= PERF_RECORD_MISC_GUEST_USER;
+		else
+			misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+	} else {
+		if (user_mode(regs))
+			misc |= PERF_RECORD_MISC_USER;
+		else
+			misc |= PERF_RECORD_MISC_KERNEL;
+	}
+
 	if (regs->flags & PERF_EFLAGS_EXACT)
 		misc |= PERF_RECORD_MISC_EXACT;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c3a33b2bb169..21b9b6aa3e88 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3776,16 +3776,20 @@ static int kvm_is_in_guest(void)
 static int kvm_is_user_mode(void)
 {
 	int user_mode = 3;
+
 	if (percpu_read(current_vcpu))
 		user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu));
+
 	return user_mode != 0;
 }
 
 static unsigned long kvm_get_guest_ip(void)
 {
 	unsigned long ip = 0;
+
 	if (percpu_read(current_vcpu))
 		ip = kvm_rip_read(percpu_read(current_vcpu));
+
 	return ip;
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 24de5f181a41..ace31fbac513 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -941,10 +941,8 @@ static inline void perf_event_mmap(struct vm_area_struct *vma)
 }
 
 extern struct perf_guest_info_callbacks *perf_guest_cbs;
-extern int perf_register_guest_info_callbacks(
-		struct perf_guest_info_callbacks *);
-extern int perf_unregister_guest_info_callbacks(
-		struct perf_guest_info_callbacks *);
+extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
+extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
 
 extern void perf_event_comm(struct task_struct *tsk);
 extern void perf_event_fork(struct task_struct *tsk);
@@ -1016,9 +1014,9 @@ static inline void
 perf_bp_event(struct perf_event *event, void *data)			{ }
 
 static inline int perf_register_guest_info_callbacks
-(struct perf_guest_info_callbacks *) {return 0; }
+(struct perf_guest_info_callbacks *callbacks) { return 0; }
 static inline int perf_unregister_guest_info_callbacks
-(struct perf_guest_info_callbacks *) {return 0; }
+(struct perf_guest_info_callbacks *callbacks) { return 0; }
 
 static inline void perf_event_mmap(struct vm_area_struct *vma)		{ }
 static inline void perf_event_comm(struct task_struct *tsk)		{ }
-- 
cgit v1.2.3


From e36fa2f7e92f25aab2e3d787dcfe3590817f19d3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 19 Apr 2010 21:17:14 +0000
Subject: rps: cleanups

struct softnet_data holds many queues, so consistent use "sd" name
instead of "queue" is better.

Adds a rps_ipi_queued() helper to cleanup enqueue_to_backlog()

Adds a _and_irq_disable suffix to net_rps_action() name, as David
suggested.

incr_input_queue_head() becomes input_queue_head_incr()

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   4 +-
 net/core/dev.c            | 149 +++++++++++++++++++++++++---------------------
 2 files changed, 82 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 83ab3da149ad..3c5ed5f5274e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1401,10 +1401,10 @@ struct softnet_data {
 	struct napi_struct	backlog;
 };
 
-static inline void incr_input_queue_head(struct softnet_data *queue)
+static inline void input_queue_head_incr(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	queue->input_queue_head++;
+	sd->input_queue_head++;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 05a2b294906b..7f5755b0a57c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -208,17 +208,17 @@ static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
 	return &net->dev_index_head[ifindex & (NETDEV_HASHENTRIES - 1)];
 }
 
-static inline void rps_lock(struct softnet_data *queue)
+static inline void rps_lock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_lock(&queue->input_pkt_queue.lock);
+	spin_lock(&sd->input_pkt_queue.lock);
 #endif
 }
 
-static inline void rps_unlock(struct softnet_data *queue)
+static inline void rps_unlock(struct softnet_data *sd)
 {
 #ifdef CONFIG_RPS
-	spin_unlock(&queue->input_pkt_queue.lock);
+	spin_unlock(&sd->input_pkt_queue.lock);
 #endif
 }
 
@@ -2346,14 +2346,37 @@ done:
 }
 
 /* Called from hardirq (IPI) context */
-static void trigger_softirq(void *data)
+static void rps_trigger_softirq(void *data)
 {
-	struct softnet_data *queue = data;
-	__napi_schedule(&queue->backlog);
+	struct softnet_data *sd = data;
+
+	__napi_schedule(&sd->backlog);
 	__get_cpu_var(netdev_rx_stat).received_rps++;
 }
+
 #endif /* CONFIG_RPS */
 
+/*
+ * Check if this softnet_data structure is another cpu one
+ * If yes, queue it to our IPI list and return 1
+ * If no, return 0
+ */
+static int rps_ipi_queued(struct softnet_data *sd)
+{
+#ifdef CONFIG_RPS
+	struct softnet_data *mysd = &__get_cpu_var(softnet_data);
+
+	if (sd != mysd) {
+		sd->rps_ipi_next = mysd->rps_ipi_list;
+		mysd->rps_ipi_list = sd;
+
+		__raise_softirq_irqoff(NET_RX_SOFTIRQ);
+		return 1;
+	}
+#endif /* CONFIG_RPS */
+	return 0;
+}
+
 /*
  * enqueue_to_backlog is called to queue an skb to a per CPU backlog
  * queue (may be a remote CPU queue).
@@ -2361,48 +2384,36 @@ static void trigger_softirq(void *data)
 static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 			      unsigned int *qtail)
 {
-	struct softnet_data *queue;
+	struct softnet_data *sd;
 	unsigned long flags;
 
-	queue = &per_cpu(softnet_data, cpu);
+	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
 	__get_cpu_var(netdev_rx_stat).total++;
 
-	rps_lock(queue);
-	if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (queue->input_pkt_queue.qlen) {
+	rps_lock(sd);
+	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
+		if (sd->input_pkt_queue.qlen) {
 enqueue:
-			__skb_queue_tail(&queue->input_pkt_queue, skb);
+			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = queue->input_queue_head +
-			    queue->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
 #endif
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_restore(flags);
 			return NET_RX_SUCCESS;
 		}
 
 		/* Schedule NAPI for backlog device */
-		if (napi_schedule_prep(&queue->backlog)) {
-#ifdef CONFIG_RPS
-			if (cpu != smp_processor_id()) {
-				struct softnet_data *myqueue;
-
-				myqueue = &__get_cpu_var(softnet_data);
-				queue->rps_ipi_next = myqueue->rps_ipi_list;
-				myqueue->rps_ipi_list = queue;
-
-				__raise_softirq_irqoff(NET_RX_SOFTIRQ);
-				goto enqueue;
-			}
-#endif
-			__napi_schedule(&queue->backlog);
+		if (napi_schedule_prep(&sd->backlog)) {
+			if (!rps_ipi_queued(sd))
+				__napi_schedule(&sd->backlog);
 		}
 		goto enqueue;
 	}
 
-	rps_unlock(queue);
+	rps_unlock(sd);
 
 	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
@@ -2903,17 +2914,17 @@ EXPORT_SYMBOL(netif_receive_skb);
 static void flush_backlog(void *arg)
 {
 	struct net_device *dev = arg;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 	struct sk_buff *skb, *tmp;
 
-	rps_lock(queue);
-	skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+	rps_lock(sd);
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
 		if (skb->dev == dev) {
-			__skb_unlink(skb, &queue->input_pkt_queue);
+			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			incr_input_queue_head(queue);
+			input_queue_head_incr(sd);
 		}
-	rps_unlock(queue);
+	rps_unlock(sd);
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3219,23 +3230,23 @@ EXPORT_SYMBOL(napi_gro_frags);
 static int process_backlog(struct napi_struct *napi, int quota)
 {
 	int work = 0;
-	struct softnet_data *queue = &__get_cpu_var(softnet_data);
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
 
 	napi->weight = weight_p;
 	do {
 		struct sk_buff *skb;
 
 		local_irq_disable();
-		rps_lock(queue);
-		skb = __skb_dequeue(&queue->input_pkt_queue);
+		rps_lock(sd);
+		skb = __skb_dequeue(&sd->input_pkt_queue);
 		if (!skb) {
 			__napi_complete(napi);
-			rps_unlock(queue);
+			rps_unlock(sd);
 			local_irq_enable();
 			break;
 		}
-		incr_input_queue_head(queue);
-		rps_unlock(queue);
+		input_queue_head_incr(sd);
+		rps_unlock(sd);
 		local_irq_enable();
 
 		__netif_receive_skb(skb);
@@ -3331,24 +3342,25 @@ EXPORT_SYMBOL(netif_napi_del);
  * net_rps_action sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
  */
-static void net_rps_action(void)
+static void net_rps_action_and_irq_disable(void)
 {
 #ifdef CONFIG_RPS
-	struct softnet_data *locqueue = &__get_cpu_var(softnet_data);
-	struct softnet_data *remqueue = locqueue->rps_ipi_list;
+	struct softnet_data *sd = &__get_cpu_var(softnet_data);
+	struct softnet_data *remsd = sd->rps_ipi_list;
 
-	if (remqueue) {
-		locqueue->rps_ipi_list = NULL;
+	if (remsd) {
+		sd->rps_ipi_list = NULL;
 
 		local_irq_enable();
 
 		/* Send pending IPI's to kick RPS processing on remote cpus. */
-		while (remqueue) {
-			struct softnet_data *next = remqueue->rps_ipi_next;
-			if (cpu_online(remqueue->cpu))
-				__smp_call_function_single(remqueue->cpu,
-							   &remqueue->csd, 0);
-			remqueue = next;
+		while (remsd) {
+			struct softnet_data *next = remsd->rps_ipi_next;
+
+			if (cpu_online(remsd->cpu))
+				__smp_call_function_single(remsd->cpu,
+							   &remsd->csd, 0);
+			remsd = next;
 		}
 	} else
 #endif
@@ -3423,7 +3435,7 @@ static void net_rx_action(struct softirq_action *h)
 		netpoll_poll_unlock(have);
 	}
 out:
-	net_rps_action();
+	net_rps_action_and_irq_disable();
 
 #ifdef CONFIG_NET_DMA
 	/*
@@ -5595,7 +5607,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		incr_input_queue_head(oldsd);
+		input_queue_head_incr(oldsd);
 	}
 
 	return NOTIFY_OK;
@@ -5812,24 +5824,23 @@ static int __init net_dev_init(void)
 	 */
 
 	for_each_possible_cpu(i) {
-		struct softnet_data *queue;
+		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
-		queue = &per_cpu(softnet_data, i);
-		skb_queue_head_init(&queue->input_pkt_queue);
-		queue->completion_queue = NULL;
-		INIT_LIST_HEAD(&queue->poll_list);
+		skb_queue_head_init(&sd->input_pkt_queue);
+		sd->completion_queue = NULL;
+		INIT_LIST_HEAD(&sd->poll_list);
 
 #ifdef CONFIG_RPS
-		queue->csd.func = trigger_softirq;
-		queue->csd.info = queue;
-		queue->csd.flags = 0;
-		queue->cpu = i;
+		sd->csd.func = rps_trigger_softirq;
+		sd->csd.info = sd;
+		sd->csd.flags = 0;
+		sd->cpu = i;
 #endif
 
-		queue->backlog.poll = process_backlog;
-		queue->backlog.weight = weight_p;
-		queue->backlog.gro_list = NULL;
-		queue->backlog.gro_count = 0;
+		sd->backlog.poll = process_backlog;
+		sd->backlog.weight = weight_p;
+		sd->backlog.gro_list = NULL;
+		sd->backlog.gro_count = 0;
 	}
 
 	dev_boot_phase = 0;
-- 
cgit v1.2.3


From 22265a5c3c103cf8c50be62e6c90d045eb649e6d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 20 Apr 2010 15:07:32 +0200
Subject: netfilter: xt_TEE: resolve oif using netdevice notifiers

Replace the runtime oif name resolving by netdevice notifier based
resolving. When an oif is given, a netdevice notifier is registered
to resolve the name on NETDEV_REGISTER or NETDEV_CHANGE and unresolve
it again on NETDEV_UNREGISTER or NETDEV_CHANGE to a different name.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/xt_TEE.h |   3 ++
 net/netfilter/xt_TEE.c           | 103 ++++++++++++++++++++++++++++++---------
 2 files changed, 83 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/xt_TEE.h b/include/linux/netfilter/xt_TEE.h
index 55d4a5011523..5c21d5c829af 100644
--- a/include/linux/netfilter/xt_TEE.h
+++ b/include/linux/netfilter/xt_TEE.h
@@ -4,6 +4,9 @@
 struct xt_tee_tginfo {
 	union nf_inet_addr gw;
 	char oif[16];
+
+	/* used internally by the kernel */
+	struct xt_tee_priv *priv __attribute__((aligned(8)));
 };
 
 #endif /* _XT_TEE_TARGET_H */
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 842e7012eca7..49da6c05f4e0 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -15,6 +15,7 @@
 #include <linux/percpu.h>
 #include <linux/route.h>
 #include <linux/skbuff.h>
+#include <linux/notifier.h>
 #include <net/checksum.h>
 #include <net/icmp.h>
 #include <net/ip.h>
@@ -32,6 +33,12 @@
 #	define WITH_IPV6 1
 #endif
 
+struct xt_tee_priv {
+	struct notifier_block	notifier;
+	struct xt_tee_tginfo	*tginfo;
+	int			oif;
+};
+
 static const union nf_inet_addr tee_zero_address;
 static DEFINE_PER_CPU(bool, tee_active);
 
@@ -49,20 +56,6 @@ static struct net *pick_net(struct sk_buff *skb)
 	return &init_net;
 }
 
-static bool tee_tg_route_oif(struct flowi *f, struct net *net,
-			     const struct xt_tee_tginfo *info)
-{
-	const struct net_device *dev;
-
-	if (*info->oif != '\0')
-		return true;
-	dev = dev_get_by_name(net, info->oif);
-	if (dev == NULL)
-		return false;
-	f->oif = dev->ifindex;
-	return true;
-}
-
 static bool
 tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 {
@@ -72,8 +65,11 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip4_u.daddr = info->gw.ip;
 	fl.nl_u.ip4_u.tos   = RT_TOS(iph->tos);
 	fl.nl_u.ip4_u.scope = RT_SCOPE_UNIVERSE;
@@ -149,8 +145,11 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 	struct flowi fl;
 
 	memset(&fl, 0, sizeof(fl));
-	if (!tee_tg_route_oif(&fl, net, info))
-		return false;
+	if (info->priv) {
+		if (info->priv->oif == -1)
+			return false;
+		fl.oif = info->priv->oif;
+	}
 	fl.nl_u.ip6_u.daddr = info->gw.in6;
 	fl.nl_u.ip6_u.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |
 				  (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];
@@ -198,15 +197,71 @@ tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
 }
 #endif /* WITH_IPV6 */
 
+static int tee_netdev_event(struct notifier_block *this, unsigned long event,
+			    void *ptr)
+{
+	struct net_device *dev = ptr;
+	struct xt_tee_priv *priv;
+
+	priv = container_of(this, struct xt_tee_priv, notifier);
+	switch (event) {
+	case NETDEV_REGISTER:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		break;
+	case NETDEV_UNREGISTER:
+		if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	case NETDEV_CHANGENAME:
+		if (!strcmp(dev->name, priv->tginfo->oif))
+			priv->oif = dev->ifindex;
+		else if (dev->ifindex == priv->oif)
+			priv->oif = -1;
+		break;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int tee_tg_check(const struct xt_tgchk_param *par)
 {
-	const struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_tginfo *info = par->targinfo;
+	struct xt_tee_priv *priv;
 
-	if (info->oif[sizeof(info->oif)-1] != '\0')
-		return -EINVAL;
 	/* 0.0.0.0 and :: not allowed */
-	return (memcmp(&info->gw, &tee_zero_address,
-	       sizeof(tee_zero_address)) == 0) ? -EINVAL : 0;
+	if (memcmp(&info->gw, &tee_zero_address,
+		   sizeof(tee_zero_address)) == 0)
+		return -EINVAL;
+
+	if (info->oif[0]) {
+		if (info->oif[sizeof(info->oif)-1] != '\0')
+			return -EINVAL;
+
+		priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+		if (priv == NULL)
+			return -ENOMEM;
+
+		priv->tginfo  = info;
+		priv->oif     = -1;
+		priv->notifier.notifier_call = tee_netdev_event;
+		info->priv    = priv;
+
+		register_netdevice_notifier(&priv->notifier);
+	} else
+		info->priv = NULL;
+
+	return 0;
+}
+
+static void tee_tg_destroy(const struct xt_tgdtor_param *par)
+{
+	struct xt_tee_tginfo *info = par->targinfo;
+
+	if (info->priv) {
+		unregister_netdevice_notifier(&info->priv->notifier);
+		kfree(info->priv);
+	}
 }
 
 static struct xt_target tee_tg_reg[] __read_mostly = {
@@ -217,6 +272,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg4,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #ifdef WITH_IPV6
@@ -227,6 +283,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
 		.target     = tee_tg6,
 		.targetsize = sizeof(struct xt_tee_tginfo),
 		.checkentry = tee_tg_check,
+		.destroy    = tee_tg_destroy,
 		.me         = THIS_MODULE,
 	},
 #endif
-- 
cgit v1.2.3


From 6c79bf0f2440fd250c8fce8d9b82fcf03d4e8350 Mon Sep 17 00:00:00 2001
From: Bart De Schuymer <bdschuym@pandora.be>
Date: Tue, 20 Apr 2010 16:22:01 +0200
Subject: netfilter: bridge-netfilter: fix refragmenting IP traffic
 encapsulated in PPPoE traffic

The MTU for IP traffic encapsulated inside PPPoE traffic is smaller
than the MTU of the Ethernet device (1500). Connection tracking
gathers all IP packets and sometimes will refragment them in
ip_fragment(). We then need to subtract the length of the
encapsulating header from the mtu used in ip_fragment(). The check in
br_nf_dev_queue_xmit() which determines if ip_fragment() has to be
called is also updated for the PPPoE-encapsulated packets.
nf_bridge_copy_header() is also updated to make sure the PPPoE data
length field has the correct value.

Signed-off-by: Bart De Schuymer <bdschuym@pandora.be>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter_bridge.h | 7 +++++++
 net/bridge/br_netfilter.c        | 2 +-
 net/ipv4/ip_output.c             | 4 ++++
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index ea0e44b90432..0ddd161f3b06 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -68,6 +68,13 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
+{
+	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
+		return PPPOE_SES_HLEN;
+	return 0;
+}
+
 extern int br_handle_frame_finish(struct sk_buff *skb);
 /* Only used in br_device.c */
 static inline int br_nf_pre_routing_finish_bridge_slow(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 6b80ebc37667..93f80fefa496 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -745,7 +745,7 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff *skb,
 static int br_nf_dev_queue_xmit(struct sk_buff *skb)
 {
 	if (skb->nfct != NULL && skb->protocol == htons(ETH_P_IP) &&
-	    skb->len > skb->dev->mtu &&
+	    skb->len + nf_bridge_mtu_reduction(skb) > skb->dev->mtu &&
 	    !skb_is_gso(skb))
 		return ip_fragment(skb, br_dev_queue_push_xmit);
 	else
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b0b2e3059f11..d979710684b2 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -469,6 +469,10 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 
 	hlen = iph->ihl * 4;
 	mtu = dst_mtu(&rt->u.dst) - hlen;	/* Size of data space */
+#ifdef CONFIG_BRIDGE_NETFILTER
+	if (skb->nf_bridge)
+		mtu -= nf_bridge_mtu_reduction(skb);
+#endif
 	IPCB(skb)->flags |= IPSKB_FRAG_COMPLETE;
 
 	/* When frag_list is given, use it. First, check its validity:
-- 
cgit v1.2.3


From f79d9bad37cb1e7ef23d60b1dd0b7859957ced9e Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Mon, 19 Apr 2010 19:57:35 +0200
Subject: mac80211: add flags for STBC (Space-Time Block Coding)

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 1 +
 include/net/mac80211.h    | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 1252ba1fbff5..97b2eae6a22c 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -876,6 +876,7 @@ struct ieee80211_ht_cap {
 #define IEEE80211_HT_CAP_SGI_40			0x0040
 #define IEEE80211_HT_CAP_TX_STBC		0x0080
 #define IEEE80211_HT_CAP_RX_STBC		0x0300
+#define		IEEE80211_HT_CAP_RX_STBC_SHIFT	8
 #define IEEE80211_HT_CAP_DELAY_BA		0x0400
 #define IEEE80211_HT_CAP_MAX_AMSDU		0x0800
 #define IEEE80211_HT_CAP_DSSSCCK40		0x1000
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0e2a1a9b2b65..7dc5a6790397 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -275,6 +275,8 @@ struct ieee80211_bss_conf {
  *	MLME command (internal to mac80211 to figure out whether to send TX
  *	status to user space)
  * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame
+ * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this
+ *	frame and selects the maximum number of streams that it can use.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -299,6 +301,8 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_HAS_RADIOTAP		= BIT(20),
 	IEEE80211_TX_INTFL_NL80211_FRAME_TX	= BIT(21),
 	IEEE80211_TX_CTL_LDPC			= BIT(22),
+	IEEE80211_TX_CTL_STBC			= BIT(23) | BIT(24),
+#define IEEE80211_TX_CTL_STBC_SHIFT		23
 };
 
 /**
-- 
cgit v1.2.3


From ccb7c7732e2ceb4e81a7806faf1670be9681ccd2 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 20 Apr 2010 22:39:53 -0700
Subject: net: Remove two unnecessary exports (skbuff).

There is no need to export skb_under_panic() and skb_over_panic() in
skbuff.c, since these methods are used only in skbuff.c ; this patch
removes these two exports. It also marks these functions as 'static'
and removeS the extern declarations of them from
include/linux/skbuff.h

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ----
 net/core/skbuff.c      | 6 ++----
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 38501d20650c..82f5116a89e4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -470,10 +470,6 @@ extern int	       skb_cow_data(struct sk_buff *skb, int tailbits,
 				    struct sk_buff **trailer);
 extern int	       skb_pad(struct sk_buff *skb, int pad);
 #define dev_kfree_skb(a)	consume_skb(a)
-extern void	      skb_over_panic(struct sk_buff *skb, int len,
-				     void *here);
-extern void	      skb_under_panic(struct sk_buff *skb, int len,
-				      void *here);
 
 extern int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb,
 			int getfrag(void *from, char *to, int offset,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index bdea0efdf8cb..4218ff49bf13 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -117,7 +117,7 @@ static const struct pipe_buf_operations sock_pipe_buf_ops = {
  *
  *	Out of line support code for skb_put(). Not user callable.
  */
-void skb_over_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_over_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -126,7 +126,6 @@ void skb_over_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_over_panic);
 
 /**
  *	skb_under_panic	- 	private function
@@ -137,7 +136,7 @@ EXPORT_SYMBOL(skb_over_panic);
  *	Out of line support code for skb_push(). Not user callable.
  */
 
-void skb_under_panic(struct sk_buff *skb, int sz, void *here)
+static void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 {
 	printk(KERN_EMERG "skb_under_panic: text:%p len:%d put:%d head:%p "
 			  "data:%p tail:%#lx end:%#lx dev:%s\n",
@@ -146,7 +145,6 @@ void skb_under_panic(struct sk_buff *skb, int sz, void *here)
 	       skb->dev ? skb->dev->name : "<NULL>");
 	BUG();
 }
-EXPORT_SYMBOL(skb_under_panic);
 
 /* 	Allocate a new skbuff. We do this ourselves so we can fill in a few
  *	'private' fields and also do memory statistics to find all the
-- 
cgit v1.2.3


From cecbca96da387428e220e307a9c945e37e2f4d9e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 18 Apr 2010 19:08:41 +0200
Subject: tracing: Dump either the oops's cpu source or all cpus buffers

The ftrace_dump_on_oops kernel parameter, sysctl and sysrq let one
dump every cpu buffers when an oops or panic happens.

It's nice when you have few cpus but it may take ages if have many,
plus you miss the real origin of the problem in all the cpu traces.

Sometimes, all you need is to dump the cpu buffer that triggered the
opps, most of the time it is our main interest.

This patch modifies ftrace_dump_on_oops to handle this choice.

The ftrace_dump_on_oops kernel parameter, when it comes alone, has
the same behaviour than before. But ftrace_dump_on_oops=orig_cpu
will only dump the buffer of the cpu that oops'ed.

Similarly, sysctl kernel.ftrace_dump_on_oops=1 and
echo 1 > /proc/sys/kernel/ftrace_dump_on_oops keep their previous
behaviour. But setting 2 jumps into cpu origin dump mode.

v2: Fix double setup
v3: Fix spelling issues reported by Randy Dunlap
v4: Also update __ftrace_dump in the selftests

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
---
 Documentation/kernel-parameters.txt |  6 ++++-
 Documentation/trace/ftrace.txt      |  6 +++--
 drivers/char/sysrq.c                |  2 +-
 include/linux/ftrace.h              |  4 ++-
 include/linux/kernel.h              | 11 ++++++--
 kernel/trace/trace.c                | 51 ++++++++++++++++++++++++++++---------
 kernel/trace/trace_selftest.c       |  5 ++--
 7 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e4cbca58536c..ab67b33300fb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -789,8 +789,12 @@ and is between 256 and 4096 characters. It is defined in the file
 			as early as possible in order to facilitate early
 			boot debugging.
 
-	ftrace_dump_on_oops
+	ftrace_dump_on_oops[=orig_cpu]
 			[FTRACE] will dump the trace buffers on oops.
+			If no parameter is passed, ftrace will dump
+			buffers of all CPUs, but if you pass orig_cpu, it will
+			dump only the buffer of the CPU that triggered the
+			oops.
 
 	ftrace_filter=[function-list]
 			[FTRACE] Limit the functions traced by the function
diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 03485bfbd797..52011815c905 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -1337,12 +1337,14 @@ ftrace_dump_on_oops must be set. To set ftrace_dump_on_oops, one
 can either use the sysctl function or set it via the proc system
 interface.
 
-  sysctl kernel.ftrace_dump_on_oops=1
+  sysctl kernel.ftrace_dump_on_oops=n
 
 or
 
-  echo 1 > /proc/sys/kernel/ftrace_dump_on_oops
+  echo n > /proc/sys/kernel/ftrace_dump_on_oops
 
+If n = 1, ftrace will dump buffers of all CPUs, if n = 2 ftrace will
+only dump the buffer of the CPU that triggered the oops.
 
 Here's an example of such a dump after a null pointer
 dereference in a kernel module:
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
index 59de2525d303..d4e8b213a462 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
@@ -289,7 +289,7 @@ static struct sysrq_key_op sysrq_showstate_blocked_op = {
 
 static void sysrq_ftrace_dump(int key, struct tty_struct *tty)
 {
-	ftrace_dump();
+	ftrace_dump(DUMP_ALL);
 }
 static struct sysrq_key_op sysrq_ftrace_dump_op = {
 	.handler	= sysrq_ftrace_dump,
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 01e6adea07ec..ea5b1aae0e8b 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -492,7 +492,9 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
 	return tsk->trace & TSK_TRACE_FL_GRAPH;
 }
 
-extern int ftrace_dump_on_oops;
+enum ftrace_dump_mode;
+
+extern enum ftrace_dump_mode ftrace_dump_on_oops;
 
 #ifdef CONFIG_PREEMPT
 #define INIT_TRACE_RECURSION		.trace_recursion = 0,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 9365227dbaf6..9fb1c1299032 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -490,6 +490,13 @@ static inline void tracing_off(void) { }
 static inline void tracing_off_permanent(void) { }
 static inline int tracing_is_on(void) { return 0; }
 #endif
+
+enum ftrace_dump_mode {
+	DUMP_NONE,
+	DUMP_ALL,
+	DUMP_ORIG,
+};
+
 #ifdef CONFIG_TRACING
 extern void tracing_start(void);
 extern void tracing_stop(void);
@@ -571,7 +578,7 @@ __ftrace_vbprintk(unsigned long ip, const char *fmt, va_list ap);
 extern int
 __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
 
-extern void ftrace_dump(void);
+extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
 #else
 static inline void
 ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
@@ -592,7 +599,7 @@ ftrace_vprintk(const char *fmt, va_list ap)
 {
 	return 0;
 }
-static inline void ftrace_dump(void) { }
+static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #endif /* CONFIG_TRACING */
 
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index bed83cab6da2..7b516c7ef9a0 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -117,9 +117,12 @@ static cpumask_var_t __read_mostly	tracing_buffer_mask;
  *
  * It is default off, but you can enable it with either specifying
  * "ftrace_dump_on_oops" in the kernel command line, or setting
- * /proc/sys/kernel/ftrace_dump_on_oops to true.
+ * /proc/sys/kernel/ftrace_dump_on_oops
+ * Set 1 if you want to dump buffers of all CPUs
+ * Set 2 if you want to dump the buffer of the CPU that triggered oops
  */
-int ftrace_dump_on_oops;
+
+enum ftrace_dump_mode ftrace_dump_on_oops;
 
 static int tracing_set_tracer(const char *buf);
 
@@ -139,8 +142,17 @@ __setup("ftrace=", set_cmdline_ftrace);
 
 static int __init set_ftrace_dump_on_oops(char *str)
 {
-	ftrace_dump_on_oops = 1;
-	return 1;
+	if (*str++ != '=' || !*str) {
+		ftrace_dump_on_oops = DUMP_ALL;
+		return 1;
+	}
+
+	if (!strcmp("orig_cpu", str)) {
+		ftrace_dump_on_oops = DUMP_ORIG;
+                return 1;
+        }
+
+        return 0;
 }
 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
 
@@ -4338,7 +4350,7 @@ static int trace_panic_handler(struct notifier_block *this,
 			       unsigned long event, void *unused)
 {
 	if (ftrace_dump_on_oops)
-		ftrace_dump();
+		ftrace_dump(ftrace_dump_on_oops);
 	return NOTIFY_OK;
 }
 
@@ -4355,7 +4367,7 @@ static int trace_die_handler(struct notifier_block *self,
 	switch (val) {
 	case DIE_OOPS:
 		if (ftrace_dump_on_oops)
-			ftrace_dump();
+			ftrace_dump(ftrace_dump_on_oops);
 		break;
 	default:
 		break;
@@ -4396,7 +4408,8 @@ trace_printk_seq(struct trace_seq *s)
 	trace_seq_init(s);
 }
 
-static void __ftrace_dump(bool disable_tracing)
+static void
+__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
 {
 	static arch_spinlock_t ftrace_dump_lock =
 		(arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
@@ -4429,12 +4442,25 @@ static void __ftrace_dump(bool disable_tracing)
 	/* don't look at user memory in panic mode */
 	trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
 
-	printk(KERN_TRACE "Dumping ftrace buffer:\n");
-
 	/* Simulate the iterator */
 	iter.tr = &global_trace;
 	iter.trace = current_trace;
-	iter.cpu_file = TRACE_PIPE_ALL_CPU;
+
+	switch (oops_dump_mode) {
+	case DUMP_ALL:
+		iter.cpu_file = TRACE_PIPE_ALL_CPU;
+		break;
+	case DUMP_ORIG:
+		iter.cpu_file = raw_smp_processor_id();
+		break;
+	case DUMP_NONE:
+		goto out_enable;
+	default:
+		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
+		iter.cpu_file = TRACE_PIPE_ALL_CPU;
+	}
+
+	printk(KERN_TRACE "Dumping ftrace buffer:\n");
 
 	/*
 	 * We need to stop all tracing on all CPUS to read the
@@ -4473,6 +4499,7 @@ static void __ftrace_dump(bool disable_tracing)
 	else
 		printk(KERN_TRACE "---------------------------------\n");
 
+ out_enable:
 	/* Re-enable tracing if requested */
 	if (!disable_tracing) {
 		trace_flags |= old_userobj;
@@ -4489,9 +4516,9 @@ static void __ftrace_dump(bool disable_tracing)
 }
 
 /* By default: disable tracing after the dump */
-void ftrace_dump(void)
+void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
 {
-	__ftrace_dump(true);
+	__ftrace_dump(true, oops_dump_mode);
 }
 
 __init static int tracer_alloc_buffers(void)
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 9398034f814a..6a9d36ddfcf2 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -256,7 +256,8 @@ trace_selftest_startup_function(struct tracer *trace, struct trace_array *tr)
 /* Maximum number of functions to trace before diagnosing a hang */
 #define GRAPH_MAX_FUNC_TEST	100000000
 
-static void __ftrace_dump(bool disable_tracing);
+static void
+__ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode);
 static unsigned int graph_hang_thresh;
 
 /* Wrap the real function entry probe to avoid possible hanging */
@@ -267,7 +268,7 @@ static int trace_graph_entry_watchdog(struct ftrace_graph_ent *trace)
 		ftrace_graph_stop();
 		printk(KERN_WARNING "BUG: Function graph tracer hang!\n");
 		if (ftrace_dump_on_oops)
-			__ftrace_dump(false);
+			__ftrace_dump(false, DUMP_ALL);
 		return 0;
 	}
 
-- 
cgit v1.2.3


From 989a2979205dd34269382b357e6d4b4b6956b889 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 14 Apr 2010 09:55:35 +0000
Subject: fasync: RCU and fine grained locking

kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.

fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.

Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.

We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/fcntl.c         | 66 ++++++++++++++++++++++++++++++------------------
 include/linux/fs.h | 12 ++++-----
 net/socket.c       | 73 ++++++++----------------------------------------------
 3 files changed, 59 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 452d02f9075e..0a140741b39e 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -614,9 +614,15 @@ int send_sigurg(struct fown_struct *fown)
 	return ret;
 }
 
-static DEFINE_RWLOCK(fasync_lock);
+static DEFINE_SPINLOCK(fasync_lock);
 static struct kmem_cache *fasync_cache __read_mostly;
 
+static void fasync_free_rcu(struct rcu_head *head)
+{
+	kmem_cache_free(fasync_cache,
+			container_of(head, struct fasync_struct, fa_rcu));
+}
+
 /*
  * Remove a fasync entry. If successfully removed, return
  * positive and clear the FASYNC flag. If no entry exists,
@@ -625,8 +631,6 @@ static struct kmem_cache *fasync_cache __read_mostly;
  * NOTE! It is very important that the FASYNC flag always
  * match the state "is the filp on a fasync list".
  *
- * We always take the 'filp->f_lock', in since fasync_lock
- * needs to be irq-safe.
  */
 static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 {
@@ -634,17 +638,22 @@ static int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 	int result = 0;
 
 	spin_lock(&filp->f_lock);
-	write_lock_irq(&fasync_lock);
+	spin_lock(&fasync_lock);
 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 		if (fa->fa_file != filp)
 			continue;
+
+		spin_lock_irq(&fa->fa_lock);
+		fa->fa_file = NULL;
+		spin_unlock_irq(&fa->fa_lock);
+
 		*fp = fa->fa_next;
-		kmem_cache_free(fasync_cache, fa);
+		call_rcu(&fa->fa_rcu, fasync_free_rcu);
 		filp->f_flags &= ~FASYNC;
 		result = 1;
 		break;
 	}
-	write_unlock_irq(&fasync_lock);
+	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
 	return result;
 }
@@ -666,25 +675,30 @@ static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fa
 		return -ENOMEM;
 
 	spin_lock(&filp->f_lock);
-	write_lock_irq(&fasync_lock);
+	spin_lock(&fasync_lock);
 	for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 		if (fa->fa_file != filp)
 			continue;
+
+		spin_lock_irq(&fa->fa_lock);
 		fa->fa_fd = fd;
+		spin_unlock_irq(&fa->fa_lock);
+
 		kmem_cache_free(fasync_cache, new);
 		goto out;
 	}
 
+	spin_lock_init(&new->fa_lock);
 	new->magic = FASYNC_MAGIC;
 	new->fa_file = filp;
 	new->fa_fd = fd;
 	new->fa_next = *fapp;
-	*fapp = new;
+	rcu_assign_pointer(*fapp, new);
 	result = 1;
 	filp->f_flags |= FASYNC;
 
 out:
-	write_unlock_irq(&fasync_lock);
+	spin_unlock(&fasync_lock);
 	spin_unlock(&filp->f_lock);
 	return result;
 }
@@ -704,37 +718,41 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 
 EXPORT_SYMBOL(fasync_helper);
 
-void __kill_fasync(struct fasync_struct *fa, int sig, int band)
+/*
+ * rcu_read_lock() is held
+ */
+static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
 {
 	while (fa) {
-		struct fown_struct * fown;
+		struct fown_struct *fown;
 		if (fa->magic != FASYNC_MAGIC) {
 			printk(KERN_ERR "kill_fasync: bad magic number in "
 			       "fasync_struct!\n");
 			return;
 		}
-		fown = &fa->fa_file->f_owner;
-		/* Don't send SIGURG to processes which have not set a
-		   queued signum: SIGURG has its own default signalling
-		   mechanism. */
-		if (!(sig == SIGURG && fown->signum == 0))
-			send_sigio(fown, fa->fa_fd, band);
-		fa = fa->fa_next;
+		spin_lock(&fa->fa_lock);
+		if (fa->fa_file) {
+			fown = &fa->fa_file->f_owner;
+			/* Don't send SIGURG to processes which have not set a
+			   queued signum: SIGURG has its own default signalling
+			   mechanism. */
+			if (!(sig == SIGURG && fown->signum == 0))
+				send_sigio(fown, fa->fa_fd, band);
+		}
+		spin_unlock(&fa->fa_lock);
+		fa = rcu_dereference(fa->fa_next);
 	}
 }
 
-EXPORT_SYMBOL(__kill_fasync);
-
 void kill_fasync(struct fasync_struct **fp, int sig, int band)
 {
 	/* First a quick test without locking: usually
 	 * the list is empty.
 	 */
 	if (*fp) {
-		read_lock(&fasync_lock);
-		/* reread *fp after obtaining the lock */
-		__kill_fasync(*fp, sig, band);
-		read_unlock(&fasync_lock);
+		rcu_read_lock();
+		kill_fasync_rcu(rcu_dereference(*fp), sig, band);
+		rcu_read_unlock();
 	}
 }
 EXPORT_SYMBOL(kill_fasync);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 39d57bc6cc71..018d382f6f92 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1280,10 +1280,12 @@ static inline int lock_may_write(struct inode *inode, loff_t start,
 
 
 struct fasync_struct {
-	int	magic;
-	int	fa_fd;
-	struct	fasync_struct	*fa_next; /* singly linked list */
-	struct	file 		*fa_file;
+	spinlock_t		fa_lock;
+	int			magic;
+	int			fa_fd;
+	struct fasync_struct	*fa_next; /* singly linked list */
+	struct file		*fa_file;
+	struct rcu_head		fa_rcu;
 };
 
 #define FASYNC_MAGIC 0x4601
@@ -1292,8 +1294,6 @@ struct fasync_struct {
 extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
-/* only for net: no internal synchronization */
-extern void __kill_fasync(struct fasync_struct *, int, int);
 
 extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
 extern int f_setown(struct file *filp, unsigned long arg, int force);
diff --git a/net/socket.c b/net/socket.c
index 35bc198bbf68..9822081eab38 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1067,78 +1067,27 @@ static int sock_close(struct inode *inode, struct file *filp)
  *	1. fasync_list is modified only under process context socket lock
  *	   i.e. under semaphore.
  *	2. fasync_list is used under read_lock(&sk->sk_callback_lock)
- *	   or under socket lock.
- *	3. fasync_list can be used from softirq context, so that
- *	   modification under socket lock have to be enhanced with
- *	   write_lock_bh(&sk->sk_callback_lock).
- *							--ANK (990710)
+ *	   or under socket lock
  */
 
 static int sock_fasync(int fd, struct file *filp, int on)
 {
-	struct fasync_struct *fa, *fna = NULL, **prev;
-	struct socket *sock;
-	struct sock *sk;
-
-	if (on) {
-		fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
-		if (fna == NULL)
-			return -ENOMEM;
-	}
-
-	sock = filp->private_data;
+	struct socket *sock = filp->private_data;
+	struct sock *sk = sock->sk;
 
-	sk = sock->sk;
-	if (sk == NULL) {
-		kfree(fna);
+	if (sk == NULL)
 		return -EINVAL;
-	}
 
 	lock_sock(sk);
 
-	spin_lock(&filp->f_lock);
-	if (on)
-		filp->f_flags |= FASYNC;
-	else
-		filp->f_flags &= ~FASYNC;
-	spin_unlock(&filp->f_lock);
-
-	prev = &(sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->fasync_list);
 
-	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
-		if (fa->fa_file == filp)
-			break;
-
-	if (on) {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			fa->fa_fd = fd;
-			write_unlock_bh(&sk->sk_callback_lock);
-
-			kfree(fna);
-			goto out;
-		}
-		fna->fa_file = filp;
-		fna->fa_fd = fd;
-		fna->magic = FASYNC_MAGIC;
-		fna->fa_next = sock->fasync_list;
-		write_lock_bh(&sk->sk_callback_lock);
-		sock->fasync_list = fna;
+	if (!sock->fasync_list)
+		sock_reset_flag(sk, SOCK_FASYNC);
+	else
 		sock_set_flag(sk, SOCK_FASYNC);
-		write_unlock_bh(&sk->sk_callback_lock);
-	} else {
-		if (fa != NULL) {
-			write_lock_bh(&sk->sk_callback_lock);
-			*prev = fa->fa_next;
-			if (!sock->fasync_list)
-				sock_reset_flag(sk, SOCK_FASYNC);
-			write_unlock_bh(&sk->sk_callback_lock);
-			kfree(fa);
-		}
-	}
 
-out:
-	release_sock(sock->sk);
+	release_sock(sk);
 	return 0;
 }
 
@@ -1159,10 +1108,10 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		__kill_fasync(sock->fasync_list, SIGIO, band);
+		kill_fasync(&sock->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		__kill_fasync(sock->fasync_list, SIGURG, band);
+		kill_fasync(&sock->fasync_list, SIGURG, band);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From a1aa8822d577c8714f8d343eea028befbab3da9d Mon Sep 17 00:00:00 2001
From: Richard Röjfors <richard.rojfors@pelagicore.com>
Date: Wed, 21 Apr 2010 16:33:29 -0700
Subject: ks8842: Add platform data for setting mac address
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds platform data to the ks8842 driver.

Via the platform data a MAC address, to be used by the controller,
can be passed.

To ensure this MAC address is used, the MAC address is written
after each hardware reset.

Signed-off-by: Richard Röjfors <richard.rojfors@pelagicore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ks8842.c   | 53 +++++++++++++++++++++++++++++++++++++++-----------
 include/linux/ks8842.h | 34 ++++++++++++++++++++++++++++++++
 2 files changed, 76 insertions(+), 11 deletions(-)
 create mode 100644 include/linux/ks8842.h

(limited to 'include/linux')

diff --git a/drivers/net/ks8842.c b/drivers/net/ks8842.c
index b91492f4e48a..f852ab3ae9cf 100644
--- a/drivers/net/ks8842.c
+++ b/drivers/net/ks8842.c
@@ -1,5 +1,5 @@
 /*
- * ks8842_main.c timberdale KS8842 ethernet driver
+ * ks8842.c timberdale KS8842 ethernet driver
  * Copyright (c) 2009 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
@@ -28,6 +28,7 @@
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/ethtool.h>
+#include <linux/ks8842.h>
 
 #define DRV_NAME "ks8842"
 
@@ -304,6 +305,20 @@ static void ks8842_read_mac_addr(struct ks8842_adapter *adapter, u8 *dest)
 	ks8842_write16(adapter, 39, mac, REG_MACAR3);
 }
 
+static void ks8842_write_mac_addr(struct ks8842_adapter *adapter, u8 *mac)
+{
+	unsigned long flags;
+	unsigned i;
+
+	spin_lock_irqsave(&adapter->lock, flags);
+	for (i = 0; i < ETH_ALEN; i++) {
+		ks8842_write8(adapter, 2, mac[ETH_ALEN - i - 1], REG_MARL + i);
+		ks8842_write8(adapter, 39, mac[ETH_ALEN - i - 1],
+			REG_MACAR1 + i);
+	}
+	spin_unlock_irqrestore(&adapter->lock, flags);
+}
+
 static inline u16 ks8842_tx_fifo_space(struct ks8842_adapter *adapter)
 {
 	return ks8842_read16(adapter, 16, REG_TXMIR) & 0x1fff;
@@ -522,6 +537,8 @@ static int ks8842_open(struct net_device *netdev)
 	/* reset the HW */
 	ks8842_reset_hw(adapter);
 
+	ks8842_write_mac_addr(adapter, netdev->dev_addr);
+
 	ks8842_update_link_status(netdev, adapter);
 
 	err = request_irq(adapter->irq, ks8842_irq, IRQF_SHARED, DRV_NAME,
@@ -568,10 +585,8 @@ static netdev_tx_t ks8842_xmit_frame(struct sk_buff *skb,
 static int ks8842_set_mac(struct net_device *netdev, void *p)
 {
 	struct ks8842_adapter *adapter = netdev_priv(netdev);
-	unsigned long flags;
 	struct sockaddr *addr = p;
 	char *mac = (u8 *)addr->sa_data;
-	int i;
 
 	dev_dbg(&adapter->pdev->dev, "%s: entry\n", __func__);
 
@@ -580,13 +595,7 @@ static int ks8842_set_mac(struct net_device *netdev, void *p)
 
 	memcpy(netdev->dev_addr, mac, netdev->addr_len);
 
-	spin_lock_irqsave(&adapter->lock, flags);
-	for (i = 0; i < ETH_ALEN; i++) {
-		ks8842_write8(adapter, 2, mac[ETH_ALEN - i - 1], REG_MARL + i);
-		ks8842_write8(adapter, 39, mac[ETH_ALEN - i - 1],
-			REG_MACAR1 + i);
-	}
-	spin_unlock_irqrestore(&adapter->lock, flags);
+	ks8842_write_mac_addr(adapter, mac);
 	return 0;
 }
 
@@ -605,6 +614,8 @@ static void ks8842_tx_timeout(struct net_device *netdev)
 
 	ks8842_reset_hw(adapter);
 
+	ks8842_write_mac_addr(adapter, netdev->dev_addr);
+
 	ks8842_update_link_status(netdev, adapter);
 }
 
@@ -627,7 +638,9 @@ static int __devinit ks8842_probe(struct platform_device *pdev)
 	struct resource *iomem;
 	struct net_device *netdev;
 	struct ks8842_adapter *adapter;
+	struct ks8842_platform_data *pdata = pdev->dev.platform_data;
 	u16 id;
+	unsigned i;
 
 	iomem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!request_mem_region(iomem->start, resource_size(iomem), DRV_NAME))
@@ -658,7 +671,25 @@ static int __devinit ks8842_probe(struct platform_device *pdev)
 	netdev->netdev_ops = &ks8842_netdev_ops;
 	netdev->ethtool_ops = &ks8842_ethtool_ops;
 
-	ks8842_read_mac_addr(adapter, netdev->dev_addr);
+	/* Check if a mac address was given */
+	i = netdev->addr_len;
+	if (pdata) {
+		for (i = 0; i < netdev->addr_len; i++)
+			if (pdata->macaddr[i] != 0)
+				break;
+
+		if (i < netdev->addr_len)
+			/* an address was passed, use it */
+			memcpy(netdev->dev_addr, pdata->macaddr,
+				netdev->addr_len);
+	}
+
+	if (i == netdev->addr_len) {
+		ks8842_read_mac_addr(adapter, netdev->dev_addr);
+
+		if (!is_valid_ether_addr(netdev->dev_addr))
+			random_ether_addr(netdev->dev_addr);
+	}
 
 	id = ks8842_read16(adapter, 32, REG_SW_ID_AND_ENABLE);
 
diff --git a/include/linux/ks8842.h b/include/linux/ks8842.h
new file mode 100644
index 000000000000..da0341b8ca0a
--- /dev/null
+++ b/include/linux/ks8842.h
@@ -0,0 +1,34 @@
+/*
+ * ks8842.h KS8842 platform data struct definition
+ * Copyright (c) 2010 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_KS8842_H
+#define _LINUX_KS8842_H
+
+#include <linux/if_ether.h>
+
+/**
+ * struct ks8842_platform_data - Platform data of the KS8842 network driver
+ * @macaddr:	The MAC address of the device, set to all 0:s to use the on in
+ *		the chip.
+ *
+ */
+struct ks8842_platform_data {
+	u8 macaddr[ETH_ALEN];
+};
+
+#endif
-- 
cgit v1.2.3


From 6fa8f719844b8455033e295f720e739c1dc3804a Mon Sep 17 00:00:00 2001
From: Vladimir Sokolovsky <vlad@mellanox.co.il>
Date: Wed, 14 Apr 2010 17:23:39 +0300
Subject: IB/mlx4: Add support for masked atomic operations

Add support for masked atomic operations (masked compare and swap,
masked fetch and add).

Signed-off-by: Vladimir Sokolovsky <vlad@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/hw/mlx4/cq.c   |  8 +++++++
 drivers/infiniband/hw/mlx4/main.c |  1 +
 drivers/infiniband/hw/mlx4/qp.c   | 50 ++++++++++++++++++++++++++++++---------
 include/linux/mlx4/device.h       |  4 ++--
 include/linux/mlx4/qp.h           |  7 ++++++
 5 files changed, 57 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index cc2ddd29ac57..5a219a2fdf16 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -661,6 +661,14 @@ repoll:
 			wc->opcode    = IB_WC_FETCH_ADD;
 			wc->byte_len  = 8;
 			break;
+		case MLX4_OPCODE_MASKED_ATOMIC_CS:
+			wc->opcode    = IB_WC_MASKED_COMP_SWAP;
+			wc->byte_len  = 8;
+			break;
+		case MLX4_OPCODE_MASKED_ATOMIC_FA:
+			wc->opcode    = IB_WC_MASKED_FETCH_ADD;
+			wc->byte_len  = 8;
+			break;
 		case MLX4_OPCODE_BIND_MW:
 			wc->opcode    = IB_WC_BIND_MW;
 			break;
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 01f2a3f93355..39051417054c 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -139,6 +139,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 	props->local_ca_ack_delay  = dev->dev->caps.local_ca_ack_delay;
 	props->atomic_cap	   = dev->dev->caps.flags & MLX4_DEV_CAP_FLAG_ATOMIC ?
 		IB_ATOMIC_HCA : IB_ATOMIC_NONE;
+	props->masked_atomic_cap   = IB_ATOMIC_HCA;
 	props->max_pkeys	   = dev->dev->caps.pkey_table_len[1];
 	props->max_mcast_grp	   = dev->dev->caps.num_mgms + dev->dev->caps.num_amgms;
 	props->max_mcast_qp_attach = dev->dev->caps.num_qp_per_mgm;
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 5643f4a8ffef..6a60827b2301 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -74,17 +74,19 @@ enum {
 };
 
 static const __be32 mlx4_ib_opcode[] = {
-	[IB_WR_SEND]			= cpu_to_be32(MLX4_OPCODE_SEND),
-	[IB_WR_LSO]			= cpu_to_be32(MLX4_OPCODE_LSO),
-	[IB_WR_SEND_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_SEND_IMM),
-	[IB_WR_RDMA_WRITE]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
-	[IB_WR_RDMA_WRITE_WITH_IMM]	= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
-	[IB_WR_RDMA_READ]		= cpu_to_be32(MLX4_OPCODE_RDMA_READ),
-	[IB_WR_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
-	[IB_WR_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
-	[IB_WR_SEND_WITH_INV]		= cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
-	[IB_WR_LOCAL_INV]		= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
-	[IB_WR_FAST_REG_MR]		= cpu_to_be32(MLX4_OPCODE_FMR),
+	[IB_WR_SEND]				= cpu_to_be32(MLX4_OPCODE_SEND),
+	[IB_WR_LSO]				= cpu_to_be32(MLX4_OPCODE_LSO),
+	[IB_WR_SEND_WITH_IMM]			= cpu_to_be32(MLX4_OPCODE_SEND_IMM),
+	[IB_WR_RDMA_WRITE]			= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE),
+	[IB_WR_RDMA_WRITE_WITH_IMM]		= cpu_to_be32(MLX4_OPCODE_RDMA_WRITE_IMM),
+	[IB_WR_RDMA_READ]			= cpu_to_be32(MLX4_OPCODE_RDMA_READ),
+	[IB_WR_ATOMIC_CMP_AND_SWP]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_CS),
+	[IB_WR_ATOMIC_FETCH_AND_ADD]		= cpu_to_be32(MLX4_OPCODE_ATOMIC_FA),
+	[IB_WR_SEND_WITH_INV]			= cpu_to_be32(MLX4_OPCODE_SEND_INVAL),
+	[IB_WR_LOCAL_INV]			= cpu_to_be32(MLX4_OPCODE_LOCAL_INVAL),
+	[IB_WR_FAST_REG_MR]			= cpu_to_be32(MLX4_OPCODE_FMR),
+	[IB_WR_MASKED_ATOMIC_CMP_AND_SWP]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_CS),
+	[IB_WR_MASKED_ATOMIC_FETCH_AND_ADD]	= cpu_to_be32(MLX4_OPCODE_MASKED_ATOMIC_FA),
 };
 
 static struct mlx4_ib_sqp *to_msqp(struct mlx4_ib_qp *mqp)
@@ -1407,6 +1409,9 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
 	if (wr->opcode == IB_WR_ATOMIC_CMP_AND_SWP) {
 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.swap);
 		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add);
+	} else if (wr->opcode == IB_WR_MASKED_ATOMIC_FETCH_AND_ADD) {
+		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
+		aseg->compare  = cpu_to_be64(wr->wr.atomic.compare_add_mask);
 	} else {
 		aseg->swap_add = cpu_to_be64(wr->wr.atomic.compare_add);
 		aseg->compare  = 0;
@@ -1414,6 +1419,15 @@ static void set_atomic_seg(struct mlx4_wqe_atomic_seg *aseg, struct ib_send_wr *
 
 }
 
+static void set_masked_atomic_seg(struct mlx4_wqe_masked_atomic_seg *aseg,
+				  struct ib_send_wr *wr)
+{
+	aseg->swap_add		= cpu_to_be64(wr->wr.atomic.swap);
+	aseg->swap_add_mask	= cpu_to_be64(wr->wr.atomic.swap_mask);
+	aseg->compare		= cpu_to_be64(wr->wr.atomic.compare_add);
+	aseg->compare_mask	= cpu_to_be64(wr->wr.atomic.compare_add_mask);
+}
+
 static void set_datagram_seg(struct mlx4_wqe_datagram_seg *dseg,
 			     struct ib_send_wr *wr)
 {
@@ -1567,6 +1581,7 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 			switch (wr->opcode) {
 			case IB_WR_ATOMIC_CMP_AND_SWP:
 			case IB_WR_ATOMIC_FETCH_AND_ADD:
+			case IB_WR_MASKED_ATOMIC_FETCH_AND_ADD:
 				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
 					      wr->wr.atomic.rkey);
 				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
@@ -1579,6 +1594,19 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
 				break;
 
+			case IB_WR_MASKED_ATOMIC_CMP_AND_SWP:
+				set_raddr_seg(wqe, wr->wr.atomic.remote_addr,
+					      wr->wr.atomic.rkey);
+				wqe  += sizeof (struct mlx4_wqe_raddr_seg);
+
+				set_masked_atomic_seg(wqe, wr);
+				wqe  += sizeof (struct mlx4_wqe_masked_atomic_seg);
+
+				size += (sizeof (struct mlx4_wqe_raddr_seg) +
+					 sizeof (struct mlx4_wqe_masked_atomic_seg)) / 16;
+
+				break;
+
 			case IB_WR_RDMA_READ:
 			case IB_WR_RDMA_WRITE:
 			case IB_WR_RDMA_WRITE_WITH_IMM:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e92d1bfdb330..7a7f9c1e679a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -123,8 +123,8 @@ enum {
 	MLX4_OPCODE_RDMA_READ		= 0x10,
 	MLX4_OPCODE_ATOMIC_CS		= 0x11,
 	MLX4_OPCODE_ATOMIC_FA		= 0x12,
-	MLX4_OPCODE_ATOMIC_MASK_CS	= 0x14,
-	MLX4_OPCODE_ATOMIC_MASK_FA	= 0x15,
+	MLX4_OPCODE_MASKED_ATOMIC_CS	= 0x14,
+	MLX4_OPCODE_MASKED_ATOMIC_FA	= 0x15,
 	MLX4_OPCODE_BIND_MW		= 0x18,
 	MLX4_OPCODE_FMR			= 0x19,
 	MLX4_OPCODE_LOCAL_INVAL		= 0x1b,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 9f29d86e5dc9..7abe64326f72 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -285,6 +285,13 @@ struct mlx4_wqe_atomic_seg {
 	__be64			compare;
 };
 
+struct mlx4_wqe_masked_atomic_seg {
+	__be64			swap_add;
+	__be64			compare;
+	__be64			swap_add_mask;
+	__be64			compare_mask;
+};
+
 struct mlx4_wqe_data_seg {
 	__be32			byte_count;
 	__be32			lkey;
-- 
cgit v1.2.3


From 71d0a6112a363e703e383ae5b12c492485c39701 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 22 Apr 2010 15:35:57 -0400
Subject: NFS: Fix an unstable write data integrity race

Commit 2c61be0a9478258f77b66208a0c4b1f5f8161c3c (NFS: Ensure that the WRITE
and COMMIT RPC calls are always uninterruptible) exposed a race on file
close. In order to ensure correct close-to-open behaviour, we want to wait
for all outstanding background commit operations to complete.

This patch adds an inode flag that indicates if a commit operation is under
way, and provides a mechanism to allow ->write_inode() to wait for its
completion if this is a data integrity flush.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/write.c         | 36 ++++++++++++++++++++++++++++++++----
 include/linux/nfs_fs.h |  1 +
 2 files changed, 33 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index de38d63aa920..ccde2aeb3fec 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1201,6 +1201,25 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
 
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
+{
+	if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
+		return 1;
+	if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags,
+				NFS_INO_COMMIT, nfs_wait_bit_killable,
+				TASK_KILLABLE))
+		return 1;
+	return 0;
+}
+
+static void nfs_commit_clear_lock(struct nfs_inode *nfsi)
+{
+	clear_bit(NFS_INO_COMMIT, &nfsi->flags);
+	smp_mb__after_clear_bit();
+	wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
+}
+
+
 static void nfs_commitdata_release(void *data)
 {
 	struct nfs_write_data *wdata = data;
@@ -1262,8 +1281,6 @@ static int nfs_commit_rpcsetup(struct list_head *head,
 	task = rpc_run_task(&task_setup_data);
 	if (IS_ERR(task))
 		return PTR_ERR(task);
-	if (how & FLUSH_SYNC)
-		rpc_wait_for_completion_task(task);
 	rpc_put_task(task);
 	return 0;
 }
@@ -1294,6 +1311,7 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
 				BDI_RECLAIMABLE);
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(inode));
 	return -ENOMEM;
 }
 
@@ -1349,6 +1367,7 @@ static void nfs_commit_release(void *calldata)
 	next:
 		nfs_clear_page_tag_locked(req);
 	}
+	nfs_commit_clear_lock(NFS_I(data->inode));
 	nfs_commitdata_release(calldata);
 }
 
@@ -1363,8 +1382,11 @@ static const struct rpc_call_ops nfs_commit_ops = {
 static int nfs_commit_inode(struct inode *inode, int how)
 {
 	LIST_HEAD(head);
-	int res;
+	int may_wait = how & FLUSH_SYNC;
+	int res = 0;
 
+	if (!nfs_commit_set_lock(NFS_I(inode), may_wait))
+		goto out;
 	spin_lock(&inode->i_lock);
 	res = nfs_scan_commit(inode, &head, 0, 0);
 	spin_unlock(&inode->i_lock);
@@ -1372,7 +1394,13 @@ static int nfs_commit_inode(struct inode *inode, int how)
 		int error = nfs_commit_list(inode, &head, how);
 		if (error < 0)
 			return error;
-	}
+		if (may_wait)
+			wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
+					nfs_wait_bit_killable,
+					TASK_KILLABLE);
+	} else
+		nfs_commit_clear_lock(NFS_I(inode));
+out:
 	return res;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 1a0b85aa151e..07ce4609fe50 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -209,6 +209,7 @@ struct nfs_inode {
 #define NFS_INO_FLUSHING	(4)		/* inode is flushing out data */
 #define NFS_INO_FSCACHE		(5)		/* inode can be cached by FS-Cache */
 #define NFS_INO_FSCACHE_LOCK	(6)		/* FS-Cache cookie management lock */
+#define NFS_INO_COMMIT		(7)		/* inode is committing unstable writes */
 
 static inline struct nfs_inode *NFS_I(const struct inode *inode)
 {
-- 
cgit v1.2.3


From e0c8233622cbd49d171bc57b60e725f2fb748750 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Thu, 22 Apr 2010 17:04:25 -0400
Subject: nfsd4: fix filehandle comment

Minor typos.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 65e333afaee4..45bb5a8102c1 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -40,8 +40,8 @@ struct nfs_fhbase_old {
  * This is the new flexible, extensible style NFSv2/v3 file handle.
  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
  *
- * The file handle is seens as a list of 4byte words.
- * The first word contains a version number (1) and four descriptor bytes
+ * The file handle is seens as a list of four-byte words.
+ * The first word contains a version number (1) and three descriptor bytes
  * that tell how the remaining 3 variable length fields should be handled.
  * These three bytes are auth_type, fsid_type and fileid_type.
  *
-- 
cgit v1.2.3


From e802af9cabb011f09b9c19a82faef3dd315f27eb Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Thu, 22 Apr 2010 15:24:53 -0700
Subject: IPv6: Generic TTL Security Mechanism (final version)

This patch adds IPv6 support for RFC5082 Generalized TTL Security Mechanism.

Not to users of mapped address; the IPV6 and IPV4 socket options are seperate.
The server does have to deal with both IPv4 and IPv6 socket options
and the client has to handle the different for each family.

On client:
	int ttl = 255;
	getaddrinfo(argv[1], argv[2], &hint, &result);

	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET) {
			setsockopt(s, IPPROTO_IP, IP_TTL, &ttl, sizeof(ttl));
		} else if (rp->ai_family == AF_INET6) {
			setsockopt(s, IPPROTO_IPV6,  IPV6_UNICAST_HOPS,
					&ttl, sizeof(ttl)))
		}

		if (connect(s, rp->ai_addr, rp->ai_addrlen) == 0) {
		   ...

On server:
	int minttl = 255 - maxhops;

	getaddrinfo(NULL, port, &hints, &result);
	for (rp = result; rp != NULL; rp = rp->ai_next) {
		s = socket(rp->ai_family, rp->ai_socktype, rp->ai_protocol);
		if (s < 0) continue;

		if (rp->ai_family == AF_INET6)
			setsockopt(s, IPPROTO_IPV6,  IPV6_MINHOPCOUNT,
					&minttl, sizeof(minttl));
		setsockopt(s, IPPROTO_IP, IP_MINTTL, &minttl, sizeof(minttl));

		if (bind(s, rp->ai_addr, rp->ai_addrlen) == 0)
			break
...

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      |  3 +++
 include/linux/ipv6.h     |  1 +
 net/ipv6/ipv6_sockglue.c | 12 ++++++++++++
 net/ipv6/tcp_ipv6.c      | 14 +++++++++++++-
 4 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index bd55c6e46b2e..9b90cb296eb1 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -265,6 +265,9 @@ struct in6_flowlabel_req {
 #define IPV6_PREFER_SRC_CGA		0x0008
 #define IPV6_PREFER_SRC_NONCGA		0x0800
 
+/* RFC5082: Generalized Ttl Security Mechanism */
+#define IPV6_MINHOPCOUNT		73
+
 /*
  * Multicast Routing:
  * see include/linux/mroute6.h.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index e0cc9a7db2b5..1bdbebf08d16 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -348,6 +348,7 @@ struct ipv6_pinfo {
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
+	__u8			min_hopcount;
 	__u8			tclass;
 
 	__u32			dst_cookie;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 1160400e9dbd..92295ad3487a 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -767,6 +767,14 @@ pref_skip_coa:
 
 		break;
 	    }
+	case IPV6_MINHOPCOUNT:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		np->min_hopcount = val;
+		retv = 0;
+		break;
 	}
 
 	release_sock(sk);
@@ -1116,6 +1124,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 			val |= IPV6_PREFER_SRC_HOME;
 		break;
 
+	case IPV6_MINHOPCOUNT:
+		val = np->min_hopcount;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1ababbb41131..6603511e3673 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -353,6 +353,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	if (sk->sk_state == TCP_CLOSE)
 		goto out;
 
+	if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto out;
+	}
+
 	tp = tcp_sk(sk);
 	seq = ntohl(th->seq);
 	if (sk->sk_state != TCP_LISTEN &&
@@ -1678,6 +1683,7 @@ ipv6_pktoptions:
 static int tcp_v6_rcv(struct sk_buff *skb)
 {
 	struct tcphdr *th;
+	struct ipv6hdr *hdr;
 	struct sock *sk;
 	int ret;
 	struct net *net = dev_net(skb->dev);
@@ -1704,12 +1710,13 @@ static int tcp_v6_rcv(struct sk_buff *skb)
 		goto bad_packet;
 
 	th = tcp_hdr(skb);
+	hdr = ipv6_hdr(skb);
 	TCP_SKB_CB(skb)->seq = ntohl(th->seq);
 	TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
 				    skb->len - th->doff*4);
 	TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
 	TCP_SKB_CB(skb)->when = 0;
-	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(ipv6_hdr(skb));
+	TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
 	TCP_SKB_CB(skb)->sacked = 0;
 
 	sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
@@ -1720,6 +1727,11 @@ process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
 
+	if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
+		NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
+		goto discard_and_relse;
+	}
+
 	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
 		goto discard_and_relse;
 
-- 
cgit v1.2.3


From 40eaf96271526a9f71030dd1a199ce46c045752e Mon Sep 17 00:00:00 2001
From: Paul LeoNerd Evans <leonerd@leonerd.org.uk>
Date: Thu, 22 Apr 2010 03:32:22 +0000
Subject: net: Socket filter ancilliary data access for skb->dev->type

Add an SKF_AD_HATYPE field to the packet ancilliary data area, giving
access to skb->dev->type, as reported in the sll_hatype field.

When capturing packets on a PF_PACKET/SOCK_RAW socket bound to all
interfaces, there doesn't appear to be a way for the filter program to
actually find out the underlying hardware type the packet was captured
on. This patch adds such ability.

This patch also handles the case where skb->dev can be NULL, such as on
netlink sockets.

Signed-off-by: Paul Evans <leonerd@leonerd.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 3 ++-
 net/core/filter.c      | 7 +++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 29a0e3db9f43..151f5d703b7e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -123,7 +123,8 @@ struct sock_fprog {	/* Required for SO_ATTACH_FILTER. */
 #define SKF_AD_NLATTR_NEST	16
 #define SKF_AD_MARK 	20
 #define SKF_AD_QUEUE	24
-#define SKF_AD_MAX	28
+#define SKF_AD_HATYPE	28
+#define SKF_AD_MAX	32
 #define SKF_NET_OFF   (-0x100000)
 #define SKF_LL_OFF    (-0x200000)
 
diff --git a/net/core/filter.c b/net/core/filter.c
index ff943bed21af..da69fb728d32 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -302,6 +302,8 @@ load_b:
 			A = skb->pkt_type;
 			continue;
 		case SKF_AD_IFINDEX:
+			if (!skb->dev)
+				return 0;
 			A = skb->dev->ifindex;
 			continue;
 		case SKF_AD_MARK:
@@ -310,6 +312,11 @@ load_b:
 		case SKF_AD_QUEUE:
 			A = skb->queue_mapping;
 			continue;
+		case SKF_AD_HATYPE:
+			if (!skb->dev)
+				return 0;
+			A = skb->dev->type;
+			continue;
 		case SKF_AD_NLATTR: {
 			struct nlattr *nla;
 
-- 
cgit v1.2.3


From 5ebfbc06aae941484326c9e7e9c4d85330f63591 Mon Sep 17 00:00:00 2001
From: Andrew Hendry <andrew.hendry@gmail.com>
Date: Thu, 22 Apr 2010 16:12:36 -0700
Subject: X25: Add if_x25.h and x25 to device identifiers

V2 Feedback from John Hughes.
- Add header for userspace implementations such as xot/xoe to use
- Use explicit values for interface stability
- No changes to driver patches

V1
- Use identifiers instead of magic numbers for X25 layer 3 to device interface.
- Also fixed checkpatch notes on updated code.

[ Add new user header to include/linux/Kbuild  -DaveM ]

Signed-off-by: Andrew Hendry <andrew.hendry@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/Kbuild    |  1 +
 include/linux/if_x25.h  | 26 ++++++++++++++++++++++++++
 include/net/x25device.h |  1 +
 net/x25/x25_dev.c       | 36 ++++++++++++++++++++----------------
 4 files changed, 48 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/if_x25.h

(limited to 'include/linux')

diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index e2ea0b2159cd..2fc8e14cc24a 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -94,6 +94,7 @@ header-y += if_ppp.h
 header-y += if_slip.h
 header-y += if_strip.h
 header-y += if_tun.h
+header-y += if_x25.h
 header-y += in_route.h
 header-y += ioctl.h
 header-y += ip6_tunnel.h
diff --git a/include/linux/if_x25.h b/include/linux/if_x25.h
new file mode 100644
index 000000000000..897765f5feb8
--- /dev/null
+++ b/include/linux/if_x25.h
@@ -0,0 +1,26 @@
+/*
+ *  Linux X.25 packet to device interface
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ */
+
+#ifndef _IF_X25_H
+#define _IF_X25_H
+
+#include <linux/types.h>
+
+/* Documentation/networking/x25-iface.txt */
+#define X25_IFACE_DATA		0x00
+#define X25_IFACE_CONNECT	0x01
+#define X25_IFACE_DISCONNECT	0x02
+#define X25_IFACE_PARAMS	0x03
+
+#endif /* _IF_X25_H */
diff --git a/include/net/x25device.h b/include/net/x25device.h
index 1415bcf93980..1fa08b49f1c2 100644
--- a/include/net/x25device.h
+++ b/include/net/x25device.h
@@ -3,6 +3,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_x25.h>
 #include <linux/skbuff.h>
 
 static inline __be16 x25_type_trans(struct sk_buff *skb, struct net_device *dev)
diff --git a/net/x25/x25_dev.c b/net/x25/x25_dev.c
index b9ef682230a0..9005f6daeab5 100644
--- a/net/x25/x25_dev.c
+++ b/net/x25/x25_dev.c
@@ -24,6 +24,7 @@
 #include <net/sock.h>
 #include <linux/if_arp.h>
 #include <net/x25.h>
+#include <net/x25device.h>
 
 static int x25_receive_data(struct sk_buff *skb, struct x25_neigh *nb)
 {
@@ -115,19 +116,22 @@ int x25_lapb_receive_frame(struct sk_buff *skb, struct net_device *dev,
 	}
 
 	switch (skb->data[0]) {
-		case 0x00:
-			skb_pull(skb, 1);
-			if (x25_receive_data(skb, nb)) {
-				x25_neigh_put(nb);
-				goto out;
-			}
-			break;
-		case 0x01:
-			x25_link_established(nb);
-			break;
-		case 0x02:
-			x25_link_terminated(nb);
-			break;
+
+	case X25_IFACE_DATA:
+		skb_pull(skb, 1);
+		if (x25_receive_data(skb, nb)) {
+			x25_neigh_put(nb);
+			goto out;
+		}
+		break;
+
+	case X25_IFACE_CONNECT:
+		x25_link_established(nb);
+		break;
+
+	case X25_IFACE_DISCONNECT:
+		x25_link_terminated(nb);
+		break;
 	}
 	x25_neigh_put(nb);
 drop:
@@ -148,7 +152,7 @@ void x25_establish_link(struct x25_neigh *nb)
 				return;
 			}
 			ptr  = skb_put(skb, 1);
-			*ptr = 0x01;
+			*ptr = X25_IFACE_CONNECT;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
@@ -184,7 +188,7 @@ void x25_terminate_link(struct x25_neigh *nb)
 	}
 
 	ptr  = skb_put(skb, 1);
-	*ptr = 0x02;
+	*ptr = X25_IFACE_DISCONNECT;
 
 	skb->protocol = htons(ETH_P_X25);
 	skb->dev      = nb->dev;
@@ -200,7 +204,7 @@ void x25_send_frame(struct sk_buff *skb, struct x25_neigh *nb)
 	switch (nb->dev->type) {
 		case ARPHRD_X25:
 			dptr  = skb_push(skb, 1);
-			*dptr = 0x00;
+			*dptr = X25_IFACE_DATA;
 			break;
 
 #if defined(CONFIG_LLC) || defined(CONFIG_LLC_MODULE)
-- 
cgit v1.2.3


From 286d1e7f73320be063a5f6af25d3d61c741065c2 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Thu, 22 Apr 2010 14:38:03 +0000
Subject: remove DCB_PROTO_VERSION as we don't do netlink versioning

remove DCB_PROTO_VERSION as we don't do netlink versioning

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dcbnl.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dcbnl.h b/include/linux/dcbnl.h
index b7cdbb4373df..8723491f7dfd 100644
--- a/include/linux/dcbnl.h
+++ b/include/linux/dcbnl.h
@@ -22,8 +22,6 @@
 
 #include <linux/types.h>
 
-#define DCB_PROTO_VERSION 1
-
 struct dcbmsg {
 	__u8               dcb_family;
 	__u8               cmd;
-- 
cgit v1.2.3


From 669c55e9f99b90e46eaa0f98a67ec53d46dc969a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri, 16 Apr 2010 14:59:29 +0200
Subject: sched: Pre-compute cpumask_weight(sched_domain_span(sd))

Dave reported that his large SPARC machines spend lots of time in
hweight64(), try and optimize some of those needless cpumask_weight()
invocations (esp. with the large offstack cpumasks these are very
expensive indeed).

Reported-by: David Miller <davem@davemloft.net>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h |  1 +
 kernel/sched.c        |  3 +++
 kernel/sched_fair.c   | 12 +++++-------
 3 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index e3e900f318d7..dfea40574b2a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -960,6 +960,7 @@ struct sched_domain {
 	char *name;
 #endif
 
+	unsigned int span_weight;
 	/*
 	 * Span of all CPUs in this domain.
 	 *
diff --git a/kernel/sched.c b/kernel/sched.c
index 0cc913a8554f..4956ed092838 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6271,6 +6271,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
 	struct rq *rq = cpu_rq(cpu);
 	struct sched_domain *tmp;
 
+	for (tmp = sd; tmp; tmp = tmp->parent)
+		tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+
 	/* Remove the sched domains which do not contribute to scheduling. */
 	for (tmp = sd; tmp; ) {
 		struct sched_domain *parent = tmp->parent;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 88d3053ac7c2..0a413c7e3ab8 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,9 +1508,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 		 * Pick the largest domain to update shares over
 		 */
 		tmp = sd;
-		if (affine_sd && (!tmp ||
-				  cpumask_weight(sched_domain_span(affine_sd)) >
-				  cpumask_weight(sched_domain_span(sd))))
+		if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
 			tmp = affine_sd;
 
 		if (tmp) {
@@ -1554,10 +1552,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
 
 		/* Now try balancing at a lower domain level of new_cpu */
 		cpu = new_cpu;
-		weight = cpumask_weight(sched_domain_span(sd));
+		weight = sd->span_weight;
 		sd = NULL;
 		for_each_domain(cpu, tmp) {
-			if (weight <= cpumask_weight(sched_domain_span(tmp)))
+			if (weight <= tmp->span_weight)
 				break;
 			if (tmp->flags & sd_flag)
 				sd = tmp;
@@ -2243,7 +2241,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
 
 unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
 {
-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
+	unsigned long weight = sd->span_weight;
 	unsigned long smt_gain = sd->smt_gain;
 
 	smt_gain /= weight;
@@ -2276,7 +2274,7 @@ unsigned long scale_rt_power(int cpu)
 
 static void update_cpu_power(struct sched_domain *sd, int cpu)
 {
-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
+	unsigned long weight = sd->span_weight;
 	unsigned long power = SCHED_LOAD_SCALE;
 	struct sched_group *sdg = sd->groups;
 
-- 
cgit v1.2.3


From af740b2c8f4521e2c45698ee6040941a82d6349d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Fri, 23 Apr 2010 12:34:56 +0200
Subject: netfilter: nf_conntrack: extend with extra stat counter

I suspect an unfortunatly series of events occuring under a DDoS
attack, in function __nf_conntrack_find() nf_contrack_core.c.

Adding a stats counter to see if the search is restarted too often.

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/nf_conntrack_common.h         | 1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c | 7 ++++---
 net/netfilter/nf_conntrack_core.c                     | 4 +++-
 net/netfilter/nf_conntrack_standalone.c               | 7 ++++---
 4 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h
index c608677dda60..14e6d32002c4 100644
--- a/include/linux/netfilter/nf_conntrack_common.h
+++ b/include/linux/netfilter/nf_conntrack_common.h
@@ -113,6 +113,7 @@ struct ip_conntrack_stat {
 	unsigned int expect_new;
 	unsigned int expect_create;
 	unsigned int expect_delete;
+	unsigned int search_restart;
 };
 
 /* call to create an explicit dependency on nf_conntrack. */
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 2fb7b76da94f..244f7cb08d68 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -336,12 +336,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -358,7 +358,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 0c9bbe93cc16..3907efb97a7c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -319,8 +319,10 @@ begin:
 	 * not the expected one, we must restart lookup.
 	 * We probably met an item that was moved to another chain.
 	 */
-	if (get_nulls_value(n) != hash)
+	if (get_nulls_value(n) != hash) {
+		NF_CT_STAT_INC(net, search_restart);
 		goto begin;
+	}
 	local_bh_enable();
 
 	return NULL;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index faa8eb3722b9..ea4a8d384234 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -252,12 +252,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 	const struct ip_conntrack_stat *st = v;
 
 	if (v == SEQ_START_TOKEN) {
-		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete\n");
+		seq_printf(seq, "entries  searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error  expect_new expect_create expect_delete search_restart\n");
 		return 0;
 	}
 
 	seq_printf(seq, "%08x  %08x %08x %08x %08x %08x %08x %08x "
-			"%08x %08x %08x %08x %08x  %08x %08x %08x \n",
+			"%08x %08x %08x %08x %08x  %08x %08x %08x %08x\n",
 		   nr_conntracks,
 		   st->searched,
 		   st->found,
@@ -274,7 +274,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
 
 		   st->expect_new,
 		   st->expect_create,
-		   st->expect_delete
+		   st->expect_delete,
+		   st->search_restart
 		);
 	return 0;
 }
-- 
cgit v1.2.3


From 793b14731686595a741d9f47726ad8b9a235385a Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:07 +0000
Subject: IPv6: data structure changes for new socket options

Add underlying data structure changes and basic setsockopt()
and getsockopt() support for IPV6_RECVPATHMTU, IPV6_PATHMTU,
and IPV6_DONTFRAG.  IPV6_PATHMTU is actually fully functional
at this point.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h      |  2 +-
 include/linux/ipv6.h     | 13 ++++++++++---
 net/ipv6/ipv6_sockglue.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9b90cb296eb1..c4bf46f764bf 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -221,10 +221,10 @@ struct in6_flowlabel_req {
 #define IPV6_RTHDR		57
 #define IPV6_RECVDSTOPTS	58
 #define IPV6_DSTOPTS		59
-#if 0	/* not yet */
 #define IPV6_RECVPATHMTU	60
 #define IPV6_PATHMTU		61
 #define IPV6_DONTFRAG		62
+#if 0	/* not yet */
 #define IPV6_USE_MIN_MTU	63
 #endif
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1bdbebf08d16..1976942cf6f9 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -21,6 +21,10 @@ struct in6_pktinfo {
 	int		ipi6_ifindex;
 };
 
+struct ip6_mtuinfo {
+	struct sockaddr_in6	ip6m_addr;
+	__u32			ip6m_mtu;
+};
 
 struct in6_ifreq {
 	struct in6_addr	ifr6_addr;
@@ -334,22 +338,25 @@ struct ipv6_pinfo {
 				dstopts:1,
 				odstopts:1,
                                 rxflow:1,
-				rxtclass:1;
+				rxtclass:1,
+				rxpmtu:1;
 		} bits;
 		__u16		all;
 	} rxopt;
 
 	/* sockopt flags */
-	__u8			recverr:1,
+	__u16			recverr:1,
 	                        sndflow:1,
 				pmtudisc:2,
 				ipv6only:1,
-				srcprefs:3;	/* 001: prefer temporary address
+				srcprefs:3,	/* 001: prefer temporary address
 						 * 010: prefer public address
 						 * 100: prefer care-of address
 						 */
+				dontfrag:1;
 	__u8			min_hopcount;
 	__u8			tclass;
+	__u8			padding;
 
 	__u32			dst_cookie;
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 92295ad3487a..2bf9eda72788 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -337,6 +337,13 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		retv = 0;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		if (optlen < sizeof(int))
+			goto e_inval;
+		np->rxopt.bits.rxpmtu = valbool;
+		retv = 0;
+		break;
+
 	case IPV6_HOPOPTS:
 	case IPV6_RTHDRDSTOPTS:
 	case IPV6_RTHDR:
@@ -773,6 +780,9 @@ pref_skip_coa:
 		if (val < 0 || val > 255)
 			goto e_inval;
 		np->min_hopcount = val;
+		break;
+	case IPV6_DONTFRAG:
+		np->dontfrag = valbool;
 		retv = 0;
 		break;
 	}
@@ -1063,6 +1073,38 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->rxopt.bits.rxflow;
 		break;
 
+	case IPV6_RECVPATHMTU:
+		val = np->rxopt.bits.rxpmtu;
+		break;
+
+	case IPV6_PATHMTU:
+	{
+		struct dst_entry *dst;
+		struct ip6_mtuinfo mtuinfo;
+
+		if (len < sizeof(mtuinfo))
+			return -EINVAL;
+
+		len = sizeof(mtuinfo);
+		memset(&mtuinfo, 0, sizeof(mtuinfo));
+
+		rcu_read_lock();
+		dst = __sk_dst_get(sk);
+		if (dst)
+			mtuinfo.ip6m_mtu = dst_mtu(dst);
+		rcu_read_unlock();
+		if (!mtuinfo.ip6m_mtu)
+			return -ENOTCONN;
+
+		if (put_user(len, optlen))
+			return -EFAULT;
+		if (copy_to_user(optval, &mtuinfo, len))
+			return -EFAULT;
+
+		return 0;
+		break;
+	}
+
 	case IPV6_UNICAST_HOPS:
 	case IPV6_MULTICAST_HOPS:
 	{
@@ -1128,6 +1170,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		val = np->min_hopcount;
 		break;
 
+	case IPV6_DONTFRAG:
+		val = np->dontfrag;
+		break;
+
 	default:
 		return -ENOPROTOOPT;
 	}
-- 
cgit v1.2.3


From 4b340ae20d0e2366792abe70f46629e576adaf5e Mon Sep 17 00:00:00 2001
From: Brian Haley <brian.haley@hp.com>
Date: Fri, 23 Apr 2010 11:26:09 +0000
Subject: IPv6: Complete IPV6_DONTFRAG support

Finally add support to detect a local IPV6_DONTFRAG event
and return the relevant data to the user if they've enabled
IPV6_RECVPATHMTU on the socket.  The next recvmsg() will
return no data, but have an IPV6_PATHMTU as ancillary data.

Signed-off-by: Brian Haley <brian.haley@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h  |  2 ++
 include/net/ipv6.h    |  2 ++
 net/ipv6/af_inet6.c   |  3 ++
 net/ipv6/datagram.c   | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ip6_output.c | 24 +++++++++-----
 net/ipv6/raw.c        |  3 ++
 net/ipv6/udp.c        |  3 ++
 7 files changed, 116 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 1976942cf6f9..2ab5509f6d49 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -257,6 +257,7 @@ struct inet6_skb_parm {
 };
 
 #define IP6CB(skb)	((struct inet6_skb_parm*)((skb)->cb))
+#define IP6CBMTU(skb)	((struct ip6_mtuinfo *)((skb)->cb))
 
 static inline int inet6_iif(const struct sk_buff *skb)
 {
@@ -366,6 +367,7 @@ struct ipv6_pinfo {
 
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
+	struct sk_buff		*rxpmtu;
 	struct {
 		struct ipv6_txoptions *opt;
 		u8 hop_limit;
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 7ab6323e631e..eba5cc00325a 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -578,9 +578,11 @@ extern int			ip6_datagram_connect(struct sock *sk,
 						     struct sockaddr *addr, int addr_len);
 
 extern int 			ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len);
+extern int 			ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len);
 extern void			ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 						u32 info, u8 *payload);
 extern void			ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info);
+extern void			ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu);
 
 extern int inet6_release(struct socket *sock);
 extern int inet6_bind(struct socket *sock, struct sockaddr *uaddr, 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 3192aa02ba5d..d2df3144429b 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -417,6 +417,9 @@ void inet6_destroy_sock(struct sock *sk)
 	if ((skb = xchg(&np->pktoptions, NULL)) != NULL)
 		kfree_skb(skb);
 
+	if ((skb = xchg(&np->rxpmtu, NULL)) != NULL)
+		kfree_skb(skb);
+
 	/* Free flowlabels */
 	fl6_free_socklist(sk);
 
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index f5076d349b18..5959230bc6c1 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -278,6 +278,45 @@ void ipv6_local_error(struct sock *sk, int err, struct flowi *fl, u32 info)
 		kfree_skb(skb);
 }
 
+void ipv6_local_rxpmtu(struct sock *sk, struct flowi *fl, u32 mtu)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct ipv6hdr *iph;
+	struct sk_buff *skb;
+	struct ip6_mtuinfo *mtu_info;
+
+	if (!np->rxopt.bits.rxpmtu)
+		return;
+
+	skb = alloc_skb(sizeof(struct ipv6hdr), GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	skb_put(skb, sizeof(struct ipv6hdr));
+	skb_reset_network_header(skb);
+	iph = ipv6_hdr(skb);
+	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+
+	mtu_info = IP6CBMTU(skb);
+	if (!mtu_info) {
+		kfree_skb(skb);
+		return;
+	}
+
+	mtu_info->ip6m_mtu = mtu;
+	mtu_info->ip6m_addr.sin6_family = AF_INET6;
+	mtu_info->ip6m_addr.sin6_port = 0;
+	mtu_info->ip6m_addr.sin6_flowinfo = 0;
+	mtu_info->ip6m_addr.sin6_scope_id = fl->oif;
+	ipv6_addr_copy(&mtu_info->ip6m_addr.sin6_addr, &ipv6_hdr(skb)->daddr);
+
+	__skb_pull(skb, skb_tail_pointer(skb) - skb->data);
+	skb_reset_transport_header(skb);
+
+	skb = xchg(&np->rxpmtu, skb);
+	kfree_skb(skb);
+}
+
 /*
  *	Handle MSG_ERRQUEUE
  */
@@ -381,6 +420,54 @@ out:
 	return err;
 }
 
+/*
+ *	Handle IPV6_RECVPATHMTU
+ */
+int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	struct sk_buff *skb;
+	struct sockaddr_in6 *sin;
+	struct ip6_mtuinfo mtu_info;
+	int err;
+	int copied;
+
+	err = -EAGAIN;
+	skb = xchg(&np->rxpmtu, NULL);
+	if (skb == NULL)
+		goto out;
+
+	copied = skb->len;
+	if (copied > len) {
+		msg->msg_flags |= MSG_TRUNC;
+		copied = len;
+	}
+	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	if (err)
+		goto out_free_skb;
+
+	sock_recv_timestamp(msg, sk, skb);
+
+	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info));
+
+	sin = (struct sockaddr_in6 *)msg->msg_name;
+	if (sin) {
+		sin->sin6_family = AF_INET6;
+		sin->sin6_flowinfo = 0;
+		sin->sin6_port = 0;
+		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;
+		ipv6_addr_copy(&sin->sin6_addr, &mtu_info.ip6m_addr.sin6_addr);
+	}
+
+	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info);
+
+	err = copied;
+
+out_free_skb:
+	kfree_skb(skb);
+out:
+	return err;
+}
 
 
 int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 54d43dd1f085..61e2bef56090 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1219,15 +1219,23 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	 */
 
 	inet->cork.length += length;
-	if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
-	    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+	if (length > mtu) {
+		int proto = sk->sk_protocol;
+		if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
+			ipv6_local_rxpmtu(sk, fl, mtu-exthdrlen);
+			return -EMSGSIZE;
+		}
 
-		err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
-					  fragheaderlen, transhdrlen, mtu,
-					  flags);
-		if (err)
-			goto error;
-		return 0;
+		if (proto == IPPROTO_UDP &&
+		    (rt->u.dst.dev->features & NETIF_F_UFO)) {
+
+			err = ip6_ufo_append_data(sk, getfrag, from, length,
+						  hh_len, fragheaderlen,
+						  transhdrlen, mtu, flags);
+			if (err)
+				goto error;
+			return 0;
+		}
 	}
 
 	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 44a84ea9b3e8..85627386cb02 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -461,6 +461,9 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 	skb = skb_recv_datagram(sk, flags, noblock, &err);
 	if (!skb)
 		goto out;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 39e3665d9460..2850e35cee3d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -335,6 +335,9 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (flags & MSG_ERRQUEUE)
 		return ipv6_recv_error(sk, msg, len);
 
+	if (np->rxpmtu && np->rxopt.bits.rxpmtu)
+		return ipv6_recv_rxpmtu(sk, msg, len);
+
 try_again:
 	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
 				  &peeked, &err);
-- 
cgit v1.2.3


From 5bac942db3d2c4738df04104240d65a5d1eaec6a Mon Sep 17 00:00:00 2001
From: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Date: Wed, 21 Apr 2010 15:36:49 +0000
Subject: SH: constify multiple DMA related objects and references to them

Lists of DMA channels and slaves are not changed, make them constant. Besides,
SH7724 channel and slave configuration of both DMA controllers is identical,
remove the extra copy of the configuration data.

Signed-off-by: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh4a/setup-sh7722.c |  6 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7724.c | 54 +++++-----------------------------
 arch/sh/kernel/cpu/sh4a/setup-sh7780.c |  6 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7785.c |  6 ++--
 arch/sh/kernel/cpu/sh4a/setup-sh7786.c |  4 +--
 drivers/dma/shdma.c                    | 12 ++++----
 include/linux/sh_dma.h                 |  9 +++---
 7 files changed, 30 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
index dc9b30d086a4..24c6167a7181 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7722.c
@@ -24,7 +24,7 @@
 #include <cpu/dma-register.h>
 #include <cpu/sh7722.h>
 
-static struct sh_dmae_slave_config sh7722_dmae_slaves[] = {
+static const struct sh_dmae_slave_config sh7722_dmae_slaves[] = {
 	{
 		.slave_id	= SHDMA_SLAVE_SCIF0_TX,
 		.addr		= 0xffe0000c,
@@ -78,7 +78,7 @@ static struct sh_dmae_slave_config sh7722_dmae_slaves[] = {
 	},
 };
 
-static struct sh_dmae_channel sh7722_dmae_channels[] = {
+static const struct sh_dmae_channel sh7722_dmae_channels[] = {
 	{
 		.offset = 0,
 		.dmars = 0,
@@ -106,7 +106,7 @@ static struct sh_dmae_channel sh7722_dmae_channels[] = {
 	}
 };
 
-static unsigned int ts_shift[] = TS_SHIFT;
+static const unsigned int ts_shift[] = TS_SHIFT;
 
 static struct sh_dmae_pdata dma_platform_data = {
 	.slave		= sh7722_dmae_slaves,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
index 8a0a4a99f86b..89fe16d20fdb 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c
@@ -31,7 +31,7 @@
 #include <cpu/sh7724.h>
 
 /* DMA */
-static struct sh_dmae_channel sh7724_dmae0_channels[] = {
+static const struct sh_dmae_channel sh7724_dmae_channels[] = {
 	{
 		.offset = 0,
 		.dmars = 0,
@@ -59,51 +59,11 @@ static struct sh_dmae_channel sh7724_dmae0_channels[] = {
 	}
 };
 
-static struct sh_dmae_channel sh7724_dmae1_channels[] = {
-	{
-		.offset = 0,
-		.dmars = 0,
-		.dmars_bit = 0,
-	}, {
-		.offset = 0x10,
-		.dmars = 0,
-		.dmars_bit = 8,
-	}, {
-		.offset = 0x20,
-		.dmars = 4,
-		.dmars_bit = 0,
-	}, {
-		.offset = 0x30,
-		.dmars = 4,
-		.dmars_bit = 8,
-	}, {
-		.offset = 0x50,
-		.dmars = 8,
-		.dmars_bit = 0,
-	}, {
-		.offset = 0x60,
-		.dmars = 8,
-		.dmars_bit = 8,
-	}
-};
-
-static unsigned int ts_shift[] = TS_SHIFT;
-
-static struct sh_dmae_pdata dma0_platform_data = {
-	.channel	= sh7724_dmae0_channels,
-	.channel_num	= ARRAY_SIZE(sh7724_dmae0_channels),
-	.ts_low_shift	= CHCR_TS_LOW_SHIFT,
-	.ts_low_mask	= CHCR_TS_LOW_MASK,
-	.ts_high_shift	= CHCR_TS_HIGH_SHIFT,
-	.ts_high_mask	= CHCR_TS_HIGH_MASK,
-	.ts_shift	= ts_shift,
-	.ts_shift_num	= ARRAY_SIZE(ts_shift),
-	.dmaor_init	= DMAOR_INIT,
-};
+static const unsigned int ts_shift[] = TS_SHIFT;
 
-static struct sh_dmae_pdata dma1_platform_data = {
-	.channel	= sh7724_dmae1_channels,
-	.channel_num	= ARRAY_SIZE(sh7724_dmae1_channels),
+static struct sh_dmae_pdata dma_platform_data = {
+	.channel	= sh7724_dmae_channels,
+	.channel_num	= ARRAY_SIZE(sh7724_dmae_channels),
 	.ts_low_shift	= CHCR_TS_LOW_SHIFT,
 	.ts_low_mask	= CHCR_TS_LOW_MASK,
 	.ts_high_shift	= CHCR_TS_HIGH_SHIFT,
@@ -187,7 +147,7 @@ static struct platform_device dma0_device = {
 	.resource	= sh7724_dmae0_resources,
 	.num_resources	= ARRAY_SIZE(sh7724_dmae0_resources),
 	.dev		= {
-		.platform_data	= &dma0_platform_data,
+		.platform_data	= &dma_platform_data,
 	},
 	.archdata = {
 		.hwblk_id = HWBLK_DMAC0,
@@ -200,7 +160,7 @@ static struct platform_device dma1_device = {
 	.resource	= sh7724_dmae1_resources,
 	.num_resources	= ARRAY_SIZE(sh7724_dmae1_resources),
 	.dev		= {
-		.platform_data	= &dma1_platform_data,
+		.platform_data	= &dma_platform_data,
 	},
 	.archdata = {
 		.hwblk_id = HWBLK_DMAC1,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
index 05fc38df1582..b12f537e4dde 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7780.c
@@ -233,7 +233,7 @@ static struct platform_device rtc_device = {
 };
 
 /* DMA */
-static struct sh_dmae_channel sh7780_dmae0_channels[] = {
+static const struct sh_dmae_channel sh7780_dmae0_channels[] = {
 	{
 		.offset = 0,
 		.dmars = 0,
@@ -261,7 +261,7 @@ static struct sh_dmae_channel sh7780_dmae0_channels[] = {
 	}
 };
 
-static struct sh_dmae_channel sh7780_dmae1_channels[] = {
+static const struct sh_dmae_channel sh7780_dmae1_channels[] = {
 	{
 		.offset = 0,
 	}, {
@@ -277,7 +277,7 @@ static struct sh_dmae_channel sh7780_dmae1_channels[] = {
 	}
 };
 
-static unsigned int ts_shift[] = TS_SHIFT;
+static const unsigned int ts_shift[] = TS_SHIFT;
 
 static struct sh_dmae_pdata dma0_platform_data = {
 	.channel	= sh7780_dmae0_channels,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
index 07bb2d4619f8..f3e3ea0ce050 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7785.c
@@ -275,7 +275,7 @@ static struct platform_device tmu5_device = {
 };
 
 /* DMA */
-static struct sh_dmae_channel sh7785_dmae0_channels[] = {
+static const struct sh_dmae_channel sh7785_dmae0_channels[] = {
 	{
 		.offset = 0,
 		.dmars = 0,
@@ -303,7 +303,7 @@ static struct sh_dmae_channel sh7785_dmae0_channels[] = {
 	}
 };
 
-static struct sh_dmae_channel sh7785_dmae1_channels[] = {
+static const struct sh_dmae_channel sh7785_dmae1_channels[] = {
 	{
 		.offset = 0,
 	}, {
@@ -319,7 +319,7 @@ static struct sh_dmae_channel sh7785_dmae1_channels[] = {
 	}
 };
 
-static unsigned int ts_shift[] = TS_SHIFT;
+static const unsigned int ts_shift[] = TS_SHIFT;
 
 static struct sh_dmae_pdata dma0_platform_data = {
 	.channel	= sh7785_dmae0_channels,
diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
index d7336036d04d..81657091da46 100644
--- a/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
+++ b/arch/sh/kernel/cpu/sh4a/setup-sh7786.c
@@ -445,7 +445,7 @@ static struct platform_device tmu11_device = {
 	.num_resources	= ARRAY_SIZE(tmu11_resources),
 };
 
-static struct sh_dmae_channel dmac0_channels[] = {
+static const struct sh_dmae_channel dmac0_channels[] = {
 	{
 		.offset = 0,
 		.dmars = 0,
@@ -473,7 +473,7 @@ static struct sh_dmae_channel dmac0_channels[] = {
 	}
 };
 
-static unsigned int ts_shift[] = TS_SHIFT;
+static const unsigned int ts_shift[] = TS_SHIFT;
 
 static struct sh_dmae_pdata dma0_platform_data = {
 	.channel	= dmac0_channels,
diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c
index 7a18b580f626..aab352a63a4a 100644
--- a/drivers/dma/shdma.c
+++ b/drivers/dma/shdma.c
@@ -188,7 +188,7 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val)
 	struct sh_dmae_device *shdev = container_of(sh_chan->common.device,
 						struct sh_dmae_device, common);
 	struct sh_dmae_pdata *pdata = shdev->pdata;
-	struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id];
+	const struct sh_dmae_channel *chan_pdata = &pdata->channel[sh_chan->id];
 	u16 __iomem *addr = shdev->dmars + chan_pdata->dmars / sizeof(u16);
 	int shift = chan_pdata->dmars_bit;
 
@@ -264,7 +264,7 @@ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan)
 	return NULL;
 }
 
-static struct sh_dmae_slave_config *sh_dmae_find_slave(
+static const struct sh_dmae_slave_config *sh_dmae_find_slave(
 	struct sh_dmae_chan *sh_chan, struct sh_dmae_slave *param)
 {
 	struct dma_device *dma_dev = sh_chan->common.device;
@@ -296,7 +296,7 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan)
 	 * never runs concurrently with itself or free_chan_resources.
 	 */
 	if (param) {
-		struct sh_dmae_slave_config *cfg;
+		const struct sh_dmae_slave_config *cfg;
 
 		cfg = sh_dmae_find_slave(sh_chan, param);
 		if (!cfg)
@@ -557,12 +557,14 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 {
 	struct sh_dmae_slave *param;
 	struct sh_dmae_chan *sh_chan;
+	dma_addr_t slave_addr;
 
 	if (!chan)
 		return NULL;
 
 	sh_chan = to_sh_chan(chan);
 	param = chan->private;
+	slave_addr = param->config->addr;
 
 	/* Someone calling slave DMA on a public channel? */
 	if (!param || !sg_len) {
@@ -575,7 +577,7 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_slave_sg(
 	 * if (param != NULL), this is a successfully requested slave channel,
 	 * therefore param->config != NULL too.
 	 */
-	return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &param->config->addr,
+	return sh_dmae_prep_sg(sh_chan, sgl, sg_len, &slave_addr,
 			       direction, flags);
 }
 
@@ -856,7 +858,7 @@ static int __devinit sh_dmae_chan_probe(struct sh_dmae_device *shdev, int id,
 					int irq, unsigned long flags)
 {
 	int err;
-	struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
+	const struct sh_dmae_channel *chan_pdata = &shdev->pdata->channel[id];
 	struct platform_device *pdev = to_platform_device(shdev->common.dev);
 	struct sh_dmae_chan *new_sh_chan;
 
diff --git a/include/linux/sh_dma.h b/include/linux/sh_dma.h
index cdaaff424211..b08cd4efa15c 100644
--- a/include/linux/sh_dma.h
+++ b/include/linux/sh_dma.h
@@ -17,7 +17,7 @@
 struct sh_dmae_slave {
 	unsigned int			slave_id; /* Set by the platform */
 	struct device			*dma_dev; /* Set by the platform */
-	struct sh_dmae_slave_config	*config;  /* Set by the driver */
+	const struct sh_dmae_slave_config	*config;  /* Set by the driver */
 };
 
 struct sh_dmae_regs {
@@ -36,6 +36,7 @@ struct sh_desc {
 	int chunks;
 	int mark;
 };
+
 struct sh_dmae_slave_config {
 	unsigned int			slave_id;
 	dma_addr_t			addr;
@@ -50,15 +51,15 @@ struct sh_dmae_channel {
 };
 
 struct sh_dmae_pdata {
-	struct sh_dmae_slave_config *slave;
+	const struct sh_dmae_slave_config *slave;
 	int slave_num;
-	struct sh_dmae_channel *channel;
+	const struct sh_dmae_channel *channel;
 	int channel_num;
 	unsigned int ts_low_shift;
 	unsigned int ts_low_mask;
 	unsigned int ts_high_shift;
 	unsigned int ts_high_mask;
-	unsigned int *ts_shift;
+	const unsigned int *ts_shift;
 	int ts_shift_num;
 	u16 dmaor_init;
 };
-- 
cgit v1.2.3


From 25239cee7e8732dbdc9f5d324f1c22a3bdec1d1f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 26 Apr 2010 16:02:05 +0200
Subject: net: rtnetlink: decouple rtnetlink address families from real address
 families

Decouple rtnetlink address families from real address families in socket.h to
be able to add rtnetlink interfaces to code that is not a real address family
without increasing AF_MAX/NPROTO.

This will be used to add support for multicast route dumping from all tables
as the proc interface can't be extended to support anything but the main table
without breaking compatibility.

This partialy undoes the patch to introduce independant families for routing
rules and converts ipmr routing rules to a new rtnetlink family. Similar to
that patch, values up to 127 are reserved for real address families, values
above that may be used arbitrarily.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/fib_rules.h |  8 --------
 include/linux/rtnetlink.h |  6 ++++++
 net/core/rtnetlink.c      | 14 +++++++-------
 net/decnet/dn_rules.c     |  2 +-
 net/ipv4/fib_rules.c      |  2 +-
 net/ipv4/ipmr.c           |  2 +-
 net/ipv6/fib6_rules.c     |  2 +-
 7 files changed, 17 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h
index 04a397619ebe..51da65b68b85 100644
--- a/include/linux/fib_rules.h
+++ b/include/linux/fib_rules.h
@@ -15,14 +15,6 @@
 /* try to find source address in routing lookups */
 #define FIB_RULE_FIND_SADDR	0x00010000
 
-/* fib_rules families. values up to 127 are reserved for real address
- * families, values above 128 may be used arbitrarily.
- */
-#define FIB_RULES_IPV4		AF_INET
-#define FIB_RULES_IPV6		AF_INET6
-#define FIB_RULES_DECNET	AF_DECnet
-#define FIB_RULES_IPMR		128
-
 struct fib_rule_hdr {
 	__u8		family;
 	__u8		dst_len;
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index d1c7c90e9cd4..5a42c36cb6aa 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -7,6 +7,12 @@
 #include <linux/if_addr.h>
 #include <linux/neighbour.h>
 
+/* rtnetlink families. Values up to 127 are reserved for real address
+ * families, values above 128 may be used arbitrarily.
+ */
+#define RTNL_FAMILY_IPMR		128
+#define RTNL_FAMILY_MAX			128
+
 /****
  *		Routing/neighbour discovery messages.
  ****/
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 78c85985cb30..fd781b62fa7f 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -98,7 +98,7 @@ int lockdep_rtnl_is_held(void)
 EXPORT_SYMBOL(lockdep_rtnl_is_held);
 #endif /* #ifdef CONFIG_PROVE_LOCKING */
 
-static struct rtnl_link *rtnl_msg_handlers[NPROTO];
+static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1];
 
 static inline int rtm_msgindex(int msgtype)
 {
@@ -118,7 +118,7 @@ static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -133,7 +133,7 @@ static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex)
 {
 	struct rtnl_link *tab;
 
-	if (protocol < NPROTO)
+	if (protocol <= RTNL_FAMILY_MAX)
 		tab = rtnl_msg_handlers[protocol];
 	else
 		tab = NULL;
@@ -167,7 +167,7 @@ int __rtnl_register(int protocol, int msgtype,
 	struct rtnl_link *tab;
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	tab = rtnl_msg_handlers[protocol];
@@ -219,7 +219,7 @@ int rtnl_unregister(int protocol, int msgtype)
 {
 	int msgindex;
 
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 	msgindex = rtm_msgindex(msgtype);
 
 	if (rtnl_msg_handlers[protocol] == NULL)
@@ -241,7 +241,7 @@ EXPORT_SYMBOL_GPL(rtnl_unregister);
  */
 void rtnl_unregister_all(int protocol)
 {
-	BUG_ON(protocol < 0 || protocol >= NPROTO);
+	BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX);
 
 	kfree(rtnl_msg_handlers[protocol]);
 	rtnl_msg_handlers[protocol] = NULL;
@@ -1384,7 +1384,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 
 	if (s_idx == 0)
 		s_idx = 1;
-	for (idx = 1; idx < NPROTO; idx++) {
+	for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) {
 		int type = cb->nlh->nlmsg_type-RTM_BASE;
 		if (idx < s_idx || idx == PF_PACKET)
 			continue;
diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c
index 1226bcad776b..48fdf10be7a1 100644
--- a/net/decnet/dn_rules.c
+++ b/net/decnet/dn_rules.c
@@ -217,7 +217,7 @@ static void dn_fib_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata dn_fib_rules_ops_template = {
-	.family		= FIB_RULES_DECNET,
+	.family		= AF_DECnet,
 	.rule_size	= sizeof(struct dn_fib_rule),
 	.addr_size	= sizeof(u16),
 	.action		= dn_fib_rule_action,
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8ab62a56701c..76daeb5ff564 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -246,7 +246,7 @@ static void fib4_rule_flush_cache(struct fib_rules_ops *ops)
 }
 
 static const struct fib_rules_ops __net_initdata fib4_rules_ops_template = {
-	.family		= FIB_RULES_IPV4,
+	.family		= AF_INET,
 	.rule_size	= sizeof(struct fib4_rule),
 	.addr_size	= sizeof(u32),
 	.action		= fib4_rule_action,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 7d3e382aed64..41e8fc0ce8b3 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -217,7 +217,7 @@ static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
 }
 
 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
-	.family		= FIB_RULES_IPMR,
+	.family		= RTNL_FAMILY_IPMR,
 	.rule_size	= sizeof(struct ipmr_rule),
 	.addr_size	= sizeof(u32),
 	.action		= ipmr_rule_action,
diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c
index 35f6949446f0..8e44f8f9c188 100644
--- a/net/ipv6/fib6_rules.c
+++ b/net/ipv6/fib6_rules.c
@@ -238,7 +238,7 @@ static size_t fib6_rule_nlmsg_payload(struct fib_rule *rule)
 }
 
 static const struct fib_rules_ops __net_initdata fib6_rules_ops_template = {
-	.family			= FIB_RULES_IPV6,
+	.family			= AF_INET6,
 	.rule_size		= sizeof(struct fib6_rule),
 	.addr_size		= sizeof(struct in6_addr),
 	.action			= fib6_rule_action,
-- 
cgit v1.2.3


From d53cdbb94a52a920d5420ed64d986c3523a56743 Mon Sep 17 00:00:00 2001
From: "John W. Linville" <linville@tuxdriver.com>
Date: Wed, 31 Mar 2010 21:39:35 +0200
Subject: ssb: do not read SPROM if it does not exist
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Attempting to read registers that don't exist on the SSB bus can cause
hangs on some boxes.  At least some b43 devices are 'in the wild' that
don't have SPROMs at all.  When the SSB bus support loads, it attempts
to read these (non-existant) SPROMs and causes hard hangs on the box --
no console output, etc.

This patch adds some intelligence to determine whether or not the SPROM
is present before attempting to read it.  This avoids those hard hangs
on those devices with no SPROM attached to their SSB bus.  The
SSB-attached devices (e.g. b43, et al.) won't work, but at least the box
will survive to test further patches. :-)

Signed-off-by: John W. Linville <linville@tuxdriver.com>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Michael Buesch <mb@bu3sch.de>
---
 drivers/ssb/driver_chipcommon.c           |  2 ++
 drivers/ssb/pci.c                         |  5 +++++
 drivers/ssb/sprom.c                       | 14 ++++++++++++++
 include/linux/ssb/ssb.h                   |  3 +++
 include/linux/ssb/ssb_driver_chipcommon.h | 15 +++++++++++++++
 5 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/ssb/driver_chipcommon.c b/drivers/ssb/driver_chipcommon.c
index 59c3c0fdbecd..59ae76bace14 100644
--- a/drivers/ssb/driver_chipcommon.c
+++ b/drivers/ssb/driver_chipcommon.c
@@ -233,6 +233,8 @@ void ssb_chipcommon_init(struct ssb_chipcommon *cc)
 {
 	if (!cc->dev)
 		return; /* We don't have a ChipCommon */
+	if (cc->dev->id.revision >= 11)
+		cc->status = chipco_read32(cc, SSB_CHIPCO_CHIPSTAT);
 	ssb_pmu_init(cc);
 	chipco_powercontrol_init(cc);
 	ssb_chipco_set_clockmode(cc, SSB_CLKMODE_FAST);
diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 9e50896233aa..a4b2b99f2c80 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -620,6 +620,11 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 	int err = -ENOMEM;
 	u16 *buf;
 
+	if (!ssb_is_sprom_available(bus)) {
+		ssb_printk(KERN_ERR PFX "No SPROM available!\n");
+		return -ENODEV;
+	}
+
 	buf = kcalloc(SSB_SPROMSIZE_WORDS_R123, sizeof(u16), GFP_KERNEL);
 	if (!buf)
 		goto out;
diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index d0e6762fec50..83bc088b941d 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -175,3 +175,17 @@ const struct ssb_sprom *ssb_get_fallback_sprom(void)
 {
 	return fallback_sprom;
 }
+
+/* http://bcm-v4.sipsolutions.net/802.11/IsSpromAvailable */
+bool ssb_is_sprom_available(struct ssb_bus *bus)
+{
+	/* status register only exists on chipcomon rev >= 11 and we need check
+	   for >= 31 only */
+	/* this routine differs from specs as we do not access SPROM directly
+	   on PCMCIA */
+	if (bus->bustype == SSB_BUSTYPE_PCI &&
+	    bus->chipco.dev->id.revision >= 31)
+		return bus->chipco.capabilities & SSB_CHIPCO_CAP_SPROM;
+
+	return true;
+}
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 24f988547361..3b4da233e31d 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -394,6 +394,9 @@ extern int ssb_bus_sdiobus_register(struct ssb_bus *bus,
 
 extern void ssb_bus_unregister(struct ssb_bus *bus);
 
+/* Does the device have an SPROM? */
+extern bool ssb_is_sprom_available(struct ssb_bus *bus);
+
 /* Set a fallback SPROM.
  * See kdoc at the function definition for complete documentation. */
 extern int ssb_arch_set_fallback_sprom(const struct ssb_sprom *sprom);
diff --git a/include/linux/ssb/ssb_driver_chipcommon.h b/include/linux/ssb/ssb_driver_chipcommon.h
index 4e27acf0a92f..2cdf249b4e5f 100644
--- a/include/linux/ssb/ssb_driver_chipcommon.h
+++ b/include/linux/ssb/ssb_driver_chipcommon.h
@@ -53,6 +53,7 @@
 #define  SSB_CHIPCO_CAP_64BIT		0x08000000	/* 64-bit Backplane */
 #define  SSB_CHIPCO_CAP_PMU		0x10000000	/* PMU available (rev >= 20) */
 #define  SSB_CHIPCO_CAP_ECI		0x20000000	/* ECI available (rev >= 20) */
+#define  SSB_CHIPCO_CAP_SPROM		0x40000000	/* SPROM present */
 #define SSB_CHIPCO_CORECTL		0x0008
 #define  SSB_CHIPCO_CORECTL_UARTCLK0	0x00000001	/* Drive UART with internal clock */
 #define	 SSB_CHIPCO_CORECTL_SE		0x00000002	/* sync clk out enable (corerev >= 3) */
@@ -385,6 +386,7 @@
 
 
 /** Chip specific Chip-Status register contents. */
+#define SSB_CHIPCO_CHST_4322_SPROM_EXISTS	0x00000040 /* SPROM present */
 #define SSB_CHIPCO_CHST_4325_SPROM_OTP_SEL	0x00000003
 #define SSB_CHIPCO_CHST_4325_DEFCIS_SEL		0 /* OTP is powered up, use def. CIS, no SPROM */
 #define SSB_CHIPCO_CHST_4325_SPROM_SEL		1 /* OTP is powered up, SPROM is present */
@@ -398,6 +400,18 @@
 #define SSB_CHIPCO_CHST_4325_RCAL_VALUE_SHIFT	4
 #define SSB_CHIPCO_CHST_4325_PMUTOP_2B 		0x00000200 /* 1 for 2b, 0 for to 2a */
 
+/** Macros to determine SPROM presence based on Chip-Status register. */
+#define SSB_CHIPCO_CHST_4312_SPROM_PRESENT(status) \
+	((status & SSB_CHIPCO_CHST_4325_SPROM_OTP_SEL) != \
+		SSB_CHIPCO_CHST_4325_OTP_SEL)
+#define SSB_CHIPCO_CHST_4322_SPROM_PRESENT(status) \
+	(status & SSB_CHIPCO_CHST_4322_SPROM_EXISTS)
+#define SSB_CHIPCO_CHST_4325_SPROM_PRESENT(status) \
+	(((status & SSB_CHIPCO_CHST_4325_SPROM_OTP_SEL) != \
+		SSB_CHIPCO_CHST_4325_DEFCIS_SEL) && \
+	 ((status & SSB_CHIPCO_CHST_4325_SPROM_OTP_SEL) != \
+		SSB_CHIPCO_CHST_4325_OTP_SEL))
+
 
 
 /** Clockcontrol masks and values **/
@@ -564,6 +578,7 @@ struct ssb_chipcommon_pmu {
 struct ssb_chipcommon {
 	struct ssb_device *dev;
 	u32 capabilities;
+	u32 status;
 	/* Fast Powerup Delay constant */
 	u16 fast_pwrup_delay;
 	struct ssb_chipcommon_pmu pmu;
-- 
cgit v1.2.3


From ea2db495f92ad2cf3301623e60cb95b4062bc484 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 31 Mar 2010 21:59:21 +0200
Subject: ssb: Look for SPROM at different offset on higher rev CC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Our offset handling becomes even a little more hackish now. For some reason I
do not understand all offsets as inrelative. It assumes base offset is 0x1000
but it will work for now as we make offsets relative anyway by removing base
0x1000. Should be cleaner however.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            | 9 ++++++---
 include/linux/ssb/ssb.h      | 1 +
 include/linux/ssb/ssb_regs.h | 3 ++-
 3 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index a4b2b99f2c80..5bb1278759c1 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -167,7 +167,7 @@ err_pci:
 }
 
 /* Get the word-offset for a SSB_SPROM_XXX define. */
-#define SPOFF(offset)	(((offset) - SSB_SPROM_BASE) / sizeof(u16))
+#define SPOFF(offset)	(((offset) - SSB_SPROM_BASE1) / sizeof(u16))
 /* Helper to extract some _offset, which is one of the SSB_SPROM_XXX defines. */
 #define SPEX16(_outvar, _offset, _mask, _shift)	\
 	out->_outvar = ((in[SPOFF(_offset)] & (_mask)) >> (_shift))
@@ -253,7 +253,7 @@ static int sprom_do_read(struct ssb_bus *bus, u16 *sprom)
 	int i;
 
 	for (i = 0; i < bus->sprom_size; i++)
-		sprom[i] = ioread16(bus->mmio + SSB_SPROM_BASE + (i * 2));
+		sprom[i] = ioread16(bus->mmio + bus->sprom_offset + (i * 2));
 
 	return 0;
 }
@@ -284,7 +284,7 @@ static int sprom_do_write(struct ssb_bus *bus, const u16 *sprom)
 			ssb_printk("75%%");
 		else if (i % 2)
 			ssb_printk(".");
-		writew(sprom[i], bus->mmio + SSB_SPROM_BASE + (i * 2));
+		writew(sprom[i], bus->mmio + bus->sprom_offset + (i * 2));
 		mmiowb();
 		msleep(20);
 	}
@@ -625,6 +625,9 @@ static int ssb_pci_sprom_get(struct ssb_bus *bus,
 		return -ENODEV;
 	}
 
+	bus->sprom_offset = (bus->chipco.dev->id.revision < 31) ?
+		SSB_SPROM_BASE1 : SSB_SPROM_BASE31;
+
 	buf = kcalloc(SSB_SPROMSIZE_WORDS_R123, sizeof(u16), GFP_KERNEL);
 	if (!buf)
 		goto out;
diff --git a/include/linux/ssb/ssb.h b/include/linux/ssb/ssb.h
index 3b4da233e31d..a2608bff9c78 100644
--- a/include/linux/ssb/ssb.h
+++ b/include/linux/ssb/ssb.h
@@ -305,6 +305,7 @@ struct ssb_bus {
 	/* ID information about the Chip. */
 	u16 chip_id;
 	u16 chip_rev;
+	u16 sprom_offset;
 	u16 sprom_size;		/* number of words in sprom */
 	u8 chip_package;
 
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 9ae9082eaeb4..b8be23ce1915 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -170,7 +170,8 @@
 #define SSB_SPROMSIZE_WORDS_R4		220
 #define SSB_SPROMSIZE_BYTES_R123	(SSB_SPROMSIZE_WORDS_R123 * sizeof(u16))
 #define SSB_SPROMSIZE_BYTES_R4		(SSB_SPROMSIZE_WORDS_R4 * sizeof(u16))
-#define SSB_SPROM_BASE			0x1000
+#define SSB_SPROM_BASE1			0x1000
+#define SSB_SPROM_BASE31		0x0800
 #define SSB_SPROM_REVISION		0x107E
 #define  SSB_SPROM_REVISION_REV		0x00FF	/* SPROM Revision number */
 #define  SSB_SPROM_REVISION_CRC		0xFF00	/* SPROM CRC8 value */
-- 
cgit v1.2.3


From 0a182fd88f8180b342f753f04c7d5507b5891c96 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 31 Mar 2010 22:54:18 +0200
Subject: ssb: Use relative offsets for SPROM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/ssb/pci.c            |   2 +-
 include/linux/ssb/ssb_regs.h | 196 +++++++++++++++++++++----------------------
 2 files changed, 99 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ssb/pci.c b/drivers/ssb/pci.c
index 5bb1278759c1..3f556d6ec4c2 100644
--- a/drivers/ssb/pci.c
+++ b/drivers/ssb/pci.c
@@ -167,7 +167,7 @@ err_pci:
 }
 
 /* Get the word-offset for a SSB_SPROM_XXX define. */
-#define SPOFF(offset)	(((offset) - SSB_SPROM_BASE1) / sizeof(u16))
+#define SPOFF(offset)	((offset) / sizeof(u16))
 /* Helper to extract some _offset, which is one of the SSB_SPROM_XXX defines. */
 #define SPEX16(_outvar, _offset, _mask, _shift)	\
 	out->_outvar = ((in[SPOFF(_offset)] & (_mask)) >> (_shift))
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index b8be23ce1915..8990e30c657a 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -172,25 +172,25 @@
 #define SSB_SPROMSIZE_BYTES_R4		(SSB_SPROMSIZE_WORDS_R4 * sizeof(u16))
 #define SSB_SPROM_BASE1			0x1000
 #define SSB_SPROM_BASE31		0x0800
-#define SSB_SPROM_REVISION		0x107E
+#define SSB_SPROM_REVISION		0x007E
 #define  SSB_SPROM_REVISION_REV		0x00FF	/* SPROM Revision number */
 #define  SSB_SPROM_REVISION_CRC		0xFF00	/* SPROM CRC8 value */
 #define  SSB_SPROM_REVISION_CRC_SHIFT	8
 
 /* SPROM Revision 1 */
-#define SSB_SPROM1_SPID			0x1004	/* Subsystem Product ID for PCI */
-#define SSB_SPROM1_SVID			0x1006	/* Subsystem Vendor ID for PCI */
-#define SSB_SPROM1_PID			0x1008	/* Product ID for PCI */
-#define SSB_SPROM1_IL0MAC		0x1048	/* 6 bytes MAC address for 802.11b/g */
-#define SSB_SPROM1_ET0MAC		0x104E	/* 6 bytes MAC address for Ethernet */
-#define SSB_SPROM1_ET1MAC		0x1054	/* 6 bytes MAC address for 802.11a */
-#define SSB_SPROM1_ETHPHY		0x105A	/* Ethernet PHY settings */
+#define SSB_SPROM1_SPID			0x0004	/* Subsystem Product ID for PCI */
+#define SSB_SPROM1_SVID			0x0006	/* Subsystem Vendor ID for PCI */
+#define SSB_SPROM1_PID			0x0008	/* Product ID for PCI */
+#define SSB_SPROM1_IL0MAC		0x0048	/* 6 bytes MAC address for 802.11b/g */
+#define SSB_SPROM1_ET0MAC		0x004E	/* 6 bytes MAC address for Ethernet */
+#define SSB_SPROM1_ET1MAC		0x0054	/* 6 bytes MAC address for 802.11a */
+#define SSB_SPROM1_ETHPHY		0x005A	/* Ethernet PHY settings */
 #define  SSB_SPROM1_ETHPHY_ET0A		0x001F	/* MII Address for enet0 */
 #define  SSB_SPROM1_ETHPHY_ET1A		0x03E0	/* MII Address for enet1 */
 #define  SSB_SPROM1_ETHPHY_ET1A_SHIFT	5
 #define  SSB_SPROM1_ETHPHY_ET0M		(1<<14)	/* MDIO for enet0 */
 #define  SSB_SPROM1_ETHPHY_ET1M		(1<<15)	/* MDIO for enet1 */
-#define SSB_SPROM1_BINF			0x105C	/* Board info */
+#define SSB_SPROM1_BINF			0x005C	/* Board info */
 #define  SSB_SPROM1_BINF_BREV		0x00FF	/* Board Revision */
 #define  SSB_SPROM1_BINF_CCODE		0x0F00	/* Country Code */
 #define  SSB_SPROM1_BINF_CCODE_SHIFT	8
@@ -198,63 +198,63 @@
 #define  SSB_SPROM1_BINF_ANTBG_SHIFT	12
 #define  SSB_SPROM1_BINF_ANTA		0xC000	/* Available A-PHY antennas */
 #define  SSB_SPROM1_BINF_ANTA_SHIFT	14
-#define SSB_SPROM1_PA0B0		0x105E
-#define SSB_SPROM1_PA0B1		0x1060
-#define SSB_SPROM1_PA0B2		0x1062
-#define SSB_SPROM1_GPIOA		0x1064	/* General Purpose IO pins 0 and 1 */
+#define SSB_SPROM1_PA0B0		0x005E
+#define SSB_SPROM1_PA0B1		0x0060
+#define SSB_SPROM1_PA0B2		0x0062
+#define SSB_SPROM1_GPIOA		0x0064	/* General Purpose IO pins 0 and 1 */
 #define  SSB_SPROM1_GPIOA_P0		0x00FF	/* Pin 0 */
 #define  SSB_SPROM1_GPIOA_P1		0xFF00	/* Pin 1 */
 #define  SSB_SPROM1_GPIOA_P1_SHIFT	8
-#define SSB_SPROM1_GPIOB		0x1066	/* General Purpuse IO pins 2 and 3 */
+#define SSB_SPROM1_GPIOB		0x0066	/* General Purpuse IO pins 2 and 3 */
 #define  SSB_SPROM1_GPIOB_P2		0x00FF	/* Pin 2 */
 #define  SSB_SPROM1_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM1_GPIOB_P3_SHIFT	8
-#define SSB_SPROM1_MAXPWR		0x1068	/* Power Amplifier Max Power */
+#define SSB_SPROM1_MAXPWR		0x0068	/* Power Amplifier Max Power */
 #define  SSB_SPROM1_MAXPWR_BG		0x00FF	/* B-PHY and G-PHY (in dBm Q5.2) */
 #define  SSB_SPROM1_MAXPWR_A		0xFF00	/* A-PHY (in dBm Q5.2) */
 #define  SSB_SPROM1_MAXPWR_A_SHIFT	8
-#define SSB_SPROM1_PA1B0		0x106A
-#define SSB_SPROM1_PA1B1		0x106C
-#define SSB_SPROM1_PA1B2		0x106E
-#define SSB_SPROM1_ITSSI		0x1070	/* Idle TSSI Target */
+#define SSB_SPROM1_PA1B0		0x006A
+#define SSB_SPROM1_PA1B1		0x006C
+#define SSB_SPROM1_PA1B2		0x006E
+#define SSB_SPROM1_ITSSI		0x0070	/* Idle TSSI Target */
 #define  SSB_SPROM1_ITSSI_BG		0x00FF	/* B-PHY and G-PHY*/
 #define  SSB_SPROM1_ITSSI_A		0xFF00	/* A-PHY */
 #define  SSB_SPROM1_ITSSI_A_SHIFT	8
-#define SSB_SPROM1_BFLLO		0x1072	/* Boardflags (low 16 bits) */
-#define SSB_SPROM1_AGAIN		0x1074	/* Antenna Gain (in dBm Q5.2) */
+#define SSB_SPROM1_BFLLO		0x0072	/* Boardflags (low 16 bits) */
+#define SSB_SPROM1_AGAIN		0x0074	/* Antenna Gain (in dBm Q5.2) */
 #define  SSB_SPROM1_AGAIN_BG		0x00FF	/* B-PHY and G-PHY */
 #define  SSB_SPROM1_AGAIN_BG_SHIFT	0
 #define  SSB_SPROM1_AGAIN_A		0xFF00	/* A-PHY */
 #define  SSB_SPROM1_AGAIN_A_SHIFT	8
 
 /* SPROM Revision 2 (inherits from rev 1) */
-#define SSB_SPROM2_BFLHI		0x1038	/* Boardflags (high 16 bits) */
-#define SSB_SPROM2_MAXP_A		0x103A	/* A-PHY Max Power */
+#define SSB_SPROM2_BFLHI		0x0038	/* Boardflags (high 16 bits) */
+#define SSB_SPROM2_MAXP_A		0x003A	/* A-PHY Max Power */
 #define  SSB_SPROM2_MAXP_A_HI		0x00FF	/* Max Power High */
 #define  SSB_SPROM2_MAXP_A_LO		0xFF00	/* Max Power Low */
 #define  SSB_SPROM2_MAXP_A_LO_SHIFT	8
-#define SSB_SPROM2_PA1LOB0		0x103C	/* A-PHY PowerAmplifier Low Settings */
-#define SSB_SPROM2_PA1LOB1		0x103E	/* A-PHY PowerAmplifier Low Settings */
-#define SSB_SPROM2_PA1LOB2		0x1040	/* A-PHY PowerAmplifier Low Settings */
-#define SSB_SPROM2_PA1HIB0		0x1042	/* A-PHY PowerAmplifier High Settings */
-#define SSB_SPROM2_PA1HIB1		0x1044	/* A-PHY PowerAmplifier High Settings */
-#define SSB_SPROM2_PA1HIB2		0x1046	/* A-PHY PowerAmplifier High Settings */
-#define SSB_SPROM2_OPO			0x1078	/* OFDM Power Offset from CCK Level */
+#define SSB_SPROM2_PA1LOB0		0x003C	/* A-PHY PowerAmplifier Low Settings */
+#define SSB_SPROM2_PA1LOB1		0x003E	/* A-PHY PowerAmplifier Low Settings */
+#define SSB_SPROM2_PA1LOB2		0x0040	/* A-PHY PowerAmplifier Low Settings */
+#define SSB_SPROM2_PA1HIB0		0x0042	/* A-PHY PowerAmplifier High Settings */
+#define SSB_SPROM2_PA1HIB1		0x0044	/* A-PHY PowerAmplifier High Settings */
+#define SSB_SPROM2_PA1HIB2		0x0046	/* A-PHY PowerAmplifier High Settings */
+#define SSB_SPROM2_OPO			0x0078	/* OFDM Power Offset from CCK Level */
 #define  SSB_SPROM2_OPO_VALUE		0x00FF
 #define  SSB_SPROM2_OPO_UNUSED		0xFF00
-#define SSB_SPROM2_CCODE		0x107C	/* Two char Country Code */
+#define SSB_SPROM2_CCODE		0x007C	/* Two char Country Code */
 
 /* SPROM Revision 3 (inherits most data from rev 2) */
-#define SSB_SPROM3_IL0MAC		0x104A	/* 6 bytes MAC address for 802.11b/g */
-#define SSB_SPROM3_OFDMAPO		0x102C	/* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */
-#define SSB_SPROM3_OFDMALPO		0x1030	/* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */
-#define SSB_SPROM3_OFDMAHPO		0x1034	/* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */
-#define SSB_SPROM3_GPIOLDC		0x1042	/* GPIO LED Powersave Duty Cycle (4 bytes, BigEndian) */
+#define SSB_SPROM3_IL0MAC		0x004A	/* 6 bytes MAC address for 802.11b/g */
+#define SSB_SPROM3_OFDMAPO		0x002C	/* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */
+#define SSB_SPROM3_OFDMALPO		0x0030	/* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */
+#define SSB_SPROM3_OFDMAHPO		0x0034	/* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */
+#define SSB_SPROM3_GPIOLDC		0x0042	/* GPIO LED Powersave Duty Cycle (4 bytes, BigEndian) */
 #define  SSB_SPROM3_GPIOLDC_OFF		0x0000FF00	/* Off Count */
 #define  SSB_SPROM3_GPIOLDC_OFF_SHIFT	8
 #define  SSB_SPROM3_GPIOLDC_ON		0x00FF0000	/* On Count */
 #define  SSB_SPROM3_GPIOLDC_ON_SHIFT	16
-#define SSB_SPROM3_CCKPO		0x1078	/* CCK Power Offset */
+#define SSB_SPROM3_CCKPO		0x0078	/* CCK Power Offset */
 #define  SSB_SPROM3_CCKPO_1M		0x000F	/* 1M Rate PO */
 #define  SSB_SPROM3_CCKPO_2M		0x00F0	/* 2M Rate PO */
 #define  SSB_SPROM3_CCKPO_2M_SHIFT	4
@@ -265,100 +265,100 @@
 #define  SSB_SPROM3_OFDMGPO		0x107A	/* G-PHY OFDM Power Offset (4 bytes, BigEndian) */
 
 /* SPROM Revision 4 */
-#define SSB_SPROM4_IL0MAC		0x104C	/* 6 byte MAC address for a/b/g/n */
-#define SSB_SPROM4_ETHPHY		0x105A	/* Ethernet PHY settings ?? */
+#define SSB_SPROM4_IL0MAC		0x004C	/* 6 byte MAC address for a/b/g/n */
+#define SSB_SPROM4_ETHPHY		0x005A	/* Ethernet PHY settings ?? */
 #define  SSB_SPROM4_ETHPHY_ET0A		0x001F	/* MII Address for enet0 */
 #define  SSB_SPROM4_ETHPHY_ET1A		0x03E0	/* MII Address for enet1 */
 #define  SSB_SPROM4_ETHPHY_ET1A_SHIFT	5
 #define  SSB_SPROM4_ETHPHY_ET0M		(1<<14)	/* MDIO for enet0 */
 #define  SSB_SPROM4_ETHPHY_ET1M		(1<<15)	/* MDIO for enet1 */
-#define SSB_SPROM4_CCODE		0x1052	/* Country Code (2 bytes) */
-#define SSB_SPROM4_ANTAVAIL		0x105D  /* Antenna available bitfields */
+#define SSB_SPROM4_CCODE		0x0052	/* Country Code (2 bytes) */
+#define SSB_SPROM4_ANTAVAIL		0x005D  /* Antenna available bitfields */
 #define SSB_SPROM4_ANTAVAIL_A		0x00FF	/* A-PHY bitfield */
 #define SSB_SPROM4_ANTAVAIL_A_SHIFT	0
 #define SSB_SPROM4_ANTAVAIL_BG		0xFF00	/* B-PHY and G-PHY bitfield */
 #define SSB_SPROM4_ANTAVAIL_BG_SHIFT	8
-#define SSB_SPROM4_BFLLO		0x1044	/* Boardflags (low 16 bits) */
-#define SSB_SPROM4_AGAIN01		0x105E	/* Antenna Gain (in dBm Q5.2) */
+#define SSB_SPROM4_BFLLO		0x0044	/* Boardflags (low 16 bits) */
+#define SSB_SPROM4_AGAIN01		0x005E	/* Antenna Gain (in dBm Q5.2) */
 #define  SSB_SPROM4_AGAIN0		0x00FF	/* Antenna 0 */
 #define  SSB_SPROM4_AGAIN0_SHIFT	0
 #define  SSB_SPROM4_AGAIN1		0xFF00	/* Antenna 1 */
 #define  SSB_SPROM4_AGAIN1_SHIFT	8
-#define SSB_SPROM4_AGAIN23		0x1060
+#define SSB_SPROM4_AGAIN23		0x0060
 #define  SSB_SPROM4_AGAIN2		0x00FF	/* Antenna 2 */
 #define  SSB_SPROM4_AGAIN2_SHIFT	0
 #define  SSB_SPROM4_AGAIN3		0xFF00	/* Antenna 3 */
 #define  SSB_SPROM4_AGAIN3_SHIFT	8
-#define SSB_SPROM4_BFLHI		0x1046  /* Board Flags Hi */
-#define SSB_SPROM4_MAXP_BG		0x1080  /* Max Power BG in path 1 */
+#define SSB_SPROM4_BFLHI		0x0046  /* Board Flags Hi */
+#define SSB_SPROM4_MAXP_BG		0x0080  /* Max Power BG in path 1 */
 #define  SSB_SPROM4_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
 #define  SSB_SPROM4_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
 #define  SSB_SPROM4_ITSSI_BG_SHIFT	8
-#define SSB_SPROM4_MAXP_A		0x108A  /* Max Power A in path 1 */
+#define SSB_SPROM4_MAXP_A		0x008A  /* Max Power A in path 1 */
 #define  SSB_SPROM4_MAXP_A_MASK		0x00FF  /* Mask for Max Power A */
 #define  SSB_SPROM4_ITSSI_A		0xFF00	/* Mask for path 1 itssi_a */
 #define  SSB_SPROM4_ITSSI_A_SHIFT	8
-#define SSB_SPROM4_GPIOA		0x1056	/* Gen. Purpose IO # 0 and 1 */
+#define SSB_SPROM4_GPIOA		0x0056	/* Gen. Purpose IO # 0 and 1 */
 #define  SSB_SPROM4_GPIOA_P0		0x00FF	/* Pin 0 */
 #define  SSB_SPROM4_GPIOA_P1		0xFF00	/* Pin 1 */
 #define  SSB_SPROM4_GPIOA_P1_SHIFT	8
-#define SSB_SPROM4_GPIOB		0x1058	/* Gen. Purpose IO # 2 and 3 */
+#define SSB_SPROM4_GPIOB		0x0058	/* Gen. Purpose IO # 2 and 3 */
 #define  SSB_SPROM4_GPIOB_P2		0x00FF	/* Pin 2 */
 #define  SSB_SPROM4_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM4_GPIOB_P3_SHIFT	8
-#define SSB_SPROM4_PA0B0		0x1082	/* The paXbY locations are */
-#define SSB_SPROM4_PA0B1		0x1084	/*   only guesses */
-#define SSB_SPROM4_PA0B2		0x1086
-#define SSB_SPROM4_PA1B0		0x108E
-#define SSB_SPROM4_PA1B1		0x1090
-#define SSB_SPROM4_PA1B2		0x1092
+#define SSB_SPROM4_PA0B0		0x0082	/* The paXbY locations are */
+#define SSB_SPROM4_PA0B1		0x0084	/*   only guesses */
+#define SSB_SPROM4_PA0B2		0x0086
+#define SSB_SPROM4_PA1B0		0x008E
+#define SSB_SPROM4_PA1B1		0x0090
+#define SSB_SPROM4_PA1B2		0x0092
 
 /* SPROM Revision 5 (inherits most data from rev 4) */
-#define SSB_SPROM5_BFLLO		0x104A	/* Boardflags (low 16 bits) */
-#define SSB_SPROM5_BFLHI		0x104C  /* Board Flags Hi */
-#define SSB_SPROM5_IL0MAC		0x1052	/* 6 byte MAC address for a/b/g/n */
-#define SSB_SPROM5_CCODE		0x1044	/* Country Code (2 bytes) */
-#define SSB_SPROM5_GPIOA		0x1076	/* Gen. Purpose IO # 0 and 1 */
+#define SSB_SPROM5_BFLLO		0x004A	/* Boardflags (low 16 bits) */
+#define SSB_SPROM5_BFLHI		0x004C  /* Board Flags Hi */
+#define SSB_SPROM5_IL0MAC		0x0052	/* 6 byte MAC address for a/b/g/n */
+#define SSB_SPROM5_CCODE		0x0044	/* Country Code (2 bytes) */
+#define SSB_SPROM5_GPIOA		0x0076	/* Gen. Purpose IO # 0 and 1 */
 #define  SSB_SPROM5_GPIOA_P0		0x00FF	/* Pin 0 */
 #define  SSB_SPROM5_GPIOA_P1		0xFF00	/* Pin 1 */
 #define  SSB_SPROM5_GPIOA_P1_SHIFT	8
-#define SSB_SPROM5_GPIOB		0x1078	/* Gen. Purpose IO # 2 and 3 */
+#define SSB_SPROM5_GPIOB		0x0078	/* Gen. Purpose IO # 2 and 3 */
 #define  SSB_SPROM5_GPIOB_P2		0x00FF	/* Pin 2 */
 #define  SSB_SPROM5_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM5_GPIOB_P3_SHIFT	8
 
 /* SPROM Revision 8 */
-#define SSB_SPROM8_BOARDREV		0x1082	/* Board revision */
-#define SSB_SPROM8_BFLLO		0x1084	/* Board flags (bits 0-15) */
-#define SSB_SPROM8_BFLHI		0x1086	/* Board flags (bits 16-31) */
-#define SSB_SPROM8_BFL2LO		0x1088	/* Board flags (bits 32-47) */
-#define SSB_SPROM8_BFL2HI		0x108A	/* Board flags (bits 48-63) */
-#define SSB_SPROM8_IL0MAC		0x108C	/* 6 byte MAC address */
-#define SSB_SPROM8_CCODE		0x1092	/* 2 byte country code */
-#define SSB_SPROM8_ANTAVAIL		0x109C  /* Antenna available bitfields*/
+#define SSB_SPROM8_BOARDREV		0x0082	/* Board revision */
+#define SSB_SPROM8_BFLLO		0x0084	/* Board flags (bits 0-15) */
+#define SSB_SPROM8_BFLHI		0x0086	/* Board flags (bits 16-31) */
+#define SSB_SPROM8_BFL2LO		0x0088	/* Board flags (bits 32-47) */
+#define SSB_SPROM8_BFL2HI		0x008A	/* Board flags (bits 48-63) */
+#define SSB_SPROM8_IL0MAC		0x008C	/* 6 byte MAC address */
+#define SSB_SPROM8_CCODE		0x0092	/* 2 byte country code */
+#define SSB_SPROM8_ANTAVAIL		0x009C  /* Antenna available bitfields*/
 #define SSB_SPROM8_ANTAVAIL_A		0xFF00	/* A-PHY bitfield */
 #define SSB_SPROM8_ANTAVAIL_A_SHIFT	8
 #define SSB_SPROM8_ANTAVAIL_BG		0x00FF	/* B-PHY and G-PHY bitfield */
 #define SSB_SPROM8_ANTAVAIL_BG_SHIFT	0
-#define SSB_SPROM8_AGAIN01		0x109E	/* Antenna Gain (in dBm Q5.2) */
+#define SSB_SPROM8_AGAIN01		0x009E	/* Antenna Gain (in dBm Q5.2) */
 #define  SSB_SPROM8_AGAIN0		0x00FF	/* Antenna 0 */
 #define  SSB_SPROM8_AGAIN0_SHIFT	0
 #define  SSB_SPROM8_AGAIN1		0xFF00	/* Antenna 1 */
 #define  SSB_SPROM8_AGAIN1_SHIFT	8
-#define SSB_SPROM8_AGAIN23		0x10A0
+#define SSB_SPROM8_AGAIN23		0x00A0
 #define  SSB_SPROM8_AGAIN2		0x00FF	/* Antenna 2 */
 #define  SSB_SPROM8_AGAIN2_SHIFT	0
 #define  SSB_SPROM8_AGAIN3		0xFF00	/* Antenna 3 */
 #define  SSB_SPROM8_AGAIN3_SHIFT	8
-#define SSB_SPROM8_GPIOA		0x1096	/*Gen. Purpose IO # 0 and 1 */
+#define SSB_SPROM8_GPIOA		0x0096	/*Gen. Purpose IO # 0 and 1 */
 #define  SSB_SPROM8_GPIOA_P0		0x00FF	/* Pin 0 */
 #define  SSB_SPROM8_GPIOA_P1		0xFF00	/* Pin 1 */
 #define  SSB_SPROM8_GPIOA_P1_SHIFT	8
-#define SSB_SPROM8_GPIOB		0x1098	/* Gen. Purpose IO # 2 and 3 */
+#define SSB_SPROM8_GPIOB		0x0098	/* Gen. Purpose IO # 2 and 3 */
 #define  SSB_SPROM8_GPIOB_P2		0x00FF	/* Pin 2 */
 #define  SSB_SPROM8_GPIOB_P3		0xFF00	/* Pin 3 */
 #define  SSB_SPROM8_GPIOB_P3_SHIFT	8
-#define SSB_SPROM8_RSSIPARM2G		0x10A4	/* RSSI params for 2GHz */
+#define SSB_SPROM8_RSSIPARM2G		0x00A4	/* RSSI params for 2GHz */
 #define  SSB_SPROM8_RSSISMF2G		0x000F
 #define  SSB_SPROM8_RSSISMC2G		0x00F0
 #define  SSB_SPROM8_RSSISMC2G_SHIFT	4
@@ -366,7 +366,7 @@
 #define  SSB_SPROM8_RSSISAV2G_SHIFT	8
 #define  SSB_SPROM8_BXA2G		0x1800
 #define  SSB_SPROM8_BXA2G_SHIFT		11
-#define SSB_SPROM8_RSSIPARM5G		0x10A6	/* RSSI params for 5GHz */
+#define SSB_SPROM8_RSSIPARM5G		0x00A6	/* RSSI params for 5GHz */
 #define  SSB_SPROM8_RSSISMF5G		0x000F
 #define  SSB_SPROM8_RSSISMC5G		0x00F0
 #define  SSB_SPROM8_RSSISMC5G_SHIFT	4
@@ -374,47 +374,47 @@
 #define  SSB_SPROM8_RSSISAV5G_SHIFT	8
 #define  SSB_SPROM8_BXA5G		0x1800
 #define  SSB_SPROM8_BXA5G_SHIFT		11
-#define SSB_SPROM8_TRI25G		0x10A8	/* TX isolation 2.4&5.3GHz */
+#define SSB_SPROM8_TRI25G		0x00A8	/* TX isolation 2.4&5.3GHz */
 #define  SSB_SPROM8_TRI2G		0x00FF	/* TX isolation 2.4GHz */
 #define  SSB_SPROM8_TRI5G		0xFF00	/* TX isolation 5.3GHz */
 #define  SSB_SPROM8_TRI5G_SHIFT		8
-#define SSB_SPROM8_TRI5GHL		0x10AA	/* TX isolation 5.2/5.8GHz */
+#define SSB_SPROM8_TRI5GHL		0x00AA	/* TX isolation 5.2/5.8GHz */
 #define  SSB_SPROM8_TRI5GL		0x00FF	/* TX isolation 5.2GHz */
 #define  SSB_SPROM8_TRI5GH		0xFF00	/* TX isolation 5.8GHz */
 #define  SSB_SPROM8_TRI5GH_SHIFT	8
-#define SSB_SPROM8_RXPO			0x10AC  /* RX power offsets */
+#define SSB_SPROM8_RXPO			0x00AC  /* RX power offsets */
 #define  SSB_SPROM8_RXPO2G		0x00FF	/* 2GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G		0xFF00	/* 5GHz RX power offset */
 #define  SSB_SPROM8_RXPO5G_SHIFT	8
-#define SSB_SPROM8_MAXP_BG		0x10C0  /* Max Power 2GHz in path 1 */
+#define SSB_SPROM8_MAXP_BG		0x00C0  /* Max Power 2GHz in path 1 */
 #define  SSB_SPROM8_MAXP_BG_MASK	0x00FF  /* Mask for Max Power 2GHz */
 #define  SSB_SPROM8_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
 #define  SSB_SPROM8_ITSSI_BG_SHIFT	8
-#define SSB_SPROM8_PA0B0		0x10C2	/* 2GHz power amp settings */
-#define SSB_SPROM8_PA0B1		0x10C4
-#define SSB_SPROM8_PA0B2		0x10C6
-#define SSB_SPROM8_MAXP_A		0x10C8  /* Max Power 5.3GHz */
+#define SSB_SPROM8_PA0B0		0x00C2	/* 2GHz power amp settings */
+#define SSB_SPROM8_PA0B1		0x00C4
+#define SSB_SPROM8_PA0B2		0x00C6
+#define SSB_SPROM8_MAXP_A		0x00C8  /* Max Power 5.3GHz */
 #define  SSB_SPROM8_MAXP_A_MASK		0x00FF  /* Mask for Max Power 5.3GHz */
 #define  SSB_SPROM8_ITSSI_A		0xFF00	/* Mask for path 1 itssi_a */
 #define  SSB_SPROM8_ITSSI_A_SHIFT	8
-#define SSB_SPROM8_MAXP_AHL		0x10CA  /* Max Power 5.2/5.8GHz */
+#define SSB_SPROM8_MAXP_AHL		0x00CA  /* Max Power 5.2/5.8GHz */
 #define  SSB_SPROM8_MAXP_AH_MASK	0x00FF  /* Mask for Max Power 5.8GHz */
 #define  SSB_SPROM8_MAXP_AL_MASK	0xFF00  /* Mask for Max Power 5.2GHz */
 #define  SSB_SPROM8_MAXP_AL_SHIFT	8
-#define SSB_SPROM8_PA1B0		0x10CC	/* 5.3GHz power amp settings */
-#define SSB_SPROM8_PA1B1		0x10CE
-#define SSB_SPROM8_PA1B2		0x10D0
-#define SSB_SPROM8_PA1LOB0		0x10D2	/* 5.2GHz power amp settings */
-#define SSB_SPROM8_PA1LOB1		0x10D4
-#define SSB_SPROM8_PA1LOB2		0x10D6
-#define SSB_SPROM8_PA1HIB0		0x10D8	/* 5.8GHz power amp settings */
-#define SSB_SPROM8_PA1HIB1		0x10DA
-#define SSB_SPROM8_PA1HIB2		0x10DC
-#define SSB_SPROM8_CCK2GPO		0x1140	/* CCK power offset */
-#define SSB_SPROM8_OFDM2GPO		0x1142	/* 2.4GHz OFDM power offset */
-#define SSB_SPROM8_OFDM5GPO		0x1146	/* 5.3GHz OFDM power offset */
-#define SSB_SPROM8_OFDM5GLPO		0x114A	/* 5.2GHz OFDM power offset */
-#define SSB_SPROM8_OFDM5GHPO		0x114E	/* 5.8GHz OFDM power offset */
+#define SSB_SPROM8_PA1B0		0x00CC	/* 5.3GHz power amp settings */
+#define SSB_SPROM8_PA1B1		0x00CE
+#define SSB_SPROM8_PA1B2		0x00D0
+#define SSB_SPROM8_PA1LOB0		0x00D2	/* 5.2GHz power amp settings */
+#define SSB_SPROM8_PA1LOB1		0x00D4
+#define SSB_SPROM8_PA1LOB2		0x00D6
+#define SSB_SPROM8_PA1HIB0		0x00D8	/* 5.8GHz power amp settings */
+#define SSB_SPROM8_PA1HIB1		0x00DA
+#define SSB_SPROM8_PA1HIB2		0x00DC
+#define SSB_SPROM8_CCK2GPO		0x0140	/* CCK power offset */
+#define SSB_SPROM8_OFDM2GPO		0x0142	/* 2.4GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GPO		0x0146	/* 5.3GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GLPO		0x014A	/* 5.2GHz OFDM power offset */
+#define SSB_SPROM8_OFDM5GHPO		0x014E	/* 5.8GHz OFDM power offset */
 
 /* Values for SSB_SPROM1_BINF_CCODE */
 enum {
-- 
cgit v1.2.3


From 5af55428858a45d94893fd6124d60988e89c0d59 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 31 Mar 2010 22:54:19 +0200
Subject: ssb: Fix order of definitions and some text space indents
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ssb/ssb_regs.h | 58 ++++++++++++++++++++++----------------------
 1 file changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index 8990e30c657a..a6d5225b9275 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -245,7 +245,6 @@
 #define SSB_SPROM2_CCODE		0x007C	/* Two char Country Code */
 
 /* SPROM Revision 3 (inherits most data from rev 2) */
-#define SSB_SPROM3_IL0MAC		0x004A	/* 6 bytes MAC address for 802.11b/g */
 #define SSB_SPROM3_OFDMAPO		0x002C	/* A-PHY OFDM Mid Power Offset (4 bytes, BigEndian) */
 #define SSB_SPROM3_OFDMALPO		0x0030	/* A-PHY OFDM Low Power Offset (4 bytes, BigEndian) */
 #define SSB_SPROM3_OFDMAHPO		0x0034	/* A-PHY OFDM High Power Offset (4 bytes, BigEndian) */
@@ -254,6 +253,7 @@
 #define  SSB_SPROM3_GPIOLDC_OFF_SHIFT	8
 #define  SSB_SPROM3_GPIOLDC_ON		0x00FF0000	/* On Count */
 #define  SSB_SPROM3_GPIOLDC_ON_SHIFT	16
+#define SSB_SPROM3_IL0MAC		0x004A	/* 6 bytes MAC address for 802.11b/g */
 #define SSB_SPROM3_CCKPO		0x0078	/* CCK Power Offset */
 #define  SSB_SPROM3_CCKPO_1M		0x000F	/* 1M Rate PO */
 #define  SSB_SPROM3_CCKPO_2M		0x00F0	/* 2M Rate PO */
@@ -265,20 +265,29 @@
 #define  SSB_SPROM3_OFDMGPO		0x107A	/* G-PHY OFDM Power Offset (4 bytes, BigEndian) */
 
 /* SPROM Revision 4 */
+#define SSB_SPROM4_BFLLO		0x0044	/* Boardflags (low 16 bits) */
+#define SSB_SPROM4_BFLHI		0x0046  /* Board Flags Hi */
 #define SSB_SPROM4_IL0MAC		0x004C	/* 6 byte MAC address for a/b/g/n */
+#define SSB_SPROM4_CCODE		0x0052	/* Country Code (2 bytes) */
+#define SSB_SPROM4_GPIOA		0x0056	/* Gen. Purpose IO # 0 and 1 */
+#define  SSB_SPROM4_GPIOA_P0		0x00FF	/* Pin 0 */
+#define  SSB_SPROM4_GPIOA_P1		0xFF00	/* Pin 1 */
+#define  SSB_SPROM4_GPIOA_P1_SHIFT	8
+#define SSB_SPROM4_GPIOB		0x0058	/* Gen. Purpose IO # 2 and 3 */
+#define  SSB_SPROM4_GPIOB_P2		0x00FF	/* Pin 2 */
+#define  SSB_SPROM4_GPIOB_P3		0xFF00	/* Pin 3 */
+#define  SSB_SPROM4_GPIOB_P3_SHIFT	8
 #define SSB_SPROM4_ETHPHY		0x005A	/* Ethernet PHY settings ?? */
 #define  SSB_SPROM4_ETHPHY_ET0A		0x001F	/* MII Address for enet0 */
 #define  SSB_SPROM4_ETHPHY_ET1A		0x03E0	/* MII Address for enet1 */
 #define  SSB_SPROM4_ETHPHY_ET1A_SHIFT	5
 #define  SSB_SPROM4_ETHPHY_ET0M		(1<<14)	/* MDIO for enet0 */
 #define  SSB_SPROM4_ETHPHY_ET1M		(1<<15)	/* MDIO for enet1 */
-#define SSB_SPROM4_CCODE		0x0052	/* Country Code (2 bytes) */
 #define SSB_SPROM4_ANTAVAIL		0x005D  /* Antenna available bitfields */
-#define SSB_SPROM4_ANTAVAIL_A		0x00FF	/* A-PHY bitfield */
-#define SSB_SPROM4_ANTAVAIL_A_SHIFT	0
-#define SSB_SPROM4_ANTAVAIL_BG		0xFF00	/* B-PHY and G-PHY bitfield */
-#define SSB_SPROM4_ANTAVAIL_BG_SHIFT	8
-#define SSB_SPROM4_BFLLO		0x0044	/* Boardflags (low 16 bits) */
+#define  SSB_SPROM4_ANTAVAIL_A		0x00FF	/* A-PHY bitfield */
+#define  SSB_SPROM4_ANTAVAIL_A_SHIFT	0
+#define  SSB_SPROM4_ANTAVAIL_BG		0xFF00	/* B-PHY and G-PHY bitfield */
+#define  SSB_SPROM4_ANTAVAIL_BG_SHIFT	8
 #define SSB_SPROM4_AGAIN01		0x005E	/* Antenna Gain (in dBm Q5.2) */
 #define  SSB_SPROM4_AGAIN0		0x00FF	/* Antenna 0 */
 #define  SSB_SPROM4_AGAIN0_SHIFT	0
@@ -289,7 +298,6 @@
 #define  SSB_SPROM4_AGAIN2_SHIFT	0
 #define  SSB_SPROM4_AGAIN3		0xFF00	/* Antenna 3 */
 #define  SSB_SPROM4_AGAIN3_SHIFT	8
-#define SSB_SPROM4_BFLHI		0x0046  /* Board Flags Hi */
 #define SSB_SPROM4_MAXP_BG		0x0080  /* Max Power BG in path 1 */
 #define  SSB_SPROM4_MAXP_BG_MASK	0x00FF  /* Mask for Max Power BG */
 #define  SSB_SPROM4_ITSSI_BG		0xFF00	/* Mask for path 1 itssi_bg */
@@ -298,14 +306,6 @@
 #define  SSB_SPROM4_MAXP_A_MASK		0x00FF  /* Mask for Max Power A */
 #define  SSB_SPROM4_ITSSI_A		0xFF00	/* Mask for path 1 itssi_a */
 #define  SSB_SPROM4_ITSSI_A_SHIFT	8
-#define SSB_SPROM4_GPIOA		0x0056	/* Gen. Purpose IO # 0 and 1 */
-#define  SSB_SPROM4_GPIOA_P0		0x00FF	/* Pin 0 */
-#define  SSB_SPROM4_GPIOA_P1		0xFF00	/* Pin 1 */
-#define  SSB_SPROM4_GPIOA_P1_SHIFT	8
-#define SSB_SPROM4_GPIOB		0x0058	/* Gen. Purpose IO # 2 and 3 */
-#define  SSB_SPROM4_GPIOB_P2		0x00FF	/* Pin 2 */
-#define  SSB_SPROM4_GPIOB_P3		0xFF00	/* Pin 3 */
-#define  SSB_SPROM4_GPIOB_P3_SHIFT	8
 #define SSB_SPROM4_PA0B0		0x0082	/* The paXbY locations are */
 #define SSB_SPROM4_PA0B1		0x0084	/*   only guesses */
 #define SSB_SPROM4_PA0B2		0x0086
@@ -314,10 +314,10 @@
 #define SSB_SPROM4_PA1B2		0x0092
 
 /* SPROM Revision 5 (inherits most data from rev 4) */
+#define SSB_SPROM5_CCODE		0x0044	/* Country Code (2 bytes) */
 #define SSB_SPROM5_BFLLO		0x004A	/* Boardflags (low 16 bits) */
 #define SSB_SPROM5_BFLHI		0x004C  /* Board Flags Hi */
 #define SSB_SPROM5_IL0MAC		0x0052	/* 6 byte MAC address for a/b/g/n */
-#define SSB_SPROM5_CCODE		0x0044	/* Country Code (2 bytes) */
 #define SSB_SPROM5_GPIOA		0x0076	/* Gen. Purpose IO # 0 and 1 */
 #define  SSB_SPROM5_GPIOA_P0		0x00FF	/* Pin 0 */
 #define  SSB_SPROM5_GPIOA_P1		0xFF00	/* Pin 1 */
@@ -335,11 +335,19 @@
 #define SSB_SPROM8_BFL2HI		0x008A	/* Board flags (bits 48-63) */
 #define SSB_SPROM8_IL0MAC		0x008C	/* 6 byte MAC address */
 #define SSB_SPROM8_CCODE		0x0092	/* 2 byte country code */
+#define SSB_SPROM8_GPIOA		0x0096	/*Gen. Purpose IO # 0 and 1 */
+#define  SSB_SPROM8_GPIOA_P0		0x00FF	/* Pin 0 */
+#define  SSB_SPROM8_GPIOA_P1		0xFF00	/* Pin 1 */
+#define  SSB_SPROM8_GPIOA_P1_SHIFT	8
+#define SSB_SPROM8_GPIOB		0x0098	/* Gen. Purpose IO # 2 and 3 */
+#define  SSB_SPROM8_GPIOB_P2		0x00FF	/* Pin 2 */
+#define  SSB_SPROM8_GPIOB_P3		0xFF00	/* Pin 3 */
+#define  SSB_SPROM8_GPIOB_P3_SHIFT	8
 #define SSB_SPROM8_ANTAVAIL		0x009C  /* Antenna available bitfields*/
-#define SSB_SPROM8_ANTAVAIL_A		0xFF00	/* A-PHY bitfield */
-#define SSB_SPROM8_ANTAVAIL_A_SHIFT	8
-#define SSB_SPROM8_ANTAVAIL_BG		0x00FF	/* B-PHY and G-PHY bitfield */
-#define SSB_SPROM8_ANTAVAIL_BG_SHIFT	0
+#define  SSB_SPROM8_ANTAVAIL_A		0xFF00	/* A-PHY bitfield */
+#define  SSB_SPROM8_ANTAVAIL_A_SHIFT	8
+#define  SSB_SPROM8_ANTAVAIL_BG		0x00FF	/* B-PHY and G-PHY bitfield */
+#define  SSB_SPROM8_ANTAVAIL_BG_SHIFT	0
 #define SSB_SPROM8_AGAIN01		0x009E	/* Antenna Gain (in dBm Q5.2) */
 #define  SSB_SPROM8_AGAIN0		0x00FF	/* Antenna 0 */
 #define  SSB_SPROM8_AGAIN0_SHIFT	0
@@ -350,14 +358,6 @@
 #define  SSB_SPROM8_AGAIN2_SHIFT	0
 #define  SSB_SPROM8_AGAIN3		0xFF00	/* Antenna 3 */
 #define  SSB_SPROM8_AGAIN3_SHIFT	8
-#define SSB_SPROM8_GPIOA		0x0096	/*Gen. Purpose IO # 0 and 1 */
-#define  SSB_SPROM8_GPIOA_P0		0x00FF	/* Pin 0 */
-#define  SSB_SPROM8_GPIOA_P1		0xFF00	/* Pin 1 */
-#define  SSB_SPROM8_GPIOA_P1_SHIFT	8
-#define SSB_SPROM8_GPIOB		0x0098	/* Gen. Purpose IO # 2 and 3 */
-#define  SSB_SPROM8_GPIOB_P2		0x00FF	/* Pin 2 */
-#define  SSB_SPROM8_GPIOB_P3		0xFF00	/* Pin 3 */
-#define  SSB_SPROM8_GPIOB_P3_SHIFT	8
 #define SSB_SPROM8_RSSIPARM2G		0x00A4	/* RSSI params for 2GHz */
 #define  SSB_SPROM8_RSSISMF2G		0x000F
 #define  SSB_SPROM8_RSSISMC2G		0x00F0
-- 
cgit v1.2.3


From 6a740aa4f47b9f29bad5292cf51f008f3edad9b1 Mon Sep 17 00:00:00 2001
From: Bruno Prémont <bonbons@linux-vserver.org>
Date: Sun, 25 Apr 2010 21:40:03 +0200
Subject: HID: add suspend/resume hooks for hid drivers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add suspend/resume hooks for HID drivers so these can do some
additional state adjustment when device gets suspended/resumed.

Signed-off-by: Bruno Prémont <bonbons@linux-vserver.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c | 24 +++++++++++++++++++++++-
 include/linux/hid.h           |  8 ++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 56d06cd8075b..14a67fba590e 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -1290,6 +1290,11 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 		{
 			set_bit(HID_REPORTED_IDLE, &usbhid->iofl);
 			spin_unlock_irq(&usbhid->lock);
+			if (hid->driver && hid->driver->suspend) {
+				status = hid->driver->suspend(hid, message);
+				if (status < 0)
+					return status;
+			}
 		} else {
 			usbhid_mark_busy(usbhid);
 			spin_unlock_irq(&usbhid->lock);
@@ -1297,6 +1302,11 @@ static int hid_suspend(struct usb_interface *intf, pm_message_t message)
 		}
 
 	} else {
+		if (hid->driver && hid->driver->suspend) {
+			status = hid->driver->suspend(hid, message);
+			if (status < 0)
+				return status;
+		}
 		spin_lock_irq(&usbhid->lock);
 		set_bit(HID_REPORTED_IDLE, &usbhid->iofl);
 		spin_unlock_irq(&usbhid->lock);
@@ -1351,6 +1361,11 @@ static int hid_resume(struct usb_interface *intf)
 		hid_io_error(hid);
 	usbhid_restart_queues(usbhid);
 
+	if (status >= 0 && hid->driver && hid->driver->resume) {
+		int ret = hid->driver->resume(hid);
+		if (ret < 0)
+			status = ret;
+	}
 	dev_dbg(&intf->dev, "resume status %d\n", status);
 	return 0;
 }
@@ -1359,9 +1374,16 @@ static int hid_reset_resume(struct usb_interface *intf)
 {
 	struct hid_device *hid = usb_get_intfdata(intf);
 	struct usbhid_device *usbhid = hid->driver_data;
+	int status;
 
 	clear_bit(HID_REPORTED_IDLE, &usbhid->iofl);
-	return hid_post_reset(intf);
+	status = hid_post_reset(intf);
+	if (status >= 0 && hid->driver && hid->driver->reset_resume) {
+		int ret = hid->driver->reset_resume(hid);
+		if (ret < 0)
+			status = ret;
+	}
+	return status;
 }
 
 #endif /* CONFIG_PM */
diff --git a/include/linux/hid.h b/include/linux/hid.h
index b1344ec4b7fc..069e587ae8e6 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -589,6 +589,9 @@ struct hid_usage_id {
  * @report_fixup: called before report descriptor parsing (NULL means nop)
  * @input_mapping: invoked on input registering before mapping an usage
  * @input_mapped: invoked on input registering after mapping an usage
+ * @suspend: invoked on suspend (NULL means nop)
+ * @resume: invoked on resume if device was not reset (NULL means nop)
+ * @reset_resume: invoked on resume if device was reset (NULL means nop)
  *
  * raw_event and event should return 0 on no action performed, 1 when no
  * further processing should be done and negative on error
@@ -629,6 +632,11 @@ struct hid_driver {
 	int (*input_mapped)(struct hid_device *hdev,
 			struct hid_input *hidinput, struct hid_field *field,
 			struct hid_usage *usage, unsigned long **bit, int *max);
+#ifdef CONFIG_PM
+	int (*suspend)(struct hid_device *hdev, pm_message_t message);
+	int (*resume)(struct hid_device *hdev);
+	int (*reset_resume)(struct hid_device *hdev);
+#endif
 /* private: */
 	struct device_driver driver;
 };
-- 
cgit v1.2.3


From 4b2cbd42bef5a22bb681acd607a7c3fbca1eeb3c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 27 Apr 2010 15:34:34 +0200
Subject: netfilter: x_tables: rectify XT_FUNCTION_MAXNAMELEN usage

There has been quite a confusion in userspace about
XT_FUNCTION_MAXNAMELEN; because struct xt_entry_match used MAX-1,
userspace would have to do an awkward MAX-2 for maximum length
checking (due to '\0'). This patch adds a new define that matches the
definition of XT_TABLE_MAXNAMELEN - being the size of the actual
struct member, not one off.

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/netfilter/x_tables.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 50c867256ca3..eeb4884c30be 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 
 #define XT_FUNCTION_MAXNAMELEN 30
+#define XT_EXTENSION_MAXNAMELEN 29
 #define XT_TABLE_MAXNAMELEN 32
 
 struct xt_entry_match {
@@ -12,8 +13,7 @@ struct xt_entry_match {
 			__u16 match_size;
 
 			/* Used by userspace */
-			char name[XT_FUNCTION_MAXNAMELEN-1];
-
+			char name[XT_EXTENSION_MAXNAMELEN];
 			__u8 revision;
 		} user;
 		struct {
@@ -36,8 +36,7 @@ struct xt_entry_target {
 			__u16 target_size;
 
 			/* Used by userspace */
-			char name[XT_FUNCTION_MAXNAMELEN-1];
-
+			char name[XT_EXTENSION_MAXNAMELEN];
 			__u8 revision;
 		} user;
 		struct {
@@ -70,8 +69,7 @@ struct xt_standard_target {
 /* The argument to IPT_SO_GET_REVISION_*.  Returns highest revision
  * kernel supports, if >= revision. */
 struct xt_get_revision {
-	char name[XT_FUNCTION_MAXNAMELEN-1];
-
+	char name[XT_EXTENSION_MAXNAMELEN];
 	__u8 revision;
 };
 
@@ -291,7 +289,7 @@ struct xt_tgdtor_param {
 struct xt_match {
 	struct list_head list;
 
-	const char name[XT_FUNCTION_MAXNAMELEN-1];
+	const char name[XT_EXTENSION_MAXNAMELEN];
 	u_int8_t revision;
 
 	/* Return true or false: return FALSE and set *hotdrop = 1 to
@@ -330,7 +328,7 @@ struct xt_match {
 struct xt_target {
 	struct list_head list;
 
-	const char name[XT_FUNCTION_MAXNAMELEN-1];
+	const char name[XT_EXTENSION_MAXNAMELEN];
 	u_int8_t revision;
 
 	/* Returns verdict. Argument order changed since 2.6.9, as this
-- 
cgit v1.2.3


From 62b915f1060996a8e1f69be50e3b8e9e43b710cb Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Fri, 2 Apr 2010 19:01:22 +0200
Subject: tracing: Add graph output support for irqsoff tracer

Add function graph output to irqsoff tracer.

The graph output is enabled by setting new 'display-graph' trace option.

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
LKML-Reference: <1270227683-14631-4-git-send-email-jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h               |  15 +-
 kernel/trace/trace.c                 |  35 +++--
 kernel/trace/trace.h                 |  21 +++
 kernel/trace/trace_functions_graph.c |  15 +-
 kernel/trace/trace_irqsoff.c         | 271 +++++++++++++++++++++++++++++++++--
 5 files changed, 324 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ea5b1aae0e8b..8415a522f430 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -352,6 +352,10 @@ struct ftrace_graph_ret {
 	int depth;
 };
 
+/* Type of the callback handlers for tracing function graph*/
+typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
+typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
+
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 
 /* for init task */
@@ -400,10 +404,6 @@ extern char __irqentry_text_end[];
 
 #define FTRACE_RETFUNC_DEPTH 50
 #define FTRACE_RETSTACK_ALLOC_SIZE 32
-/* Type of the callback handlers for tracing function graph*/
-typedef void (*trace_func_graph_ret_t)(struct ftrace_graph_ret *); /* return */
-typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
-
 extern int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 				trace_func_graph_ent_t entryfunc);
 
@@ -441,6 +441,13 @@ static inline void unpause_graph_tracing(void)
 static inline void ftrace_graph_init_task(struct task_struct *t) { }
 static inline void ftrace_graph_exit_task(struct task_struct *t) { }
 
+static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
+			  trace_func_graph_ent_t entryfunc)
+{
+	return -1;
+}
+static inline void unregister_ftrace_graph(void) { }
+
 static inline int task_curr_ret_stack(struct task_struct *tsk)
 {
 	return -1;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 7b516c7ef9a0..8b9ba41ec146 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1808,7 +1808,7 @@ static void print_func_help_header(struct seq_file *m)
 }
 
 
-static void
+void
 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
 {
 	unsigned long sym_flags = (trace_flags & TRACE_ITER_SYM_MASK);
@@ -2017,7 +2017,7 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 	return event ? event->binary(iter, 0) : TRACE_TYPE_HANDLED;
 }
 
-static int trace_empty(struct trace_iterator *iter)
+int trace_empty(struct trace_iterator *iter)
 {
 	int cpu;
 
@@ -2084,6 +2084,23 @@ static enum print_line_t print_trace_line(struct trace_iterator *iter)
 	return print_trace_fmt(iter);
 }
 
+void trace_default_header(struct seq_file *m)
+{
+	struct trace_iterator *iter = m->private;
+
+	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
+		/* print nothing if the buffers are empty */
+		if (trace_empty(iter))
+			return;
+		print_trace_header(m, iter);
+		if (!(trace_flags & TRACE_ITER_VERBOSE))
+			print_lat_help_header(m);
+	} else {
+		if (!(trace_flags & TRACE_ITER_VERBOSE))
+			print_func_help_header(m);
+	}
+}
+
 static int s_show(struct seq_file *m, void *v)
 {
 	struct trace_iterator *iter = v;
@@ -2096,17 +2113,9 @@ static int s_show(struct seq_file *m, void *v)
 		}
 		if (iter->trace && iter->trace->print_header)
 			iter->trace->print_header(m);
-		else if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
-			/* print nothing if the buffers are empty */
-			if (trace_empty(iter))
-				return 0;
-			print_trace_header(m, iter);
-			if (!(trace_flags & TRACE_ITER_VERBOSE))
-				print_lat_help_header(m);
-		} else {
-			if (!(trace_flags & TRACE_ITER_VERBOSE))
-				print_func_help_header(m);
-		}
+		else
+			trace_default_header(m);
+
 	} else if (iter->leftover) {
 		/*
 		 * If we filled the seq_file buffer earlier, we
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 970004c5fa79..911e9864e94a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -378,6 +378,9 @@ void trace_function(struct trace_array *tr,
 		    unsigned long ip,
 		    unsigned long parent_ip,
 		    unsigned long flags, int pc);
+void trace_default_header(struct seq_file *m);
+void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
+int trace_empty(struct trace_iterator *iter);
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
 int trace_graph_entry(struct ftrace_graph_ent *trace);
@@ -491,11 +494,29 @@ extern int trace_clock_id;
 
 /* Standard output formatting function used for function return traces */
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+/* Flag options */
+#define TRACE_GRAPH_PRINT_OVERRUN       0x1
+#define TRACE_GRAPH_PRINT_CPU           0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD      0x4
+#define TRACE_GRAPH_PRINT_PROC          0x8
+#define TRACE_GRAPH_PRINT_DURATION      0x10
+#define TRACE_GRAPH_PRINT_ABS_TIME      0x20
+
 extern enum print_line_t
 print_graph_function_flags(struct trace_iterator *iter, u32 flags);
 extern void print_graph_headers_flags(struct seq_file *s, u32 flags);
 extern enum print_line_t
 trace_print_graph_duration(unsigned long long duration, struct trace_seq *s);
+extern void graph_trace_open(struct trace_iterator *iter);
+extern void graph_trace_close(struct trace_iterator *iter);
+extern int __trace_graph_entry(struct trace_array *tr,
+			       struct ftrace_graph_ent *trace,
+			       unsigned long flags, int pc);
+extern void __trace_graph_return(struct trace_array *tr,
+				 struct ftrace_graph_ret *trace,
+				 unsigned long flags, int pc);
+
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 /* TODO: make this variable */
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index de5f6518aba0..dd11c830eb84 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -179,7 +179,7 @@ unsigned long ftrace_return_to_handler(unsigned long frame_pointer)
 	return ret;
 }
 
-static int __trace_graph_entry(struct trace_array *tr,
+int __trace_graph_entry(struct trace_array *tr,
 				struct ftrace_graph_ent *trace,
 				unsigned long flags,
 				int pc)
@@ -246,7 +246,7 @@ int trace_graph_thresh_entry(struct ftrace_graph_ent *trace)
 		return trace_graph_entry(trace);
 }
 
-static void __trace_graph_return(struct trace_array *tr,
+void __trace_graph_return(struct trace_array *tr,
 				struct ftrace_graph_ret *trace,
 				unsigned long flags,
 				int pc)
@@ -1093,6 +1093,11 @@ print_graph_function_flags(struct trace_iterator *iter, u32 flags)
 		trace_assign_type(field, entry);
 		return print_graph_return(&field->ret, s, entry, iter, flags);
 	}
+	case TRACE_STACK:
+	case TRACE_FN:
+		/* dont trace stack and functions as comments */
+		return TRACE_TYPE_UNHANDLED;
+
 	default:
 		return print_graph_comment(s, entry, iter, flags);
 	}
@@ -1170,12 +1175,12 @@ void print_graph_headers_flags(struct seq_file *s, u32 flags)
 	seq_printf(s, "               |   |   |   |\n");
 }
 
-static void print_graph_headers(struct seq_file *s)
+void print_graph_headers(struct seq_file *s)
 {
 	print_graph_headers_flags(s, tracer_flags.val);
 }
 
-static void graph_trace_open(struct trace_iterator *iter)
+void graph_trace_open(struct trace_iterator *iter)
 {
 	/* pid and depth on the last trace processed */
 	struct fgraph_data *data;
@@ -1210,7 +1215,7 @@ static void graph_trace_open(struct trace_iterator *iter)
 	pr_warning("function graph tracer: not enough memory\n");
 }
 
-static void graph_trace_close(struct trace_iterator *iter)
+void graph_trace_close(struct trace_iterator *iter)
 {
 	struct fgraph_data *data = iter->private;
 
diff --git a/kernel/trace/trace_irqsoff.c b/kernel/trace/trace_irqsoff.c
index 2974bc7538c7..6fd486e0cef4 100644
--- a/kernel/trace/trace_irqsoff.c
+++ b/kernel/trace/trace_irqsoff.c
@@ -34,6 +34,9 @@ static int trace_type __read_mostly;
 
 static int save_lat_flag;
 
+static void stop_irqsoff_tracer(struct trace_array *tr, int graph);
+static int start_irqsoff_tracer(struct trace_array *tr, int graph);
+
 #ifdef CONFIG_PREEMPT_TRACER
 static inline int
 preempt_trace(void)
@@ -55,6 +58,23 @@ irq_trace(void)
 # define irq_trace() (0)
 #endif
 
+#define TRACE_DISPLAY_GRAPH	1
+
+static struct tracer_opt trace_opts[] = {
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+	/* display latency trace as call graph */
+	{ TRACER_OPT(display-graph, TRACE_DISPLAY_GRAPH) },
+#endif
+	{ } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+	.val  = 0,
+	.opts = trace_opts,
+};
+
+#define is_graph() (tracer_flags.val & TRACE_DISPLAY_GRAPH)
+
 /*
  * Sequence count - we record it when starting a measurement and
  * skip the latency if the sequence has changed - some other section
@@ -108,6 +128,202 @@ static struct ftrace_ops trace_ops __read_mostly =
 };
 #endif /* CONFIG_FUNCTION_TRACER */
 
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
+{
+	int cpu;
+
+	if (!(bit & TRACE_DISPLAY_GRAPH))
+		return -EINVAL;
+
+	if (!(is_graph() ^ set))
+		return 0;
+
+	stop_irqsoff_tracer(irqsoff_trace, !set);
+
+	for_each_possible_cpu(cpu)
+		per_cpu(tracing_cpu, cpu) = 0;
+
+	tracing_max_latency = 0;
+	tracing_reset_online_cpus(irqsoff_trace);
+
+	return start_irqsoff_tracer(irqsoff_trace, set);
+}
+
+static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
+{
+	struct trace_array *tr = irqsoff_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int ret;
+	int cpu;
+	int pc;
+
+	cpu = raw_smp_processor_id();
+	if (likely(!per_cpu(tracing_cpu, cpu)))
+		return 0;
+
+	local_save_flags(flags);
+	/* slight chance to get a false positive on tracing_cpu */
+	if (!irqs_disabled_flags(flags))
+		return 0;
+
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1)) {
+		pc = preempt_count();
+		ret = __trace_graph_entry(tr, trace, flags, pc);
+	} else
+		ret = 0;
+
+	atomic_dec(&data->disabled);
+	return ret;
+}
+
+static void irqsoff_graph_return(struct ftrace_graph_ret *trace)
+{
+	struct trace_array *tr = irqsoff_trace;
+	struct trace_array_cpu *data;
+	unsigned long flags;
+	long disabled;
+	int cpu;
+	int pc;
+
+	cpu = raw_smp_processor_id();
+	if (likely(!per_cpu(tracing_cpu, cpu)))
+		return;
+
+	local_save_flags(flags);
+	/* slight chance to get a false positive on tracing_cpu */
+	if (!irqs_disabled_flags(flags))
+		return;
+
+	data = tr->data[cpu];
+	disabled = atomic_inc_return(&data->disabled);
+
+	if (likely(disabled == 1)) {
+		pc = preempt_count();
+		__trace_graph_return(tr, trace, flags, pc);
+	}
+
+	atomic_dec(&data->disabled);
+}
+
+static void irqsoff_trace_open(struct trace_iterator *iter)
+{
+	if (is_graph())
+		graph_trace_open(iter);
+
+}
+
+static void irqsoff_trace_close(struct trace_iterator *iter)
+{
+	if (iter->private)
+		graph_trace_close(iter);
+}
+
+#define GRAPH_TRACER_FLAGS (TRACE_GRAPH_PRINT_CPU | \
+			    TRACE_GRAPH_PRINT_PROC)
+
+static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
+{
+	u32 flags = GRAPH_TRACER_FLAGS;
+
+	if (trace_flags & TRACE_ITER_LATENCY_FMT)
+		flags |= TRACE_GRAPH_PRINT_DURATION;
+	else
+		flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+	/*
+	 * In graph mode call the graph tracer output function,
+	 * otherwise go with the TRACE_FN event handler
+	 */
+	if (is_graph())
+		return print_graph_function_flags(iter, flags);
+
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static void irqsoff_print_header(struct seq_file *s)
+{
+	if (is_graph()) {
+		struct trace_iterator *iter = s->private;
+		u32 flags = GRAPH_TRACER_FLAGS;
+
+		if (trace_flags & TRACE_ITER_LATENCY_FMT) {
+			/* print nothing if the buffers are empty */
+			if (trace_empty(iter))
+				return;
+
+			print_trace_header(s, iter);
+			flags |= TRACE_GRAPH_PRINT_DURATION;
+		} else
+			flags |= TRACE_GRAPH_PRINT_ABS_TIME;
+
+		print_graph_headers_flags(s, flags);
+	} else
+		trace_default_header(s);
+}
+
+static void
+trace_graph_function(struct trace_array *tr,
+		 unsigned long ip, unsigned long flags, int pc)
+{
+	u64 time = trace_clock_local();
+	struct ftrace_graph_ent ent = {
+		.func  = ip,
+		.depth = 0,
+	};
+	struct ftrace_graph_ret ret = {
+		.func     = ip,
+		.depth    = 0,
+		.calltime = time,
+		.rettime  = time,
+	};
+
+	__trace_graph_entry(tr, &ent, flags, pc);
+	__trace_graph_return(tr, &ret, flags, pc);
+}
+
+static void
+__trace_function(struct trace_array *tr,
+		 unsigned long ip, unsigned long parent_ip,
+		 unsigned long flags, int pc)
+{
+	if (!is_graph())
+		trace_function(tr, ip, parent_ip, flags, pc);
+	else {
+		trace_graph_function(tr, parent_ip, flags, pc);
+		trace_graph_function(tr, ip, flags, pc);
+	}
+}
+
+#else
+#define __trace_function trace_function
+
+static int irqsoff_set_flag(u32 old_flags, u32 bit, int set)
+{
+	return -EINVAL;
+}
+
+static int irqsoff_graph_entry(struct ftrace_graph_ent *trace)
+{
+	return -1;
+}
+
+static enum print_line_t irqsoff_print_line(struct trace_iterator *iter)
+{
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static void irqsoff_graph_return(struct ftrace_graph_ret *trace) { }
+static void irqsoff_print_header(struct seq_file *s) { }
+static void irqsoff_trace_open(struct trace_iterator *iter) { }
+static void irqsoff_trace_close(struct trace_iterator *iter) { }
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
+
 /*
  * Should this new latency be reported/recorded?
  */
@@ -150,7 +366,7 @@ check_critical_timing(struct trace_array *tr,
 	if (!report_latency(delta))
 		goto out_unlock;
 
-	trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
 	/* Skip 5 functions to get to the irq/preempt enable function */
 	__trace_stack(tr, flags, 5, pc);
 
@@ -172,7 +388,7 @@ out_unlock:
 out:
 	data->critical_sequence = max_sequence;
 	data->preempt_timestamp = ftrace_now(cpu);
-	trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
+	__trace_function(tr, CALLER_ADDR0, parent_ip, flags, pc);
 }
 
 static inline void
@@ -204,7 +420,7 @@ start_critical_timing(unsigned long ip, unsigned long parent_ip)
 
 	local_save_flags(flags);
 
-	trace_function(tr, ip, parent_ip, flags, preempt_count());
+	__trace_function(tr, ip, parent_ip, flags, preempt_count());
 
 	per_cpu(tracing_cpu, cpu) = 1;
 
@@ -238,7 +454,7 @@ stop_critical_timing(unsigned long ip, unsigned long parent_ip)
 	atomic_inc(&data->disabled);
 
 	local_save_flags(flags);
-	trace_function(tr, ip, parent_ip, flags, preempt_count());
+	__trace_function(tr, ip, parent_ip, flags, preempt_count());
 	check_critical_timing(tr, data, parent_ip ? : ip, cpu);
 	data->critical_start = 0;
 	atomic_dec(&data->disabled);
@@ -347,19 +563,32 @@ void trace_preempt_off(unsigned long a0, unsigned long a1)
 }
 #endif /* CONFIG_PREEMPT_TRACER */
 
-static void start_irqsoff_tracer(struct trace_array *tr)
+static int start_irqsoff_tracer(struct trace_array *tr, int graph)
 {
-	register_ftrace_function(&trace_ops);
-	if (tracing_is_enabled())
+	int ret = 0;
+
+	if (!graph)
+		ret = register_ftrace_function(&trace_ops);
+	else
+		ret = register_ftrace_graph(&irqsoff_graph_return,
+					    &irqsoff_graph_entry);
+
+	if (!ret && tracing_is_enabled())
 		tracer_enabled = 1;
 	else
 		tracer_enabled = 0;
+
+	return ret;
 }
 
-static void stop_irqsoff_tracer(struct trace_array *tr)
+static void stop_irqsoff_tracer(struct trace_array *tr, int graph)
 {
 	tracer_enabled = 0;
-	unregister_ftrace_function(&trace_ops);
+
+	if (!graph)
+		unregister_ftrace_function(&trace_ops);
+	else
+		unregister_ftrace_graph();
 }
 
 static void __irqsoff_tracer_init(struct trace_array *tr)
@@ -372,12 +601,14 @@ static void __irqsoff_tracer_init(struct trace_array *tr)
 	/* make sure that the tracer is visible */
 	smp_wmb();
 	tracing_reset_online_cpus(tr);
-	start_irqsoff_tracer(tr);
+
+	if (start_irqsoff_tracer(tr, is_graph()))
+		printk(KERN_ERR "failed to start irqsoff tracer\n");
 }
 
 static void irqsoff_tracer_reset(struct trace_array *tr)
 {
-	stop_irqsoff_tracer(tr);
+	stop_irqsoff_tracer(tr, is_graph());
 
 	if (!save_lat_flag)
 		trace_flags &= ~TRACE_ITER_LATENCY_FMT;
@@ -409,9 +640,15 @@ static struct tracer irqsoff_tracer __read_mostly =
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
 	.print_max	= 1,
+	.print_header   = irqsoff_print_header,
+	.print_line     = irqsoff_print_line,
+	.flags		= &tracer_flags,
+	.set_flag	= irqsoff_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_irqsoff,
 #endif
+	.open           = irqsoff_trace_open,
+	.close          = irqsoff_trace_close,
 };
 # define register_irqsoff(trace) register_tracer(&trace)
 #else
@@ -435,9 +672,15 @@ static struct tracer preemptoff_tracer __read_mostly =
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
 	.print_max	= 1,
+	.print_header   = irqsoff_print_header,
+	.print_line     = irqsoff_print_line,
+	.flags		= &tracer_flags,
+	.set_flag	= irqsoff_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_preemptoff,
 #endif
+	.open		= irqsoff_trace_open,
+	.close		= irqsoff_trace_close,
 };
 # define register_preemptoff(trace) register_tracer(&trace)
 #else
@@ -463,9 +706,15 @@ static struct tracer preemptirqsoff_tracer __read_mostly =
 	.start		= irqsoff_tracer_start,
 	.stop		= irqsoff_tracer_stop,
 	.print_max	= 1,
+	.print_header   = irqsoff_print_header,
+	.print_line     = irqsoff_print_line,
+	.flags		= &tracer_flags,
+	.set_flag	= irqsoff_set_flag,
 #ifdef CONFIG_FTRACE_SELFTEST
 	.selftest    = trace_selftest_startup_preemptirqsoff,
 #endif
+	.open		= irqsoff_trace_open,
+	.close		= irqsoff_trace_close,
 };
 
 # define register_preemptirqsoff(trace) register_tracer(&trace)
-- 
cgit v1.2.3


From 72c9ddfd4c5bf54ef03cfdf57026416cb678eeba Mon Sep 17 00:00:00 2001
From: David Miller <davem@davemloft.net>
Date: Tue, 20 Apr 2010 15:47:11 -0700
Subject: ring-buffer: Make non-consuming read less expensive with lots of
 cpus.

When performing a non-consuming read, a synchronize_sched() is
performed once for every cpu which is actively tracing.

This is very expensive, and can make it take several seconds to open
up the 'trace' file with lots of cpus.

Only one synchronize_sched() call is actually necessary.  What is
desired is for all cpus to see the disabling state change.  So we
transform the existing sequence:

	for_each_cpu() {
		ring_buffer_read_start();
	}

where each ring_buffer_start() call performs a synchronize_sched(),
into the following:

	for_each_cpu() {
		ring_buffer_read_prepare();
	}
	ring_buffer_read_prepare_sync();
	for_each_cpu() {
		ring_buffer_read_start();
	}

wherein only the single ring_buffer_read_prepare_sync() call needs to
do the synchronize_sched().

The first phase, via ring_buffer_read_prepare(), allocates the 'iter'
memory and increments ->record_disabled.

In the second phase, ring_buffer_read_prepare_sync() makes sure this
->record_disabled state is visible fully to all cpus.

And in the final third phase, the ring_buffer_read_start() calls reset
the 'iter' objects allocated in the first phase since we now know that
none of the cpus are adding trace entries any more.

This makes openning the 'trace' file nearly instantaneous on a
sparc64 Niagara2 box with 128 cpus tracing.

Signed-off-by: David S. Miller <davem@davemloft.net>
LKML-Reference: <20100420.154711.11246950.davem@davemloft.net>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  4 ++-
 kernel/trace/ring_buffer.c  | 64 ++++++++++++++++++++++++++++++++++++++-------
 kernel/trace/trace.c        | 11 +++++---
 3 files changed, 65 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index c8297761e414..25b4f686d918 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -127,7 +127,9 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
 		    unsigned long *lost_events);
 
 struct ring_buffer_iter *
-ring_buffer_read_start(struct ring_buffer *buffer, int cpu);
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu);
+void ring_buffer_read_prepare_sync(void);
+void ring_buffer_read_start(struct ring_buffer_iter *iter);
 void ring_buffer_read_finish(struct ring_buffer_iter *iter);
 
 struct ring_buffer_event *
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 5885cdfc41f3..2a090448ef6b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -3332,23 +3332,30 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts,
 EXPORT_SYMBOL_GPL(ring_buffer_consume);
 
 /**
- * ring_buffer_read_start - start a non consuming read of the buffer
+ * ring_buffer_read_prepare - Prepare for a non consuming read of the buffer
  * @buffer: The ring buffer to read from
  * @cpu: The cpu buffer to iterate over
  *
- * This starts up an iteration through the buffer. It also disables
- * the recording to the buffer until the reading is finished.
- * This prevents the reading from being corrupted. This is not
- * a consuming read, so a producer is not expected.
+ * This performs the initial preparations necessary to iterate
+ * through the buffer.  Memory is allocated, buffer recording
+ * is disabled, and the iterator pointer is returned to the caller.
  *
- * Must be paired with ring_buffer_finish.
+ * Disabling buffer recordng prevents the reading from being
+ * corrupted. This is not a consuming read, so a producer is not
+ * expected.
+ *
+ * After a sequence of ring_buffer_read_prepare calls, the user is
+ * expected to make at least one call to ring_buffer_prepare_sync.
+ * Afterwards, ring_buffer_read_start is invoked to get things going
+ * for real.
+ *
+ * This overall must be paired with ring_buffer_finish.
  */
 struct ring_buffer_iter *
-ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
+ring_buffer_read_prepare(struct ring_buffer *buffer, int cpu)
 {
 	struct ring_buffer_per_cpu *cpu_buffer;
 	struct ring_buffer_iter *iter;
-	unsigned long flags;
 
 	if (!cpumask_test_cpu(cpu, buffer->cpumask))
 		return NULL;
@@ -3362,15 +3369,52 @@ ring_buffer_read_start(struct ring_buffer *buffer, int cpu)
 	iter->cpu_buffer = cpu_buffer;
 
 	atomic_inc(&cpu_buffer->record_disabled);
+
+	return iter;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_prepare);
+
+/**
+ * ring_buffer_read_prepare_sync - Synchronize a set of prepare calls
+ *
+ * All previously invoked ring_buffer_read_prepare calls to prepare
+ * iterators will be synchronized.  Afterwards, read_buffer_read_start
+ * calls on those iterators are allowed.
+ */
+void
+ring_buffer_read_prepare_sync(void)
+{
 	synchronize_sched();
+}
+EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync);
+
+/**
+ * ring_buffer_read_start - start a non consuming read of the buffer
+ * @iter: The iterator returned by ring_buffer_read_prepare
+ *
+ * This finalizes the startup of an iteration through the buffer.
+ * The iterator comes from a call to ring_buffer_read_prepare and
+ * an intervening ring_buffer_read_prepare_sync must have been
+ * performed.
+ *
+ * Must be paired with ring_buffer_finish.
+ */
+void
+ring_buffer_read_start(struct ring_buffer_iter *iter)
+{
+	struct ring_buffer_per_cpu *cpu_buffer;
+	unsigned long flags;
+
+	if (!iter)
+		return;
+
+	cpu_buffer = iter->cpu_buffer;
 
 	spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
 	arch_spin_lock(&cpu_buffer->lock);
 	rb_iter_reset(iter);
 	arch_spin_unlock(&cpu_buffer->lock);
 	spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
-
-	return iter;
 }
 EXPORT_SYMBOL_GPL(ring_buffer_read_start);
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8b9ba41ec146..756d7283318b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2201,15 +2201,20 @@ __tracing_open(struct inode *inode, struct file *file)
 
 	if (iter->cpu_file == TRACE_PIPE_ALL_CPU) {
 		for_each_tracing_cpu(cpu) {
-
 			iter->buffer_iter[cpu] =
-				ring_buffer_read_start(iter->tr->buffer, cpu);
+				ring_buffer_read_prepare(iter->tr->buffer, cpu);
+		}
+		ring_buffer_read_prepare_sync();
+		for_each_tracing_cpu(cpu) {
+			ring_buffer_read_start(iter->buffer_iter[cpu]);
 			tracing_iter_reset(iter, cpu);
 		}
 	} else {
 		cpu = iter->cpu_file;
 		iter->buffer_iter[cpu] =
-				ring_buffer_read_start(iter->tr->buffer, cpu);
+			ring_buffer_read_prepare(iter->tr->buffer, cpu);
+		ring_buffer_read_prepare_sync();
+		ring_buffer_read_start(iter->buffer_iter[cpu]);
 		tracing_iter_reset(iter, cpu);
 	}
 
-- 
cgit v1.2.3


From ff846f52935e6c8dfb0c97df7c2c1bf777454684 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Tue, 27 Apr 2010 01:02:40 +0000
Subject: igb: add support for reporting 5GT/s during probe on PCIe Gen2

This change corrects the fact that we were not reporting Gen2 link speeds
when we were in fact connected at Gen2 rates.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/igb/e1000_defines.h |  4 ----
 drivers/net/igb/e1000_mac.c     | 27 ++++++++++++++++++++-------
 drivers/net/igb/igb_main.c      |  1 +
 include/linux/pci_regs.h        |  3 +++
 4 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/igb/e1000_defines.h b/drivers/net/igb/e1000_defines.h
index 31d24e0e76de..24d9be64342f 100644
--- a/drivers/net/igb/e1000_defines.h
+++ b/drivers/net/igb/e1000_defines.h
@@ -610,11 +610,7 @@
 #define IGP_LED3_MODE           0x07000000
 
 /* PCI/PCI-X/PCI-EX Config space */
-#define PCIE_LINK_STATUS             0x12
 #define PCIE_DEVICE_CONTROL2         0x28
-
-#define PCIE_LINK_WIDTH_MASK         0x3F0
-#define PCIE_LINK_WIDTH_SHIFT        4
 #define PCIE_DEVICE_CONTROL2_16ms    0x0005
 
 #define PHY_REVISION_MASK      0xFFFFFFF0
diff --git a/drivers/net/igb/e1000_mac.c b/drivers/net/igb/e1000_mac.c
index be8d010e4021..90c5e01e9235 100644
--- a/drivers/net/igb/e1000_mac.c
+++ b/drivers/net/igb/e1000_mac.c
@@ -53,17 +53,30 @@ s32 igb_get_bus_info_pcie(struct e1000_hw *hw)
 	u16 pcie_link_status;
 
 	bus->type = e1000_bus_type_pci_express;
-	bus->speed = e1000_bus_speed_2500;
 
 	ret_val = igb_read_pcie_cap_reg(hw,
-					  PCIE_LINK_STATUS,
-					  &pcie_link_status);
-	if (ret_val)
+					PCI_EXP_LNKSTA,
+					&pcie_link_status);
+	if (ret_val) {
 		bus->width = e1000_bus_width_unknown;
-	else
+		bus->speed = e1000_bus_speed_unknown;
+	} else {
+		switch (pcie_link_status & PCI_EXP_LNKSTA_CLS) {
+		case PCI_EXP_LNKSTA_CLS_2_5GB:
+			bus->speed = e1000_bus_speed_2500;
+			break;
+		case PCI_EXP_LNKSTA_CLS_5_0GB:
+			bus->speed = e1000_bus_speed_5000;
+			break;
+		default:
+			bus->speed = e1000_bus_speed_unknown;
+			break;
+		}
+
 		bus->width = (enum e1000_bus_width)((pcie_link_status &
-						     PCIE_LINK_WIDTH_MASK) >>
-						     PCIE_LINK_WIDTH_SHIFT);
+						     PCI_EXP_LNKSTA_NLW) >>
+						     PCI_EXP_LNKSTA_NLW_SHIFT);
+	}
 
 	reg = rd32(E1000_STATUS);
 	bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT;
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index a14303a0ad7b..919e36386675 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -1638,6 +1638,7 @@ static int __devinit igb_probe(struct pci_dev *pdev,
 	dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
 		 netdev->name,
 		 ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
+		  (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
 		                                            "unknown"),
 		 ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
 		  (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index c8f302991b66..c4c3d68be19a 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -442,7 +442,10 @@
 #define  PCI_EXP_LNKCTL_LABIE	0x0800	/* Lnk Autonomous Bandwidth Interrupt Enable */
 #define PCI_EXP_LNKSTA		18	/* Link Status */
 #define  PCI_EXP_LNKSTA_CLS	0x000f	/* Current Link Speed */
+#define  PCI_EXP_LNKSTA_CLS_2_5GB 0x01	/* Current Link Speed 2.5GT/s */
+#define  PCI_EXP_LNKSTA_CLS_5_0GB 0x02	/* Current Link Speed 5.0GT/s */
 #define  PCI_EXP_LNKSTA_NLW	0x03f0	/* Nogotiated Link Width */
+#define  PCI_EXP_LNKSTA_NLW_SHIFT 4	/* start of NLW mask in link status */
 #define  PCI_EXP_LNKSTA_LT	0x0800	/* Link Training */
 #define  PCI_EXP_LNKSTA_SLC	0x1000	/* Slot Clock Configuration */
 #define  PCI_EXP_LNKSTA_DLLLA	0x2000	/* Data Link Layer Link Active */
-- 
cgit v1.2.3


From fd8aaaf3519f3fd3c82594e90bc6808072b94d54 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Tue, 27 Apr 2010 01:23:35 +0200
Subject: cfg80211: add ap isolation support

This is used to configure APs to not bridge traffic between connected stations.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h | 5 +++++
 include/net/cfg80211.h  | 2 ++
 net/wireless/nl80211.c  | 4 ++++
 3 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2ea3edeee7aa..f8750f9a65b8 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -709,6 +709,9 @@ enum nl80211_commands {
  *	NL80211_CMD_AUTHENTICATE, NL80211_CMD_DEAUTHENTICATE,
  *	NL80211_CMD_DISASSOCIATE.
  *
+ * @NL80211_ATTR_AP_ISOLATE: (AP mode) Do not forward traffic between stations
+ *	connected to this BSS.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -864,6 +867,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_LOCAL_STATE_CHANGE,
 
+	NL80211_ATTR_AP_ISOLATE,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5a4efe54cffd..f6b29bf925d0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -511,6 +511,7 @@ struct mpath_info {
  * @basic_rates: basic rates in IEEE 802.11 format
  *	(or NULL for no change)
  * @basic_rates_len: number of basic rates
+ * @ap_isolate: do not forward packets between connected stations
  */
 struct bss_parameters {
 	int use_cts_prot;
@@ -518,6 +519,7 @@ struct bss_parameters {
 	int use_short_slot_time;
 	u8 *basic_rates;
 	u8 basic_rates_len;
+	int ap_isolate;
 };
 
 struct mesh_config {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index df5505b3930c..c27bef8e0c11 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -151,6 +151,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_PS_STATE] = { .type = NLA_U32 },
 	[NL80211_ATTR_CQM] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_LOCAL_STATE_CHANGE] = { .type = NLA_FLAG },
+	[NL80211_ATTR_AP_ISOLATE] = { .type = NLA_U8 },
 };
 
 /* policy for the attributes */
@@ -2441,6 +2442,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 	params.use_cts_prot = -1;
 	params.use_short_preamble = -1;
 	params.use_short_slot_time = -1;
+	params.ap_isolate = -1;
 
 	if (info->attrs[NL80211_ATTR_BSS_CTS_PROT])
 		params.use_cts_prot =
@@ -2457,6 +2459,8 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info)
 		params.basic_rates_len =
 			nla_len(info->attrs[NL80211_ATTR_BSS_BASIC_RATES]);
 	}
+	if (info->attrs[NL80211_ATTR_AP_ISOLATE])
+		params.ap_isolate = !!nla_get_u8(info->attrs[NL80211_ATTR_AP_ISOLATE]);
 
 	rtnl_lock();
 
-- 
cgit v1.2.3


From a9cbd588fdb71ea415754c885e2f9f03e6bf1ba0 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Mon, 26 Apr 2010 23:06:24 +0000
Subject: net: reimplement softnet_data.output_queue as a FIFO queue

reimplement softnet_data.output_queue as a FIFO queue to keep the
fairness among the qdiscs rescheduled.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
----
 include/linux/netdevice.h |    1 +
 net/core/dev.c            |   22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 +
 net/core/dev.c            | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3c5ed5f5274e..c04ca246395d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1385,6 +1385,7 @@ static inline int unregister_gifconf(unsigned int family)
  */
 struct softnet_data {
 	struct Qdisc		*output_queue;
+	struct Qdisc		**output_queue_tailp;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 4d43f1a80f74..3d314919a2cf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1557,8 +1557,9 @@ static inline void __netif_reschedule(struct Qdisc *q)
 
 	local_irq_save(flags);
 	sd = &__get_cpu_var(softnet_data);
-	q->next_sched = sd->output_queue;
-	sd->output_queue = q;
+	q->next_sched = NULL;
+	*sd->output_queue_tailp = q;
+	sd->output_queue_tailp = &q->next_sched;
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_restore(flags);
 }
@@ -2529,6 +2530,7 @@ static void net_tx_action(struct softirq_action *h)
 		local_irq_disable();
 		head = sd->output_queue;
 		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 		local_irq_enable();
 
 		while (head) {
@@ -5594,7 +5596,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 			    void *ocpu)
 {
 	struct sk_buff **list_skb;
-	struct Qdisc **list_net;
 	struct sk_buff *skb;
 	unsigned int cpu, oldcpu = (unsigned long)ocpu;
 	struct softnet_data *sd, *oldsd;
@@ -5615,13 +5616,13 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	*list_skb = oldsd->completion_queue;
 	oldsd->completion_queue = NULL;
 
-	/* Find end of our output_queue. */
-	list_net = &sd->output_queue;
-	while (*list_net)
-		list_net = &(*list_net)->next_sched;
 	/* Append output queue from offline CPU. */
-	*list_net = oldsd->output_queue;
-	oldsd->output_queue = NULL;
+	if (oldsd->output_queue) {
+		*sd->output_queue_tailp = oldsd->output_queue;
+		sd->output_queue_tailp = oldsd->output_queue_tailp;
+		oldsd->output_queue = NULL;
+		oldsd->output_queue_tailp = &oldsd->output_queue;
+	}
 
 	raise_softirq_irqoff(NET_TX_SOFTIRQ);
 	local_irq_enable();
@@ -5851,7 +5852,8 @@ static int __init net_dev_init(void)
 		skb_queue_head_init(&sd->input_pkt_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
-
+		sd->output_queue = NULL;
+		sd->output_queue_tailp = &sd->output_queue;
 #ifdef CONFIG_RPS
 		sd->csd.func = rps_trigger_softirq;
 		sd->csd.info = sd;
-- 
cgit v1.2.3


From 6e7676c1a76aed6e957611d8d7a9e5592e23aeba Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Tue, 27 Apr 2010 15:07:33 -0700
Subject: net: batch skb dequeueing from softnet input_pkt_queue

batch skb dequeueing from softnet input_pkt_queue to reduce potential lock
contention when RPS is enabled.

Note: in the worst case, the number of packets in a softnet_data may
be double of netdev_max_backlog.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++--
 net/core/dev.c            | 57 ++++++++++++++++++++++++++++++++---------------
 2 files changed, 43 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c04ca246395d..40d4c20d034b 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1388,6 +1388,7 @@ struct softnet_data {
 	struct Qdisc		**output_queue_tailp;
 	struct list_head	poll_list;
 	struct sk_buff		*completion_queue;
+	struct sk_buff_head	process_queue;
 
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
@@ -1402,10 +1403,11 @@ struct softnet_data {
 	struct napi_struct	backlog;
 };
 
-static inline void input_queue_head_incr(struct softnet_data *sd)
+static inline void input_queue_head_add(struct softnet_data *sd,
+					unsigned int len)
 {
 #ifdef CONFIG_RPS
-	sd->input_queue_head++;
+	sd->input_queue_head += len;
 #endif
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 3d314919a2cf..100dcbd29739 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2408,12 +2408,13 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
-	if (sd->input_pkt_queue.qlen <= netdev_max_backlog) {
-		if (sd->input_pkt_queue.qlen) {
+	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
+		if (skb_queue_len(&sd->input_pkt_queue)) {
 enqueue:
 			__skb_queue_tail(&sd->input_pkt_queue, skb);
 #ifdef CONFIG_RPS
-			*qtail = sd->input_queue_head + sd->input_pkt_queue.qlen;
+			*qtail = sd->input_queue_head +
+					skb_queue_len(&sd->input_pkt_queue);
 #endif
 			rps_unlock(sd);
 			local_irq_restore(flags);
@@ -2934,13 +2935,21 @@ static void flush_backlog(void *arg)
 	struct sk_buff *skb, *tmp;
 
 	rps_lock(sd);
-	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp)
+	skb_queue_walk_safe(&sd->input_pkt_queue, skb, tmp) {
 		if (skb->dev == dev) {
 			__skb_unlink(skb, &sd->input_pkt_queue);
 			kfree_skb(skb);
-			input_queue_head_incr(sd);
+			input_queue_head_add(sd, 1);
 		}
+	}
 	rps_unlock(sd);
+
+	skb_queue_walk_safe(&sd->process_queue, skb, tmp) {
+		if (skb->dev == dev) {
+			__skb_unlink(skb, &sd->process_queue);
+			kfree_skb(skb);
+		}
+	}
 }
 
 static int napi_gro_complete(struct sk_buff *skb)
@@ -3286,24 +3295,33 @@ static int process_backlog(struct napi_struct *napi, int quota)
 	}
 #endif
 	napi->weight = weight_p;
-	do {
+	local_irq_disable();
+	while (work < quota) {
 		struct sk_buff *skb;
+		unsigned int qlen;
+
+		while ((skb = __skb_dequeue(&sd->process_queue))) {
+			local_irq_enable();
+			__netif_receive_skb(skb);
+			if (++work >= quota)
+				return work;
+			local_irq_disable();
+		}
 
-		local_irq_disable();
 		rps_lock(sd);
-		skb = __skb_dequeue(&sd->input_pkt_queue);
-		if (!skb) {
+		qlen = skb_queue_len(&sd->input_pkt_queue);
+		if (qlen) {
+			input_queue_head_add(sd, qlen);
+			skb_queue_splice_tail_init(&sd->input_pkt_queue,
+						   &sd->process_queue);
+		}
+		if (qlen < quota - work) {
 			__napi_complete(napi);
-			rps_unlock(sd);
-			local_irq_enable();
-			break;
+			quota = work + qlen;
 		}
-		input_queue_head_incr(sd);
 		rps_unlock(sd);
-		local_irq_enable();
-
-		__netif_receive_skb(skb);
-	} while (++work < quota);
+	}
+	local_irq_enable();
 
 	return work;
 }
@@ -5630,8 +5648,10 @@ static int dev_cpu_callback(struct notifier_block *nfb,
 	/* Process offline CPU's input_pkt_queue */
 	while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) {
 		netif_rx(skb);
-		input_queue_head_incr(oldsd);
+		input_queue_head_add(oldsd, 1);
 	}
+	while ((skb = __skb_dequeue(&oldsd->process_queue)))
+		netif_rx(skb);
 
 	return NOTIFY_OK;
 }
@@ -5850,6 +5870,7 @@ static int __init net_dev_init(void)
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
 		skb_queue_head_init(&sd->input_pkt_queue);
+		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
 		INIT_LIST_HEAD(&sd->poll_list);
 		sd->output_queue = NULL;
-- 
cgit v1.2.3


From cb84aa9b42b506299e5aea1ba4da26c03ab12877 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 27 Apr 2010 17:20:38 -0400
Subject: LSM Audit: rename LSM_AUDIT_NO_AUDIT to LSM_AUDIT_DATA_NONE

Most of the LSM common audit work uses LSM_AUDIT_DATA_* for the naming.
This was not so for LSM_AUDIT_NO_AUDIT which means the generic initializer
cannot be used.  This patch just renames the flag so the generic
initializer can be used.

Signed-off-by: Eric Paris <eparis@redhat.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/lsm_audit.h | 2 +-
 security/lsm_audit.c      | 2 +-
 security/selinux/avc.c    | 3 +--
 3 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lsm_audit.h b/include/linux/lsm_audit.h
index f78f83d7663f..6907251d5200 100644
--- a/include/linux/lsm_audit.h
+++ b/include/linux/lsm_audit.h
@@ -33,7 +33,7 @@ struct common_audit_data {
 #define LSM_AUDIT_DATA_IPC	4
 #define LSM_AUDIT_DATA_TASK	5
 #define LSM_AUDIT_DATA_KEY	6
-#define LSM_AUDIT_NO_AUDIT	7
+#define LSM_AUDIT_DATA_NONE	7
 #define LSM_AUDIT_DATA_KMOD	8
 	struct task_struct *tsk;
 	union 	{
diff --git a/security/lsm_audit.c b/security/lsm_audit.c
index acba3dfc8d29..8c3650672e23 100644
--- a/security/lsm_audit.c
+++ b/security/lsm_audit.c
@@ -220,7 +220,7 @@ static void dump_common_audit_data(struct audit_buffer *ab,
 	}
 
 	switch (a->type) {
-	case LSM_AUDIT_NO_AUDIT:
+	case LSM_AUDIT_DATA_NONE:
 		return;
 	case LSM_AUDIT_DATA_IPC:
 		audit_log_format(ab, " key=%d ", a->u.ipc_id);
diff --git a/security/selinux/avc.c b/security/selinux/avc.c
index 989fef82563a..7f1a304712a9 100644
--- a/security/selinux/avc.c
+++ b/security/selinux/avc.c
@@ -499,8 +499,7 @@ void avc_audit(u32 ssid, u32 tsid,
 		return;
 	if (!a) {
 		a = &stack_data;
-		memset(a, 0, sizeof(*a));
-		a->type = LSM_AUDIT_NO_AUDIT;
+		COMMON_AUDIT_DATA_INIT(a, NONE);
 	}
 	a->selinux_audit_data.tclass = tclass;
 	a->selinux_audit_data.requested = requested;
-- 
cgit v1.2.3


From bece7b2398d073d11b2e352405a3ecd3a1e39c60 Mon Sep 17 00:00:00 2001
From: Sjur Braendeland <sjur.brandeland@stericsson.com>
Date: Wed, 28 Apr 2010 08:54:38 +0000
Subject: caif: Rewritten socket implementation

Changes:
 This is a complete re-write of the socket layer. Making the socket
 implementation more aligned with the other socket layers and using more
 of the support functions available in sock.c. Lots of code is copied
 from af_unix (and some from af_irda).
 Non-blocking mode should be working as well.

Signed-off-by: Sjur Braendeland <sjur.brandeland@stericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/caif/caif_socket.h |    5 +-
 net/caif/caif_socket.c           | 1795 ++++++++++++++++++--------------------
 2 files changed, 831 insertions(+), 969 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/caif/caif_socket.h b/include/linux/caif/caif_socket.h
index 8e5c8444a3f4..2a61eb1beb85 100644
--- a/include/linux/caif/caif_socket.h
+++ b/include/linux/caif/caif_socket.h
@@ -16,7 +16,6 @@
 #include <sys/socket.h>
 #endif
 
-
 /**
  * enum caif_link_selector -    Physical Link Selection.
  * @CAIF_LINK_HIGH_BANDW:	Physical interface for high-bandwidth
@@ -59,7 +58,7 @@ enum caif_channel_priority {
 /**
  * enum caif_protocol_type  -	CAIF Channel type.
  * @CAIFPROTO_AT:		Classic AT channel.
- * @CAIFPROTO_DATAGRAM:		Datagram channel.
+ * @CAIFPROTO_DATAGRAM:	Datagram channel.
  * @CAIFPROTO_DATAGRAM_LOOP:	Datagram loopback channel, used for testing.
  * @CAIFPROTO_UTIL:		Utility (Psock) channel.
  * @CAIFPROTO_RFM:		Remote File Manager
@@ -87,6 +86,7 @@ enum caif_at_type {
 
 /**
  * struct sockaddr_caif - the sockaddr structure for CAIF sockets.
+ * @family:		     Address family number, must be AF_CAIF.
  * @u:			     Union of address data 'switched' by family.
  * :
  * @u.at:                    Applies when family = CAIFPROTO_AT.
@@ -153,6 +153,7 @@ struct sockaddr_caif {
  *
  *
  * This enum defines the CAIF Socket options to be used on a socket
+ * of type PF_CAIF.
  *
  */
 enum caif_socket_opts {
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index d455375789fb..c3a70c5c893a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -1,7 +1,6 @@
 /*
  * Copyright (C) ST-Ericsson AB 2010
  * Author:	Sjur Brendeland sjur.brandeland@stericsson.com
- *		Per Sigmond per.sigmond@stericsson.com
  * License terms: GNU General Public License (GPL) version 2
  */
 
@@ -16,91 +15,52 @@
 #include <linux/poll.h>
 #include <linux/tcp.h>
 #include <linux/uaccess.h>
-#include <asm/atomic.h>
-
+#include <linux/mutex.h>
+#include <linux/debugfs.h>
 #include <linux/caif/caif_socket.h>
+#include <asm/atomic.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
 #include <net/caif/caif_layer.h>
 #include <net/caif/caif_dev.h>
 #include <net/caif/cfpkt.h>
 
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_NETPROTO(AF_CAIF);
+
+#define CAIF_DEF_SNDBUF (CAIF_MAX_PAYLOAD_SIZE*10)
+#define CAIF_DEF_RCVBUF (CAIF_MAX_PAYLOAD_SIZE*100)
+
+/*
+ * CAIF state is re-using the TCP socket states.
+ * caif_states stored in sk_state reflect the state as reported by
+ * the CAIF stack, while sk_socket->state is the state of the socket.
+ */
+enum caif_states {
+	CAIF_CONNECTED		= TCP_ESTABLISHED,
+	CAIF_CONNECTING	= TCP_SYN_SENT,
+	CAIF_DISCONNECTED	= TCP_CLOSE
+};
+
+#define TX_FLOW_ON_BIT	1
+#define RX_FLOW_ON_BIT	2
 
-#define CHNL_SKT_READ_QUEUE_HIGH 200
-#define CHNL_SKT_READ_QUEUE_LOW 100
-
-static int caif_sockbuf_size = 40000;
-static atomic_t caif_nr_socks = ATOMIC_INIT(0);
-
-#define CONN_STATE_OPEN_BIT	      1
-#define CONN_STATE_PENDING_BIT	      2
-#define CONN_STATE_PEND_DESTROY_BIT   3
-#define CONN_REMOTE_SHUTDOWN_BIT      4
-
-#define TX_FLOW_ON_BIT		      1
-#define RX_FLOW_ON_BIT		      2
-
-#define STATE_IS_OPEN(cf_sk) test_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_REMOTE_SHUTDOWN(cf_sk) test_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING(cf_sk) test_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define STATE_IS_PENDING_DESTROY(cf_sk) test_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-
-#define SET_STATE_PENDING_DESTROY(cf_sk) set_bit(CONN_STATE_PEND_DESTROY_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_OPEN(cf_sk) set_bit(CONN_STATE_OPEN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_STATE_CLOSED(cf_sk) clear_bit(CONN_STATE_OPEN_BIT,\
-					(void *) &(cf_sk)->conn_state)
-#define SET_PENDING_ON(cf_sk) set_bit(CONN_STATE_PENDING_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-#define SET_PENDING_OFF(cf_sk) clear_bit(CONN_STATE_PENDING_BIT,\
-				       (void *) &(cf_sk)->conn_state)
-#define SET_REMOTE_SHUTDOWN(cf_sk) set_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(cf_sk)->conn_state)
-
-#define SET_REMOTE_SHUTDOWN_OFF(dev) clear_bit(CONN_REMOTE_SHUTDOWN_BIT,\
-				    (void *) &(dev)->conn_state)
-#define RX_FLOW_IS_ON(cf_sk) test_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define TX_FLOW_IS_ON(cf_sk) test_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SET_RX_FLOW_OFF(cf_sk) clear_bit(RX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_RX_FLOW_ON(cf_sk) set_bit(RX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_OFF(cf_sk) clear_bit(TX_FLOW_ON_BIT,\
-				       (void *) &(cf_sk)->flow_state)
-#define SET_TX_FLOW_ON(cf_sk) set_bit(TX_FLOW_ON_BIT,\
-				    (void *) &(cf_sk)->flow_state)
-
-#define SKT_READ_FLAG 0x01
-#define SKT_WRITE_FLAG 0x02
 static struct dentry *debugfsdir;
-#include <linux/debugfs.h>
 
 #ifdef CONFIG_DEBUG_FS
 struct debug_fs_counter {
-	atomic_t num_open;
-	atomic_t num_close;
-	atomic_t num_init;
-	atomic_t num_init_resp;
-	atomic_t num_init_fail_resp;
-	atomic_t num_deinit;
-	atomic_t num_deinit_resp;
+	atomic_t caif_nr_socks;
+	atomic_t num_connect_req;
+	atomic_t num_connect_resp;
+	atomic_t num_connect_fail_resp;
+	atomic_t num_disconnect;
 	atomic_t num_remote_shutdown_ind;
 	atomic_t num_tx_flow_off_ind;
 	atomic_t num_tx_flow_on_ind;
 	atomic_t num_rx_flow_off;
 	atomic_t num_rx_flow_on;
-	atomic_t skb_in_use;
-	atomic_t skb_alloc;
-	atomic_t skb_free;
 };
-static struct debug_fs_counter cnt;
+struct debug_fs_counter cnt;
 #define	dbfs_atomic_inc(v) atomic_inc(v)
 #define	dbfs_atomic_dec(v) atomic_dec(v)
 #else
@@ -108,624 +68,666 @@ static struct debug_fs_counter cnt;
 #define	dbfs_atomic_dec(v)
 #endif
 
-/* The AF_CAIF socket */
 struct caifsock {
-	/* NOTE: sk has to be the first member */
-	struct sock sk;
+	struct sock sk; /* must be first member */
 	struct cflayer layer;
-	char name[CAIF_LAYER_NAME_SZ];
-	u32 conn_state;
+	char name[CAIF_LAYER_NAME_SZ]; /* Used for debugging */
 	u32 flow_state;
-	struct cfpktq *pktq;
-	int file_mode;
 	struct caif_connect_request conn_req;
-	int read_queue_len;
-	/* protect updates of read_queue_len */
-	spinlock_t read_queue_len_lock;
+	struct mutex readlock;
 	struct dentry *debugfs_socket_dir;
 };
 
-static void drain_queue(struct caifsock *cf_sk);
+static int rx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
+
+static int tx_flow_is_on(struct caifsock *cf_sk)
+{
+	return test_bit(TX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-/* Packet Receive Callback function called from CAIF Stack */
-static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
+static void set_rx_flow_off(struct caifsock *cf_sk)
 {
-	struct caifsock *cf_sk;
-	int read_queue_high;
-	cf_sk = container_of(layr, struct caifsock, layer);
+	 clear_bit(RX_FLOW_ON_BIT,
+		 (void *) &cf_sk->flow_state);
+}
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/*FIXME: This should be allowed finally!*/
-		pr_debug("CAIF: %s(): called after close request\n", __func__);
-		cfpkt_destroy(pkt);
-		return 0;
-	}
-	/* NOTE: This function may be called in Tasklet context! */
+static void set_rx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(RX_FLOW_ON_BIT,
+			(void *) &cf_sk->flow_state);
+}
 
-	/* The queue has its own lock */
-	cfpkt_queue(cf_sk->pktq, pkt, 0);
+static void set_tx_flow_off(struct caifsock *cf_sk)
+{
+	 clear_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len++;
+static void set_tx_flow_on(struct caifsock *cf_sk)
+{
+	 set_bit(TX_FLOW_ON_BIT,
+		(void *) &cf_sk->flow_state);
+}
 
-	read_queue_high = (cf_sk->read_queue_len > CHNL_SKT_READ_QUEUE_HIGH);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+static void caif_read_lock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_lock(&cf_sk->readlock);
+}
 
-	if (RX_FLOW_IS_ON(cf_sk) && read_queue_high) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_off);
-		SET_RX_FLOW_OFF(cf_sk);
+static void caif_read_unlock(struct sock *sk)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	mutex_unlock(&cf_sk->readlock);
+}
 
-		/* Send flow off (NOTE: must not sleep) */
-		pr_debug("CAIF: %s():"
-			" sending flow OFF (queue len = %d)\n",
-			__func__,
-		     cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
+int sk_rcvbuf_lowwater(struct caifsock *cf_sk)
+{
+	/* A quarter of full buffer is used a low water mark */
+	return cf_sk->sk.sk_rcvbuf / 4;
+}
 
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_OFF_REQ);
-	}
+void caif_flow_ctrl(struct sock *sk, int mode)
+{
+	struct caifsock *cf_sk;
+	cf_sk = container_of(sk, struct caifsock, sk);
+	if (cf_sk->layer.dn)
+		cf_sk->layer.dn->modemcmd(cf_sk->layer.dn, mode);
+}
 
-	/* Signal reader that data is available. */
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), but changed so packets are
+ * not dropped, but CAIF is sending flow off instead.
+ */
+int caif_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
+{
+	int err;
+	int skb_len;
+	unsigned long flags;
+	struct sk_buff_head *list = &sk->sk_receive_queue;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	wake_up_interruptible(sk_sleep(&cf_sk->sk));
+	if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
+		(unsigned)sk->sk_rcvbuf && rx_flow_is_on(cf_sk)) {
+		trace_printk("CAIF: %s():"
+			" sending flow OFF (queue len = %d %d)\n",
+			__func__,
+			atomic_read(&cf_sk->sk.sk_rmem_alloc),
+			sk_rcvbuf_lowwater(cf_sk));
+		set_rx_flow_off(cf_sk);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
 
+	err = sk_filter(sk, skb);
+	if (err)
+		return err;
+	if (!sk_rmem_schedule(sk, skb->truesize) && rx_flow_is_on(cf_sk)) {
+		set_rx_flow_off(cf_sk);
+		trace_printk("CAIF: %s():"
+			" sending flow OFF due to rmem_schedule\n",
+			__func__);
+		if (cf_sk->layer.dn)
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_OFF_REQ);
+	}
+	skb->dev = NULL;
+	skb_set_owner_r(skb, sk);
+	/* Cache the SKB length before we tack it onto the receive
+	 * queue. Once it is added it no longer belongs to us and
+	 * may be freed by other threads of control pulling packets
+	 * from the queue.
+	 */
+	skb_len = skb->len;
+	spin_lock_irqsave(&list->lock, flags);
+	if (!sock_flag(sk, SOCK_DEAD))
+		__skb_queue_tail(list, skb);
+	spin_unlock_irqrestore(&list->lock, flags);
+
+	if (!sock_flag(sk, SOCK_DEAD))
+		sk->sk_data_ready(sk, skb_len);
+	else
+		kfree_skb(skb);
 	return 0;
 }
 
-/* Packet Flow Control Callback function called from CAIF */
-static void caif_sktflowctrl_cb(struct cflayer *layr,
-				enum caif_ctrlcmd flow,
-				int phyid)
+/* Packet Receive Callback function called from CAIF Stack */
+static int caif_sktrecv_cb(struct cflayer *layr, struct cfpkt *pkt)
 {
 	struct caifsock *cf_sk;
-
-	/* NOTE: This function may be called in Tasklet context! */
-	pr_debug("CAIF: %s(): flowctrl func called: %s.\n",
-		      __func__,
-		      flow == CAIF_CTRLCMD_FLOW_ON_IND ? "ON" :
-		      flow == CAIF_CTRLCMD_FLOW_OFF_IND ? "OFF" :
-		      flow == CAIF_CTRLCMD_INIT_RSP ? "INIT_RSP" :
-		      flow == CAIF_CTRLCMD_DEINIT_RSP ? "DEINIT_RSP" :
-		      flow == CAIF_CTRLCMD_INIT_FAIL_RSP ? "INIT_FAIL_RSP" :
-		      flow ==
-		      CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND ? "REMOTE_SHUTDOWN" :
-		      "UKNOWN CTRL COMMAND");
-
-	if (layr == NULL)
-		return;
+	struct sk_buff *skb;
 
 	cf_sk = container_of(layr, struct caifsock, layer);
+	skb = cfpkt_tonative(pkt);
+
+	if (unlikely(cf_sk->sk.sk_state != CAIF_CONNECTED)) {
+		cfpkt_destroy(pkt);
+		return 0;
+	}
+	caif_queue_rcv_skb(&cf_sk->sk, skb);
+	return 0;
+}
 
+/* Packet Control Callback function called from CAIF */
+static void caif_ctrl_cb(struct cflayer *layr,
+				enum caif_ctrlcmd flow,
+				int phyid)
+{
+	struct caifsock *cf_sk = container_of(layr, struct caifsock, layer);
 	switch (flow) {
 	case CAIF_CTRLCMD_FLOW_ON_IND:
+		/* OK from modem to start sending again */
 		dbfs_atomic_inc(&cnt.num_tx_flow_on_ind);
-		/* Signal reader that data is available. */
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_FLOW_OFF_IND:
+		/* Modem asks us to shut up */
 		dbfs_atomic_inc(&cnt.num_tx_flow_off_ind);
-		SET_TX_FLOW_OFF(cf_sk);
+		set_tx_flow_off(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_INIT_RSP:
-		dbfs_atomic_inc(&cnt.num_init_resp);
-		/* Signal reader that data is available. */
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_ON(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* We're now connected */
+		dbfs_atomic_inc(&cnt.num_connect_resp);
+		cf_sk->sk.sk_state = CAIF_CONNECTED;
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_DEINIT_RSP:
-		dbfs_atomic_inc(&cnt.num_deinit_resp);
-		caif_assert(!STATE_IS_OPEN(cf_sk));
-		SET_PENDING_OFF(cf_sk);
-		if (!STATE_IS_PENDING_DESTROY(cf_sk)) {
-			if (sk_sleep(&cf_sk->sk) != NULL)
-				wake_up_interruptible(sk_sleep(&cf_sk->sk));
-		}
-		dbfs_atomic_inc(&cnt.num_deinit);
-		sock_put(&cf_sk->sk);
+		/* We're now disconnected */
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
+		cfcnfg_release_adap_layer(&cf_sk->layer);
 		break;
 
 	case CAIF_CTRLCMD_INIT_FAIL_RSP:
-		dbfs_atomic_inc(&cnt.num_init_fail_resp);
-		caif_assert(STATE_IS_OPEN(cf_sk));
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_OFF(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		/* Connect request failed */
+		dbfs_atomic_inc(&cnt.num_connect_fail_resp);
+		cf_sk->sk.sk_err = ECONNREFUSED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		/*
+		 * Socket "standards" seems to require POLLOUT to
+		 * be set at connect failure.
+		 */
+		set_tx_flow_on(cf_sk);
+		cf_sk->sk.sk_state_change(&cf_sk->sk);
 		break;
 
 	case CAIF_CTRLCMD_REMOTE_SHUTDOWN_IND:
+		/* Modem has closed this connection, or device is down. */
 		dbfs_atomic_inc(&cnt.num_remote_shutdown_ind);
-		SET_REMOTE_SHUTDOWN(cf_sk);
-		/* Use sk_shutdown to indicate remote shutdown indication */
-		cf_sk->sk.sk_shutdown |= RCV_SHUTDOWN;
-		cf_sk->file_mode = 0;
-		wake_up_interruptible(sk_sleep(&cf_sk->sk));
+		cf_sk->sk.sk_shutdown = SHUTDOWN_MASK;
+		cf_sk->sk.sk_err = ECONNRESET;
+		set_rx_flow_on(cf_sk);
+		cf_sk->sk.sk_error_report(&cf_sk->sk);
 		break;
 
 	default:
 		pr_debug("CAIF: %s(): Unexpected flow command %d\n",
-			      __func__, flow);
+				__func__, flow);
 	}
 }
 
-static void skb_destructor(struct sk_buff *skb)
+static void caif_check_flow_release(struct sock *sk)
 {
-	dbfs_atomic_inc(&cnt.skb_free);
-	dbfs_atomic_dec(&cnt.skb_in_use);
-}
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
+	if (cf_sk->layer.dn == NULL || cf_sk->layer.dn->modemcmd == NULL)
+		return;
+	if (rx_flow_is_on(cf_sk))
+		return;
 
-static int caif_recvmsg(struct kiocb *iocb, struct socket *sock,
+	if (atomic_read(&sk->sk_rmem_alloc) <= sk_rcvbuf_lowwater(cf_sk)) {
+			dbfs_atomic_inc(&cnt.num_rx_flow_on);
+			set_rx_flow_on(cf_sk);
+			cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
+						CAIF_MODEMCMD_FLOW_ON_REQ);
+	}
+}
+/*
+ * Copied from sock.c:sock_queue_rcv_skb(), and added check that user buffer
+ * has sufficient size.
+ */
+
+static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 				struct msghdr *m, size_t buf_len, int flags)
 
 {
 	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	struct cfpkt *pkt = NULL;
-	size_t len;
-	int result;
 	struct sk_buff *skb;
-	ssize_t ret = -EIO;
-	int read_queue_low;
-
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto read_error;
-	}
-
-	/* Don't do multiple iovec entries yet */
-	if (m->msg_iovlen != 1)
-		return -EOPNOTSUPP;
+	int ret = 0;
+	int len;
 
 	if (unlikely(!buf_len))
 		return -EINVAL;
 
-	lock_sock(&(cf_sk->sk));
-
-	caif_assert(cf_sk->pktq);
-
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing. */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
-		} else {
-			pr_debug("CAIF: %s(): socket is closing..\n", __func__);
-			ret = -EBADF;
-		}
+	skb = skb_recv_datagram(sk, flags, 0 , &ret);
+	if (!skb)
 		goto read_error;
-	}
-	/* Socket is open or opening. */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (flags & MSG_DONTWAIT) {
-			/* We can't block. */
-			pr_debug("CAIF: %s():state pending and MSG_DONTWAIT\n",
-				 __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
 
+	len = skb->len;
+
+	if (skb && skb->len > buf_len && !(flags & MSG_PEEK)) {
+		len = buf_len;
 		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
+		 * Push skb back on receive queue if buffer too small.
+		 * This has a built-in race where multi-threaded receive
+		 * may get packet in wrong order, but multiple read does
+		 * not really guarantee ordered delivery anyway.
+		 * Let's optimize for speed without taking locks.
 		 */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
 
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto read_error;
-		}
+		skb_queue_head(&sk->sk_receive_queue, skb);
+		ret = -EMSGSIZE;
+		goto read_error;
 	}
 
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
-
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
+	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
+	if (ret)
 		goto read_error;
-	}
 
-	/*
-	 * Block if we don't have any received buffers.
-	 * The queue has its own lock.
-	 */
-	while ((pkt = cfpkt_qpeek(cf_sk->pktq)) == NULL) {
+	skb_free_datagram(sk, skb);
 
-		if (flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto read_error;
-		}
-		trace_printk("CAIF: %s() wait_event\n", __func__);
+	caif_check_flow_release(sk);
 
-		/* Let writers in. */
-		release_sock(&cf_sk->sk);
+	return len;
 
-		/* Block reader until data arrives or socket is closed. */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					cfpkt_qpeek(cf_sk->pktq)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)) ==
-		    -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				" wait_event_interruptible woken by "
-				"a signal, signal_pending(current) = %d\n",
-				__func__,
-				signal_pending(current));
-			return -ERESTARTSYS;
-		}
+read_error:
+	return ret;
+}
 
-		trace_printk("CAIF: %s() awake\n", __func__);
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto read_error_no_unlock;
-		}
 
-		/* I want to be alone on cf_sk (except status and queue). */
-		lock_sock(&(cf_sk->sk));
+/* Copied from unix_stream_wait_data, identical except for lock call. */
+static long caif_stream_data_wait(struct sock *sk, long timeo)
+{
+	DEFINE_WAIT(wait);
+	lock_sock(sk);
+
+	for (;;) {
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+
+		if (!skb_queue_empty(&sk->sk_receive_queue) ||
+			sk->sk_err ||
+			sk->sk_state != CAIF_CONNECTED ||
+			sock_flag(sk, SOCK_DEAD) ||
+			(sk->sk_shutdown & RCV_SHUTDOWN) ||
+			signal_pending(current) ||
+			!timeo)
+			break;
 
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Someone closed the link, report error. */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto read_error;
-		}
+		set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
+		release_sock(sk);
+		timeo = schedule_timeout(timeo);
+		lock_sock(sk);
+		clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags);
 	}
 
-	/* The queue has its own lock. */
-	len = cfpkt_getlen(pkt);
-
-	/* Check max length that can be copied. */
-	if (len <= buf_len)
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-	else {
-		pr_debug("CAIF: %s(): user buffer too small (%ld,%ld)\n",
-			 __func__, (long) len, (long) buf_len);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EMSGSIZE;
-			goto read_error;
-		}
-		len = buf_len;
-	}
+	finish_wait(sk_sleep(sk), &wait);
+	release_sock(sk);
+	return timeo;
+}
 
 
-	spin_lock(&cf_sk->read_queue_len_lock);
-	cf_sk->read_queue_len--;
-	read_queue_low = (cf_sk->read_queue_len < CHNL_SKT_READ_QUEUE_LOW);
-	spin_unlock(&cf_sk->read_queue_len_lock);
+/*
+ * Copied from unix_stream_recvmsg, but removed credit checks,
+ * changed locking calls, changed address handling.
+ */
+static int caif_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
+				struct msghdr *msg, size_t size,
+				int flags)
+{
+	struct sock *sk = sock->sk;
+	int copied = 0;
+	int target;
+	int err = 0;
+	long timeo;
 
-	if (!RX_FLOW_IS_ON(cf_sk) && read_queue_low) {
-		dbfs_atomic_inc(&cnt.num_rx_flow_on);
-		SET_RX_FLOW_ON(cf_sk);
+	err = -EOPNOTSUPP;
+	if (flags&MSG_OOB)
+		goto out;
 
-		/* Send flow on. */
-		pr_debug("CAIF: %s(): sending flow ON (queue len = %d)\n",
-			 __func__, cf_sk->read_queue_len);
-		caif_assert(cf_sk->layer.dn);
-		caif_assert(cf_sk->layer.dn->ctrlcmd);
-		(void) cf_sk->layer.dn->modemcmd(cf_sk->layer.dn,
-					       CAIF_MODEMCMD_FLOW_ON_REQ);
+	msg->msg_namelen = 0;
 
-		caif_assert(cf_sk->read_queue_len >= 0);
-	}
+	/*
+	 * Lock the socket to prevent queue disordering
+	 * while sleeps in memcpy_tomsg
+	 */
+	err = -EAGAIN;
+	if (sk->sk_state == CAIF_CONNECTING)
+		goto out;
 
-	skb = cfpkt_tonative(pkt);
-	result = skb_copy_datagram_iovec(skb, 0, m->msg_iov, len);
-	skb_pull(skb, len);
+	caif_read_lock(sk);
+	target = sock_rcvlowat(sk, flags&MSG_WAITALL, size);
+	timeo = sock_rcvtimeo(sk, flags&MSG_DONTWAIT);
 
-	if (result) {
-		pr_debug("CAIF: %s(): copy to_iovec failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EFAULT;
-		goto read_error;
-	}
+	do {
+		int chunk;
+		struct sk_buff *skb;
 
-	/* Free packet and remove from queue */
-	if (skb->len == 0)
-		skb_free_datagram(sk, skb);
+		lock_sock(sk);
+		skb = skb_dequeue(&sk->sk_receive_queue);
+		caif_check_flow_release(sk);
 
-	/* Let the others in. */
-	release_sock(&cf_sk->sk);
-	return len;
+		if (skb == NULL) {
+			if (copied >= target)
+				goto unlock;
+			/*
+			 *	POSIX 1003.1g mandates this order.
+			 */
+			err = sock_error(sk);
+			if (err)
+				goto unlock;
+			err = -ECONNRESET;
+			if (sk->sk_shutdown & RCV_SHUTDOWN)
+				goto unlock;
 
-read_error:
-	release_sock(&cf_sk->sk);
-read_error_no_unlock:
-	return ret;
-}
+			err = -EPIPE;
+			if (sk->sk_state != CAIF_CONNECTED)
+				goto unlock;
+			if (sock_flag(sk, SOCK_DEAD))
+				goto unlock;
 
-/* Send a signal as a consequence of sendmsg, sendto or caif_sendmsg. */
-static int caif_sendmsg(struct kiocb *kiocb, struct socket *sock,
-			struct msghdr *msg, size_t len)
-{
+			release_sock(sk);
 
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	size_t payload_size = msg->msg_iov->iov_len;
-	struct cfpkt *pkt = NULL;
-	struct caif_payload_info info;
-	unsigned char *txbuf;
-	ssize_t ret = -EIO;
-	int result;
-	struct sk_buff *skb;
-	caif_assert(msg->msg_iovlen == 1);
+			err = -EAGAIN;
+			if (!timeo)
+				break;
 
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): private_data not set!\n",
-			      __func__);
-		ret = -EBADFD;
-		goto write_error_no_unlock;
-	}
+			caif_read_unlock(sk);
 
-	if (unlikely(msg->msg_iov->iov_base == NULL)) {
-		pr_warning("CAIF: %s(): Buffer is NULL.\n", __func__);
-		ret = -EINVAL;
-		goto write_error_no_unlock;
-	}
+			timeo = caif_stream_data_wait(sk, timeo);
 
-	if (payload_size > CAIF_MAX_PAYLOAD_SIZE) {
-		pr_debug("CAIF: %s(): buffer too long\n", __func__);
-		if (sock->type == SOCK_SEQPACKET) {
-			ret = -EINVAL;
-			goto write_error_no_unlock;
+			if (signal_pending(current)) {
+				err = sock_intr_errno(timeo);
+				goto out;
+			}
+			caif_read_lock(sk);
+			continue;
+unlock:
+			release_sock(sk);
+			break;
 		}
-		payload_size = CAIF_MAX_PAYLOAD_SIZE;
-	}
+		release_sock(sk);
+		chunk = min_t(unsigned int, skb->len, size);
+		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			if (copied == 0)
+				copied = -EFAULT;
+			break;
+		}
+		copied += chunk;
+		size -= chunk;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
+		/* Mark read part of skb as used */
+		if (!(flags & MSG_PEEK)) {
+			skb_pull(skb, chunk);
 
-	caif_assert(cf_sk->pktq);
+			/* put the skb back if we didn't use it up. */
+			if (skb->len) {
+				skb_queue_head(&sk->sk_receive_queue, skb);
+				break;
+			}
+			kfree_skb(skb);
 
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Socket is closed or closing */
-		if (!STATE_IS_PENDING(cf_sk)) {
-			pr_debug("CAIF: %s(): socket is closed (by remote)\n",
-				 __func__);
-			ret = -EPIPE;
 		} else {
-			pr_debug("CAIF: %s(): socket is closing...\n",
-				 __func__);
-			ret = -EBADF;
-		}
-		goto write_error;
-	}
-
-	/* Socket is open or opening */
-	if (STATE_IS_PENDING(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opening...\n", __func__);
-
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			/* We can't block */
-			trace_printk("CAIF: %s():state pending:"
-				     "state=MSG_DONTWAIT\n", __func__);
-			ret = -EAGAIN;
-			goto write_error;
+			/*
+			 * It is questionable, see note in unix_dgram_recvmsg.
+			 */
+			/* put message back and return */
+			skb_queue_head(&sk->sk_receive_queue, skb);
+			break;
 		}
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
-
-		/*
-		 * Blocking mode; state is pending and we need to wait
-		 * for its conclusion.
-		 */
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+	} while (size);
+	caif_read_unlock(sk);
 
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal (1)", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error;
-		}
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk) ||
-		!STATE_IS_OPEN(cf_sk) ||
-		STATE_IS_PENDING(cf_sk)) {
+out:
+	return copied ? : err;
+}
 
-		pr_debug("CAIF: %s(): socket closed\n",
-			__func__);
-		ret = -ESHUTDOWN;
-		goto write_error;
+/*
+ * Copied from sock.c:sock_wait_for_wmem, but change to wait for
+ * CAIF flow-on and sock_writable.
+ */
+static long caif_wait_for_flow_on(struct caifsock *cf_sk,
+				int wait_writeable, long timeo, int *err)
+{
+	struct sock *sk = &cf_sk->sk;
+	DEFINE_WAIT(wait);
+	for (;;) {
+		*err = 0;
+		if (tx_flow_is_on(cf_sk) &&
+			(!wait_writeable || sock_writeable(&cf_sk->sk)))
+			break;
+		*err = -ETIMEDOUT;
+		if (!timeo)
+			break;
+		*err = -ERESTARTSYS;
+		if (signal_pending(current))
+			break;
+		prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
+		*err = -ECONNRESET;
+		if (sk->sk_shutdown & SHUTDOWN_MASK)
+			break;
+		*err = -sk->sk_err;
+		if (sk->sk_err)
+			break;
+		*err = -EPIPE;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED)
+			break;
+		timeo = schedule_timeout(timeo);
 	}
+	finish_wait(sk_sleep(sk), &wait);
+	return timeo;
+}
 
-	if (!TX_FLOW_IS_ON(cf_sk)) {
+/*
+ * Transmit a SKB. The device may temporarily request re-transmission
+ * by returning EAGAIN.
+ */
+static int transmit_skb(struct sk_buff *skb, struct caifsock *cf_sk,
+			int noblock, long timeo)
+{
+	struct cfpkt *pkt;
+	int ret, loopcnt = 0;
 
-		/* Flow is off. Check non-block flag */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			trace_printk("CAIF: %s(): MSG_DONTWAIT and tx flow off",
-				 __func__);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	pkt = cfpkt_fromnative(CAIF_DIR_OUT, skb);
+	memset(cfpkt_info(pkt), 0, sizeof(struct caif_payload_info));
+	do {
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
+		ret = -ETIMEDOUT;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s():"
-				 " wait_event_interruptible woken by a signal",
-				 __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
+		/* Slight paranoia, probably not needed. */
+		if (unlikely(loopcnt++ > 1000)) {
+			pr_warning("CAIF: %s(): transmit retries failed,"
+				" error = %d\n", __func__, ret);
+			break;
 		}
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* someone closed the link, report error */
-			pr_debug("CAIF: %s(): remote end shutdown!\n",
-				      __func__);
-			ret = -EPIPE;
-			goto write_error;
+		if (cf_sk->layer.dn != NULL)
+			ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+		if (likely(ret >= 0))
+			break;
+		/* if transmit return -EAGAIN, then retry */
+		if (noblock && ret == -EAGAIN)
+			break;
+		timeo = caif_wait_for_flow_on(cf_sk, 0, timeo, &ret);
+		if (signal_pending(current)) {
+			ret = sock_intr_errno(timeo);
+			break;
 		}
-
-		if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-			pr_debug("CAIF: %s(): "
-				 "received remote_shutdown indication\n",
-				 __func__);
-			ret = -ESHUTDOWN;
-			goto write_error;
+		if (ret)
+			break;
+		if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+			sock_flag(&cf_sk->sk, SOCK_DEAD) ||
+			(cf_sk->sk.sk_shutdown & RCV_SHUTDOWN)) {
+			ret = -EPIPE;
+			cf_sk->sk.sk_err = EPIPE;
+			break;
 		}
-	}
+	} while (ret == -EAGAIN);
+	return ret;
+}
 
-	pkt = cfpkt_create(payload_size);
-	skb = (struct sk_buff *)pkt;
-	skb->destructor = skb_destructor;
-	skb->sk = sk;
-	dbfs_atomic_inc(&cnt.skb_alloc);
-	dbfs_atomic_inc(&cnt.skb_in_use);
-	if (cfpkt_raw_append(pkt, (void **) &txbuf, payload_size) < 0) {
-		pr_debug("CAIF: %s(): cfpkt_raw_append failed\n", __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
+/* Copied from af_unix:unix_dgram_sendmsg, and adapted to CAIF */
+static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
+			struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int buffer_size;
+	int ret = 0;
+	struct sk_buff *skb = NULL;
+	int noblock;
+	long timeo;
+	caif_assert(cf_sk);
+	ret = sock_error(sk);
+	if (ret)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_flags&MSG_OOB)
+		goto err;
+
+	ret = -EOPNOTSUPP;
+	if (msg->msg_namelen)
+		goto err;
+
+	ret = -EINVAL;
+	if (unlikely(msg->msg_iov->iov_base == NULL))
+		goto err;
+	noblock = msg->msg_flags & MSG_DONTWAIT;
+
+	buffer_size = len + CAIF_NEEDED_HEADROOM + CAIF_NEEDED_TAILROOM;
+
+	ret = -EMSGSIZE;
+	if (buffer_size > CAIF_MAX_PAYLOAD_SIZE)
+		goto err;
+
+	timeo = sock_sndtimeo(sk, noblock);
+	timeo = caif_wait_for_flow_on(container_of(sk, struct caifsock, sk),
+				1, timeo, &ret);
+
+	ret = -EPIPE;
+	if (cf_sk->sk.sk_state != CAIF_CONNECTED ||
+		sock_flag(sk, SOCK_DEAD) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		goto err;
+
+	ret = -ENOMEM;
+	skb = sock_alloc_send_skb(sk, buffer_size, noblock, &ret);
+	if (!skb)
+		goto err;
+	skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+
+	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+
+	if (ret)
+		goto err;
+	ret = transmit_skb(skb, cf_sk, noblock, timeo);
+	if (ret < 0)
+		goto err;
+	return len;
+err:
+	kfree_skb(skb);
+	return ret;
+}
 
-	/* Copy data into buffer. */
-	if (copy_from_user(txbuf, msg->msg_iov->iov_base, payload_size)) {
-		pr_debug("CAIF: %s(): copy_from_user returned non zero.\n",
-			 __func__);
-		cfpkt_destroy(pkt);
-		ret = -EINVAL;
-		goto write_error;
-	}
-	memset(&info, 0, sizeof(info));
+/*
+ * Copied from unix_stream_sendmsg and adapted to CAIF:
+ * Changed removed permission handling and added waiting for flow on
+ * and other minor adaptations.
+ */
+static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
+				struct msghdr *msg, size_t len)
+{
+	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int err, size;
+	struct sk_buff *skb;
+	int sent = 0;
+	long timeo;
 
-	/* Send the packet down the stack. */
-	caif_assert(cf_sk->layer.dn);
-	caif_assert(cf_sk->layer.dn->transmit);
+	err = -EOPNOTSUPP;
 
-	do {
-		ret = cf_sk->layer.dn->transmit(cf_sk->layer.dn, pkt);
+	if (unlikely(msg->msg_flags&MSG_OOB))
+		goto out_err;
 
-		if (likely((ret >= 0) || (ret != -EAGAIN)))
-			break;
+	if (unlikely(msg->msg_namelen))
+		goto out_err;
 
-		/* EAGAIN - retry */
-		if (msg->msg_flags & MSG_DONTWAIT) {
-			pr_debug("CAIF: %s(): NONBLOCK and transmit failed,"
-				 " error = %ld\n", __func__, (long) ret);
-			ret = -EAGAIN;
-			goto write_error;
-		}
+	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+	timeo = caif_wait_for_flow_on(cf_sk, 1, timeo, &err);
 
-		/* Let readers in */
-		release_sock(&cf_sk->sk);
+	if (unlikely(sk->sk_shutdown & SEND_SHUTDOWN))
+		goto pipe_err;
 
-		/* Wait until flow is on or socket is closed */
-		if (wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					TX_FLOW_IS_ON(cf_sk)
-					|| !STATE_IS_OPEN(cf_sk)
-					|| STATE_IS_REMOTE_SHUTDOWN(cf_sk)
-					) == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 " woken by a signal", __func__);
-			ret = -ERESTARTSYS;
-			goto write_error_no_unlock;
-		}
+	while (sent < len) {
 
-		/* I want to be alone on cf_sk (except status and queue) */
-		lock_sock(&(cf_sk->sk));
+		size = len-sent;
 
-	} while (ret == -EAGAIN);
+		if (size > CAIF_MAX_PAYLOAD_SIZE)
+			size = CAIF_MAX_PAYLOAD_SIZE;
 
-	if (ret < 0) {
-		cfpkt_destroy(pkt);
-		pr_debug("CAIF: %s(): transmit failed, error = %ld\n",
-			 __func__, (long) ret);
+		/* If size is more than half of sndbuf, chop up message */
+		if (size > ((sk->sk_sndbuf >> 1) - 64))
+			size = (sk->sk_sndbuf >> 1) - 64;
 
-		goto write_error;
-	}
+		if (size > SKB_MAX_ALLOC)
+			size = SKB_MAX_ALLOC;
 
-	release_sock(&cf_sk->sk);
-	return payload_size;
+		skb = sock_alloc_send_skb(sk,
+					size + CAIF_NEEDED_HEADROOM
+					+ CAIF_NEEDED_TAILROOM,
+					msg->msg_flags&MSG_DONTWAIT,
+					&err);
+		if (skb == NULL)
+			goto out_err;
 
-write_error:
-	release_sock(&cf_sk->sk);
-write_error_no_unlock:
-	return ret;
-}
+		skb_reserve(skb, CAIF_NEEDED_HEADROOM);
+		/*
+		 *	If you pass two values to the sock_alloc_send_skb
+		 *	it tries to grab the large buffer with GFP_NOFS
+		 *	(which can fail easily), and if it fails grab the
+		 *	fallback size buffer which is under a page and will
+		 *	succeed. [Alan]
+		 */
+		size = min_t(int, size, skb_tailroom(skb));
 
-static unsigned int caif_poll(struct file *file, struct socket *sock,
-						poll_table *wait)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-	u32 mask = 0;
-	poll_wait(file, sk_sleep(sk), wait);
-	lock_sock(&(cf_sk->sk));
-	if (!STATE_IS_OPEN(cf_sk)) {
-		if (!STATE_IS_PENDING(cf_sk))
-			mask |= POLLHUP;
-	} else {
-		if (cfpkt_qpeek(cf_sk->pktq) != NULL)
-			mask |= (POLLIN | POLLRDNORM);
-		if (TX_FLOW_IS_ON(cf_sk))
-			mask |= (POLLOUT | POLLWRNORM);
+		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		if (err) {
+			kfree_skb(skb);
+			goto out_err;
+		}
+		err = transmit_skb(skb, cf_sk,
+				msg->msg_flags&MSG_DONTWAIT, timeo);
+		if (err < 0) {
+			kfree_skb(skb);
+			goto pipe_err;
+		}
+		sent += size;
 	}
-	release_sock(&cf_sk->sk);
-	trace_printk("CAIF: %s(): poll mask=0x%04x\n",
-		      __func__, mask);
-	return mask;
-}
-
-static void drain_queue(struct caifsock *cf_sk)
-{
-	struct cfpkt *pkt = NULL;
-
-	/* Empty the queue */
-	do {
-		/* The queue has its own lock */
-		if (!cf_sk->pktq)
-			break;
-
-		pkt = cfpkt_dequeue(cf_sk->pktq);
-		if (!pkt)
-			break;
-		pr_debug("CAIF: %s(): freeing packet from read queue\n",
-			 __func__);
-		cfpkt_destroy(pkt);
 
-	} while (1);
+	return sent;
 
-	cf_sk->read_queue_len = 0;
+pipe_err:
+	if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
+		send_sig(SIGPIPE, current, 0);
+	err = -EPIPE;
+out_err:
+	return sent ? : err;
 }
 
 static int setsockopt(struct socket *sock,
@@ -736,19 +738,13 @@ static int setsockopt(struct socket *sock,
 	int prio, linksel;
 	struct ifreq ifreq;
 
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): setsockopt "
-			 "cannot be done on a connected socket\n",
-			 __func__);
+	if (cf_sk->sk.sk_socket->state != SS_UNCONNECTED)
 		return -ENOPROTOOPT;
-	}
+
 	switch (opt) {
 	case CAIFSO_LINK_SELECT:
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (lvl != SOL_CAIF)
 			goto bad_sol;
 		if (copy_from_user(&linksel, ov, sizeof(int)))
@@ -761,28 +757,20 @@ static int setsockopt(struct socket *sock,
 	case SO_PRIORITY:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(int)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(int))
 			return -EINVAL;
-		}
 		if (copy_from_user(&prio, ov, sizeof(int)))
 			return -EINVAL;
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.priority = prio;
-		pr_debug("CAIF: %s(): Setting sockopt priority=%d\n", __func__,
-			cf_sk->conn_req.priority);
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	case SO_BINDTODEVICE:
 		if (lvl != SOL_SOCKET)
 			goto bad_sol;
-		if (ol < sizeof(struct ifreq)) {
-			pr_debug("CAIF: %s(): setsockopt"
-				 " SO_PRIORITY bad size\n", __func__);
+		if (ol < sizeof(struct ifreq))
 			return -EINVAL;
-		}
 		if (copy_from_user(&ifreq, ov, sizeof(ifreq)))
 			return -EFAULT;
 		lock_sock(&(cf_sk->sk));
@@ -798,359 +786,275 @@ static int setsockopt(struct socket *sock,
 			goto bad_sol;
 		if (cf_sk->sk.sk_protocol != CAIFPROTO_UTIL)
 			return -ENOPROTOOPT;
-		if (ol > sizeof(cf_sk->conn_req.param.data))
-			goto req_param_bad_size;
-
 		lock_sock(&(cf_sk->sk));
 		cf_sk->conn_req.param.size = ol;
-		if (copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
+		if (ol > sizeof(cf_sk->conn_req.param.data) ||
+			copy_from_user(&cf_sk->conn_req.param.data, ov, ol)) {
 			release_sock(&cf_sk->sk);
-req_param_bad_size:
-			pr_debug("CAIF: %s(): setsockopt"
-				 " CAIFSO_CHANNEL_CONFIG bad size\n", __func__);
 			return -EINVAL;
 		}
-
 		release_sock(&cf_sk->sk);
 		return 0;
 
 	default:
-		pr_debug("CAIF: %s(): unhandled option %d\n", __func__, opt);
-		return -EINVAL;
+		return -ENOPROTOOPT;
 	}
 
 	return 0;
 bad_sol:
-	pr_debug("CAIF: %s(): setsockopt bad level\n", __func__);
 	return -ENOPROTOOPT;
 
 }
 
-static int caif_connect(struct socket *sock, struct sockaddr *uservaddr,
-	       int sockaddr_len, int flags)
+/*
+ * caif_connect() - Connect a CAIF Socket
+ * Copied and modified af_irda.c:irda_connect().
+ *
+ * Note : by consulting "errno", the user space caller may learn the cause
+ * of the failure. Most of them are visible in the function, others may come
+ * from subroutines called and are listed here :
+ *  o -EAFNOSUPPORT: bad socket family or type.
+ *  o -ESOCKTNOSUPPORT: bad socket type or protocol
+ *  o -EINVAL: bad socket address, or CAIF link type
+ *  o -ECONNREFUSED: remote end refused the connection.
+ *  o -EINPROGRESS: connect request sent but timed out (or non-blocking)
+ *  o -EISCONN: already connected.
+ *  o -ETIMEDOUT: Connection timed out (send timeout)
+ *  o -ENODEV: No link layer to send request
+ *  o -ECONNRESET: Received Shutdown indication or lost link layer
+ *  o -ENOMEM: Out of memory
+ *
+ *  State Strategy:
+ *  o sk_state: holds the CAIF_* protocol state, it's updated by
+ *	caif_ctrl_cb.
+ *  o sock->state: holds the SS_* socket state and is updated by connect and
+ *	disconnect.
+ */
+static int caif_connect(struct socket *sock, struct sockaddr *uaddr,
+			int addr_len, int flags)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = -1;
-	int mode = 0;
-	int ret = -EIO;
 	struct sock *sk = sock->sk;
-	BUG_ON(sk == NULL);
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-
-	trace_printk("CAIF: %s(): cf_sk=%p OPEN=%d, TX_FLOW=%d, RX_FLOW=%d\n",
-		 __func__, cf_sk,
-		STATE_IS_OPEN(cf_sk),
-		TX_FLOW_IS_ON(cf_sk), RX_FLOW_IS_ON(cf_sk));
-
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	long timeo;
+	int err;
+	lock_sock(sk);
 
-	if (sock->type == SOCK_SEQPACKET || sock->type == SOCK_STREAM)
-		sock->state	= SS_CONNECTING;
-	else
+	err = -EAFNOSUPPORT;
+	if (uaddr->sa_family != AF_CAIF)
 		goto out;
 
-	/* I want to be alone on cf_sk (except status and queue) */
-	lock_sock(&(cf_sk->sk));
-
-	if (sockaddr_len != sizeof(struct sockaddr_caif)) {
-		pr_debug("CAIF: %s(): Bad address len (%ld,%lu)\n",
-			 __func__, (long) sockaddr_len,
-			(long unsigned) sizeof(struct sockaddr_caif));
-		ret = -EINVAL;
-		goto open_error;
+	err = -ESOCKTNOSUPPORT;
+	if (unlikely(!(sk->sk_type == SOCK_STREAM &&
+		       cf_sk->sk.sk_protocol == CAIFPROTO_AT) &&
+		       sk->sk_type != SOCK_SEQPACKET))
+		goto out;
+	switch (sock->state) {
+	case SS_UNCONNECTED:
+		/* Normal case, a fresh connect */
+		caif_assert(sk->sk_state == CAIF_DISCONNECTED);
+		break;
+	case SS_CONNECTING:
+		switch (sk->sk_state) {
+		case CAIF_CONNECTED:
+			sock->state = SS_CONNECTED;
+			err = -EISCONN;
+			goto out;
+		case CAIF_DISCONNECTED:
+			/* Reconnect allowed */
+			break;
+		case CAIF_CONNECTING:
+			err = -EALREADY;
+			if (flags & O_NONBLOCK)
+				goto out;
+			goto wait_connect;
+		}
+		break;
+	case SS_CONNECTED:
+		caif_assert(sk->sk_state == CAIF_CONNECTED ||
+				sk->sk_state == CAIF_DISCONNECTED);
+		if (sk->sk_shutdown & SHUTDOWN_MASK) {
+			/* Allow re-connect after SHUTDOWN_IND */
+			caif_disconnect_client(&cf_sk->layer);
+			break;
+		}
+		/* No reconnect on a seqpacket socket */
+		err = -EISCONN;
+		goto out;
+	case SS_DISCONNECTING:
+	case SS_FREE:
+		caif_assert(1); /*Should never happen */
+		break;
 	}
+	sk->sk_state = CAIF_DISCONNECTED;
+	sock->state = SS_UNCONNECTED;
+	sk_stream_kill_queues(&cf_sk->sk);
 
-	if (uservaddr->sa_family != AF_CAIF) {
-		pr_debug("CAIF: %s(): Bad address family (%d)\n",
-			 __func__, uservaddr->sa_family);
-		ret = -EAFNOSUPPORT;
-		goto open_error;
-	}
+	err = -EINVAL;
+	if (addr_len != sizeof(struct sockaddr_caif) ||
+		!uaddr)
+		goto out;
 
-	memcpy(&cf_sk->conn_req.sockaddr, uservaddr,
+	memcpy(&cf_sk->conn_req.sockaddr, uaddr,
 		sizeof(struct sockaddr_caif));
 
-	dbfs_atomic_inc(&cnt.num_open);
-	mode = SKT_READ_FLAG | SKT_WRITE_FLAG;
-
-	/* If socket is not open, make sure socket is in fully closed state */
-	if (!STATE_IS_OPEN(cf_sk)) {
-		/* Has link close response been received (if we ever sent it)?*/
-		if (STATE_IS_PENDING(cf_sk)) {
-			/*
-			 * Still waiting for close response from remote.
-			 * If opened non-blocking, report "would block"
-			 */
-			if (flags & O_NONBLOCK) {
-				pr_debug("CAIF: %s(): O_NONBLOCK"
-					" && close pending\n", __func__);
-				ret = -EAGAIN;
-				goto open_error;
-			}
-
-			pr_debug("CAIF: %s(): Wait for close response"
-				 " from remote...\n", __func__);
-
-			release_sock(&cf_sk->sk);
-
-			/*
-			 * Blocking mode; close is pending and we need to wait
-			 * for its conclusion.
-			 */
-			result =
-			    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						     !STATE_IS_PENDING(cf_sk));
-
-			lock_sock(&(cf_sk->sk));
-			if (result == -ERESTARTSYS) {
-				pr_debug("CAIF: %s(): wait_event_interruptible"
-					 "woken by a signal (1)", __func__);
-				ret = -ERESTARTSYS;
-				goto open_error;
-			}
-		}
+	/* Move to connecting socket, start sending Connect Requests */
+	sock->state = SS_CONNECTING;
+	sk->sk_state = CAIF_CONNECTING;
+
+	dbfs_atomic_inc(&cnt.num_connect_req);
+	cf_sk->layer.receive = caif_sktrecv_cb;
+	err = caif_connect_client(&cf_sk->conn_req,
+				&cf_sk->layer);
+	if (err < 0) {
+		cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+		cf_sk->sk.sk_state = CAIF_DISCONNECTED;
+		goto out;
 	}
 
-	/* socket is now either closed, pending open or open */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		/* Open */
-		pr_debug("CAIF: %s(): Socket is already opened (cf_sk=%p)"
-			" check access f_flags = 0x%x file_mode = 0x%x\n",
-			 __func__, cf_sk, mode, cf_sk->file_mode);
-
-	} else {
-		/* We are closed or pending open.
-		 * If closed:	    send link setup
-		 * If pending open: link setup already sent (we could have been
-		 *		    interrupted by a signal last time)
-		 */
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* First opening of file; connect lower layers: */
-			/* Drain queue (very unlikely) */
-			drain_queue(cf_sk);
-
-			cf_sk->layer.receive = caif_sktrecv_cb;
-			SET_STATE_OPEN(cf_sk);
-			SET_PENDING_ON(cf_sk);
-
-			/* Register this channel. */
-			result =
-				caif_connect_client(&cf_sk->conn_req,
-							&cf_sk->layer);
-			if (result < 0) {
-				pr_debug("CAIF: %s(): can't register channel\n",
-					__func__);
-				ret = -EIO;
-				SET_STATE_CLOSED(cf_sk);
-				SET_PENDING_OFF(cf_sk);
-				goto open_error;
-			}
-			dbfs_atomic_inc(&cnt.num_init);
-		}
-
-		/* If opened non-blocking, report "success".
-		 */
-		if (flags & O_NONBLOCK) {
-			pr_debug("CAIF: %s(): O_NONBLOCK success\n",
-				 __func__);
-			ret = -EINPROGRESS;
-			cf_sk->sk.sk_err = -EINPROGRESS;
-			goto open_error;
-		}
-
-		trace_printk("CAIF: %s(): Wait for connect response\n",
-			     __func__);
+	err = -EINPROGRESS;
+wait_connect:
 
-		/* release lock before waiting */
-		release_sock(&cf_sk->sk);
-
-		result =
-		    wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-					     !STATE_IS_PENDING(cf_sk));
-
-		lock_sock(&(cf_sk->sk));
-
-		if (result == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				 "woken by a signal (2)", __func__);
-			ret = -ERESTARTSYS;
-			goto open_error;
-		}
-
-		if (!STATE_IS_OPEN(cf_sk)) {
-			/* Lower layers said "no" */
-			pr_debug("CAIF: %s(): Closed received\n", __func__);
-			ret = -EPIPE;
-			goto open_error;
-		}
+	if (sk->sk_state != CAIF_CONNECTED && (flags & O_NONBLOCK))
+		goto out;
 
-		trace_printk("CAIF: %s(): Connect received\n", __func__);
+	timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
+
+	release_sock(sk);
+	err = wait_event_interruptible_timeout(*sk_sleep(sk),
+			sk->sk_state != CAIF_CONNECTING,
+			timeo);
+	lock_sock(sk);
+	if (err < 0)
+		goto out; /* -ERESTARTSYS */
+	if (err == 0 && sk->sk_state != CAIF_CONNECTED) {
+		err = -ETIMEDOUT;
+		goto out;
 	}
-	/* Open is ok */
-	cf_sk->file_mode |= mode;
 
-	trace_printk("CAIF: %s(): Connected - file mode = %x\n",
-		  __func__, cf_sk->file_mode);
-
-	release_sock(&cf_sk->sk);
-	return 0;
-open_error:
-	sock->state	= SS_UNCONNECTED;
-	release_sock(&cf_sk->sk);
+	if (sk->sk_state != CAIF_CONNECTED) {
+		sock->state = SS_UNCONNECTED;
+		err = sock_error(sk);
+		if (!err)
+			err = -ECONNREFUSED;
+		goto out;
+	}
+	sock->state = SS_CONNECTED;
+	err = 0;
 out:
-	return ret;
+	release_sock(sk);
+	return err;
 }
 
-static int caif_shutdown(struct socket *sock, int how)
+
+/*
+ * caif_release() - Disconnect a CAIF Socket
+ * Copied and modified af_irda.c:irda_release().
+ */
+static int caif_release(struct socket *sock)
 {
-	struct caifsock *cf_sk = NULL;
-	int result = 0;
-	int tx_flow_state_was_on;
 	struct sock *sk = sock->sk;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
+	int res = 0;
 
-	trace_printk("CAIF: %s(): enter\n", __func__);
-	pr_debug("f_flags=%x\n", sock->file->f_flags);
-
-	if (how != SHUT_RDWR)
-		return -EOPNOTSUPP;
-
-	cf_sk = container_of(sk, struct caifsock, sk);
-	if (cf_sk == NULL) {
-		pr_debug("CAIF: %s(): COULD NOT FIND SOCKET\n", __func__);
-		return -EBADF;
-	}
-
-	/* I want to be alone on cf_sk (except status queue) */
-	lock_sock(&(cf_sk->sk));
-	sock_hold(&cf_sk->sk);
-
-	/* IS_CLOSED have double meaning:
-	 * 1) Spontanous Remote Shutdown Request.
-	 * 2) Ack on a channel teardown(disconnect)
-	 * Must clear bit in case we previously received
-	 * remote shudown request.
-	 */
-	if (STATE_IS_OPEN(cf_sk) && !STATE_IS_PENDING(cf_sk)) {
-		SET_STATE_CLOSED(cf_sk);
-		SET_PENDING_ON(cf_sk);
-		tx_flow_state_was_on = TX_FLOW_IS_ON(cf_sk);
-		SET_TX_FLOW_OFF(cf_sk);
-
-		/* Hold the socket until DEINIT_RSP is received */
-		sock_hold(&cf_sk->sk);
-		result = caif_disconnect_client(&cf_sk->layer);
-
-		if (result < 0) {
-			pr_debug("CAIF: %s(): "
-					"caif_disconnect_client() failed\n",
-					 __func__);
-			SET_STATE_CLOSED(cf_sk);
-			SET_PENDING_OFF(cf_sk);
-			SET_TX_FLOW_OFF(cf_sk);
-			release_sock(&cf_sk->sk);
-			sock_put(&cf_sk->sk);
-			return -EIO;
-		}
+	if (!sk)
+		return 0;
 
-	}
-	if (STATE_IS_REMOTE_SHUTDOWN(cf_sk)) {
-		SET_PENDING_OFF(cf_sk);
-		SET_REMOTE_SHUTDOWN_OFF(cf_sk);
-	}
+	set_tx_flow_off(cf_sk);
 
 	/*
-	 * Socket is no longer in state pending close,
-	 * and we can release the reference.
+	 * Ensure that packets are not queued after this point in time.
+	 * caif_queue_rcv_skb checks SOCK_DEAD holding the queue lock,
+	 * this ensures no packets when sock is dead.
 	 */
+	spin_lock(&sk->sk_receive_queue.lock);
+	sock_set_flag(sk, SOCK_DEAD);
+	spin_unlock(&sk->sk_receive_queue.lock);
+	sock->sk = NULL;
 
-	dbfs_atomic_inc(&cnt.num_close);
-	drain_queue(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
-	cf_sk->file_mode = 0;
-	sock_put(&cf_sk->sk);
-	release_sock(&cf_sk->sk);
-	if (!result && (sock->file->f_flags & O_NONBLOCK)) {
-		pr_debug("nonblocking shutdown returing -EAGAIN\n");
-		return -EAGAIN;
-	} else
-		return result;
-}
-
-static ssize_t caif_sock_no_sendpage(struct socket *sock,
-				     struct page *page,
-				     int offset, size_t size, int flags)
-{
-	return -EOPNOTSUPP;
-}
-
-/* This function is called as part of close. */
-static int caif_release(struct socket *sock)
-{
-	struct sock *sk = sock->sk;
-	struct caifsock *cf_sk = NULL;
-	int res;
-	caif_assert(sk != NULL);
-	cf_sk = container_of(sk, struct caifsock, sk);
+	dbfs_atomic_inc(&cnt.num_disconnect);
 
 	if (cf_sk->debugfs_socket_dir != NULL)
 		debugfs_remove_recursive(cf_sk->debugfs_socket_dir);
 
-	res = caif_shutdown(sock, SHUT_RDWR);
-	if (res && res != -EINPROGRESS)
-		return res;
-
-	/*
-	 * FIXME: Shutdown should probably be possible to do async
-	 * without flushing queues, allowing reception of frames while
-	 * waiting for DEINIT_IND.
-	 * Release should always block, to allow secure decoupling of
-	 * CAIF stack.
-	 */
-	if (!(sock->file->f_flags & O_NONBLOCK)) {
-		res = wait_event_interruptible(*sk_sleep(&cf_sk->sk),
-						!STATE_IS_PENDING(cf_sk));
-
-		if (res == -ERESTARTSYS) {
-			pr_debug("CAIF: %s(): wait_event_interruptible"
-				"woken by a signal (1)", __func__);
-		}
-	}
 	lock_sock(&(cf_sk->sk));
+	sk->sk_state = CAIF_DISCONNECTED;
+	sk->sk_shutdown = SHUTDOWN_MASK;
 
-	sock->sk = NULL;
+	if (cf_sk->sk.sk_socket->state == SS_CONNECTED ||
+		cf_sk->sk.sk_socket->state == SS_CONNECTING)
+		res = caif_disconnect_client(&cf_sk->layer);
 
-	/* Detach the socket from its process context by making it orphan. */
-	sock_orphan(sk);
+	cf_sk->sk.sk_socket->state = SS_DISCONNECTING;
+	wake_up_interruptible_poll(sk_sleep(sk), POLLERR|POLLHUP);
 
-	/*
-	 * Setting SHUTDOWN_MASK means that both send and receive are shutdown
-	 * for the socket.
-	 */
-	sk->sk_shutdown = SHUTDOWN_MASK;
+	sock_orphan(sk);
+	cf_sk->layer.dn = NULL;
+	sk_stream_kill_queues(&cf_sk->sk);
+	release_sock(sk);
+	sock_put(sk);
+	return res;
+}
 
-	/*
-	 * Set the socket state to closed, the TCP_CLOSE macro is used when
-	 * closing any socket.
-	 */
+/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
+static unsigned int caif_poll(struct file *file,
+				struct socket *sock, poll_table *wait)
+{
+	struct sock *sk = sock->sk;
+	unsigned int mask;
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 
-	/* Flush out this sockets receive queue. */
-	drain_queue(cf_sk);
+	sock_poll_wait(file, sk_sleep(sk), wait);
+	mask = 0;
 
-	/* Finally release the socket. */
-	SET_STATE_PENDING_DESTROY(cf_sk);
+	/* exceptional events? */
+	if (sk->sk_err)
+		mask |= POLLERR;
+	if (sk->sk_shutdown == SHUTDOWN_MASK)
+		mask |= POLLHUP;
+	if (sk->sk_shutdown & RCV_SHUTDOWN)
+		mask |= POLLRDHUP;
 
-	release_sock(&cf_sk->sk);
+	/* readable? */
+	if (!skb_queue_empty(&sk->sk_receive_queue) ||
+		(sk->sk_shutdown & RCV_SHUTDOWN))
+		mask |= POLLIN | POLLRDNORM;
 
-	sock_put(sk);
+	/* Connection-based need to check for termination and startup */
+	if (sk->sk_state == CAIF_DISCONNECTED)
+		mask |= POLLHUP;
 
 	/*
-	 * The rest of the cleanup will be handled from the
-	 * caif_sock_destructor
+	 * we set writable also when the other side has shut down the
+	 * connection. This prevents stuck sockets.
 	 */
-	return res;
+	if (sock_writeable(sk) && tx_flow_is_on(cf_sk))
+		mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
+
+	return mask;
 }
 
-static const struct proto_ops caif_ops = {
+static const struct proto_ops caif_seqpacket_ops = {
+	.family = PF_CAIF,
+	.owner = THIS_MODULE,
+	.release = caif_release,
+	.bind = sock_no_bind,
+	.connect = caif_connect,
+	.socketpair = sock_no_socketpair,
+	.accept = sock_no_accept,
+	.getname = sock_no_getname,
+	.poll = caif_poll,
+	.ioctl = sock_no_ioctl,
+	.listen = sock_no_listen,
+	.shutdown = sock_no_shutdown,
+	.setsockopt = setsockopt,
+	.getsockopt = sock_no_getsockopt,
+	.sendmsg = caif_seqpkt_sendmsg,
+	.recvmsg = caif_seqpkt_recvmsg,
+	.mmap = sock_no_mmap,
+	.sendpage = sock_no_sendpage,
+};
+
+static const struct proto_ops caif_stream_ops = {
 	.family = PF_CAIF,
 	.owner = THIS_MODULE,
 	.release = caif_release,
@@ -1162,73 +1066,62 @@ static const struct proto_ops caif_ops = {
 	.poll = caif_poll,
 	.ioctl = sock_no_ioctl,
 	.listen = sock_no_listen,
-	.shutdown = caif_shutdown,
+	.shutdown = sock_no_shutdown,
 	.setsockopt = setsockopt,
 	.getsockopt = sock_no_getsockopt,
-	.sendmsg = caif_sendmsg,
-	.recvmsg = caif_recvmsg,
+	.sendmsg = caif_stream_sendmsg,
+	.recvmsg = caif_stream_recvmsg,
 	.mmap = sock_no_mmap,
-	.sendpage = caif_sock_no_sendpage,
+	.sendpage = sock_no_sendpage,
 };
 
 /* This function is called when a socket is finally destroyed. */
 static void caif_sock_destructor(struct sock *sk)
 {
-	struct caifsock *cf_sk = NULL;
-	cf_sk = container_of(sk, struct caifsock, sk);
-	/* Error checks. */
+	struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
 	caif_assert(!atomic_read(&sk->sk_wmem_alloc));
 	caif_assert(sk_unhashed(sk));
 	caif_assert(!sk->sk_socket);
 	if (!sock_flag(sk, SOCK_DEAD)) {
-		pr_debug("CAIF: %s(): 0x%p", __func__, sk);
+		pr_info("Attempt to release alive CAIF socket: %p\n", sk);
 		return;
 	}
-
-	if (STATE_IS_OPEN(cf_sk)) {
-		pr_debug("CAIF: %s(): socket is opened (cf_sk=%p)"
-			 " file_mode = 0x%x\n", __func__,
-			 cf_sk, cf_sk->file_mode);
-		return;
-	}
-	drain_queue(cf_sk);
-	kfree(cf_sk->pktq);
-
-	trace_printk("CAIF: %s(): caif_sock_destructor: Removing socket %s\n",
-		__func__, cf_sk->name);
-	atomic_dec(&caif_nr_socks);
+	sk_stream_kill_queues(&cf_sk->sk);
+	dbfs_atomic_dec(&cnt.caif_nr_socks);
 }
 
 static int caif_create(struct net *net, struct socket *sock, int protocol,
-		       int kern)
+			int kern)
 {
 	struct sock *sk = NULL;
 	struct caifsock *cf_sk = NULL;
-	int result = 0;
 	static struct proto prot = {.name = "PF_CAIF",
 		.owner = THIS_MODULE,
 		.obj_size = sizeof(struct caifsock),
 	};
 
+	if (!capable(CAP_SYS_ADMIN) && !capable(CAP_NET_ADMIN))
+		return -EPERM;
 	/*
 	 * The sock->type specifies the socket type to use.
-	 * in SEQPACKET mode packet boundaries are enforced.
+	 * The CAIF socket is a packet stream in the sense
+	 * that it is packet based. CAIF trusts the reliability
+	 * of the link, no resending is implemented.
 	 */
-	if (sock->type != SOCK_SEQPACKET && sock->type != SOCK_STREAM)
+	if (sock->type == SOCK_SEQPACKET)
+		sock->ops = &caif_seqpacket_ops;
+	else if (sock->type == SOCK_STREAM)
+		sock->ops = &caif_stream_ops;
+	else
 		return -ESOCKTNOSUPPORT;
 
-	if (net != &init_net)
-		return -EAFNOSUPPORT;
-
 	if (protocol < 0 || protocol >= CAIFPROTO_MAX)
 		return -EPROTONOSUPPORT;
 	/*
-	 * Set the socket state to unconnected.	 The socket state is really
-	 * not used at all in the net/core or socket.c but the
+	 * Set the socket state to unconnected.	 The socket state
+	 * is really not used at all in the net/core or socket.c but the
 	 * initialization makes sure that sock->state is not uninitialized.
 	 */
-	sock->state = SS_UNCONNECTED;
-
 	sk = sk_alloc(net, PF_CAIF, GFP_KERNEL, &prot);
 	if (!sk)
 		return -ENOMEM;
@@ -1238,11 +1131,9 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 	/* Store the protocol */
 	sk->sk_protocol = (unsigned char) protocol;
 
-	spin_lock_init(&cf_sk->read_queue_len_lock);
-
-	/* Fill in some information concerning the misc socket. */
-	snprintf(cf_sk->name, sizeof(cf_sk->name), "cf_sk%d",
-		atomic_read(&caif_nr_socks));
+	/* Sendbuf dictates the amount of outbound packets not yet sent */
+	sk->sk_sndbuf = CAIF_DEF_SNDBUF;
+	sk->sk_rcvbuf = CAIF_DEF_RCVBUF;
 
 	/*
 	 * Lock in order to try to stop someone from opening the socket
@@ -1252,108 +1143,85 @@ static int caif_create(struct net *net, struct socket *sock, int protocol,
 
 	/* Initialize the nozero default sock structure data. */
 	sock_init_data(sock, sk);
-	sock->ops = &caif_ops;
 	sk->sk_destruct = caif_sock_destructor;
-	sk->sk_sndbuf = caif_sockbuf_size;
-	sk->sk_rcvbuf = caif_sockbuf_size;
 
-	cf_sk->pktq = cfpktq_create();
+	mutex_init(&cf_sk->readlock); /* single task reading lock */
+	cf_sk->layer.ctrlcmd = caif_ctrl_cb;
+	cf_sk->sk.sk_socket->state = SS_UNCONNECTED;
+	cf_sk->sk.sk_state = CAIF_DISCONNECTED;
 
-	if (!cf_sk->pktq) {
-		pr_err("CAIF: %s(): queue create failed.\n", __func__);
-		result = -ENOMEM;
-		release_sock(&cf_sk->sk);
-		goto err_failed;
-	}
-	cf_sk->layer.ctrlcmd = caif_sktflowctrl_cb;
-	SET_STATE_CLOSED(cf_sk);
-	SET_PENDING_OFF(cf_sk);
-	SET_TX_FLOW_OFF(cf_sk);
-	SET_RX_FLOW_ON(cf_sk);
+	set_tx_flow_off(cf_sk);
+	set_rx_flow_on(cf_sk);
 
 	/* Set default options on configuration */
 	cf_sk->conn_req.priority = CAIF_PRIO_NORMAL;
-	cf_sk->conn_req.link_selector = CAIF_LINK_HIGH_BANDW;
+	cf_sk->conn_req.link_selector = CAIF_LINK_LOW_LATENCY;
 	cf_sk->conn_req.protocol = protocol;
 	/* Increase the number of sockets created. */
-	atomic_inc(&caif_nr_socks);
+	dbfs_atomic_inc(&cnt.caif_nr_socks);
+#ifdef CONFIG_DEBUG_FS
 	if (!IS_ERR(debugfsdir)) {
+		/* Fill in some information concerning the misc socket. */
+		snprintf(cf_sk->name, sizeof(cf_sk->name), "cfsk%d",
+				atomic_read(&cnt.caif_nr_socks));
+
 		cf_sk->debugfs_socket_dir =
 			debugfs_create_dir(cf_sk->name, debugfsdir);
-		debugfs_create_u32("conn_state", S_IRUSR | S_IWUSR,
-				cf_sk->debugfs_socket_dir, &cf_sk->conn_state);
+		debugfs_create_u32("sk_state", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_state);
 		debugfs_create_u32("flow_state", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir, &cf_sk->flow_state);
-		debugfs_create_u32("read_queue_len", S_IRUSR | S_IWUSR,
+		debugfs_create_u32("sk_rmem_alloc", S_IRUSR | S_IWUSR,
+				cf_sk->debugfs_socket_dir,
+				(u32 *) &cf_sk->sk.sk_rmem_alloc);
+		debugfs_create_u32("sk_wmem_alloc", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
-				(u32 *) &cf_sk->read_queue_len);
+				(u32 *) &cf_sk->sk.sk_wmem_alloc);
 		debugfs_create_u32("identity", S_IRUSR | S_IWUSR,
 				cf_sk->debugfs_socket_dir,
 				(u32 *) &cf_sk->layer.id);
 	}
+#endif
 	release_sock(&cf_sk->sk);
 	return 0;
-err_failed:
-	sk_free(sk);
-	return result;
 }
 
+
 static struct net_proto_family caif_family_ops = {
 	.family = PF_CAIF,
 	.create = caif_create,
 	.owner = THIS_MODULE,
 };
 
-static int af_caif_init(void)
+int af_caif_init(void)
 {
-	int err;
-	err = sock_register(&caif_family_ops);
-
+	int err = sock_register(&caif_family_ops);
 	if (!err)
 		return err;
-
 	return 0;
 }
 
 static int __init caif_sktinit_module(void)
 {
-	int stat;
 #ifdef CONFIG_DEBUG_FS
-	debugfsdir = debugfs_create_dir("chnl_skt", NULL);
+	debugfsdir = debugfs_create_dir("caif_sk", NULL);
 	if (!IS_ERR(debugfsdir)) {
-		debugfs_create_u32("skb_inuse", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_in_use);
-		debugfs_create_u32("skb_alloc", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_alloc);
-		debugfs_create_u32("skb_free", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.skb_free);
 		debugfs_create_u32("num_sockets", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &caif_nr_socks);
-		debugfs_create_u32("num_open", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.caif_nr_socks);
+		debugfs_create_u32("num_connect_req", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_open);
-		debugfs_create_u32("num_close", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_req);
+		debugfs_create_u32("num_connect_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_close);
-		debugfs_create_u32("num_init", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_resp);
+		debugfs_create_u32("num_connect_fail_resp", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init);
-		debugfs_create_u32("num_init_resp", S_IRUSR | S_IWUSR,
+				(u32 *) &cnt.num_connect_fail_resp);
+		debugfs_create_u32("num_disconnect", S_IRUSR | S_IWUSR,
 				debugfsdir,
-				(u32 *) &cnt.num_init_resp);
-		debugfs_create_u32("num_init_fail_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_init_fail_resp);
-		debugfs_create_u32("num_deinit", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit);
-		debugfs_create_u32("num_deinit_resp", S_IRUSR | S_IWUSR,
-				debugfsdir,
-				(u32 *) &cnt.num_deinit_resp);
+				(u32 *) &cnt.num_disconnect);
 		debugfs_create_u32("num_remote_shutdown_ind",
 				S_IRUSR | S_IWUSR, debugfsdir,
 				(u32 *) &cnt.num_remote_shutdown_ind);
@@ -1371,13 +1239,7 @@ static int __init caif_sktinit_module(void)
 				(u32 *) &cnt.num_rx_flow_on);
 	}
 #endif
-	stat = af_caif_init();
-	if (stat) {
-		pr_err("CAIF: %s(): Failed to initialize CAIF socket layer.",
-		       __func__);
-		return stat;
-	}
-	return 0;
+	return af_caif_init();
 }
 
 static void __exit caif_sktexit_module(void)
@@ -1386,6 +1248,5 @@ static void __exit caif_sktexit_module(void)
 	if (debugfsdir != NULL)
 		debugfs_remove_recursive(debugfsdir);
 }
-
 module_init(caif_sktinit_module);
 module_exit(caif_sktexit_module);
-- 
cgit v1.2.3


From 073900a28d95c75a706bf40ebf092ea048c7b236 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Mon, 12 Apr 2010 13:17:25 +0200
Subject: USB: rename usb_buffer_alloc() and usb_buffer_free()

For more clearance what the functions actually do,

  usb_buffer_alloc() is renamed to usb_alloc_coherent()
  usb_buffer_free()  is renamed to usb_free_coherent()

They should only be used in code which really needs DMA coherency.

[added compatibility macros so we can convert things easier - gregkh]

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Alan Stern <stern@rowland.harvard.edu>
Cc: Pedro Ribeiro <pedrib@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 20 ++++++++++----------
 include/linux/usb.h    | 18 +++++++++++++++---
 2 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 1297e9b16a51..0561430f2ede 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -718,7 +718,7 @@ int __usb_get_extra_descriptor(char *buffer, unsigned size,
 EXPORT_SYMBOL_GPL(__usb_get_extra_descriptor);
 
 /**
- * usb_buffer_alloc - allocate dma-consistent buffer for URB_NO_xxx_DMA_MAP
+ * usb_alloc_coherent - allocate dma-consistent buffer for URB_NO_xxx_DMA_MAP
  * @dev: device the buffer will be used with
  * @size: requested buffer size
  * @mem_flags: affect whether allocation may block
@@ -737,30 +737,30 @@ EXPORT_SYMBOL_GPL(__usb_get_extra_descriptor);
  * architectures where CPU caches are not DMA-coherent.  On systems without
  * bus-snooping caches, these buffers are uncached.
  *
- * When the buffer is no longer used, free it with usb_buffer_free().
+ * When the buffer is no longer used, free it with usb_free_coherent().
  */
-void *usb_buffer_alloc(struct usb_device *dev, size_t size, gfp_t mem_flags,
-		       dma_addr_t *dma)
+void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags,
+			 dma_addr_t *dma)
 {
 	if (!dev || !dev->bus)
 		return NULL;
 	return hcd_buffer_alloc(dev->bus, size, mem_flags, dma);
 }
-EXPORT_SYMBOL_GPL(usb_buffer_alloc);
+EXPORT_SYMBOL_GPL(usb_alloc_coherent);
 
 /**
- * usb_buffer_free - free memory allocated with usb_buffer_alloc()
+ * usb_free_coherent - free memory allocated with usb_alloc_coherent()
  * @dev: device the buffer was used with
  * @size: requested buffer size
  * @addr: CPU address of buffer
  * @dma: DMA address of buffer
  *
  * This reclaims an I/O buffer, letting it be reused.  The memory must have
- * been allocated using usb_buffer_alloc(), and the parameters must match
+ * been allocated using usb_alloc_coherent(), and the parameters must match
  * those provided in that allocation request.
  */
-void usb_buffer_free(struct usb_device *dev, size_t size, void *addr,
-		     dma_addr_t dma)
+void usb_free_coherent(struct usb_device *dev, size_t size, void *addr,
+		       dma_addr_t dma)
 {
 	if (!dev || !dev->bus)
 		return;
@@ -768,7 +768,7 @@ void usb_buffer_free(struct usb_device *dev, size_t size, void *addr,
 		return;
 	hcd_buffer_free(dev->bus, size, addr, dma);
 }
-EXPORT_SYMBOL_GPL(usb_buffer_free);
+EXPORT_SYMBOL_GPL(usb_free_coherent);
 
 /**
  * usb_buffer_map - create DMA mapping(s) for an urb
diff --git a/include/linux/usb.h b/include/linux/usb.h
index ce1323c4e47c..739f1fd1cc15 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1085,7 +1085,7 @@ typedef void (*usb_complete_t)(struct urb *);
  * Alternatively, drivers may pass the URB_NO_xxx_DMA_MAP transfer flags,
  * which tell the host controller driver that no such mapping is needed since
  * the device driver is DMA-aware.  For example, a device driver might
- * allocate a DMA buffer with usb_buffer_alloc() or call usb_buffer_map().
+ * allocate a DMA buffer with usb_alloc_coherent() or call usb_buffer_map().
  * When these transfer flags are provided, host controller drivers will
  * attempt to use the dma addresses found in the transfer_dma and/or
  * setup_dma fields rather than determining a dma address themselves.
@@ -1366,11 +1366,23 @@ static inline int usb_urb_dir_out(struct urb *urb)
 	return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT;
 }
 
-void *usb_buffer_alloc(struct usb_device *dev, size_t size,
+void *usb_alloc_coherent(struct usb_device *dev, size_t size,
 	gfp_t mem_flags, dma_addr_t *dma);
-void usb_buffer_free(struct usb_device *dev, size_t size,
+void usb_free_coherent(struct usb_device *dev, size_t size,
 	void *addr, dma_addr_t dma);
 
+/* Compatible macros while we switch over */
+static inline void *usb_buffer_alloc(struct usb_device *dev, size_t size,
+				     gfp_t mem_flags, dma_addr_t *dma)
+{
+	return usb_alloc_coherent(dev, size, mem_flags, dma);
+}
+static inline void usb_buffer_free(struct usb_device *dev, size_t size,
+				   void *addr, dma_addr_t dma)
+{
+	return usb_free_coherent(dev, size, addr, dma);
+}
+
 #if 0
 struct urb *usb_buffer_map(struct urb *urb);
 void usb_buffer_dmasync(struct urb *urb);
-- 
cgit v1.2.3


From 5a2e3995951176e1aaa63d17ae2e1d26ac99003d Mon Sep 17 00:00:00 2001
From: Kei Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Date: Thu, 1 Apr 2010 20:40:58 +0900
Subject: [SCSI] ftrace: add __print_hex()

__print_hex() prints values in an array in hex (w/o '0x') (space separated)
EX) 92 33 32 f3 ee 4d

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Signed-off-by: Tomohiro Kusumi <kusumi.tomohiro@jp.fujitsu.com>
Signed-off-by: Kei Tokunaga <tokunaga.keiich@jp.fujitsu.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: James Bottomley <James.Bottomley@suse.de>
---
 include/linux/ftrace_event.h |  3 +++
 include/trace/ftrace.h       |  3 +++
 kernel/trace/trace_output.c  | 15 +++++++++++++++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c0f4b364c711..c3c5aaaae53a 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -25,6 +25,9 @@ const char *ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 const char *ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 				     const struct trace_print_flags *symbol_array);
 
+const char *ftrace_print_hex_seq(struct trace_seq *p,
+				 const unsigned char *buf, int len);
+
 /*
  * The trace entry - the most basic unit of tracing. This is what
  * is printed in the end as a single line in the trace output, such as:
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index ea6f9d4a20e9..c48320b3dabd 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -198,6 +198,9 @@
 		ftrace_print_symbols_seq(p, value, symbols);		\
 	})
 
+#undef __print_hex
+#define __print_hex(buf, buf_len) ftrace_print_hex_seq(p, buf, buf_len)
+
 #undef DECLARE_EVENT_CLASS
 #define DECLARE_EVENT_CLASS(call, proto, args, tstruct, assign, print)	\
 static notrace enum print_line_t					\
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 8e46b3323cdc..7c4a0ca650b5 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -355,6 +355,21 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 }
 EXPORT_SYMBOL(ftrace_print_symbols_seq);
 
+const char *
+ftrace_print_hex_seq(struct trace_seq *p, const unsigned char *buf, int buf_len)
+{
+	int i;
+	const char *ret = p->buffer + p->len;
+
+	for (i = 0; i < buf_len; i++)
+		trace_seq_printf(p, "%s%2.2x", i == 0 ? "" : " ", buf[i]);
+
+	trace_seq_putc(p, 0);
+
+	return ret;
+}
+EXPORT_SYMBOL(ftrace_print_hex_seq);
+
 #ifdef CONFIG_KRETPROBES
 static inline const char *kretprobed(const char *name)
 {
-- 
cgit v1.2.3


From 73266fc1df2f94cf72b3beba3eee3b88ed0b0664 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Thu, 22 Apr 2010 05:05:45 +0200
Subject: hw-breakpoints: Tag ptrace breakpoint as exclude_kernel

Tag ptrace breakpoints with the exclude_kernel attribute set. This
will make it easier to set generic policies on breakpoints, when it
comes to ensure nobody unpriviliged try to breakpoint on the kernel.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: K. Prasad <prasad@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/sh/kernel/ptrace_32.c    | 2 +-
 arch/x86/kernel/ptrace.c      | 2 +-
 include/linux/hw_breakpoint.h | 6 ++++++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 7759a9a93211..d4104ce9fe53 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -85,7 +85,7 @@ static int set_single_step(struct task_struct *tsk, unsigned long addr)
 
 	bp = thread->ptrace_bps[0];
 	if (!bp) {
-		hw_breakpoint_init(&attr);
+		ptrace_breakpoint_init(&attr);
 
 		attr.bp_addr = addr;
 		attr.bp_len = HW_BREAKPOINT_LEN_2;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 055be0afd330..70c4872cd8aa 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -688,7 +688,7 @@ static int ptrace_set_breakpoint_addr(struct task_struct *tsk, int nr,
 	struct perf_event_attr attr;
 
 	if (!t->ptrace_bps[nr]) {
-		hw_breakpoint_init(&attr);
+		ptrace_breakpoint_init(&attr);
 		/*
 		 * Put stub len and type to register (reserve) an inactive but
 		 * correct bp
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index c70d27af03f9..a0aa5a9cfb0e 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -34,6 +34,12 @@ static inline void hw_breakpoint_init(struct perf_event_attr *attr)
 	attr->sample_period = 1;
 }
 
+static inline void ptrace_breakpoint_init(struct perf_event_attr *attr)
+{
+	hw_breakpoint_init(attr);
+	attr->exclude_kernel = 1;
+}
+
 static inline unsigned long hw_breakpoint_addr(struct perf_event *bp)
 {
 	return bp->attr.bp_addr;
-- 
cgit v1.2.3


From 0102752e4c9e0655b39734550d4c35327954f7f9 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sun, 11 Apr 2010 18:55:56 +0200
Subject: hw-breakpoints: Separate constraint space for data and instruction
 breakpoints

There are two outstanding fashions for archs to implement hardware
breakpoints.

The first is to separate breakpoint address pattern definition
space between data and instruction breakpoints. We then have
typically distinct instruction address breakpoint registers
and data address breakpoint registers, delivered with
separate control registers for data and instruction breakpoints
as well. This is the case of PowerPc and ARM for example.

The second consists in having merged breakpoint address space
definition between data and instruction breakpoint. Address
registers can host either instruction or data address and
the access mode for the breakpoint is defined in a control
register. This is the case of x86 and Super H.

This patch adds a new CONFIG_HAVE_MIXED_BREAKPOINTS_REGS config
that archs can select if they belong to the second case. Those
will have their slot allocation merged for instructions and
data breakpoints.

The others will have a separate slot tracking between data and
instruction breakpoints.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: K. Prasad <prasad@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/Kconfig                  | 11 ++++++
 arch/sh/Kconfig               |  1 +
 arch/x86/Kconfig              |  1 +
 include/linux/hw_breakpoint.h |  9 +++--
 kernel/hw_breakpoint.c        | 86 +++++++++++++++++++++++++++++--------------
 5 files changed, 78 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/Kconfig b/arch/Kconfig
index f06010fb4838..acda512da2e2 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -137,6 +137,17 @@ config HAVE_HW_BREAKPOINT
 	bool
 	depends on PERF_EVENTS
 
+config HAVE_MIXED_BREAKPOINTS_REGS
+	bool
+	depends on HAVE_HW_BREAKPOINT
+	help
+	  Depending on the arch implementation of hardware breakpoints,
+	  some of them have separate registers for data and instruction
+	  breakpoints addresses, others have mixed registers to store
+	  them but define the access type in a control register.
+	  Select this option if your arch implements breakpoints under the
+	  latter fashion.
+
 config HAVE_USER_RETURN_NOTIFIER
 	bool
 
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index 8d90564c2bcf..e6d8ab5cfa9d 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -44,6 +44,7 @@ config SUPERH32
 	select HAVE_FUNCTION_GRAPH_TRACER
 	select HAVE_ARCH_KGDB
 	select HAVE_HW_BREAKPOINT
+	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS if HAVE_HW_BREAKPOINT
 	select ARCH_HIBERNATION_POSSIBLE if MMU
 
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 97a95dfd1181..01177dcbe261 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -53,6 +53,7 @@ config X86
 	select HAVE_KERNEL_LZMA
 	select HAVE_KERNEL_LZO
 	select HAVE_HW_BREAKPOINT
+	select HAVE_MIXED_BREAKPOINTS_REGS
 	select PERF_EVENTS
 	select ANON_INODES
 	select HAVE_ARCH_KMEMCHECK
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index a0aa5a9cfb0e..7e8899093098 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -9,9 +9,12 @@ enum {
 };
 
 enum {
-	HW_BREAKPOINT_R = 1,
-	HW_BREAKPOINT_W = 2,
-	HW_BREAKPOINT_X = 4,
+	HW_BREAKPOINT_EMPTY	= 0,
+	HW_BREAKPOINT_R		= 1,
+	HW_BREAKPOINT_W		= 2,
+	HW_BREAKPOINT_RW	= HW_BREAKPOINT_R | HW_BREAKPOINT_W,
+	HW_BREAKPOINT_X		= 4,
+	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
 };
 
 #ifdef __KERNEL__
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 89e8a050c43a..8ead1345e33b 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -45,18 +45,28 @@
 
 #include <linux/hw_breakpoint.h>
 
+enum bp_type_idx {
+	TYPE_INST 	= 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+	TYPE_DATA	= 0,
+#else
+	TYPE_DATA	= 1,
+#endif
+	TYPE_MAX
+};
+
 /*
  * Constraints data
  */
 
 /* Number of pinned cpu breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned);
+static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
 
 /* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[TYPE_MAX][HBP_NUM]);
 
 /* Number of non-pinned cpu/task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_bp_flexible);
+static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 struct bp_busy_slots {
@@ -67,14 +77,22 @@ struct bp_busy_slots {
 /* Serialize accesses to the above constraints */
 static DEFINE_MUTEX(nr_bp_mutex);
 
+static inline enum bp_type_idx find_slot_idx(struct perf_event *bp)
+{
+	if (bp->attr.bp_type & HW_BREAKPOINT_RW)
+		return TYPE_DATA;
+
+	return TYPE_INST;
+}
+
 /*
  * Report the maximum number of pinned breakpoints a task
  * have in this cpu
  */
-static unsigned int max_task_bp_pinned(int cpu)
+static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 {
 	int i;
-	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 
 	for (i = HBP_NUM -1; i >= 0; i--) {
 		if (tsk_pinned[i] > 0)
@@ -84,7 +102,7 @@ static unsigned int max_task_bp_pinned(int cpu)
 	return 0;
 }
 
-static int task_bp_pinned(struct task_struct *tsk)
+static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
 {
 	struct perf_event_context *ctx = tsk->perf_event_ctxp;
 	struct list_head *list;
@@ -105,7 +123,8 @@ static int task_bp_pinned(struct task_struct *tsk)
 	 */
 	list_for_each_entry(bp, list, event_entry) {
 		if (bp->attr.type == PERF_TYPE_BREAKPOINT)
-			count++;
+			if (find_slot_idx(bp) == type)
+				count++;
 	}
 
 	raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -118,18 +137,19 @@ static int task_bp_pinned(struct task_struct *tsk)
  * a given cpu (cpu > -1) or in all of them (cpu = -1).
  */
 static void
-fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
+fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
+		    enum bp_type_idx type)
 {
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
 
 	if (cpu >= 0) {
-		slots->pinned = per_cpu(nr_cpu_bp_pinned, cpu);
+		slots->pinned = per_cpu(nr_cpu_bp_pinned[type], cpu);
 		if (!tsk)
-			slots->pinned += max_task_bp_pinned(cpu);
+			slots->pinned += max_task_bp_pinned(cpu, type);
 		else
-			slots->pinned += task_bp_pinned(tsk);
-		slots->flexible = per_cpu(nr_bp_flexible, cpu);
+			slots->pinned += task_bp_pinned(tsk, type);
+		slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
 
 		return;
 	}
@@ -137,16 +157,16 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
 	for_each_online_cpu(cpu) {
 		unsigned int nr;
 
-		nr = per_cpu(nr_cpu_bp_pinned, cpu);
+		nr = per_cpu(nr_cpu_bp_pinned[type], cpu);
 		if (!tsk)
-			nr += max_task_bp_pinned(cpu);
+			nr += max_task_bp_pinned(cpu, type);
 		else
-			nr += task_bp_pinned(tsk);
+			nr += task_bp_pinned(tsk, type);
 
 		if (nr > slots->pinned)
 			slots->pinned = nr;
 
-		nr = per_cpu(nr_bp_flexible, cpu);
+		nr = per_cpu(nr_bp_flexible[type], cpu);
 
 		if (nr > slots->flexible)
 			slots->flexible = nr;
@@ -156,14 +176,15 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp)
 /*
  * Add a pinned breakpoint for the given task in our constraint table
  */
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
+static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+				enum bp_type_idx type)
 {
 	unsigned int *tsk_pinned;
 	int count = 0;
 
-	count = task_bp_pinned(tsk);
+	count = task_bp_pinned(tsk, type);
 
-	tsk_pinned = per_cpu(nr_task_bp_pinned, cpu);
+	tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 	if (enable) {
 		tsk_pinned[count]++;
 		if (count > 0)
@@ -178,7 +199,8 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable)
 /*
  * Add/remove the given breakpoint in our constraint table
  */
-static void toggle_bp_slot(struct perf_event *bp, bool enable)
+static void
+toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type)
 {
 	int cpu = bp->cpu;
 	struct task_struct *tsk = bp->ctx->task;
@@ -186,20 +208,20 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 	/* Pinned counter task profiling */
 	if (tsk) {
 		if (cpu >= 0) {
-			toggle_bp_task_slot(tsk, cpu, enable);
+			toggle_bp_task_slot(tsk, cpu, enable, type);
 			return;
 		}
 
 		for_each_online_cpu(cpu)
-			toggle_bp_task_slot(tsk, cpu, enable);
+			toggle_bp_task_slot(tsk, cpu, enable, type);
 		return;
 	}
 
 	/* Pinned counter cpu profiling */
 	if (enable)
-		per_cpu(nr_cpu_bp_pinned, bp->cpu)++;
+		per_cpu(nr_cpu_bp_pinned[type], bp->cpu)++;
 	else
-		per_cpu(nr_cpu_bp_pinned, bp->cpu)--;
+		per_cpu(nr_cpu_bp_pinned[type], bp->cpu)--;
 }
 
 /*
@@ -246,14 +268,21 @@ static void toggle_bp_slot(struct perf_event *bp, bool enable)
 static int __reserve_bp_slot(struct perf_event *bp)
 {
 	struct bp_busy_slots slots = {0};
+	enum bp_type_idx type;
 
-	fetch_bp_busy_slots(&slots, bp);
+	/* Basic checks */
+	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
+	    bp->attr.bp_type == HW_BREAKPOINT_INVALID)
+		return -EINVAL;
+
+	type = find_slot_idx(bp);
+	fetch_bp_busy_slots(&slots, bp, type);
 
 	/* Flexible counters need to keep at least one slot */
 	if (slots.pinned + (!!slots.flexible) == HBP_NUM)
 		return -ENOSPC;
 
-	toggle_bp_slot(bp, true);
+	toggle_bp_slot(bp, true, type);
 
 	return 0;
 }
@@ -273,7 +302,10 @@ int reserve_bp_slot(struct perf_event *bp)
 
 static void __release_bp_slot(struct perf_event *bp)
 {
-	toggle_bp_slot(bp, false);
+	enum bp_type_idx type;
+
+	type = find_slot_idx(bp);
+	toggle_bp_slot(bp, false, type);
 }
 
 void release_bp_slot(struct perf_event *bp)
-- 
cgit v1.2.3


From feef47d0cb530e8419dfa0b48141b538b89b1b1a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Fri, 23 Apr 2010 05:59:55 +0200
Subject: hw-breakpoints: Get the number of available registers on boot
 dynamically

The breakpoint generic layer assumes that archs always know in advance
the static number of address registers available to host breakpoints
through the HBP_NUM macro.

However this is not true for every archs. For example Arm needs to get
this information dynamically to handle the compatiblity between
different versions.

To solve this, this patch proposes to drop the static HBP_NUM macro
and let the arch provide the number of available slots through a
new hw_breakpoint_slots() function. For archs that have
CONFIG_HAVE_MIXED_BREAKPOINTS_REGS selected, it will be called once
as the number of registers fits for instruction and data breakpoints
together.
For the others it will be called first to get the number of
instruction breakpoint registers and another time to get the
data breakpoint registers, the targeted type is given as a
parameter of hw_breakpoint_slots().

Reported-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Acked-by: Paul Mundt <lethal@linux-sh.org>
Cc: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
Cc: K. Prasad <prasad@linux.vnet.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 arch/sh/include/asm/hw_breakpoint.h  |  5 ++++
 arch/x86/include/asm/hw_breakpoint.h |  5 ++++
 include/linux/hw_breakpoint.h        | 10 +++++++
 kernel/hw_breakpoint.c               | 53 ++++++++++++++++++++++++++++--------
 kernel/trace/trace_ksym.c            | 26 +++++-------------
 5 files changed, 68 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/hw_breakpoint.h b/arch/sh/include/asm/hw_breakpoint.h
index 382bad937dcc..e14cad96798f 100644
--- a/arch/sh/include/asm/hw_breakpoint.h
+++ b/arch/sh/include/asm/hw_breakpoint.h
@@ -46,6 +46,11 @@ struct pmu;
 /* Maximum number of UBC channels */
 #define HBP_NUM		2
 
+static inline int hw_breakpoint_slots(int type)
+{
+	return HBP_NUM;
+}
+
 /* arch/sh/kernel/hw_breakpoint.c */
 extern int arch_check_bp_in_kernelspace(struct perf_event *bp);
 extern int arch_validate_hwbkpt_settings(struct perf_event *bp);
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index c77a5a6fab9d..942255310e6a 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -41,6 +41,11 @@ struct arch_hw_breakpoint {
 /* Total number of available HW breakpoint registers */
 #define HBP_NUM 4
 
+static inline int hw_breakpoint_slots(int type)
+{
+	return HBP_NUM;
+}
+
 struct perf_event;
 struct pmu;
 
diff --git a/include/linux/hw_breakpoint.h b/include/linux/hw_breakpoint.h
index 7e8899093098..a2d6ea49ec56 100644
--- a/include/linux/hw_breakpoint.h
+++ b/include/linux/hw_breakpoint.h
@@ -17,6 +17,16 @@ enum {
 	HW_BREAKPOINT_INVALID   = HW_BREAKPOINT_RW | HW_BREAKPOINT_X,
 };
 
+enum bp_type_idx {
+	TYPE_INST 	= 0,
+#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
+	TYPE_DATA	= 0,
+#else
+	TYPE_DATA	= 1,
+#endif
+	TYPE_MAX
+};
+
 #ifdef __KERNEL__
 
 #include <linux/perf_event.h>
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 974498b858fc..684b710cbb91 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -40,20 +40,12 @@
 #include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/init.h>
+#include <linux/slab.h>
 #include <linux/cpu.h>
 #include <linux/smp.h>
 
 #include <linux/hw_breakpoint.h>
 
-enum bp_type_idx {
-	TYPE_INST 	= 0,
-#ifdef CONFIG_HAVE_MIXED_BREAKPOINTS_REGS
-	TYPE_DATA	= 0,
-#else
-	TYPE_DATA	= 1,
-#endif
-	TYPE_MAX
-};
 
 /*
  * Constraints data
@@ -63,11 +55,15 @@ enum bp_type_idx {
 static DEFINE_PER_CPU(unsigned int, nr_cpu_bp_pinned[TYPE_MAX]);
 
 /* Number of pinned task breakpoints in a cpu */
-static DEFINE_PER_CPU(unsigned int, nr_task_bp_pinned[TYPE_MAX][HBP_NUM]);
+static DEFINE_PER_CPU(unsigned int, *nr_task_bp_pinned[TYPE_MAX]);
 
 /* Number of non-pinned cpu/task breakpoints in a cpu */
 static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
 
+static int nr_slots[TYPE_MAX];
+
+static int constraints_initialized;
+
 /* Gather the number of total pinned and un-pinned bp in a cpuset */
 struct bp_busy_slots {
 	unsigned int pinned;
@@ -99,7 +95,7 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
 	int i;
 	unsigned int *tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
 
-	for (i = HBP_NUM -1; i >= 0; i--) {
+	for (i = nr_slots[type] - 1; i >= 0; i--) {
 		if (tsk_pinned[i] > 0)
 			return i + 1;
 	}
@@ -292,6 +288,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
 	enum bp_type_idx type;
 	int weight;
 
+	/* We couldn't initialize breakpoint constraints on boot */
+	if (!constraints_initialized)
+		return -ENOMEM;
+
 	/* Basic checks */
 	if (bp->attr.bp_type == HW_BREAKPOINT_EMPTY ||
 	    bp->attr.bp_type == HW_BREAKPOINT_INVALID)
@@ -304,7 +304,7 @@ static int __reserve_bp_slot(struct perf_event *bp)
 	fetch_this_slot(&slots, weight);
 
 	/* Flexible counters need to keep at least one slot */
-	if (slots.pinned + (!!slots.flexible) > HBP_NUM)
+	if (slots.pinned + (!!slots.flexible) > nr_slots[type])
 		return -ENOSPC;
 
 	toggle_bp_slot(bp, true, type, weight);
@@ -551,7 +551,36 @@ static struct notifier_block hw_breakpoint_exceptions_nb = {
 
 static int __init init_hw_breakpoint(void)
 {
+	unsigned int **task_bp_pinned;
+	int cpu, err_cpu;
+	int i;
+
+	for (i = 0; i < TYPE_MAX; i++)
+		nr_slots[i] = hw_breakpoint_slots(i);
+
+	for_each_possible_cpu(cpu) {
+		for (i = 0; i < TYPE_MAX; i++) {
+			task_bp_pinned = &per_cpu(nr_task_bp_pinned[i], cpu);
+			*task_bp_pinned = kzalloc(sizeof(int) * nr_slots[i],
+						  GFP_KERNEL);
+			if (!*task_bp_pinned)
+				goto err_alloc;
+		}
+	}
+
+	constraints_initialized = 1;
+
 	return register_die_notifier(&hw_breakpoint_exceptions_nb);
+
+ err_alloc:
+	for_each_possible_cpu(err_cpu) {
+		if (err_cpu == cpu)
+			break;
+		for (i = 0; i < TYPE_MAX; i++)
+			kfree(per_cpu(nr_task_bp_pinned[i], cpu));
+	}
+
+	return -ENOMEM;
 }
 core_initcall(init_hw_breakpoint);
 
diff --git a/kernel/trace/trace_ksym.c b/kernel/trace/trace_ksym.c
index d59cd6879477..8eaf00749b65 100644
--- a/kernel/trace/trace_ksym.c
+++ b/kernel/trace/trace_ksym.c
@@ -34,12 +34,6 @@
 
 #include <asm/atomic.h>
 
-/*
- * For now, let us restrict the no. of symbols traced simultaneously to number
- * of available hardware breakpoint registers.
- */
-#define KSYM_TRACER_MAX HBP_NUM
-
 #define KSYM_TRACER_OP_LEN 3 /* rw- */
 
 struct trace_ksym {
@@ -53,7 +47,6 @@ struct trace_ksym {
 
 static struct trace_array *ksym_trace_array;
 
-static unsigned int ksym_filter_entry_count;
 static unsigned int ksym_tracing_enabled;
 
 static HLIST_HEAD(ksym_filter_head);
@@ -181,13 +174,6 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 	struct trace_ksym *entry;
 	int ret = -ENOMEM;
 
-	if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
-		printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
-		" new requests for tracing can be accepted now.\n",
-			KSYM_TRACER_MAX);
-		return -ENOSPC;
-	}
-
 	entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
 	if (!entry)
 		return -ENOMEM;
@@ -203,13 +189,17 @@ int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
 
 	if (IS_ERR(entry->ksym_hbp)) {
 		ret = PTR_ERR(entry->ksym_hbp);
-		printk(KERN_INFO "ksym_tracer request failed. Try again"
-					" later!!\n");
+		if (ret == -ENOSPC) {
+			printk(KERN_ERR "ksym_tracer: Maximum limit reached."
+			" No new requests for tracing can be accepted now.\n");
+		} else {
+			printk(KERN_INFO "ksym_tracer request failed. Try again"
+					 " later!!\n");
+		}
 		goto err;
 	}
 
 	hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
-	ksym_filter_entry_count++;
 
 	return 0;
 
@@ -265,7 +255,6 @@ static void __ksym_trace_reset(void)
 	hlist_for_each_entry_safe(entry, node, node1, &ksym_filter_head,
 								ksym_hlist) {
 		unregister_wide_hw_breakpoint(entry->ksym_hbp);
-		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
 		kfree(entry);
@@ -338,7 +327,6 @@ static ssize_t ksym_trace_filter_write(struct file *file,
 				goto out_unlock;
 		}
 		/* Error or "symbol:---" case: drop it */
-		ksym_filter_entry_count--;
 		hlist_del_rcu(&(entry->ksym_hlist));
 		synchronize_rcu();
 		kfree(entry);
-- 
cgit v1.2.3


From 43815482370c510c569fd18edb57afcb0fa8cab6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 29 Apr 2010 11:01:49 +0000
Subject: net: sock_def_readable() and friends RCU conversion

sk_callback_lock rwlock actually protects sk->sk_sleep pointer, so we
need two atomic operations (and associated dirtying) per incoming
packet.

RCU conversion is pretty much needed :

1) Add a new structure, called "struct socket_wq" to hold all fields
that will need rcu_read_lock() protection (currently: a
wait_queue_head_t and a struct fasync_struct pointer).

[Future patch will add a list anchor for wakeup coalescing]

2) Attach one of such structure to each "struct socket" created in
sock_alloc_inode().

3) Respect RCU grace period when freeing a "struct socket_wq"

4) Change sk_sleep pointer in "struct sock" by sk_wq, pointer to "struct
socket_wq"

5) Change sk_sleep() function to use new sk->sk_wq instead of
sk->sk_sleep

6) Change sk_has_sleeper() to wq_has_sleeper() that must be used inside
a rcu_read_lock() section.

7) Change all sk_has_sleeper() callers to :
  - Use rcu_read_lock() instead of read_lock(&sk->sk_callback_lock)
  - Use wq_has_sleeper() to eventually wakeup tasks.
  - Use rcu_read_unlock() instead of read_unlock(&sk->sk_callback_lock)

8) sock_wake_async() is modified to use rcu protection as well.

9) Exceptions :
  macvtap, drivers/net/tun.c, af_unix use integrated "struct socket_wq"
instead of dynamically allocated ones. They dont need rcu freeing.

Some cleanups or followups are probably needed, (possible
sk_callback_lock conversion to a spinlock for example...).

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c | 13 +++++++++----
 drivers/net/tun.c     | 21 ++++++++++++---------
 include/linux/net.h   | 14 +++++++++-----
 include/net/af_unix.h | 20 +++++++++++---------
 include/net/sock.h    | 38 +++++++++++++++++++-------------------
 net/atm/common.c      | 22 ++++++++++++++--------
 net/core/sock.c       | 50 +++++++++++++++++++++++++++++++-------------------
 net/core/stream.c     | 10 +++++++---
 net/dccp/output.c     | 10 ++++++----
 net/iucv/af_iucv.c    | 11 +++++++----
 net/phonet/pep.c      |  8 ++++----
 net/phonet/socket.c   |  2 +-
 net/rxrpc/af_rxrpc.c  | 10 ++++++----
 net/sctp/socket.c     |  2 +-
 net/socket.c          | 47 ++++++++++++++++++++++++++++++++++++-----------
 net/unix/af_unix.c    | 17 ++++++++---------
 16 files changed, 181 insertions(+), 114 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index d97e1fd234ba..1c4110df343e 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -37,6 +37,7 @@
 struct macvtap_queue {
 	struct sock sk;
 	struct socket sock;
+	struct socket_wq wq;
 	struct macvlan_dev *vlan;
 	struct file *file;
 	unsigned int flags;
@@ -242,12 +243,15 @@ static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
 
 static void macvtap_sock_write_space(struct sock *sk)
 {
+	wait_queue_head_t *wqueue;
+
 	if (!sock_writeable(sk) ||
 	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_poll(sk_sleep(sk), POLLOUT | POLLWRNORM | POLLWRBAND);
+	wqueue = sk_sleep(sk);
+	if (wqueue && waitqueue_active(wqueue))
+		wake_up_interruptible_poll(wqueue, POLLOUT | POLLWRNORM | POLLWRBAND);
 }
 
 static int macvtap_open(struct inode *inode, struct file *file)
@@ -272,7 +276,8 @@ static int macvtap_open(struct inode *inode, struct file *file)
 	if (!q)
 		goto out;
 
-	init_waitqueue_head(&q->sock.wait);
+	q->sock.wq = &q->wq;
+	init_waitqueue_head(&q->wq.wait);
 	q->sock.type = SOCK_RAW;
 	q->sock.state = SS_CONNECTED;
 	q->sock.file = file;
@@ -308,7 +313,7 @@ static unsigned int macvtap_poll(struct file *file, poll_table * wait)
 		goto out;
 
 	mask = 0;
-	poll_wait(file, &q->sock.wait, wait);
+	poll_wait(file, &q->wq.wait, wait);
 
 	if (!skb_queue_empty(&q->sk.sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 20a17938c62b..e525a6cf5587 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -109,7 +109,7 @@ struct tun_struct {
 
 	struct tap_filter       txflt;
 	struct socket		socket;
-
+	struct socket_wq	wq;
 #ifdef TUN_DEBUG
 	int debug;
 #endif
@@ -323,7 +323,7 @@ static void tun_net_uninit(struct net_device *dev)
 	/* Inform the methods they need to stop using the dev.
 	 */
 	if (tfile) {
-		wake_up_all(&tun->socket.wait);
+		wake_up_all(&tun->wq.wait);
 		if (atomic_dec_and_test(&tfile->count))
 			__tun_detach(tun);
 	}
@@ -398,7 +398,7 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+	wake_up_interruptible_poll(&tun->wq.wait, POLLIN |
 				   POLLRDNORM | POLLRDBAND);
 	return NETDEV_TX_OK;
 
@@ -498,7 +498,7 @@ static unsigned int tun_chr_poll(struct file *file, poll_table * wait)
 
 	DBG(KERN_INFO "%s: tun_chr_poll\n", tun->dev->name);
 
-	poll_wait(file, &tun->socket.wait, wait);
+	poll_wait(file, &tun->wq.wait, wait);
 
 	if (!skb_queue_empty(&sk->sk_receive_queue))
 		mask |= POLLIN | POLLRDNORM;
@@ -773,7 +773,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
 
 	DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 
-	add_wait_queue(&tun->socket.wait, &wait);
+	add_wait_queue(&tun->wq.wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
@@ -804,7 +804,7 @@ static ssize_t tun_do_read(struct tun_struct *tun,
 	}
 
 	current->state = TASK_RUNNING;
-	remove_wait_queue(&tun->socket.wait, &wait);
+	remove_wait_queue(&tun->wq.wait, &wait);
 
 	return ret;
 }
@@ -861,6 +861,7 @@ static struct rtnl_link_ops tun_link_ops __read_mostly = {
 static void tun_sock_write_space(struct sock *sk)
 {
 	struct tun_struct *tun;
+	wait_queue_head_t *wqueue;
 
 	if (!sock_writeable(sk))
 		return;
@@ -868,8 +869,9 @@ static void tun_sock_write_space(struct sock *sk)
 	if (!test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
 		return;
 
-	if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
+	wqueue = sk_sleep(sk);
+	if (wqueue && waitqueue_active(wqueue))
+		wake_up_interruptible_sync_poll(wqueue, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 	tun = tun_sk(sk)->tun;
@@ -1039,7 +1041,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		if (!sk)
 			goto err_free_dev;
 
-		init_waitqueue_head(&tun->socket.wait);
+		tun->socket.wq = &tun->wq;
+		init_waitqueue_head(&tun->wq.wait);
 		tun->socket.ops = &tun_socket_ops;
 		sock_init_data(&tun->socket, sk);
 		sk->sk_write_space = tun_sock_write_space;
diff --git a/include/linux/net.h b/include/linux/net.h
index 4157b5d42bd6..2b4deeeb8646 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -59,6 +59,7 @@ typedef enum {
 #include <linux/wait.h>
 #include <linux/fcntl.h>	/* For O_CLOEXEC and O_NONBLOCK */
 #include <linux/kmemcheck.h>
+#include <linux/rcupdate.h>
 
 struct poll_table_struct;
 struct pipe_inode_info;
@@ -116,6 +117,12 @@ enum sock_shutdown_cmd {
 	SHUT_RDWR	= 2,
 };
 
+struct socket_wq {
+	wait_queue_head_t	wait;
+	struct fasync_struct	*fasync_list;
+	struct rcu_head		rcu;
+} ____cacheline_aligned_in_smp;
+
 /**
  *  struct socket - general BSD socket
  *  @state: socket state (%SS_CONNECTED, etc)
@@ -135,11 +142,8 @@ struct socket {
 	kmemcheck_bitfield_end(type);
 
 	unsigned long		flags;
-	/*
-	 * Please keep fasync_list & wait fields in the same cache line
-	 */
-	struct fasync_struct	*fasync_list;
-	wait_queue_head_t	wait;
+
+	struct socket_wq	*wq;
 
 	struct file		*file;
 	struct sock		*sk;
diff --git a/include/net/af_unix.h b/include/net/af_unix.h
index 1614d78c60ed..20725e213aee 100644
--- a/include/net/af_unix.h
+++ b/include/net/af_unix.h
@@ -30,7 +30,7 @@ struct unix_skb_parms {
 #endif
 };
 
-#define UNIXCB(skb) 	(*(struct unix_skb_parms*)&((skb)->cb))
+#define UNIXCB(skb) 	(*(struct unix_skb_parms *)&((skb)->cb))
 #define UNIXCREDS(skb)	(&UNIXCB((skb)).creds)
 #define UNIXSID(skb)	(&UNIXCB((skb)).secid)
 
@@ -45,21 +45,23 @@ struct unix_skb_parms {
 struct unix_sock {
 	/* WARNING: sk has to be the first member */
 	struct sock		sk;
-        struct unix_address     *addr;
-        struct dentry		*dentry;
-        struct vfsmount		*mnt;
+	struct unix_address     *addr;
+	struct dentry		*dentry;
+	struct vfsmount		*mnt;
 	struct mutex		readlock;
-        struct sock		*peer;
-        struct sock		*other;
+	struct sock		*peer;
+	struct sock		*other;
 	struct list_head	link;
-        atomic_long_t           inflight;
-        spinlock_t		lock;
+	atomic_long_t		inflight;
+	spinlock_t		lock;
 	unsigned int		gc_candidate : 1;
 	unsigned int		gc_maybe_cycle : 1;
-        wait_queue_head_t       peer_wait;
+	struct socket_wq	peer_wq;
 };
 #define unix_sk(__sk) ((struct unix_sock *)__sk)
 
+#define peer_wait peer_wq.wait
+
 #ifdef CONFIG_SYSCTL
 extern int unix_sysctl_register(struct net *net);
 extern void unix_sysctl_unregister(struct net *net);
diff --git a/include/net/sock.h b/include/net/sock.h
index e1777db5b9ab..cc7f91ec972c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -159,7 +159,7 @@ struct sock_common {
   *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
   *	@sk_lock:	synchronizer
   *	@sk_rcvbuf: size of receive buffer in bytes
-  *	@sk_sleep: sock wait queue
+  *	@sk_wq: sock wait queue and async head
   *	@sk_dst_cache: destination cache
   *	@sk_dst_lock: destination cache lock
   *	@sk_policy: flow policy
@@ -257,7 +257,7 @@ struct sock {
 		struct sk_buff *tail;
 		int len;
 	} sk_backlog;
-	wait_queue_head_t	*sk_sleep;
+	struct socket_wq	*sk_wq;
 	struct dst_entry	*sk_dst_cache;
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
@@ -1219,7 +1219,7 @@ static inline void sk_set_socket(struct sock *sk, struct socket *sock)
 
 static inline wait_queue_head_t *sk_sleep(struct sock *sk)
 {
-	return sk->sk_sleep;
+	return &sk->sk_wq->wait;
 }
 /* Detach socket from process context.
  * Announce socket dead, detach it from wait queue and inode.
@@ -1233,14 +1233,14 @@ static inline void sock_orphan(struct sock *sk)
 	write_lock_bh(&sk->sk_callback_lock);
 	sock_set_flag(sk, SOCK_DEAD);
 	sk_set_socket(sk, NULL);
-	sk->sk_sleep  = NULL;
+	sk->sk_wq  = NULL;
 	write_unlock_bh(&sk->sk_callback_lock);
 }
 
 static inline void sock_graft(struct sock *sk, struct socket *parent)
 {
 	write_lock_bh(&sk->sk_callback_lock);
-	sk->sk_sleep = &parent->wait;
+	rcu_assign_pointer(sk->sk_wq, parent->wq);
 	parent->sk = sk;
 	sk_set_socket(sk, parent);
 	security_sock_graft(sk, parent);
@@ -1392,12 +1392,12 @@ static inline int sk_has_allocations(const struct sock *sk)
 }
 
 /**
- * sk_has_sleeper - check if there are any waiting processes
- * @sk: socket
+ * wq_has_sleeper - check if there are any waiting processes
+ * @sk: struct socket_wq
  *
- * Returns true if socket has waiting processes
+ * Returns true if socket_wq has waiting processes
  *
- * The purpose of the sk_has_sleeper and sock_poll_wait is to wrap the memory
+ * The purpose of the wq_has_sleeper and sock_poll_wait is to wrap the memory
  * barrier call. They were added due to the race found within the tcp code.
  *
  * Consider following tcp code paths:
@@ -1410,9 +1410,10 @@ static inline int sk_has_allocations(const struct sock *sk)
  *   ...                 ...
  *   tp->rcv_nxt check   sock_def_readable
  *   ...                 {
- *   schedule               ...
- *                          if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
- *                              wake_up_interruptible(sk_sleep(sk))
+ *   schedule               rcu_read_lock();
+ *                          wq = rcu_dereference(sk->sk_wq);
+ *                          if (wq && waitqueue_active(&wq->wait))
+ *                              wake_up_interruptible(&wq->wait)
  *                          ...
  *                       }
  *
@@ -1421,19 +1422,18 @@ static inline int sk_has_allocations(const struct sock *sk)
  * could then endup calling schedule and sleep forever if there are no more
  * data on the socket.
  *
- * The sk_has_sleeper is always called right after a call to read_lock, so we
- * can use smp_mb__after_lock barrier.
  */
-static inline int sk_has_sleeper(struct sock *sk)
+static inline bool wq_has_sleeper(struct socket_wq *wq)
 {
+
 	/*
 	 * We need to be sure we are in sync with the
 	 * add_wait_queue modifications to the wait queue.
 	 *
 	 * This memory barrier is paired in the sock_poll_wait.
 	 */
-	smp_mb__after_lock();
-	return sk_sleep(sk) && waitqueue_active(sk_sleep(sk));
+	smp_mb();
+	return wq && waitqueue_active(&wq->wait);
 }
 
 /**
@@ -1442,7 +1442,7 @@ static inline int sk_has_sleeper(struct sock *sk)
  * @wait_address:   socket wait queue
  * @p:              poll_table
  *
- * See the comments in the sk_has_sleeper function.
+ * See the comments in the wq_has_sleeper function.
  */
 static inline void sock_poll_wait(struct file *filp,
 		wait_queue_head_t *wait_address, poll_table *p)
@@ -1453,7 +1453,7 @@ static inline void sock_poll_wait(struct file *filp,
 		 * We need to be sure we are in sync with the
 		 * socket flags modification.
 		 *
-		 * This memory barrier is paired in the sk_has_sleeper.
+		 * This memory barrier is paired in the wq_has_sleeper.
 		*/
 		smp_mb();
 	}
diff --git a/net/atm/common.c b/net/atm/common.c
index e3e10e6f8628..b43feb1a3995 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -90,10 +90,13 @@ static void vcc_sock_destruct(struct sock *sk)
 
 static void vcc_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up(&wq->wait);
+	rcu_read_unlock();
 }
 
 static inline int vcc_writable(struct sock *sk)
@@ -106,16 +109,19 @@ static inline int vcc_writable(struct sock *sk)
 
 static void vcc_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	if (vcc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static struct proto vcc_proto = {
diff --git a/net/core/sock.c b/net/core/sock.c
index 51041759517e..94c4affdda9b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1211,7 +1211,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority)
 		 */
 		sk_refcnt_debug_inc(newsk);
 		sk_set_socket(newsk, NULL);
-		newsk->sk_sleep	 = NULL;
+		newsk->sk_wq = NULL;
 
 		if (newsk->sk_prot->sockets_allocated)
 			percpu_counter_inc(newsk->sk_prot->sockets_allocated);
@@ -1800,41 +1800,53 @@ EXPORT_SYMBOL(sock_no_sendpage);
 
 static void sock_def_wakeup(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
-	read_unlock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
+	rcu_read_unlock();
 }
 
 static void sock_def_error_report(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_poll(sk_sleep(sk), POLLERR);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_poll(&wq->wait, POLLERR);
 	sk_wake_async(sk, SOCK_WAKE_IO, POLL_ERR);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_readable(struct sock *sk, int len)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_sync_poll(sk_sleep(sk), POLLIN |
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_sync_poll(&wq->wait, POLLIN |
 						POLLRDNORM | POLLRDBAND);
 	sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 
 	/* Do not wake up a writer until he can make "significant"
 	 * progress.  --DaveM
 	 */
 	if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync_poll(sk_sleep(sk), POLLOUT |
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
 
 		/* Should agree with poll, otherwise some programs break */
@@ -1842,7 +1854,7 @@ static void sock_def_write_space(struct sock *sk)
 			sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 static void sock_def_destruct(struct sock *sk)
@@ -1896,10 +1908,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 
 	if (sock) {
 		sk->sk_type	=	sock->type;
-		sk->sk_sleep	=	&sock->wait;
+		sk->sk_wq	=	sock->wq;
 		sock->sk	=	sk;
 	} else
-		sk->sk_sleep	=	NULL;
+		sk->sk_wq	=	NULL;
 
 	spin_lock_init(&sk->sk_dst_lock);
 	rwlock_init(&sk->sk_callback_lock);
diff --git a/net/core/stream.c b/net/core/stream.c
index 7b3c3f30b107..cc196f42b8d8 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -28,15 +28,19 @@
 void sk_stream_write_space(struct sock *sk)
 {
 	struct socket *sock = sk->sk_socket;
+	struct socket_wq *wq;
 
 	if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk) && sock) {
 		clear_bit(SOCK_NOSPACE, &sock->flags);
 
-		if (sk_sleep(sk) && waitqueue_active(sk_sleep(sk)))
-			wake_up_interruptible_poll(sk_sleep(sk), POLLOUT |
+		rcu_read_lock();
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_poll(&wq->wait, POLLOUT |
 						POLLWRNORM | POLLWRBAND);
-		if (sock->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
+		if (wq && wq->fasync_list && !(sk->sk_shutdown & SEND_SHUTDOWN))
 			sock_wake_async(sock, SOCK_WAKE_SPACE, POLL_OUT);
+		rcu_read_unlock();
 	}
 }
 
diff --git a/net/dccp/output.c b/net/dccp/output.c
index 2d3dcb39851f..aadbdb58758b 100644
--- a/net/dccp/output.c
+++ b/net/dccp/output.c
@@ -195,15 +195,17 @@ EXPORT_SYMBOL_GPL(dccp_sync_mss);
 
 void dccp_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
 
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible(sk_sleep(sk));
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible(&wq->wait);
 	/* Should agree with poll, otherwise some programs break */
 	if (sock_writeable(sk))
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /**
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 9636b7d27b48..8be324fe08b9 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -305,11 +305,14 @@ static inline int iucv_below_msglim(struct sock *sk)
  */
 static void iucv_sock_wake_msglim(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
-	if (sk_has_sleeper(sk))
-		wake_up_interruptible_all(sk_sleep(sk));
+	struct socket_wq *wq;
+
+	rcu_read_lock();
+	wq = rcu_dereference(sk->sk_wq);
+	if (wq_has_sleeper(wq))
+		wake_up_interruptible_all(&wq->wait);
 	sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* Timers */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e2a95762abd3..af4d38bc3b22 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -664,12 +664,12 @@ static int pep_wait_connreq(struct sock *sk, int noblock)
 		if (signal_pending(tsk))
 			return sock_intr_errno(timeo);
 
-		prepare_to_wait_exclusive(&sk->sk_socket->wait, &wait,
+		prepare_to_wait_exclusive(sk_sleep(sk), &wait,
 						TASK_INTERRUPTIBLE);
 		release_sock(sk);
 		timeo = schedule_timeout(timeo);
 		lock_sock(sk);
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 	}
 
 	return 0;
@@ -910,10 +910,10 @@ disabled:
 			goto out;
 		}
 
-		prepare_to_wait(&sk->sk_socket->wait, &wait,
+		prepare_to_wait(sk_sleep(sk), &wait,
 				TASK_INTERRUPTIBLE);
 		done = sk_wait_event(sk, &timeo, atomic_read(&pn->tx_credits));
-		finish_wait(&sk->sk_socket->wait, &wait);
+		finish_wait(sk_sleep(sk), &wait);
 
 		if (sk->sk_state != TCP_ESTABLISHED)
 			goto disabled;
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index c785bfd0744f..6e9848bf0370 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -265,7 +265,7 @@ static unsigned int pn_socket_poll(struct file *file, struct socket *sock,
 	struct pep_sock *pn = pep_sk(sk);
 	unsigned int mask = 0;
 
-	poll_wait(file, &sock->wait, wait);
+	poll_wait(file, sk_sleep(sk), wait);
 
 	switch (sk->sk_state) {
 	case TCP_LISTEN:
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index c432d76f415e..0b9bb2085ce4 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -62,13 +62,15 @@ static inline int rxrpc_writable(struct sock *sk)
 static void rxrpc_write_space(struct sock *sk)
 {
 	_enter("%p", sk);
-	read_lock(&sk->sk_callback_lock);
+	rcu_read_lock();
 	if (rxrpc_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible(sk_sleep(sk));
+		struct socket_wq *wq = rcu_dereference(sk->sk_wq);
+
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /*
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 13d8229f3a9c..d54700af927a 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6065,7 +6065,7 @@ static void __sctp_write_space(struct sctp_association *asoc)
 			 * here by modeling from the current TCP/UDP code.
 			 * We have not tested with it yet.
 			 */
-			if (sock->fasync_list &&
+			if (sock->wq->fasync_list &&
 			    !(sk->sk_shutdown & SEND_SHUTDOWN))
 				sock_wake_async(sock,
 						SOCK_WAKE_SPACE, POLL_OUT);
diff --git a/net/socket.c b/net/socket.c
index cb7c1f6c0d6e..dae8c6b84a09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -252,9 +252,14 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	ei = kmem_cache_alloc(sock_inode_cachep, GFP_KERNEL);
 	if (!ei)
 		return NULL;
-	init_waitqueue_head(&ei->socket.wait);
+	ei->socket.wq = kmalloc(sizeof(struct socket_wq), GFP_KERNEL);
+	if (!ei->socket.wq) {
+		kmem_cache_free(sock_inode_cachep, ei);
+		return NULL;
+	}
+	init_waitqueue_head(&ei->socket.wq->wait);
+	ei->socket.wq->fasync_list = NULL;
 
-	ei->socket.fasync_list = NULL;
 	ei->socket.state = SS_UNCONNECTED;
 	ei->socket.flags = 0;
 	ei->socket.ops = NULL;
@@ -264,10 +269,21 @@ static struct inode *sock_alloc_inode(struct super_block *sb)
 	return &ei->vfs_inode;
 }
 
+
+static void wq_free_rcu(struct rcu_head *head)
+{
+	struct socket_wq *wq = container_of(head, struct socket_wq, rcu);
+
+	kfree(wq);
+}
+
 static void sock_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(sock_inode_cachep,
-			container_of(inode, struct socket_alloc, vfs_inode));
+	struct socket_alloc *ei;
+
+	ei = container_of(inode, struct socket_alloc, vfs_inode);
+	call_rcu(&ei->socket.wq->rcu, wq_free_rcu);
+	kmem_cache_free(sock_inode_cachep, ei);
 }
 
 static void init_once(void *foo)
@@ -513,7 +529,7 @@ void sock_release(struct socket *sock)
 		module_put(owner);
 	}
 
-	if (sock->fasync_list)
+	if (sock->wq->fasync_list)
 		printk(KERN_ERR "sock_release: fasync list not empty!\n");
 
 	percpu_sub(sockets_in_use, 1);
@@ -1080,9 +1096,9 @@ static int sock_fasync(int fd, struct file *filp, int on)
 
 	lock_sock(sk);
 
-	fasync_helper(fd, filp, on, &sock->fasync_list);
+	fasync_helper(fd, filp, on, &sock->wq->fasync_list);
 
-	if (!sock->fasync_list)
+	if (!sock->wq->fasync_list)
 		sock_reset_flag(sk, SOCK_FASYNC);
 	else
 		sock_set_flag(sk, SOCK_FASYNC);
@@ -1091,12 +1107,20 @@ static int sock_fasync(int fd, struct file *filp, int on)
 	return 0;
 }
 
-/* This function may be called only under socket lock or callback_lock */
+/* This function may be called only under socket lock or callback_lock or rcu_lock */
 
 int sock_wake_async(struct socket *sock, int how, int band)
 {
-	if (!sock || !sock->fasync_list)
+	struct socket_wq *wq;
+
+	if (!sock)
 		return -1;
+	rcu_read_lock();
+	wq = rcu_dereference(sock->wq);
+	if (!wq || !wq->fasync_list) {
+		rcu_read_unlock();
+		return -1;
+	}
 	switch (how) {
 	case SOCK_WAKE_WAITD:
 		if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags))
@@ -1108,11 +1132,12 @@ int sock_wake_async(struct socket *sock, int how, int band)
 		/* fall through */
 	case SOCK_WAKE_IO:
 call_kill:
-		kill_fasync(&sock->fasync_list, SIGIO, band);
+		kill_fasync(&wq->fasync_list, SIGIO, band);
 		break;
 	case SOCK_WAKE_URG:
-		kill_fasync(&sock->fasync_list, SIGURG, band);
+		kill_fasync(&wq->fasync_list, SIGURG, band);
 	}
+	rcu_read_unlock();
 	return 0;
 }
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 87c0360eaa25..fef2cc5e9d2b 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -313,13 +313,16 @@ static inline int unix_writable(struct sock *sk)
 
 static void unix_write_space(struct sock *sk)
 {
-	read_lock(&sk->sk_callback_lock);
+	struct socket_wq *wq;
+
+	rcu_read_lock();
 	if (unix_writable(sk)) {
-		if (sk_has_sleeper(sk))
-			wake_up_interruptible_sync(sk_sleep(sk));
+		wq = rcu_dereference(sk->sk_wq);
+		if (wq_has_sleeper(wq))
+			wake_up_interruptible_sync(&wq->wait);
 		sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 	}
-	read_unlock(&sk->sk_callback_lock);
+	rcu_read_unlock();
 }
 
 /* When dgram socket disconnects (or changes its peer), we clear its receive
@@ -406,9 +409,7 @@ static int unix_release_sock(struct sock *sk, int embrion)
 				skpair->sk_err = ECONNRESET;
 			unix_state_unlock(skpair);
 			skpair->sk_state_change(skpair);
-			read_lock(&skpair->sk_callback_lock);
 			sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
-			read_unlock(&skpair->sk_callback_lock);
 		}
 		sock_put(skpair); /* It may now die */
 		unix_peer(sk) = NULL;
@@ -1142,7 +1143,7 @@ restart:
 	newsk->sk_peercred.pid	= task_tgid_vnr(current);
 	current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid);
 	newu = unix_sk(newsk);
-	newsk->sk_sleep		= &newu->peer_wait;
+	newsk->sk_wq		= &newu->peer_wq;
 	otheru = unix_sk(other);
 
 	/* copy address information from listening to new sock*/
@@ -1931,12 +1932,10 @@ static int unix_shutdown(struct socket *sock, int mode)
 			other->sk_shutdown |= peer_mode;
 			unix_state_unlock(other);
 			other->sk_state_change(other);
-			read_lock(&other->sk_callback_lock);
 			if (peer_mode == SHUTDOWN_MASK)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
 			else if (peer_mode & RCV_SHUTDOWN)
 				sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
-			read_unlock(&other->sk_callback_lock);
 		}
 		if (other)
 			sock_put(other);
-- 
cgit v1.2.3


From 47d29646a2c1c147d8a7598aeac2c87dd71ed638 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 2 May 2010 02:21:44 -0700
Subject: net: Inline skb_pull() in eth_type_trans().

In commit 6be8ac2f ("[NET]: uninline skb_pull, de-bloats a lot")
we uninlined skb_pull.

But in some critical paths it makes sense to inline this thing
and it helps performance significantly.

Create an skb_pull_inline() so that we can do this in a way that
serves also as annotation.

Based upon a patch by Eric Dumazet.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 +++++
 net/core/skbuff.c      | 2 +-
 net/ethernet/eth.c     | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 82f5116a89e4..746a652b9f6f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1128,6 +1128,11 @@ static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len)
 	return skb->data += len;
 }
 
+static inline unsigned char *skb_pull_inline(struct sk_buff *skb, unsigned int len)
+{
+	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+}
+
 extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta);
 
 static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4218ff49bf13..8b9c109166a7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1051,7 +1051,7 @@ EXPORT_SYMBOL(skb_push);
  */
 unsigned char *skb_pull(struct sk_buff *skb, unsigned int len)
 {
-	return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len);
+	return skb_pull_inline(skb, len);
 }
 EXPORT_SYMBOL(skb_pull);
 
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 0c0d272a9888..61ec0329316c 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -162,7 +162,7 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 
 	skb->dev = dev;
 	skb_reset_mac_header(skb);
-	skb_pull(skb, ETH_HLEN);
+	skb_pull_inline(skb, ETH_HLEN);
 	eth = eth_hdr(skb);
 
 	if (unlikely(is_multicast_ether_addr(eth->h_dest))) {
-- 
cgit v1.2.3


From dee42870a423ad485129f43cddfe7275479f11d8 Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Sun, 2 May 2010 05:42:16 +0000
Subject: net: fix softnet_stat

Per cpu variable softnet_data.total was shared between IRQ and SoftIRQ context
without any protection. And enqueue_to_backlog should update the netdev_rx_stat
of the target CPU.

This patch renames softnet_data.total to softnet_data.processed: the number of
packets processed in uppper levels(IP stacks).

softnet_stat data is moved into softnet_data.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
----
 include/linux/netdevice.h |   17 +++++++----------
 net/core/dev.c            |   26 ++++++++++++--------------
 net/sched/sch_generic.c   |    2 +-
 3 files changed, 20 insertions(+), 25 deletions(-)
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 +++++++----------
 net/core/dev.c            | 26 ++++++++++++--------------
 net/sched/sch_generic.c   |  2 +-
 3 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 40d4c20d034b..c39938f8a8d8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -218,16 +218,6 @@ struct neighbour;
 struct neigh_parms;
 struct sk_buff;
 
-struct netif_rx_stats {
-	unsigned total;
-	unsigned dropped;
-	unsigned time_squeeze;
-	unsigned cpu_collision;
-	unsigned received_rps;
-};
-
-DECLARE_PER_CPU(struct netif_rx_stats, netdev_rx_stat);
-
 struct netdev_hw_addr {
 	struct list_head	list;
 	unsigned char		addr[MAX_ADDR_LEN];
@@ -1390,6 +1380,12 @@ struct softnet_data {
 	struct sk_buff		*completion_queue;
 	struct sk_buff_head	process_queue;
 
+	/* stats */
+	unsigned		processed;
+	unsigned		time_squeeze;
+	unsigned		cpu_collision;
+	unsigned		received_rps;
+
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
 
@@ -1399,6 +1395,7 @@ struct softnet_data {
 	unsigned int		cpu;
 	unsigned int		input_queue_head;
 #endif
+	unsigned		dropped;
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 };
diff --git a/net/core/dev.c b/net/core/dev.c
index 100dcbd29739..36d53be4fca6 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2205,8 +2205,6 @@ int netdev_max_backlog __read_mostly = 1000;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
-DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
-
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
@@ -2366,7 +2364,7 @@ static void rps_trigger_softirq(void *data)
 	struct softnet_data *sd = data;
 
 	__napi_schedule(&sd->backlog);
-	__get_cpu_var(netdev_rx_stat).received_rps++;
+	sd->received_rps++;
 }
 
 #endif /* CONFIG_RPS */
@@ -2405,7 +2403,6 @@ static int enqueue_to_backlog(struct sk_buff *skb, int cpu,
 	sd = &per_cpu(softnet_data, cpu);
 
 	local_irq_save(flags);
-	__get_cpu_var(netdev_rx_stat).total++;
 
 	rps_lock(sd);
 	if (skb_queue_len(&sd->input_pkt_queue) <= netdev_max_backlog) {
@@ -2429,9 +2426,9 @@ enqueue:
 		goto enqueue;
 	}
 
+	sd->dropped++;
 	rps_unlock(sd);
 
-	__get_cpu_var(netdev_rx_stat).dropped++;
 	local_irq_restore(flags);
 
 	kfree_skb(skb);
@@ -2806,7 +2803,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
 			skb->dev = master;
 	}
 
-	__get_cpu_var(netdev_rx_stat).total++;
+	__get_cpu_var(softnet_data).processed++;
 
 	skb_reset_network_header(skb);
 	skb_reset_transport_header(skb);
@@ -3490,7 +3487,7 @@ out:
 	return;
 
 softnet_break:
-	__get_cpu_var(netdev_rx_stat).time_squeeze++;
+	sd->time_squeeze++;
 	__raise_softirq_irqoff(NET_RX_SOFTIRQ);
 	goto out;
 }
@@ -3691,17 +3688,17 @@ static int dev_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
-static struct netif_rx_stats *softnet_get_online(loff_t *pos)
+static struct softnet_data *softnet_get_online(loff_t *pos)
 {
-	struct netif_rx_stats *rc = NULL;
+	struct softnet_data *sd = NULL;
 
 	while (*pos < nr_cpu_ids)
 		if (cpu_online(*pos)) {
-			rc = &per_cpu(netdev_rx_stat, *pos);
+			sd = &per_cpu(softnet_data, *pos);
 			break;
 		} else
 			++*pos;
-	return rc;
+	return sd;
 }
 
 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
@@ -3721,12 +3718,12 @@ static void softnet_seq_stop(struct seq_file *seq, void *v)
 
 static int softnet_seq_show(struct seq_file *seq, void *v)
 {
-	struct netif_rx_stats *s = v;
+	struct softnet_data *sd = v;
 
 	seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
-		   s->total, s->dropped, s->time_squeeze, 0,
+		   sd->processed, sd->dropped, sd->time_squeeze, 0,
 		   0, 0, 0, 0, /* was fastroute */
-		   s->cpu_collision, s->received_rps);
+		   sd->cpu_collision, sd->received_rps);
 	return 0;
 }
 
@@ -5869,6 +5866,7 @@ static int __init net_dev_init(void)
 	for_each_possible_cpu(i) {
 		struct softnet_data *sd = &per_cpu(softnet_data, i);
 
+		memset(sd, 0, sizeof(*sd));
 		skb_queue_head_init(&sd->input_pkt_queue);
 		skb_queue_head_init(&sd->process_queue);
 		sd->completion_queue = NULL;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index aeddabfb8e4e..a969b111bd76 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -94,7 +94,7 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
 		 * Another cpu is holding lock, requeue & delay xmits for
 		 * some time.
 		 */
-		__get_cpu_var(netdev_rx_stat).cpu_collision++;
+		__get_cpu_var(softnet_data).cpu_collision++;
 		ret = dev_requeue_skb(skb, q);
 	}
 
-- 
cgit v1.2.3


From cd7b5396e7e4d10c51116f59f414ff90312af8d4 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 2 May 2010 22:27:59 -0700
Subject: net: Use explicit "unsigned int" instead of plain "unsigned" in
 netdevice.h

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c39938f8a8d8..98112fbddefd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -878,7 +878,7 @@ struct net_device {
 	unsigned char		operstate; /* RFC2863 operstate */
 	unsigned char		link_mode; /* mapping policy to operstate */
 
-	unsigned		mtu;	/* interface MTU value		*/
+	unsigned int		mtu;	/* interface MTU value		*/
 	unsigned short		type;	/* interface hardware type	*/
 	unsigned short		hard_header_len;	/* hardware hdr length	*/
 
@@ -1381,10 +1381,10 @@ struct softnet_data {
 	struct sk_buff_head	process_queue;
 
 	/* stats */
-	unsigned		processed;
-	unsigned		time_squeeze;
-	unsigned		cpu_collision;
-	unsigned		received_rps;
+	unsigned int		processed;
+	unsigned int		time_squeeze;
+	unsigned int		cpu_collision;
+	unsigned int		received_rps;
 
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
-- 
cgit v1.2.3


From d9d52b5178af586d679c1052fb161ee05ea2e83f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 17 Mar 2010 17:45:01 +0200
Subject: tun: add ioctl to modify vnet header size

virtio added mergeable buffers mode where 2 bytes of extra info is put
after vnet header but before actual data (tun does not need this data).
In hindsight, it would have been better to add the new info *before* the
packet: as it is, users need a lot of tricky code to skip the extra 2
bytes in the middle of the iovec, and in fact applications seem to get
it wrong, and only work with specific iovec layout.  The fact we might
need to split iovec also means we might in theory overflow iovec max
size.

This patch adds a simpler way for applications to handle this,
and future proofs the interface against further extensions,
by making the size of the virtio net header configurable
from userspace. As a result, tun driver will simply
skip the extra 2 bytes on both input and output.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 32 ++++++++++++++++++++++++++++----
 include/linux/if_tun.h |  2 ++
 2 files changed, 30 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index e525a6cf5587..6b150c072a41 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -110,6 +110,9 @@ struct tun_struct {
 	struct tap_filter       txflt;
 	struct socket		socket;
 	struct socket_wq	wq;
+
+	int			vnet_hdr_sz;
+
 #ifdef TUN_DEBUG
 	int debug;
 #endif
@@ -563,7 +566,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 	}
 
 	if (tun->flags & TUN_VNET_HDR) {
-		if ((len -= sizeof(gso)) > count)
+		if ((len -= tun->vnet_hdr_sz) > count)
 			return -EINVAL;
 
 		if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
@@ -575,7 +578,7 @@ static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
 
 		if (gso.hdr_len > len)
 			return -EINVAL;
-		offset += sizeof(gso);
+		offset += tun->vnet_hdr_sz;
 	}
 
 	if ((tun->flags & TUN_TYPE_MASK) == TUN_TAP_DEV) {
@@ -718,7 +721,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 
 	if (tun->flags & TUN_VNET_HDR) {
 		struct virtio_net_hdr gso = { 0 }; /* no info leak */
-		if ((len -= sizeof(gso)) < 0)
+		if ((len -= tun->vnet_hdr_sz) < 0)
 			return -EINVAL;
 
 		if (skb_is_gso(skb)) {
@@ -749,7 +752,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 		if (unlikely(memcpy_toiovecend(iv, (void *)&gso, total,
 					       sizeof(gso))))
 			return -EFAULT;
-		total += sizeof(gso);
+		total += tun->vnet_hdr_sz;
 	}
 
 	len = min_t(int, skb->len, len);
@@ -1035,6 +1038,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 		tun->dev = dev;
 		tun->flags = flags;
 		tun->txflt.count = 0;
+		tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
 
 		err = -ENOMEM;
 		sk = sk_alloc(net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
@@ -1177,6 +1181,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 	struct sock_fprog fprog;
 	struct ifreq ifr;
 	int sndbuf;
+	int vnet_hdr_sz;
 	int ret;
 
 	if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89)
@@ -1322,6 +1327,25 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
 		tun->socket.sk->sk_sndbuf = sndbuf;
 		break;
 
+	case TUNGETVNETHDRSZ:
+		vnet_hdr_sz = tun->vnet_hdr_sz;
+		if (copy_to_user(argp, &vnet_hdr_sz, sizeof(vnet_hdr_sz)))
+			ret = -EFAULT;
+		break;
+
+	case TUNSETVNETHDRSZ:
+		if (copy_from_user(&vnet_hdr_sz, argp, sizeof(vnet_hdr_sz))) {
+			ret = -EFAULT;
+			break;
+		}
+		if (vnet_hdr_sz < (int)sizeof(struct virtio_net_hdr)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		tun->vnet_hdr_sz = vnet_hdr_sz;
+		break;
+
 	case TUNATTACHFILTER:
 		/* Can be set only for TAPs */
 		ret = -EINVAL;
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 1350a246893a..06b1829731fd 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -51,6 +51,8 @@
 #define TUNSETSNDBUF   _IOW('T', 212, int)
 #define TUNATTACHFILTER _IOW('T', 213, struct sock_fprog)
 #define TUNDETACHFILTER _IOW('T', 214, struct sock_fprog)
+#define TUNGETVNETHDRSZ _IOR('T', 215, int)
+#define TUNSETVNETHDRSZ _IOW('T', 216, int)
 
 /* TUNSETIFF ifr flags */
 #define IFF_TUN		0x0001
-- 
cgit v1.2.3


From e7a297b0d7d6049bd4e423ac1e17da31e4c401b8 Mon Sep 17 00:00:00 2001
From: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Date: Fri, 30 Apr 2010 14:44:50 -0700
Subject: genirq: Add CPU mask affinity hint

This patch adds a cpumask affinity hint to the irq_desc structure,
along with a registration function and a read-only proc entry for each
interrupt.

This affinity_hint handle for each interrupt can be used by underlying
drivers that need a better mechanism to control interrupt affinity.
The underlying driver can register a cpumask for the interrupt, which
will allow the driver to provide the CPU mask for the interrupt to
anything that requests it.  The intent is to extend the userspace
daemon, irqbalance, to help hint to it a preferred CPU mask to balance
the interrupt into.

[ tglx: Fixed compile warnings, added WARN_ON, made SMP only ]

Signed-off-by: Peter P Waskiewicz Jr <peter.p.waskiewicz.jr@intel.com>
Cc: davem@davemloft.net
Cc: arjan@linux.jf.intel.com
Cc: bhutchings@solarflare.com
LKML-Reference: <20100430214445.3992.41647.stgit@ppwaskie-hc2.jf.intel.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/interrupt.h |  6 ++++++
 include/linux/irq.h       |  1 +
 kernel/irq/manage.c       | 22 ++++++++++++++++++++++
 kernel/irq/proc.c         | 39 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index e6d2f4441fda..5137db3317f9 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -233,6 +233,7 @@ extern int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask);
 extern int irq_can_set_affinity(unsigned int irq);
 extern int irq_select_affinity(unsigned int irq);
 
+extern int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m);
 #else /* CONFIG_SMP */
 
 static inline int irq_set_affinity(unsigned int irq, const struct cpumask *m)
@@ -247,6 +248,11 @@ static inline int irq_can_set_affinity(unsigned int irq)
 
 static inline int irq_select_affinity(unsigned int irq)  { return 0; }
 
+static inline int irq_set_affinity_hint(unsigned int irq,
+                                        const struct cpumask *m)
+{
+	return -EINVAL;
+}
 #endif /* CONFIG_SMP && CONFIG_GENERIC_HARDIRQS */
 
 #ifdef CONFIG_GENERIC_HARDIRQS
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 707ab122e2e6..c03243ad84b4 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -195,6 +195,7 @@ struct irq_desc {
 	raw_spinlock_t		lock;
 #ifdef CONFIG_SMP
 	cpumask_var_t		affinity;
+	const struct cpumask	*affinity_hint;
 	unsigned int		node;
 #ifdef CONFIG_GENERIC_PENDING_IRQ
 	cpumask_var_t		pending_mask;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 444d5a81a209..3164ba7ce151 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -138,6 +138,22 @@ int irq_set_affinity(unsigned int irq, const struct cpumask *cpumask)
 	return 0;
 }
 
+int irq_set_affinity_hint(unsigned int irq, const struct cpumask *m)
+{
+	struct irq_desc *desc = irq_to_desc(irq);
+	unsigned long flags;
+
+	if (!desc)
+		return -EINVAL;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	desc->affinity_hint = m;
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(irq_set_affinity_hint);
+
 #ifndef CONFIG_AUTO_IRQ_AFFINITY
 /*
  * Generic version of the affinity autoselector.
@@ -906,6 +922,12 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 			desc->chip->disable(irq);
 	}
 
+#ifdef CONFIG_SMP
+	/* make sure affinity_hint is cleaned up */
+	if (WARN_ON_ONCE(desc->affinity_hint))
+		desc->affinity_hint = NULL;
+#endif
+
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
 
 	unregister_handler_proc(irq, action);
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index fe92dc5190dd..4f9427a30e14 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -32,6 +32,29 @@ static int irq_affinity_proc_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static int irq_affinity_hint_proc_show(struct seq_file *m, void *v)
+{
+	struct irq_desc *desc = irq_to_desc((long)m->private);
+	unsigned long flags;
+	cpumask_var_t mask;
+
+	if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+		return -ENOMEM;
+
+	raw_spin_lock_irqsave(&desc->lock, flags);
+	if (desc->affinity_hint)
+		cpumask_copy(mask, desc->affinity_hint);
+	else
+		cpumask_setall(mask);
+	raw_spin_unlock_irqrestore(&desc->lock, flags);
+
+	seq_cpumask(m, mask);
+	seq_putc(m, '\n');
+	free_cpumask_var(mask);
+
+	return 0;
+}
+
 #ifndef is_affinity_mask_valid
 #define is_affinity_mask_valid(val) 1
 #endif
@@ -84,6 +107,11 @@ static int irq_affinity_proc_open(struct inode *inode, struct file *file)
 	return single_open(file, irq_affinity_proc_show, PDE(inode)->data);
 }
 
+static int irq_affinity_hint_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, irq_affinity_hint_proc_show, PDE(inode)->data);
+}
+
 static const struct file_operations irq_affinity_proc_fops = {
 	.open		= irq_affinity_proc_open,
 	.read		= seq_read,
@@ -92,6 +120,13 @@ static const struct file_operations irq_affinity_proc_fops = {
 	.write		= irq_affinity_proc_write,
 };
 
+static const struct file_operations irq_affinity_hint_proc_fops = {
+	.open		= irq_affinity_hint_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int default_affinity_show(struct seq_file *m, void *v)
 {
 	seq_cpumask(m, irq_default_affinity);
@@ -252,6 +287,10 @@ void register_irq_proc(unsigned int irq, struct irq_desc *desc)
 	proc_create_data("smp_affinity", 0600, desc->dir,
 			 &irq_affinity_proc_fops, (void *)(long)irq);
 
+	/* create /proc/irq/<irq>/affinity_hint */
+	proc_create_data("affinity_hint", 0400, desc->dir,
+			 &irq_affinity_hint_proc_fops, (void *)(long)irq);
+
 	proc_create_data("node", 0444, desc->dir,
 			 &irq_node_proc_fops, (void *)(long)irq);
 #endif
-- 
cgit v1.2.3


From fb4b698fc78347419aa9ae7114e1375f92107500 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@citi.umich.edu>
Date: Wed, 28 Apr 2010 17:45:06 -0400
Subject: nfsd: further comment typos

Whoops, missed some more.

"Reviewed-by, I guess": Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
---
 include/linux/nfsd/nfsfh.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 45bb5a8102c1..80d55bbc5365 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -40,12 +40,12 @@ struct nfs_fhbase_old {
  * This is the new flexible, extensible style NFSv2/v3 file handle.
  * by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
  *
- * The file handle is seens as a list of four-byte words.
+ * The file handle starts with a sequence of four-byte words.
  * The first word contains a version number (1) and three descriptor bytes
  * that tell how the remaining 3 variable length fields should be handled.
  * These three bytes are auth_type, fsid_type and fileid_type.
  *
- * All 4byte values are in host-byte-order.
+ * All four-byte values are in host-byte-order.
  *
  * The auth_type field specifies how the filehandle can be authenticated
  * This might allow a file to be confirmed to be in a writable part of a
-- 
cgit v1.2.3


From 4f70ecca9c57731b4acbe5043eb22e4416bd2368 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 3 May 2010 10:50:14 +0000
Subject: net: rcu fixes

Add hlist_for_each_entry_rcu_bh() and
hlist_for_each_entry_continue_rcu_bh() macros, and use them in
ipv6_get_ifaddr(), if6_get_first() and if6_get_next() to fix lockdeps
warnings.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Reviewed-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h | 29 +++++++++++++++++++++++++++++
 net/ipv6/addrconf.c     | 16 ++++++++--------
 2 files changed, 37 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 004908b104d5..4ec3b38ce9c5 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -428,6 +428,23 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
 		pos = rcu_dereference_raw(pos->next))
 
+/**
+ * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @head:	the head for your list.
+ * @member:	the name of the hlist_node within the struct.
+ *
+ * This list-traversal primitive may safely run concurrently with
+ * the _rcu list-mutation primitives such as hlist_add_head_rcu()
+ * as long as the traversal is guarded by rcu_read_lock().
+ */
+#define hlist_for_each_entry_rcu_bh(tpos, pos, head, member)		 \
+	for (pos = rcu_dereference_bh((head)->first);			 \
+		pos && ({ prefetch(pos->next); 1; }) &&			 \
+		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; }); \
+		pos = rcu_dereference_bh(pos->next))
+
 /**
  * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point
  * @tpos:	the type * to use as a loop cursor.
@@ -440,6 +457,18 @@ static inline void hlist_add_after_rcu(struct hlist_node *prev,
 	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
 	     pos = rcu_dereference(pos->next))
 
+/**
+ * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct hlist_node to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_continue_rcu_bh(tpos, pos, member)		\
+	for (pos = rcu_dereference_bh((pos)->next);			\
+	     pos && ({ prefetch(pos->next); 1; }) &&			\
+	     ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1; });  \
+	     pos = rcu_dereference_bh(pos->next))
+
 
 #endif	/* __KERNEL__ */
 #endif
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 34d2d649e396..3984f52181f4 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1346,7 +1346,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add
 	struct hlist_node *node;
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, node, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, node, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr)) {
@@ -2959,7 +2959,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq)
 
 	for (state->bucket = 0; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) {
 		struct hlist_node *n;
-		hlist_for_each_entry_rcu(ifa, n, &inet6_addr_lst[state->bucket],
+		hlist_for_each_entry_rcu_bh(ifa, n, &inet6_addr_lst[state->bucket],
 					 addr_lst)
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -2974,12 +2974,12 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq,
 	struct net *net = seq_file_net(seq);
 	struct hlist_node *n = &ifa->addr_lst;
 
-	hlist_for_each_entry_continue_rcu(ifa, n, addr_lst)
+	hlist_for_each_entry_continue_rcu_bh(ifa, n, addr_lst)
 		if (net_eq(dev_net(ifa->idev->dev), net))
 			return ifa;
 
 	while (++state->bucket < IN6_ADDR_HSIZE) {
-		hlist_for_each_entry(ifa, n,
+		hlist_for_each_entry_rcu_bh(ifa, n,
 				     &inet6_addr_lst[state->bucket], addr_lst) {
 			if (net_eq(dev_net(ifa->idev->dev), net))
 				return ifa;
@@ -3000,7 +3000,7 @@ static struct inet6_ifaddr *if6_get_idx(struct seq_file *seq, loff_t pos)
 }
 
 static void *if6_seq_start(struct seq_file *seq, loff_t *pos)
-	__acquires(rcu)
+	__acquires(rcu_bh)
 {
 	rcu_read_lock_bh();
 	return if6_get_idx(seq, *pos);
@@ -3016,7 +3016,7 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 }
 
 static void if6_seq_stop(struct seq_file *seq, void *v)
-	__releases(rcu)
+	__releases(rcu_bh)
 {
 	rcu_read_unlock_bh();
 }
@@ -3093,7 +3093,7 @@ int ipv6_chk_home_addr(struct net *net, struct in6_addr *addr)
 	unsigned int hash = ipv6_addr_hash(addr);
 
 	rcu_read_lock_bh();
-	hlist_for_each_entry_rcu(ifp, n, &inet6_addr_lst[hash], addr_lst) {
+	hlist_for_each_entry_rcu_bh(ifp, n, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
@@ -3127,7 +3127,7 @@ static void addrconf_verify(unsigned long foo)
 
 	for (i = 0; i < IN6_ADDR_HSIZE; i++) {
 restart:
-		hlist_for_each_entry_rcu(ifp, node,
+		hlist_for_each_entry_rcu_bh(ifp, node,
 					 &inet6_addr_lst[i], addr_lst) {
 			unsigned long age;
 
-- 
cgit v1.2.3


From 30ba3ead05763b172acaa65ae1be71af2a878940 Mon Sep 17 00:00:00 2001
From: Sriramakrishnan Govindarajan <srk@ti.com>
Date: Mon, 3 May 2010 23:47:12 -0700
Subject: Input: add keypad driver for keys interfaced to TCA6416

This patch implements a simple Keypad driver that functions
as an I2C client. It handles key press events for keys
connected to TCA6416 I2C based IO expander.

Signed-off-by: Sriramakrishnan <srk@ti.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/Kconfig          |  16 ++
 drivers/input/keyboard/Makefile         |   1 +
 drivers/input/keyboard/tca6416-keypad.c | 349 ++++++++++++++++++++++++++++++++
 include/linux/tca6416_keypad.h          |  34 ++++
 4 files changed, 400 insertions(+)
 create mode 100644 drivers/input/keyboard/tca6416-keypad.c
 create mode 100644 include/linux/tca6416_keypad.h

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/Kconfig b/drivers/input/keyboard/Kconfig
index a8293388d019..3525f533e186 100644
--- a/drivers/input/keyboard/Kconfig
+++ b/drivers/input/keyboard/Kconfig
@@ -179,6 +179,22 @@ config KEYBOARD_GPIO
 	  To compile this driver as a module, choose M here: the
 	  module will be called gpio_keys.
 
+config KEYBOARD_TCA6416
+	tristate "TCA6416 Keypad Support"
+	depends on I2C
+	help
+	  This driver implements basic keypad functionality
+	  for keys connected through TCA6416 IO expander
+
+	  Say Y here if your device has keys connected to
+	  TCA6416 IO expander. Your board-specific setup logic
+	  must also provide pin-mask details(of which TCA6416 pins
+	  are used for keypad).
+
+	  If enabled the complete TCA6416 device will be managed through
+	  this driver.
+
+
 config KEYBOARD_MATRIX
 	tristate "GPIO driven matrix keypad support"
 	depends on GENERIC_GPIO
diff --git a/drivers/input/keyboard/Makefile b/drivers/input/keyboard/Makefile
index 9a74127e4d17..4596d0c6f922 100644
--- a/drivers/input/keyboard/Makefile
+++ b/drivers/input/keyboard/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_KEYBOARD_BFIN)		+= bf54x-keys.o
 obj-$(CONFIG_KEYBOARD_DAVINCI)		+= davinci_keyscan.o
 obj-$(CONFIG_KEYBOARD_EP93XX)		+= ep93xx_keypad.o
 obj-$(CONFIG_KEYBOARD_GPIO)		+= gpio_keys.o
+obj-$(CONFIG_KEYBOARD_TCA6416)		+= tca6416-keypad.o
 obj-$(CONFIG_KEYBOARD_HIL)		+= hil_kbd.o
 obj-$(CONFIG_KEYBOARD_HIL_OLD)		+= hilkbd.o
 obj-$(CONFIG_KEYBOARD_IMX)		+= imx_keypad.o
diff --git a/drivers/input/keyboard/tca6416-keypad.c b/drivers/input/keyboard/tca6416-keypad.c
new file mode 100644
index 000000000000..493c93f25e2a
--- /dev/null
+++ b/drivers/input/keyboard/tca6416-keypad.c
@@ -0,0 +1,349 @@
+/*
+ * Driver for keys on TCA6416 I2C IO expander
+ *
+ * Copyright (C) 2010 Texas Instruments
+ *
+ * Author : Sriramakrishnan.A.G. <srk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/gpio.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/tca6416_keypad.h>
+
+#define TCA6416_INPUT          0
+#define TCA6416_OUTPUT         1
+#define TCA6416_INVERT         2
+#define TCA6416_DIRECTION      3
+
+static const struct i2c_device_id tca6416_id[] = {
+	{ "tca6416-keys", 16, },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, tca6416_id);
+
+struct tca6416_drv_data {
+	struct input_dev *input;
+	struct tca6416_button data[0];
+};
+
+struct tca6416_keypad_chip {
+	uint16_t reg_output;
+	uint16_t reg_direction;
+	uint16_t reg_input;
+
+	struct i2c_client *client;
+	struct input_dev *input;
+	struct delayed_work dwork;
+	u16 pinmask;
+	int irqnum;
+	bool use_polling;
+	struct tca6416_button buttons[0];
+};
+
+static int tca6416_write_reg(struct tca6416_keypad_chip *chip, int reg, u16 val)
+{
+	int error;
+
+	error = i2c_smbus_write_word_data(chip->client, reg << 1, val);
+	if (error < 0) {
+		dev_err(&chip->client->dev,
+			"%s failed, reg: %d, val: %d, error: %d\n",
+			__func__, reg, val, error);
+		return error;
+	}
+
+	return 0;
+}
+
+static int tca6416_read_reg(struct tca6416_keypad_chip *chip, int reg, u16 *val)
+{
+	int retval;
+
+	retval = i2c_smbus_read_word_data(chip->client, reg << 1);
+	if (retval < 0) {
+		dev_err(&chip->client->dev, "%s failed, reg: %d, error: %d\n",
+			__func__, reg, retval);
+		return retval;
+	}
+
+	*val = (u16)retval;
+	return 0;
+}
+
+static void tca6416_keys_scan(struct tca6416_keypad_chip *chip)
+{
+	struct input_dev *input = chip->input;
+	u16 reg_val, val;
+	int error, i, pin_index;
+
+	error = tca6416_read_reg(chip, TCA6416_INPUT, &reg_val);
+	if (error)
+		return;
+
+	reg_val &= chip->pinmask;
+
+	/* Figure out which lines have changed */
+	val = reg_val ^ chip->reg_input;
+	chip->reg_input = reg_val;
+
+	for (i = 0, pin_index = 0; i < 16; i++) {
+		if (val & (1 << i)) {
+			struct tca6416_button *button = &chip->buttons[pin_index];
+			unsigned int type = button->type ?: EV_KEY;
+			int state = ((reg_val & (1 << i)) ? 1 : 0)
+						^ button->active_low;
+
+			input_event(input, type, button->code, !!state);
+			input_sync(input);
+		}
+
+		if (chip->pinmask & (1 << i))
+			pin_index++;
+	}
+}
+
+/*
+ * This is threaded IRQ handler and this can (and will) sleep.
+ */
+static irqreturn_t tca6416_keys_isr(int irq, void *dev_id)
+{
+	struct tca6416_keypad_chip *chip = dev_id;
+
+	tca6416_keys_scan(chip);
+
+	return IRQ_HANDLED;
+}
+
+static void tca6416_keys_work_func(struct work_struct *work)
+{
+	struct tca6416_keypad_chip *chip =
+		container_of(work, struct tca6416_keypad_chip, dwork.work);
+
+	tca6416_keys_scan(chip);
+	schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
+}
+
+static int tca6416_keys_open(struct input_dev *dev)
+{
+	struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
+
+	/* Get initial device state in case it has switches */
+	tca6416_keys_scan(chip);
+
+	if (chip->use_polling)
+		schedule_delayed_work(&chip->dwork, msecs_to_jiffies(100));
+	else
+		enable_irq(chip->irqnum);
+
+	return 0;
+}
+
+static void tca6416_keys_close(struct input_dev *dev)
+{
+	struct tca6416_keypad_chip *chip = input_get_drvdata(dev);
+
+	if (chip->use_polling)
+		cancel_delayed_work_sync(&chip->dwork);
+	else
+		disable_irq(chip->irqnum);
+}
+
+static int __devinit tca6416_setup_registers(struct tca6416_keypad_chip *chip)
+{
+	int error;
+
+	error = tca6416_read_reg(chip, TCA6416_OUTPUT, &chip->reg_output);
+	if (error)
+		return error;
+
+	error = tca6416_read_reg(chip, TCA6416_DIRECTION, &chip->reg_direction);
+	if (error)
+		return error;
+
+	/* ensure that keypad pins are set to input */
+	error = tca6416_write_reg(chip, TCA6416_DIRECTION,
+				  chip->reg_direction | chip->pinmask);
+	if (error)
+		return error;
+
+	error = tca6416_read_reg(chip, TCA6416_DIRECTION, &chip->reg_direction);
+	if (error)
+		return error;
+
+	error = tca6416_read_reg(chip, TCA6416_INPUT, &chip->reg_input);
+	if (error)
+		return error;
+
+	chip->reg_input &= chip->pinmask;
+
+	return 0;
+}
+
+static int __devinit tca6416_keypad_probe(struct i2c_client *client,
+				   const struct i2c_device_id *id)
+{
+	struct tca6416_keys_platform_data *pdata;
+	struct tca6416_keypad_chip *chip;
+	struct input_dev *input;
+	int error;
+	int i;
+
+	/* Check functionality */
+	if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_BYTE)) {
+		dev_err(&client->dev, "%s adapter not supported\n",
+			dev_driver_string(&client->adapter->dev));
+		return -ENODEV;
+	}
+
+	pdata = client->dev.platform_data;
+	if (!pdata) {
+		dev_dbg(&client->dev, "no platform data\n");
+		return -EINVAL;
+	}
+
+	chip = kzalloc(sizeof(struct tca6416_keypad_chip) +
+		       pdata->nbuttons * sizeof(struct tca6416_button),
+		       GFP_KERNEL);
+	input = input_allocate_device();
+	if (!chip || !input) {
+		error = -ENOMEM;
+		goto fail1;
+	}
+
+	chip->client = client;
+	chip->input = input;
+	chip->pinmask = pdata->pinmask;
+	chip->use_polling = pdata->use_polling;
+
+	INIT_DELAYED_WORK(&chip->dwork, tca6416_keys_work_func);
+
+	input->phys = "tca6416-keys/input0";
+	input->name = client->name;
+	input->dev.parent = &client->dev;
+
+	input->open = tca6416_keys_open;
+	input->close = tca6416_keys_close;
+
+	input->id.bustype = BUS_HOST;
+	input->id.vendor = 0x0001;
+	input->id.product = 0x0001;
+	input->id.version = 0x0100;
+
+	/* Enable auto repeat feature of Linux input subsystem */
+	if (pdata->rep)
+		__set_bit(EV_REP, input->evbit);
+
+	for (i = 0; i < pdata->nbuttons; i++) {
+		unsigned int type;
+
+		chip->buttons[i] = pdata->buttons[i];
+		type = (pdata->buttons[i].type) ?: EV_KEY;
+		input_set_capability(input, type, pdata->buttons[i].code);
+	}
+
+	input_set_drvdata(input, chip);
+
+	/*
+	 * Initialize cached registers from their original values.
+	 * we can't share this chip with another i2c master.
+	 */
+	error = tca6416_setup_registers(chip);
+	if (error)
+		goto fail1;
+
+	if (!chip->use_polling) {
+		if (pdata->irq_is_gpio)
+			chip->irqnum = gpio_to_irq(client->irq);
+		else
+			chip->irqnum = client->irq;
+
+		error = request_threaded_irq(chip->irqnum, NULL,
+					     tca6416_keys_isr,
+					     IRQF_TRIGGER_FALLING,
+					     "tca6416-keypad", chip);
+		if (error) {
+			dev_dbg(&client->dev,
+				"Unable to claim irq %d; error %d\n",
+				chip->irqnum, error);
+			goto fail1;
+		}
+		disable_irq(chip->irqnum);
+	}
+
+	error = input_register_device(input);
+	if (error) {
+		dev_dbg(&client->dev,
+			"Unable to register input device, error: %d\n", error);
+		goto fail2;
+	}
+
+	i2c_set_clientdata(client, chip);
+
+	return 0;
+
+fail2:
+	if (!chip->use_polling) {
+		free_irq(chip->irqnum, chip);
+		enable_irq(chip->irqnum);
+	}
+fail1:
+	input_free_device(input);
+	kfree(chip);
+	return error;
+}
+
+static int __devexit tca6416_keypad_remove(struct i2c_client *client)
+{
+	struct tca6416_keypad_chip *chip = i2c_get_clientdata(client);
+
+	if (!chip->use_polling) {
+		free_irq(chip->irqnum, chip);
+		enable_irq(chip->irqnum);
+	}
+
+	input_unregister_device(chip->input);
+	kfree(chip);
+
+	i2c_set_clientdata(client, NULL);
+
+	return 0;
+}
+
+
+static struct i2c_driver tca6416_keypad_driver = {
+	.driver = {
+		.name	= "tca6416-keypad",
+	},
+	.probe		= tca6416_keypad_probe,
+	.remove		= __devexit_p(tca6416_keypad_remove),
+	.id_table	= tca6416_id,
+};
+
+static int __init tca6416_keypad_init(void)
+{
+	return i2c_add_driver(&tca6416_keypad_driver);
+}
+
+subsys_initcall(tca6416_keypad_init);
+
+static void __exit tca6416_keypad_exit(void)
+{
+	i2c_del_driver(&tca6416_keypad_driver);
+}
+module_exit(tca6416_keypad_exit);
+
+MODULE_AUTHOR("Sriramakrishnan <srk@ti.com>");
+MODULE_DESCRIPTION("Keypad driver over tca6146 IO expander");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/tca6416_keypad.h b/include/linux/tca6416_keypad.h
new file mode 100644
index 000000000000..7bd266f3525c
--- /dev/null
+++ b/include/linux/tca6416_keypad.h
@@ -0,0 +1,34 @@
+/*
+ * tca6416 keypad platform support
+ *
+ * Copyright (C) 2010 Texas Instruments
+ *
+ * Author: Sriramakrishnan <srk@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _TCA6416_KEYS_H
+#define _TCA6416_KEYS_H
+
+#include <linux/types.h>
+
+struct tca6416_button {
+	/* Configuration parameters */
+	int code;		/* input event code (KEY_*, SW_*) */
+	int active_low;
+	int type;		/* input event type (EV_KEY, EV_SW) */
+};
+
+struct tca6416_keys_platform_data {
+	struct tca6416_button *buttons;
+	int nbuttons;
+	unsigned int rep:1;	/* enable input subsystem auto repeat */
+	uint16_t pinmask;
+	uint16_t invert;
+	int irq_is_gpio;
+	int use_polling;	/* use polling if Interrupt is not connected*/
+};
+#endif
-- 
cgit v1.2.3


From 6629dcff19470a894ce294d0adb9cbab94ee1fb9 Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 4 May 2010 11:09:28 +0200
Subject: i2c-core: Use per-adapter userspace device lists

Using a single list for all userspace devices leads to a dead lock
on multiplexed buses in some circumstances (mux chip instantiated
from userspace). This is solved by using a separate list for each
bus segment.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Acked-by: Michael Lawnick <ml.lawnick@gmx.de>
---
 drivers/i2c/i2c-core.c | 34 ++++++++++++++++++----------------
 include/linux/i2c.h    |  2 ++
 2 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 5105126225c3..c2258a51fe0c 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -40,12 +40,11 @@
 #include "i2c-core.h"
 
 
-/* core_lock protects i2c_adapter_idr, userspace_devices, and guarantees
+/* core_lock protects i2c_adapter_idr, and guarantees
    that device detection, deletion of detected devices, and attach_adapter
    and detach_adapter calls are serialized */
 static DEFINE_MUTEX(core_lock);
 static DEFINE_IDR(i2c_adapter_idr);
-static LIST_HEAD(userspace_devices);
 
 static struct device_type i2c_client_type;
 static int i2c_check_addr(struct i2c_adapter *adapter, int addr);
@@ -542,9 +541,9 @@ i2c_sysfs_new_device(struct device *dev, struct device_attribute *attr,
 		return -EEXIST;
 
 	/* Keep track of the added device */
-	mutex_lock(&core_lock);
-	list_add_tail(&client->detected, &userspace_devices);
-	mutex_unlock(&core_lock);
+	i2c_lock_adapter(adap);
+	list_add_tail(&client->detected, &adap->userspace_clients);
+	i2c_unlock_adapter(adap);
 	dev_info(dev, "%s: Instantiated device %s at 0x%02hx\n", "new_device",
 		 info.type, info.addr);
 
@@ -583,9 +582,10 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 
 	/* Make sure the device was added through sysfs */
 	res = -ENOENT;
-	mutex_lock(&core_lock);
-	list_for_each_entry_safe(client, next, &userspace_devices, detected) {
-		if (client->addr == addr && client->adapter == adap) {
+	i2c_lock_adapter(adap);
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		if (client->addr == addr) {
 			dev_info(dev, "%s: Deleting device %s at 0x%02hx\n",
 				 "delete_device", client->name, client->addr);
 
@@ -595,7 +595,7 @@ i2c_sysfs_delete_device(struct device *dev, struct device_attribute *attr,
 			break;
 		}
 	}
-	mutex_unlock(&core_lock);
+	i2c_unlock_adapter(adap);
 
 	if (res < 0)
 		dev_err(dev, "%s: Can't find device in list\n",
@@ -677,6 +677,7 @@ static int i2c_register_adapter(struct i2c_adapter *adap)
 	}
 
 	rt_mutex_init(&adap->bus_lock);
+	INIT_LIST_HEAD(&adap->userspace_clients);
 
 	/* Set default timeout to 1 second if not already set */
 	if (adap->timeout == 0)
@@ -879,14 +880,15 @@ int i2c_del_adapter(struct i2c_adapter *adap)
 		return res;
 
 	/* Remove devices instantiated from sysfs */
-	list_for_each_entry_safe(client, next, &userspace_devices, detected) {
-		if (client->adapter == adap) {
-			dev_dbg(&adap->dev, "Removing %s at 0x%x\n",
-				client->name, client->addr);
-			list_del(&client->detected);
-			i2c_unregister_device(client);
-		}
+	i2c_lock_adapter(adap);
+	list_for_each_entry_safe(client, next, &adap->userspace_clients,
+				 detected) {
+		dev_dbg(&adap->dev, "Removing %s at 0x%x\n", client->name,
+			client->addr);
+		list_del(&client->detected);
+		i2c_unregister_device(client);
 	}
+	i2c_unlock_adapter(adap);
 
 	/* Detach any active clients. This can't fail, thus we do not
 	   checking the returned value. */
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index 0a5da639b327..6ed1d59bfb1e 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -355,6 +355,8 @@ struct i2c_adapter {
 	int nr;
 	char name[48];
 	struct completion dev_released;
+
+	struct list_head userspace_clients;
 };
 #define to_i2c_adapter(d) container_of(d, struct i2c_adapter, dev)
 
-- 
cgit v1.2.3


From 4dbf6bc239c169b032777616806ecc648058f6b2 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Tue, 4 May 2010 11:24:01 -0400
Subject: tracing: Convert nop macros to static inlines

The ftrace.h file contains several functions as macros when the
functions are disabled due to config options. This patch converts
most of them to static inlines.

There are two exceptions:

  register_ftrace_function() and unregister_ftrace_function()

This is because their parameter "ops" must not be evaluated since
code using the function is allowed to #ifdef out the creation of
the parameter.

This also fixes an error caused by recent changes:

 kernel/trace/trace_irqsoff.c: In function 'start_irqsoff_tracer':
 kernel/trace/trace_irqsoff.c:571: error: expected expression before 'do'

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 8415a522f430..e0ae83bbd9cc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -82,9 +82,13 @@ void clear_ftrace_function(void);
 extern void ftrace_stub(unsigned long a0, unsigned long a1);
 
 #else /* !CONFIG_FUNCTION_TRACER */
-# define register_ftrace_function(ops) do { } while (0)
-# define unregister_ftrace_function(ops) do { } while (0)
-# define clear_ftrace_function(ops) do { } while (0)
+/*
+ * (un)register_ftrace_function must be a macro since the ops parameter
+ * must not be evaluated.
+ */
+#define register_ftrace_function(ops) ({ 0; })
+#define unregister_ftrace_function(ops) ({ 0; })
+static inline void clear_ftrace_function(void) { }
 static inline void ftrace_kill(void) { }
 static inline void ftrace_stop(void) { }
 static inline void ftrace_start(void) { }
@@ -237,11 +241,13 @@ extern int skip_trace(unsigned long ip);
 extern void ftrace_disable_daemon(void);
 extern void ftrace_enable_daemon(void);
 #else
-# define skip_trace(ip)				({ 0; })
-# define ftrace_force_update()			({ 0; })
-# define ftrace_set_filter(buf, len, reset)	do { } while (0)
-# define ftrace_disable_daemon()		do { } while (0)
-# define ftrace_enable_daemon()			do { } while (0)
+static inline int skip_trace(unsigned long ip) { return 0; }
+static inline int ftrace_force_update(void) { return 0; }
+static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
+{
+}
+static inline void ftrace_disable_daemon(void) { }
+static inline void ftrace_enable_daemon(void) { }
 static inline void ftrace_release_mod(struct module *mod) {}
 static inline int register_ftrace_command(struct ftrace_func_command *cmd)
 {
@@ -314,16 +320,16 @@ static inline void __ftrace_enabled_restore(int enabled)
   extern void time_hardirqs_on(unsigned long a0, unsigned long a1);
   extern void time_hardirqs_off(unsigned long a0, unsigned long a1);
 #else
-# define time_hardirqs_on(a0, a1)		do { } while (0)
-# define time_hardirqs_off(a0, a1)		do { } while (0)
+  static inline void time_hardirqs_on(unsigned long a0, unsigned long a1) { }
+  static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { }
 #endif
 
 #ifdef CONFIG_PREEMPT_TRACER
   extern void trace_preempt_on(unsigned long a0, unsigned long a1);
   extern void trace_preempt_off(unsigned long a0, unsigned long a1);
 #else
-# define trace_preempt_on(a0, a1)		do { } while (0)
-# define trace_preempt_off(a0, a1)		do { } while (0)
+  static inline void trace_preempt_on(unsigned long a0, unsigned long a1) { }
+  static inline void trace_preempt_off(unsigned long a0, unsigned long a1) { }
 #endif
 
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
-- 
cgit v1.2.3


From 1ce7e4ff24fe338438bc7837e02780f202bf202b Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Fri, 23 Apr 2010 10:35:52 +0800
Subject: cgroup: Check task_lock in task_subsys_state()

Expand task_subsys_state()'s rcu_dereference_check() to include the full
locking rule as documented in Documentation/cgroups/cgroups.txt by adding
a check for task->alloc_lock being held.

This fixes an RCU false positive when resuming from suspend. The warning
comes from freezer cgroup in cgroup_freezing_or_frozen().

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
Acked-by: Matt Helsley <matthltc@us.ibm.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b8ad1ea99586..8f78073d7caa 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -530,6 +530,7 @@ static inline struct cgroup_subsys_state *task_subsys_state(
 {
 	return rcu_dereference_check(task->cgroups->subsys[subsys_id],
 				     rcu_read_lock_held() ||
+				     lockdep_is_held(&task->alloc_lock) ||
 				     cgroup_lock_is_held());
 }
 
-- 
cgit v1.2.3


From 4677d4a53e0d565742277e8913e91c821453e63e Mon Sep 17 00:00:00 2001
From: Borislav Petkov <borislav.petkov@amd.com>
Date: Mon, 3 May 2010 14:57:11 +0200
Subject: arch, hweight: Fix compilation errors

Fix function prototype visibility issues when compiling for non-x86
architectures. Tested with crosstool
(ftp://ftp.kernel.org/pub/tools/crosstool/) with alpha, ia64 and sparc
targets.

Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
LKML-Reference: <20100503130736.GD26107@aftab>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 include/asm-generic/bitops/arch_hweight.h | 8 ++++----
 include/linux/bitops.h                    | 5 +++++
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/asm-generic/bitops/arch_hweight.h b/include/asm-generic/bitops/arch_hweight.h
index 9a81c1e9436c..6a211f40665c 100644
--- a/include/asm-generic/bitops/arch_hweight.h
+++ b/include/asm-generic/bitops/arch_hweight.h
@@ -3,22 +3,22 @@
 
 #include <asm/types.h>
 
-inline unsigned int __arch_hweight32(unsigned int w)
+static inline unsigned int __arch_hweight32(unsigned int w)
 {
 	return __sw_hweight32(w);
 }
 
-inline unsigned int __arch_hweight16(unsigned int w)
+static inline unsigned int __arch_hweight16(unsigned int w)
 {
 	return __sw_hweight16(w);
 }
 
-inline unsigned int __arch_hweight8(unsigned int w)
+static inline unsigned int __arch_hweight8(unsigned int w)
 {
 	return __sw_hweight8(w);
 }
 
-inline unsigned long __arch_hweight64(__u64 w)
+static inline unsigned long __arch_hweight64(__u64 w)
 {
 	return __sw_hweight64(w);
 }
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index c55d5bc4ee58..26caa608ccd9 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -10,6 +10,11 @@
 #define BITS_TO_LONGS(nr)	DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))
 #endif
 
+extern unsigned int __sw_hweight8(unsigned int w);
+extern unsigned int __sw_hweight16(unsigned int w);
+extern unsigned int __sw_hweight32(unsigned int w);
+extern unsigned long __sw_hweight64(__u64 w);
+
 /*
  * Include this here because some architectures need generic_ffs/fls in
  * scope
-- 
cgit v1.2.3


From 2c2df8418ac7908eec4558407b83f16739006c54 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 01:07:02 -0700
Subject: x86, acpi/irq: Introduce apci_isa_irq_to_gsi

There are a number of cases where the current code makes the assumption
that isa irqs identity map to the first 16 acpi global system intereupts.
In most instances that assumption is correct as that is the required
behaviour in dual i8259 mode and the default behavior in ioapic mode.

However there are some systems out there that take advantage of acpis
interrupt remapping  for the isa irqs to have a completely different
mapping of isa_irq to gsi.

Introduce acpi_isa_irq_to_gsi to perform this mapping explicitly in the
code that needs it.  Initially this will be just the current assumed
identity mapping to ensure it's introduction does not cause regressions.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <1269936436-7039-1-git-send-email-ebiederm@xmission.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/ia64/kernel/acpi.c     | 8 ++++++++
 arch/x86/kernel/acpi/boot.c | 8 ++++++++
 include/linux/acpi.h        | 1 +
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 4d1a7e9314cf..c6c90f39f4d9 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -785,6 +785,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 	return 0;
 }
 
+int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
+{
+	if (isa_irq >= 16)
+		return -1;
+	*gsi = isa_irq;
+	return 0;
+}
+
 /*
  *  ACPI based hotplug CPU support
  */
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index cd40aba6aa95..da718d672596 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -458,6 +458,14 @@ int acpi_gsi_to_irq(u32 gsi, unsigned int *irq)
 	return 0;
 }
 
+int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi)
+{
+	if (isa_irq >= 16)
+		return -1;
+	*gsi = isa_irq;
+	return 0;
+}
+
 /*
  * success: return IRQ number (>=0)
  * failure: return < 0
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index b926afe8c03e..7a937dabcc4a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -116,6 +116,7 @@ extern unsigned long acpi_realmode_flags;
 
 int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity);
 int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
 
 #ifdef CONFIG_X86_IO_APIC
 extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity);
-- 
cgit v1.2.3


From 9a0a91bb56d2915cdb8585717de38376ad20fef9 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 01:07:03 -0700
Subject: x86, acpi/irq: Teach acpi_get_override_irq to take a gsi not an
 isa_irq

In perverse acpi implementations the isa irqs are not identity mapped
to the first 16 gsi.  Furthermore at least the extended interrupt
resource capability may return gsi's and not isa irqs.  So since
what we get from acpi is a gsi teach acpi_get_overrride_irq to
operate on a gsi instead of an isa_irq.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
LKML-Reference: <1269936436-7039-2-git-send-email-ebiederm@xmission.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
---
 arch/x86/kernel/apic/io_apic.c | 23 ++++++++++++++---------
 include/linux/acpi.h           |  4 ++--
 2 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 127b8718abfb..73ec92838d83 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4082,22 +4082,27 @@ int __init io_apic_get_version(int ioapic)
 	return reg_01.bits.version;
 }
 
-int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity)
 {
-	int i;
+	int ioapic, pin, idx;
 
 	if (skip_ioapic_setup)
 		return -1;
 
-	for (i = 0; i < mp_irq_entries; i++)
-		if (mp_irqs[i].irqtype == mp_INT &&
-		    mp_irqs[i].srcbusirq == bus_irq)
-			break;
-	if (i >= mp_irq_entries)
+	ioapic = mp_find_ioapic(gsi);
+	if (ioapic < 0)
+		return -1;
+
+	pin = mp_find_ioapic_pin(ioapic, gsi);
+	if (pin < 0)
+		return -1;
+
+	idx = find_irq_entry(ioapic, pin, mp_INT);
+	if (idx < 0)
 		return -1;
 
-	*trigger = irq_trigger(i);
-	*polarity = irq_polarity(i);
+	*trigger = irq_trigger(idx);
+	*polarity = irq_polarity(idx);
 	return 0;
 }
 
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 7a937dabcc4a..3da73f5f0ae9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -119,9 +119,9 @@ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
 int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi);
 
 #ifdef CONFIG_X86_IO_APIC
-extern int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity);
+extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity);
 #else
-#define acpi_get_override_irq(bus, trigger, polarity) (-1)
+#define acpi_get_override_irq(gsi, trigger, polarity) (-1)
 #endif
 /*
  * This function undoes the effect of one call to acpi_register_gsi().
-- 
cgit v1.2.3


From ec7d2f2cf3a1b76202986519ec4f8ec75b2de232 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Wed, 5 May 2010 01:07:37 -0700
Subject: net: __alloc_skb() speedup

With following patch I can reach maximum rate of my pktgen+udpsink
simulator :
- 'old' machine : dual quad core E5450  @3.00GHz
- 64 UDP rx flows (only differ by destination port)
- RPS enabled, NIC interrupts serviced on cpu0
- rps dispatched on 7 other cores. (~130.000 IPI per second)
- SLAB allocator (faster than SLUB in this workload)
- tg3 NIC
- 1.080.000 pps without a single drop at NIC level.

Idea is to add two prefetchw() calls in __alloc_skb(), one to prefetch
first sk_buff cache line, the second to prefetch the shinfo part.

Also using one memset() to initialize all skb_shared_info fields instead
of one by one to reduce number of instructions, using long word moves.

All skb_shared_info fields before 'dataref' are cleared in
__alloc_skb().

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  7 ++++++-
 net/core/skbuff.c      | 21 +++++----------------
 2 files changed, 11 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 746a652b9f6f..88d55395a27c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -187,7 +187,6 @@ union skb_shared_tx {
  * the end of the header data, ie. at skb->end.
  */
 struct skb_shared_info {
-	atomic_t	dataref;
 	unsigned short	nr_frags;
 	unsigned short	gso_size;
 	/* Warning: this field is not always filled in (UFO)! */
@@ -197,6 +196,12 @@ struct skb_shared_info {
 	union skb_shared_tx tx_flags;
 	struct sk_buff	*frag_list;
 	struct skb_shared_hwtstamps hwtstamps;
+
+	/*
+	 * Warning : all fields before dataref are cleared in __alloc_skb()
+	 */
+	atomic_t	dataref;
+
 	skb_frag_t	frags[MAX_SKB_FRAGS];
 	/* Intermediate layers must ensure that destructor_arg
 	 * remains valid until skb destructor */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 8b9c109166a7..a9b0e1f77806 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -181,12 +181,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	skb = kmem_cache_alloc_node(cache, gfp_mask & ~__GFP_DMA, node);
 	if (!skb)
 		goto out;
+	prefetchw(skb);
 
 	size = SKB_DATA_ALIGN(size);
 	data = kmalloc_node_track_caller(size + sizeof(struct skb_shared_info),
 			gfp_mask, node);
 	if (!data)
 		goto nodata;
+	prefetchw(data + size);
 
 	/*
 	 * Only clear those fields we need to clear, not those that we will
@@ -208,15 +210,8 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 
 	/* make sure we initialize shinfo sequentially */
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags  = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	if (fclone) {
 		struct sk_buff *child = skb + 1;
@@ -505,16 +500,10 @@ int skb_recycle_check(struct sk_buff *skb, int skb_size)
 		return 0;
 
 	skb_release_head_state(skb);
+
 	shinfo = skb_shinfo(skb);
+	memset(shinfo, 0, offsetof(struct skb_shared_info, dataref));
 	atomic_set(&shinfo->dataref, 1);
-	shinfo->nr_frags = 0;
-	shinfo->gso_size = 0;
-	shinfo->gso_segs = 0;
-	shinfo->gso_type = 0;
-	shinfo->ip6_frag_id = 0;
-	shinfo->tx_flags.flags = 0;
-	skb_frag_list_init(skb);
-	memset(&shinfo->hwtstamps, 0, sizeof(shinfo->hwtstamps));
 
 	memset(skb, 0, offsetof(struct sk_buff, tail));
 	skb->data = skb->head + NET_SKB_PAD;
-- 
cgit v1.2.3


From 2e26ca7150a4f2ab3e69471dfc65f131e7dd7a05 Mon Sep 17 00:00:00 2001
From: Steven Rostedt <srostedt@redhat.com>
Date: Wed, 5 May 2010 10:52:31 -0400
Subject: tracing: Fix tracepoint.h DECLARE_TRACE() to allow more than one
 header

When more than one header is included under CREATE_TRACE_POINTS
the DECLARE_TRACE() macro is not defined back to its original meaning
and the second include will fail to initialize the TRACE_EVENT()
and DECLARE_TRACE() correctly.

To fix this the tracepoint.h file moves the define of DECLARE_TRACE()
out of the #ifdef _LINUX_TRACEPOINT_H protection (just like the
define of the TRACE_EVENT()). This way the define_trace.h will undef
the DECLARE_TRACE() at the end and allow new headers to start
from scratch.

This patch also requires fixing the include/events/napi.h

It currently uses DECLARE_TRACE() and should be converted to a TRACE_EVENT()
format. But I'll leave that change to the authors of that file.
But since the napi.h file depends on using the CREATE_TRACE_POINTS
and does not define its own DEFINE_TRACE() it must use the define_trace.h
method instead.

Cc: Neil Horman <nhorman@tuxdriver.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h   | 114 ++++++++++++++++++++++---------------------
 include/trace/define_trace.h |   5 ++
 include/trace/events/napi.h  |  10 +++-
 3 files changed, 72 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 78b4bd3be496..1d85f9a6a199 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -33,6 +33,65 @@ struct tracepoint {
 					 * Keep in sync with vmlinux.lds.h.
 					 */
 
+/*
+ * Connect a probe to a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_register(const char *name, void *probe);
+
+/*
+ * Disconnect a probe from a tracepoint.
+ * Internal API, should not be used directly.
+ */
+extern int tracepoint_probe_unregister(const char *name, void *probe);
+
+extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
+extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
+extern void tracepoint_probe_update_all(void);
+
+struct tracepoint_iter {
+	struct module *module;
+	struct tracepoint *tracepoint;
+};
+
+extern void tracepoint_iter_start(struct tracepoint_iter *iter);
+extern void tracepoint_iter_next(struct tracepoint_iter *iter);
+extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
+extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
+extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
+	struct tracepoint *begin, struct tracepoint *end);
+
+/*
+ * tracepoint_synchronize_unregister must be called between the last tracepoint
+ * probe unregistration and the end of module exit to make sure there is no
+ * caller executing a probe when it is freed.
+ */
+static inline void tracepoint_synchronize_unregister(void)
+{
+	synchronize_sched();
+}
+
+#define PARAMS(args...) args
+
+#ifdef CONFIG_TRACEPOINTS
+extern void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end);
+#else
+static inline void tracepoint_update_probe_range(struct tracepoint *begin,
+	struct tracepoint *end)
+{ }
+#endif /* CONFIG_TRACEPOINTS */
+
+#endif /* _LINUX_TRACEPOINT_H */
+
+/*
+ * Note: we keep the TRACE_EVENT and DECLARE_TRACE outside the include
+ *  file ifdef protection.
+ *  This is due to the way trace events work. If a file includes two
+ *  trace event headers under one "CREATE_TRACE_POINTS" the first include
+ *  will override the TRACE_EVENT and break the second include.
+ */
+
 #ifndef DECLARE_TRACE
 
 #define TP_PROTO(args...)	args
@@ -96,9 +155,6 @@ struct tracepoint {
 #define EXPORT_TRACEPOINT_SYMBOL(name)					\
 	EXPORT_SYMBOL(__tracepoint_##name)
 
-extern void tracepoint_update_probe_range(struct tracepoint *begin,
-	struct tracepoint *end);
-
 #else /* !CONFIG_TRACEPOINTS */
 #define DECLARE_TRACE(name, proto, args)				\
 	static inline void _do_trace_##name(struct tracepoint *tp, proto) \
@@ -119,61 +175,9 @@ extern void tracepoint_update_probe_range(struct tracepoint *begin,
 #define EXPORT_TRACEPOINT_SYMBOL_GPL(name)
 #define EXPORT_TRACEPOINT_SYMBOL(name)
 
-static inline void tracepoint_update_probe_range(struct tracepoint *begin,
-	struct tracepoint *end)
-{ }
 #endif /* CONFIG_TRACEPOINTS */
 #endif /* DECLARE_TRACE */
 
-/*
- * Connect a probe to a tracepoint.
- * Internal API, should not be used directly.
- */
-extern int tracepoint_probe_register(const char *name, void *probe);
-
-/*
- * Disconnect a probe from a tracepoint.
- * Internal API, should not be used directly.
- */
-extern int tracepoint_probe_unregister(const char *name, void *probe);
-
-extern int tracepoint_probe_register_noupdate(const char *name, void *probe);
-extern int tracepoint_probe_unregister_noupdate(const char *name, void *probe);
-extern void tracepoint_probe_update_all(void);
-
-struct tracepoint_iter {
-	struct module *module;
-	struct tracepoint *tracepoint;
-};
-
-extern void tracepoint_iter_start(struct tracepoint_iter *iter);
-extern void tracepoint_iter_next(struct tracepoint_iter *iter);
-extern void tracepoint_iter_stop(struct tracepoint_iter *iter);
-extern void tracepoint_iter_reset(struct tracepoint_iter *iter);
-extern int tracepoint_get_iter_range(struct tracepoint **tracepoint,
-	struct tracepoint *begin, struct tracepoint *end);
-
-/*
- * tracepoint_synchronize_unregister must be called between the last tracepoint
- * probe unregistration and the end of module exit to make sure there is no
- * caller executing a probe when it is freed.
- */
-static inline void tracepoint_synchronize_unregister(void)
-{
-	synchronize_sched();
-}
-
-#define PARAMS(args...) args
-
-#endif /* _LINUX_TRACEPOINT_H */
-
-/*
- * Note: we keep the TRACE_EVENT outside the include file ifdef protection.
- *  This is due to the way trace events work. If a file includes two
- *  trace event headers under one "CREATE_TRACE_POINTS" the first include
- *  will override the TRACE_EVENT and break the second include.
- */
-
 #ifndef TRACE_EVENT
 /*
  * For use with the TRACE_EVENT macro:
diff --git a/include/trace/define_trace.h b/include/trace/define_trace.h
index 5acfb1eb4df9..1dfab5401511 100644
--- a/include/trace/define_trace.h
+++ b/include/trace/define_trace.h
@@ -65,6 +65,10 @@
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
+/* Make all open coded DECLARE_TRACE nops */
+#undef DECLARE_TRACE
+#define DECLARE_TRACE(name, proto, args)
+
 #ifdef CONFIG_EVENT_TRACING
 #include <trace/ftrace.h>
 #endif
@@ -75,6 +79,7 @@
 #undef DEFINE_EVENT
 #undef DEFINE_EVENT_PRINT
 #undef TRACE_HEADER_MULTI_READ
+#undef DECLARE_TRACE
 
 /* Only undef what we defined in this file */
 #ifdef UNDEF_TRACE_INCLUDE_FILE
diff --git a/include/trace/events/napi.h b/include/trace/events/napi.h
index a8989c4547e7..188deca2f3c7 100644
--- a/include/trace/events/napi.h
+++ b/include/trace/events/napi.h
@@ -1,4 +1,7 @@
-#ifndef _TRACE_NAPI_H_
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM napi
+
+#if !defined(_TRACE_NAPI_H) || defined(TRACE_HEADER_MULTI_READ)
 #define _TRACE_NAPI_H_
 
 #include <linux/netdevice.h>
@@ -8,4 +11,7 @@ DECLARE_TRACE(napi_poll,
 	TP_PROTO(struct napi_struct *napi),
 	TP_ARGS(napi));
 
-#endif
+#endif /* _TRACE_NAPI_H_ */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
-- 
cgit v1.2.3


From 0e34e93177fb1f642cab080e0bde664c06c7183a Mon Sep 17 00:00:00 2001
From: WANG Cong <amwang@redhat.com>
Date: Thu, 6 May 2010 00:47:21 -0700
Subject: netpoll: add generic support for bridge and bonding devices

This whole patchset is for adding netpoll support to bridge and bonding
devices. I already tested it for bridge, bonding, bridge over bonding,
and bonding over bridge. It looks fine now.

To make bridge and bonding support netpoll, we need to adjust
some netpoll generic code. This patch does the following things:

1) introduce two new priv_flags for struct net_device:
   IFF_IN_NETPOLL which identifies we are processing a netpoll;
   IFF_DISABLE_NETPOLL is used to disable netpoll support for a device
   at run-time;

2) introduce one new method for netdev_ops:
   ->ndo_netpoll_cleanup() is used to clean up netpoll when a device is
     removed.

3) introduce netpoll_poll_dev() which takes a struct net_device * parameter;
   export netpoll_send_skb() and netpoll_poll_dev() which will be used later;

4) hide a pointer to struct netpoll in struct netpoll_info, ditto.

5) introduce ->real_dev for struct netpoll.

6) introduce a new status NETDEV_BONDING_DESLAE, which is used to disable
   netconsole before releasing a slave, to avoid deadlocks.

Cc: David Miller <davem@davemloft.net>
Cc: Neil Horman <nhorman@tuxdriver.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netconsole.c  | 15 +++++++++------
 include/linux/if.h        |  2 ++
 include/linux/netdevice.h |  1 +
 include/linux/netpoll.h   |  5 +++++
 include/linux/notifier.h  |  1 +
 net/core/netpoll.c        | 26 +++++++++++++++++++++-----
 6 files changed, 39 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/netconsole.c b/drivers/net/netconsole.c
index a361dea35574..ca142c47b2e4 100644
--- a/drivers/net/netconsole.c
+++ b/drivers/net/netconsole.c
@@ -665,7 +665,8 @@ static int netconsole_netdev_event(struct notifier_block *this,
 	struct netconsole_target *nt;
 	struct net_device *dev = ptr;
 
-	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER))
+	if (!(event == NETDEV_CHANGENAME || event == NETDEV_UNREGISTER ||
+	      event == NETDEV_BONDING_DESLAVE || event == NETDEV_GOING_DOWN))
 		goto done;
 
 	spin_lock_irqsave(&target_list_lock, flags);
@@ -677,19 +678,21 @@ static int netconsole_netdev_event(struct notifier_block *this,
 				strlcpy(nt->np.dev_name, dev->name, IFNAMSIZ);
 				break;
 			case NETDEV_UNREGISTER:
-				if (!nt->enabled)
-					break;
 				netpoll_cleanup(&nt->np);
+				/* Fall through */
+			case NETDEV_GOING_DOWN:
+			case NETDEV_BONDING_DESLAVE:
 				nt->enabled = 0;
-				printk(KERN_INFO "netconsole: network logging stopped"
-					", interface %s unregistered\n",
-					dev->name);
 				break;
 			}
 		}
 		netconsole_target_put(nt);
 	}
 	spin_unlock_irqrestore(&target_list_lock, flags);
+	if (event == NETDEV_UNREGISTER || event == NETDEV_BONDING_DESLAVE)
+		printk(KERN_INFO "netconsole: network logging stopped, "
+			"interface %s %s\n",  dev->name,
+			event == NETDEV_UNREGISTER ? "unregistered" : "released slaves");
 
 done:
 	return NOTIFY_DONE;
diff --git a/include/linux/if.h b/include/linux/if.h
index 3a9f410a296b..be350e62a905 100644
--- a/include/linux/if.h
+++ b/include/linux/if.h
@@ -71,6 +71,8 @@
 					 * release skb->dst
 					 */
 #define IFF_DONT_BRIDGE 0x800		/* disallow bridging this ether dev */
+#define IFF_IN_NETPOLL	0x1000		/* whether we are processing netpoll */
+#define IFF_DISABLE_NETPOLL	0x2000	/* disable netpoll at run-time */
 
 #define IF_GET_IFACE	0x0001		/* for querying only */
 #define IF_GET_PROTO	0x0002
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 98112fbddefd..69022d47d6f2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -724,6 +724,7 @@ struct net_device_ops {
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	void                    (*ndo_poll_controller)(struct net_device *dev);
+	void			(*ndo_netpoll_cleanup)(struct net_device *dev);
 #endif
 	int			(*ndo_set_vf_mac)(struct net_device *dev,
 						  int queue, u8 *mac);
diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index a765ea898549..017e604d05f8 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -14,6 +14,7 @@
 
 struct netpoll {
 	struct net_device *dev;
+	struct net_device *real_dev;
 	char dev_name[IFNAMSIZ];
 	const char *name;
 	void (*rx_hook)(struct netpoll *, int, char *, int);
@@ -36,8 +37,11 @@ struct netpoll_info {
 	struct sk_buff_head txq;
 
 	struct delayed_work tx_work;
+
+	struct netpoll *netpoll;
 };
 
+void netpoll_poll_dev(struct net_device *dev);
 void netpoll_poll(struct netpoll *np);
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len);
 void netpoll_print_options(struct netpoll *np);
@@ -47,6 +51,7 @@ int netpoll_trap(void);
 void netpoll_set_trap(int trap);
 void netpoll_cleanup(struct netpoll *np);
 int __netpoll_rx(struct sk_buff *skb);
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 9c5d3fad01f3..7c3609622334 100644
--- a/include/linux/notifier.h
+++ b/include/linux/notifier.h
@@ -206,6 +206,7 @@ static inline int notifier_to_errno(int ret)
 #define NETDEV_POST_TYPE_CHANGE	0x000F
 #define NETDEV_POST_INIT	0x0010
 #define NETDEV_UNREGISTER_BATCH 0x0011
+#define NETDEV_BONDING_DESLAVE  0x0012
 
 #define SYS_DOWN	0x0001	/* Notify of system down */
 #define SYS_RESTART	SYS_DOWN
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a58f59b97597..94825b109551 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -179,9 +179,8 @@ static void service_arp_queue(struct netpoll_info *npi)
 	}
 }
 
-void netpoll_poll(struct netpoll *np)
+void netpoll_poll_dev(struct net_device *dev)
 {
-	struct net_device *dev = np->dev;
 	const struct net_device_ops *ops;
 
 	if (!dev || !netif_running(dev))
@@ -201,6 +200,11 @@ void netpoll_poll(struct netpoll *np)
 	zap_completion_queue();
 }
 
+void netpoll_poll(struct netpoll *np)
+{
+	netpoll_poll_dev(np->dev);
+}
+
 static void refill_skbs(void)
 {
 	struct sk_buff *skb;
@@ -282,7 +286,7 @@ static int netpoll_owner_active(struct net_device *dev)
 	return 0;
 }
 
-static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	int status = NETDEV_TX_BUSY;
 	unsigned long tries;
@@ -308,7 +312,9 @@ static void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		     tries > 0; --tries) {
 			if (__netif_tx_trylock(txq)) {
 				if (!netif_tx_queue_stopped(txq)) {
+					dev->priv_flags |= IFF_IN_NETPOLL;
 					status = ops->ndo_start_xmit(skb, dev);
+					dev->priv_flags &= ~IFF_IN_NETPOLL;
 					if (status == NETDEV_TX_OK)
 						txq_trans_update(txq);
 				}
@@ -756,7 +762,10 @@ int netpoll_setup(struct netpoll *np)
 		atomic_inc(&npinfo->refcnt);
 	}
 
-	if (!ndev->netdev_ops->ndo_poll_controller) {
+	npinfo->netpoll = np;
+
+	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
+	    !ndev->netdev_ops->ndo_poll_controller) {
 		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
 		       np->name, np->dev_name);
 		err = -ENOTSUPP;
@@ -878,6 +887,7 @@ void netpoll_cleanup(struct netpoll *np)
 			}
 
 			if (atomic_dec_and_test(&npinfo->refcnt)) {
+				const struct net_device_ops *ops;
 				skb_queue_purge(&npinfo->arp_tx);
 				skb_queue_purge(&npinfo->txq);
 				cancel_rearming_delayed_work(&npinfo->tx_work);
@@ -885,7 +895,11 @@ void netpoll_cleanup(struct netpoll *np)
 				/* clean after last, unfinished work */
 				__skb_queue_purge(&npinfo->txq);
 				kfree(npinfo);
-				np->dev->npinfo = NULL;
+				ops = np->dev->netdev_ops;
+				if (ops->ndo_netpoll_cleanup)
+					ops->ndo_netpoll_cleanup(np->dev);
+				else
+					np->dev->npinfo = NULL;
 			}
 		}
 
@@ -908,6 +922,7 @@ void netpoll_set_trap(int trap)
 		atomic_dec(&trapped);
 }
 
+EXPORT_SYMBOL(netpoll_send_skb);
 EXPORT_SYMBOL(netpoll_set_trap);
 EXPORT_SYMBOL(netpoll_trap);
 EXPORT_SYMBOL(netpoll_print_options);
@@ -915,4 +930,5 @@ EXPORT_SYMBOL(netpoll_parse_options);
 EXPORT_SYMBOL(netpoll_setup);
 EXPORT_SYMBOL(netpoll_cleanup);
 EXPORT_SYMBOL(netpoll_send_udp);
+EXPORT_SYMBOL(netpoll_poll_dev);
 EXPORT_SYMBOL(netpoll_poll);
-- 
cgit v1.2.3


From ffb273623bc506d854902a415ef241b79232f93a Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 6 May 2010 01:20:10 -0700
Subject: netpoll: Use 'bool' for netpoll_rx() return type.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 017e604d05f8..e9e231215865 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -55,19 +55,19 @@ void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 
 #ifdef CONFIG_NETPOLL
-static inline int netpoll_rx(struct sk_buff *skb)
+static inline bool netpoll_rx(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = skb->dev->npinfo;
 	unsigned long flags;
-	int ret = 0;
+	bool ret = false;
 
 	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
-		return 0;
+		return false;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
 	/* check rx_flags again with the lock held */
 	if (npinfo->rx_flags && __netpoll_rx(skb))
-		ret = 1;
+		ret = true;
 	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
 	return ret;
-- 
cgit v1.2.3


From ee84b8243b07c33a5c8aed42b4b2da60cb16d1d2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 6 May 2010 09:28:41 -0700
Subject: rcu: create rcu_my_thread_group_empty() wrapper

Some RCU-lockdep splat repairs need to know whether they are running
in a single-threaded process.  Unfortunately, the thread_group_empty()
primitive is defined in sched.h, and can induce #include hell.  This
commit therefore introduces a rcu_my_thread_group_empty() wrapper that
is defined in rcupdate.c, thus avoiding the need to include sched.h
everywhere.

Signed-off-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcupdate.c        | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 07db2feb8572..db266bbed23f 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -190,6 +190,8 @@ static inline int rcu_read_lock_sched_held(void)
 
 #ifdef CONFIG_PROVE_RCU
 
+extern int rcu_my_thread_group_empty(void);
+
 /**
  * rcu_dereference_check - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 03a7ea1579f6..49d808e833b0 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -122,3 +122,14 @@ void wakeme_after_rcu(struct rcu_head  *head)
 	rcu = container_of(head, struct rcu_synchronize, head);
 	complete(&rcu->completion);
 }
+
+#ifdef CONFIG_PROVE_RCU
+/*
+ * wrapper function to avoid #include problems.
+ */
+int rcu_my_thread_group_empty(void)
+{
+	return thread_group_empty(current);
+}
+EXPORT_SYMBOL_GPL(rcu_my_thread_group_empty);
+#endif /* #ifdef CONFIG_PROVE_RCU */
-- 
cgit v1.2.3


From 1142d810298e694754498dbb4983fcb6cb7fd884 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 6 May 2010 18:49:20 +0200
Subject: cpu_stop: implement stop_cpu[s]()

Implement a simplistic per-cpu maximum priority cpu monopolization
mechanism.  A non-sleeping callback can be scheduled to run on one or
multiple cpus with maximum priority monopolozing those cpus.  This is
primarily to replace and unify RT workqueue usage in stop_machine and
scheduler migration_thread which currently is serving multiple
purposes.

Four functions are provided - stop_one_cpu(), stop_one_cpu_nowait(),
stop_cpus() and try_stop_cpus().

This is to allow clean sharing of resources among stop_cpu and all the
migration thread users.  One stopper thread per cpu is created which
is currently named "stopper/CPU".  This will eventually replace the
migration thread and take on its name.

* This facility was originally named cpuhog and lived in separate
  files but Peter Zijlstra nacked the name and thus got renamed to
  cpu_stop and moved into stop_machine.c.

* Better reporting of preemption leak as per Peter's suggestion.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
 include/linux/stop_machine.h |  39 ++++-
 kernel/stop_machine.c        | 372 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 402 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index baba3a23a814..efcbd6c37947 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -1,15 +1,46 @@
 #ifndef _LINUX_STOP_MACHINE
 #define _LINUX_STOP_MACHINE
-/* "Bogolock": stop the entire machine, disable interrupts.  This is a
-   very heavy lock, which is equivalent to grabbing every spinlock
-   (and more).  So the "read" side to such a lock is anything which
-   disables preeempt. */
+
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
+#include <linux/list.h>
 #include <asm/system.h>
 
 #if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
 
+/*
+ * stop_cpu[s]() is simplistic per-cpu maximum priority cpu
+ * monopolization mechanism.  The caller can specify a non-sleeping
+ * function to be executed on a single or multiple cpus preempting all
+ * other processes and monopolizing those cpus until it finishes.
+ *
+ * Resources for this mechanism are preallocated when a cpu is brought
+ * up and requests are guaranteed to be served as long as the target
+ * cpus are online.
+ */
+
+typedef int (*cpu_stop_fn_t)(void *arg);
+
+struct cpu_stop_work {
+	struct list_head	list;		/* cpu_stopper->works */
+	cpu_stop_fn_t		fn;
+	void			*arg;
+	struct cpu_stop_done	*done;
+};
+
+int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
+void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+			 struct cpu_stop_work *work_buf);
+int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
+
+/*
+ * stop_machine "Bogolock": stop the entire machine, disable
+ * interrupts.  This is a very heavy lock, which is equivalent to
+ * grabbing every spinlock (and more).  So the "read" side to such a
+ * lock is anything which disables preeempt.
+ */
+
 /**
  * stop_machine: freeze the machine on all CPUs and run this function
  * @fn: the function to run
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 9bb9fb1bd79c..7e3f9182aef3 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -1,17 +1,379 @@
-/* Copyright 2008, 2005 Rusty Russell rusty@rustcorp.com.au IBM Corporation.
- * GPL v2 and any later version.
+/*
+ * kernel/stop_machine.c
+ *
+ * Copyright (C) 2008, 2005	IBM Corporation.
+ * Copyright (C) 2008, 2005	Rusty Russell rusty@rustcorp.com.au
+ * Copyright (C) 2010		SUSE Linux Products GmbH
+ * Copyright (C) 2010		Tejun Heo <tj@kernel.org>
+ *
+ * This file is released under the GPLv2 and any later version.
  */
+#include <linux/completion.h>
 #include <linux/cpu.h>
-#include <linux/err.h>
+#include <linux/init.h>
 #include <linux/kthread.h>
 #include <linux/module.h>
+#include <linux/percpu.h>
 #include <linux/sched.h>
 #include <linux/stop_machine.h>
-#include <linux/syscalls.h>
 #include <linux/interrupt.h>
+#include <linux/kallsyms.h>
 
 #include <asm/atomic.h>
-#include <asm/uaccess.h>
+
+/*
+ * Structure to determine completion condition and record errors.  May
+ * be shared by works on different cpus.
+ */
+struct cpu_stop_done {
+	atomic_t		nr_todo;	/* nr left to execute */
+	bool			executed;	/* actually executed? */
+	int			ret;		/* collected return value */
+	struct completion	completion;	/* fired if nr_todo reaches 0 */
+};
+
+/* the actual stopper, one per every possible cpu, enabled on online cpus */
+struct cpu_stopper {
+	spinlock_t		lock;
+	struct list_head	works;		/* list of pending works */
+	struct task_struct	*thread;	/* stopper thread */
+	bool			enabled;	/* is this stopper enabled? */
+};
+
+static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper);
+
+static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo)
+{
+	memset(done, 0, sizeof(*done));
+	atomic_set(&done->nr_todo, nr_todo);
+	init_completion(&done->completion);
+}
+
+/* signal completion unless @done is NULL */
+static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed)
+{
+	if (done) {
+		if (executed)
+			done->executed = true;
+		if (atomic_dec_and_test(&done->nr_todo))
+			complete(&done->completion);
+	}
+}
+
+/* queue @work to @stopper.  if offline, @work is completed immediately */
+static void cpu_stop_queue_work(struct cpu_stopper *stopper,
+				struct cpu_stop_work *work)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&stopper->lock, flags);
+
+	if (stopper->enabled) {
+		list_add_tail(&work->list, &stopper->works);
+		wake_up_process(stopper->thread);
+	} else
+		cpu_stop_signal_done(work->done, false);
+
+	spin_unlock_irqrestore(&stopper->lock, flags);
+}
+
+/**
+ * stop_one_cpu - stop a cpu
+ * @cpu: cpu to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on @cpu.  @fn is run in a process context with
+ * the highest priority preempting any task on the cpu and
+ * monopolizing it.  This function returns after the execution is
+ * complete.
+ *
+ * This function doesn't guarantee @cpu stays online till @fn
+ * completes.  If @cpu goes down in the middle, execution may happen
+ * partially or fully on different cpus.  @fn should either be ready
+ * for that or the caller should ensure that @cpu stays online until
+ * this function completes.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed because @cpu was offline;
+ * otherwise, the return value of @fn.
+ */
+int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
+{
+	struct cpu_stop_done done;
+	struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done };
+
+	cpu_stop_init_done(&done, 1);
+	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work);
+	wait_for_completion(&done.completion);
+	return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * stop_one_cpu_nowait - stop a cpu but don't wait for completion
+ * @cpu: cpu to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Similar to stop_one_cpu() but doesn't wait for completion.  The
+ * caller is responsible for ensuring @work_buf is currently unused
+ * and will remain untouched until stopper starts executing @fn.
+ *
+ * CONTEXT:
+ * Don't care.
+ */
+void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
+			struct cpu_stop_work *work_buf)
+{
+	*work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, };
+	cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
+}
+
+/* static data for stop_cpus */
+static DEFINE_MUTEX(stop_cpus_mutex);
+static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
+
+int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
+{
+	struct cpu_stop_work *work;
+	struct cpu_stop_done done;
+	unsigned int cpu;
+
+	/* initialize works and done */
+	for_each_cpu(cpu, cpumask) {
+		work = &per_cpu(stop_cpus_work, cpu);
+		work->fn = fn;
+		work->arg = arg;
+		work->done = &done;
+	}
+	cpu_stop_init_done(&done, cpumask_weight(cpumask));
+
+	/*
+	 * Disable preemption while queueing to avoid getting
+	 * preempted by a stopper which might wait for other stoppers
+	 * to enter @fn which can lead to deadlock.
+	 */
+	preempt_disable();
+	for_each_cpu(cpu, cpumask)
+		cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu),
+				    &per_cpu(stop_cpus_work, cpu));
+	preempt_enable();
+
+	wait_for_completion(&done.completion);
+	return done.executed ? done.ret : -ENOENT;
+}
+
+/**
+ * stop_cpus - stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Execute @fn(@arg) on online cpus in @cpumask.  On each target cpu,
+ * @fn is run in a process context with the highest priority
+ * preempting any task on the cpu and monopolizing it.  This function
+ * returns after all executions are complete.
+ *
+ * This function doesn't guarantee the cpus in @cpumask stay online
+ * till @fn completes.  If some cpus go down in the middle, execution
+ * on the cpu may happen partially or fully on different cpus.  @fn
+ * should either be ready for that or the caller should ensure that
+ * the cpus stay online until this function completes.
+ *
+ * All stop_cpus() calls are serialized making it safe for @fn to wait
+ * for all cpus to start executing it.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -ENOENT if @fn(@arg) was not executed at all because all cpus in
+ * @cpumask were offline; otherwise, 0 if all executions of @fn
+ * returned 0, any non zero return value if any returned non zero.
+ */
+int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
+{
+	int ret;
+
+	/* static works are used, process one request at a time */
+	mutex_lock(&stop_cpus_mutex);
+	ret = __stop_cpus(cpumask, fn, arg);
+	mutex_unlock(&stop_cpus_mutex);
+	return ret;
+}
+
+/**
+ * try_stop_cpus - try to stop multiple cpus
+ * @cpumask: cpus to stop
+ * @fn: function to execute
+ * @arg: argument to @fn
+ *
+ * Identical to stop_cpus() except that it fails with -EAGAIN if
+ * someone else is already using the facility.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * -EAGAIN if someone else is already stopping cpus, -ENOENT if
+ * @fn(@arg) was not executed at all because all cpus in @cpumask were
+ * offline; otherwise, 0 if all executions of @fn returned 0, any non
+ * zero return value if any returned non zero.
+ */
+int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)
+{
+	int ret;
+
+	/* static works are used, process one request at a time */
+	if (!mutex_trylock(&stop_cpus_mutex))
+		return -EAGAIN;
+	ret = __stop_cpus(cpumask, fn, arg);
+	mutex_unlock(&stop_cpus_mutex);
+	return ret;
+}
+
+static int cpu_stopper_thread(void *data)
+{
+	struct cpu_stopper *stopper = data;
+	struct cpu_stop_work *work;
+	int ret;
+
+repeat:
+	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
+
+	if (kthread_should_stop()) {
+		__set_current_state(TASK_RUNNING);
+		return 0;
+	}
+
+	work = NULL;
+	spin_lock_irq(&stopper->lock);
+	if (!list_empty(&stopper->works)) {
+		work = list_first_entry(&stopper->works,
+					struct cpu_stop_work, list);
+		list_del_init(&work->list);
+	}
+	spin_unlock_irq(&stopper->lock);
+
+	if (work) {
+		cpu_stop_fn_t fn = work->fn;
+		void *arg = work->arg;
+		struct cpu_stop_done *done = work->done;
+		char ksym_buf[KSYM_NAME_LEN];
+
+		__set_current_state(TASK_RUNNING);
+
+		/* cpu stop callbacks are not allowed to sleep */
+		preempt_disable();
+
+		ret = fn(arg);
+		if (ret)
+			done->ret = ret;
+
+		/* restore preemption and check it's still balanced */
+		preempt_enable();
+		WARN_ONCE(preempt_count(),
+			  "cpu_stop: %s(%p) leaked preempt count\n",
+			  kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL,
+					  ksym_buf), arg);
+
+		cpu_stop_signal_done(done, true);
+	} else
+		schedule();
+
+	goto repeat;
+}
+
+/* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
+static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
+					   unsigned long action, void *hcpu)
+{
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	unsigned int cpu = (unsigned long)hcpu;
+	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+	struct cpu_stop_work *work;
+	struct task_struct *p;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_UP_PREPARE:
+		BUG_ON(stopper->thread || stopper->enabled ||
+		       !list_empty(&stopper->works));
+		p = kthread_create(cpu_stopper_thread, stopper, "stopper/%d",
+				   cpu);
+		if (IS_ERR(p))
+			return NOTIFY_BAD;
+		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
+		get_task_struct(p);
+		stopper->thread = p;
+		break;
+
+	case CPU_ONLINE:
+		kthread_bind(stopper->thread, cpu);
+		/* strictly unnecessary, as first user will wake it */
+		wake_up_process(stopper->thread);
+		/* mark enabled */
+		spin_lock_irq(&stopper->lock);
+		stopper->enabled = true;
+		spin_unlock_irq(&stopper->lock);
+		break;
+
+#ifdef CONFIG_HOTPLUG_CPU
+	case CPU_UP_CANCELED:
+	case CPU_DEAD:
+		/* kill the stopper */
+		kthread_stop(stopper->thread);
+		/* drain remaining works */
+		spin_lock_irq(&stopper->lock);
+		list_for_each_entry(work, &stopper->works, list)
+			cpu_stop_signal_done(work->done, false);
+		stopper->enabled = false;
+		spin_unlock_irq(&stopper->lock);
+		/* release the stopper */
+		put_task_struct(stopper->thread);
+		stopper->thread = NULL;
+		break;
+#endif
+	}
+
+	return NOTIFY_OK;
+}
+
+/*
+ * Give it a higher priority so that cpu stopper is available to other
+ * cpu notifiers.  It currently shares the same priority as sched
+ * migration_notifier.
+ */
+static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = {
+	.notifier_call	= cpu_stop_cpu_callback,
+	.priority	= 10,
+};
+
+static int __init cpu_stop_init(void)
+{
+	void *bcpu = (void *)(long)smp_processor_id();
+	unsigned int cpu;
+	int err;
+
+	for_each_possible_cpu(cpu) {
+		struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
+
+		spin_lock_init(&stopper->lock);
+		INIT_LIST_HEAD(&stopper->works);
+	}
+
+	/* start one for the boot cpu */
+	err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE,
+				    bcpu);
+	BUG_ON(err == NOTIFY_BAD);
+	cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu);
+	register_cpu_notifier(&cpu_stop_cpu_notifier);
+
+	return 0;
+}
+early_initcall(cpu_stop_init);
 
 /* This controls the threads on each CPU. */
 enum stopmachine_state {
-- 
cgit v1.2.3


From 3fc1f1e27a5b807791d72e5d992aa33b668a6626 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 6 May 2010 18:49:20 +0200
Subject: stop_machine: reimplement using cpu_stop

Reimplement stop_machine using cpu_stop.  As cpu stoppers are
guaranteed to be available for all online cpus,
stop_machine_create/destroy() are no longer necessary and removed.

With resource management and synchronization handled by cpu_stop, the
new implementation is much simpler.  Asking the cpu_stop to execute
the stop_cpu() state machine on all online cpus with cpu hotplug
disabled is enough.

stop_machine itself doesn't need to manage any global resources
anymore, so all per-instance information is rolled into struct
stop_machine_data and the mutex and all static data variables are
removed.

The previous implementation created and destroyed RT workqueues as
necessary which made stop_machine() calls highly expensive on very
large machines.  According to Dimitri Sivanich, preventing the dynamic
creation/destruction makes booting faster more than twice on very
large machines.  cpu_stop resources are preallocated for all online
cpus and should have the same effect.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
 arch/s390/kernel/time.c      |   1 -
 drivers/xen/manage.c         |  14 +---
 include/linux/stop_machine.h |  20 ------
 kernel/cpu.c                 |   8 ---
 kernel/module.c              |  14 +---
 kernel/stop_machine.c        | 158 +++++++++++--------------------------------
 6 files changed, 42 insertions(+), 173 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index fba6dec156bf..03d96569f187 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -390,7 +390,6 @@ static void __init time_init_wq(void)
 	if (time_sync_wq)
 		return;
 	time_sync_wq = create_singlethread_workqueue("timesync");
-	stop_machine_create();
 }
 
 /*
diff --git a/drivers/xen/manage.c b/drivers/xen/manage.c
index 2ac4440e7b08..8943b8ccee1a 100644
--- a/drivers/xen/manage.c
+++ b/drivers/xen/manage.c
@@ -80,12 +80,6 @@ static void do_suspend(void)
 
 	shutting_down = SHUTDOWN_SUSPEND;
 
-	err = stop_machine_create();
-	if (err) {
-		printk(KERN_ERR "xen suspend: failed to setup stop_machine %d\n", err);
-		goto out;
-	}
-
 #ifdef CONFIG_PREEMPT
 	/* If the kernel is preemptible, we need to freeze all the processes
 	   to prevent them from being in the middle of a pagetable update
@@ -93,7 +87,7 @@ static void do_suspend(void)
 	err = freeze_processes();
 	if (err) {
 		printk(KERN_ERR "xen suspend: freeze failed %d\n", err);
-		goto out_destroy_sm;
+		goto out;
 	}
 #endif
 
@@ -136,12 +130,8 @@ out_resume:
 out_thaw:
 #ifdef CONFIG_PREEMPT
 	thaw_processes();
-
-out_destroy_sm:
-#endif
-	stop_machine_destroy();
-
 out:
+#endif
 	shutting_down = SHUTDOWN_INVALID;
 }
 #endif	/* CONFIG_PM_SLEEP */
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index efcbd6c37947..0e552e72a4c4 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -67,23 +67,6 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
  */
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
-/**
- * stop_machine_create: create all stop_machine threads
- *
- * Description: This causes all stop_machine threads to be created before
- * stop_machine actually gets called. This can be used by subsystems that
- * need a non failing stop_machine infrastructure.
- */
-int stop_machine_create(void);
-
-/**
- * stop_machine_destroy: destroy all stop_machine threads
- *
- * Description: This causes all stop_machine threads which were created with
- * stop_machine_create to be destroyed again.
- */
-void stop_machine_destroy(void);
-
 #else
 
 static inline int stop_machine(int (*fn)(void *), void *data,
@@ -96,8 +79,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	return ret;
 }
 
-static inline int stop_machine_create(void) { return 0; }
-static inline void stop_machine_destroy(void) { }
-
 #endif /* CONFIG_SMP */
 #endif /* _LINUX_STOP_MACHINE */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 914aedcde849..545777574779 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -266,9 +266,6 @@ int __ref cpu_down(unsigned int cpu)
 {
 	int err;
 
-	err = stop_machine_create();
-	if (err)
-		return err;
 	cpu_maps_update_begin();
 
 	if (cpu_hotplug_disabled) {
@@ -280,7 +277,6 @@ int __ref cpu_down(unsigned int cpu)
 
 out:
 	cpu_maps_update_done();
-	stop_machine_destroy();
 	return err;
 }
 EXPORT_SYMBOL(cpu_down);
@@ -361,9 +357,6 @@ int disable_nonboot_cpus(void)
 {
 	int cpu, first_cpu, error;
 
-	error = stop_machine_create();
-	if (error)
-		return error;
 	cpu_maps_update_begin();
 	first_cpu = cpumask_first(cpu_online_mask);
 	/*
@@ -394,7 +387,6 @@ int disable_nonboot_cpus(void)
 		printk(KERN_ERR "Non-boot CPUs are not disabled\n");
 	}
 	cpu_maps_update_done();
-	stop_machine_destroy();
 	return error;
 }
 
diff --git a/kernel/module.c b/kernel/module.c
index 1016b75b026a..0838246d8c94 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -723,16 +723,8 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 		return -EFAULT;
 	name[MODULE_NAME_LEN-1] = '\0';
 
-	/* Create stop_machine threads since free_module relies on
-	 * a non-failing stop_machine call. */
-	ret = stop_machine_create();
-	if (ret)
-		return ret;
-
-	if (mutex_lock_interruptible(&module_mutex) != 0) {
-		ret = -EINTR;
-		goto out_stop;
-	}
+	if (mutex_lock_interruptible(&module_mutex) != 0)
+		return -EINTR;
 
 	mod = find_module(name);
 	if (!mod) {
@@ -792,8 +784,6 @@ SYSCALL_DEFINE2(delete_module, const char __user *, name_user,
 
  out:
 	mutex_unlock(&module_mutex);
-out_stop:
-	stop_machine_destroy();
 	return ret;
 }
 
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 7e3f9182aef3..884c7a1afeed 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -388,174 +388,92 @@ enum stopmachine_state {
 	/* Exit */
 	STOPMACHINE_EXIT,
 };
-static enum stopmachine_state state;
 
 struct stop_machine_data {
-	int (*fn)(void *);
-	void *data;
-	int fnret;
+	int			(*fn)(void *);
+	void			*data;
+	/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
+	unsigned int		num_threads;
+	const struct cpumask	*active_cpus;
+
+	enum stopmachine_state	state;
+	atomic_t		thread_ack;
 };
 
-/* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */
-static unsigned int num_threads;
-static atomic_t thread_ack;
-static DEFINE_MUTEX(lock);
-/* setup_lock protects refcount, stop_machine_wq and stop_machine_work. */
-static DEFINE_MUTEX(setup_lock);
-/* Users of stop_machine. */
-static int refcount;
-static struct workqueue_struct *stop_machine_wq;
-static struct stop_machine_data active, idle;
-static const struct cpumask *active_cpus;
-static void __percpu *stop_machine_work;
-
-static void set_state(enum stopmachine_state newstate)
+static void set_state(struct stop_machine_data *smdata,
+		      enum stopmachine_state newstate)
 {
 	/* Reset ack counter. */
-	atomic_set(&thread_ack, num_threads);
+	atomic_set(&smdata->thread_ack, smdata->num_threads);
 	smp_wmb();
-	state = newstate;
+	smdata->state = newstate;
 }
 
 /* Last one to ack a state moves to the next state. */
-static void ack_state(void)
+static void ack_state(struct stop_machine_data *smdata)
 {
-	if (atomic_dec_and_test(&thread_ack))
-		set_state(state + 1);
+	if (atomic_dec_and_test(&smdata->thread_ack))
+		set_state(smdata, smdata->state + 1);
 }
 
-/* This is the actual function which stops the CPU. It runs
- * in the context of a dedicated stopmachine workqueue. */
-static void stop_cpu(struct work_struct *unused)
+/* This is the cpu_stop function which stops the CPU. */
+static int stop_machine_cpu_stop(void *data)
 {
+	struct stop_machine_data *smdata = data;
 	enum stopmachine_state curstate = STOPMACHINE_NONE;
-	struct stop_machine_data *smdata = &idle;
-	int cpu = smp_processor_id();
-	int err;
+	int cpu = smp_processor_id(), err = 0;
+	bool is_active;
+
+	if (!smdata->active_cpus)
+		is_active = cpu == cpumask_first(cpu_online_mask);
+	else
+		is_active = cpumask_test_cpu(cpu, smdata->active_cpus);
 
-	if (!active_cpus) {
-		if (cpu == cpumask_first(cpu_online_mask))
-			smdata = &active;
-	} else {
-		if (cpumask_test_cpu(cpu, active_cpus))
-			smdata = &active;
-	}
 	/* Simple state machine */
 	do {
 		/* Chill out and ensure we re-read stopmachine_state. */
 		cpu_relax();
-		if (state != curstate) {
-			curstate = state;
+		if (smdata->state != curstate) {
+			curstate = smdata->state;
 			switch (curstate) {
 			case STOPMACHINE_DISABLE_IRQ:
 				local_irq_disable();
 				hard_irq_disable();
 				break;
 			case STOPMACHINE_RUN:
-				/* On multiple CPUs only a single error code
-				 * is needed to tell that something failed. */
-				err = smdata->fn(smdata->data);
-				if (err)
-					smdata->fnret = err;
+				if (is_active)
+					err = smdata->fn(smdata->data);
 				break;
 			default:
 				break;
 			}
-			ack_state();
+			ack_state(smdata);
 		}
 	} while (curstate != STOPMACHINE_EXIT);
 
 	local_irq_enable();
+	return err;
 }
 
-/* Callback for CPUs which aren't supposed to do anything. */
-static int chill(void *unused)
-{
-	return 0;
-}
-
-int stop_machine_create(void)
-{
-	mutex_lock(&setup_lock);
-	if (refcount)
-		goto done;
-	stop_machine_wq = create_rt_workqueue("kstop");
-	if (!stop_machine_wq)
-		goto err_out;
-	stop_machine_work = alloc_percpu(struct work_struct);
-	if (!stop_machine_work)
-		goto err_out;
-done:
-	refcount++;
-	mutex_unlock(&setup_lock);
-	return 0;
-
-err_out:
-	if (stop_machine_wq)
-		destroy_workqueue(stop_machine_wq);
-	mutex_unlock(&setup_lock);
-	return -ENOMEM;
-}
-EXPORT_SYMBOL_GPL(stop_machine_create);
-
-void stop_machine_destroy(void)
-{
-	mutex_lock(&setup_lock);
-	refcount--;
-	if (refcount)
-		goto done;
-	destroy_workqueue(stop_machine_wq);
-	free_percpu(stop_machine_work);
-done:
-	mutex_unlock(&setup_lock);
-}
-EXPORT_SYMBOL_GPL(stop_machine_destroy);
-
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
-	struct work_struct *sm_work;
-	int i, ret;
-
-	/* Set up initial state. */
-	mutex_lock(&lock);
-	num_threads = num_online_cpus();
-	active_cpus = cpus;
-	active.fn = fn;
-	active.data = data;
-	active.fnret = 0;
-	idle.fn = chill;
-	idle.data = NULL;
-
-	set_state(STOPMACHINE_PREPARE);
-
-	/* Schedule the stop_cpu work on all cpus: hold this CPU so one
-	 * doesn't hit this CPU until we're ready. */
-	get_cpu();
-	for_each_online_cpu(i) {
-		sm_work = per_cpu_ptr(stop_machine_work, i);
-		INIT_WORK(sm_work, stop_cpu);
-		queue_work_on(i, stop_machine_wq, sm_work);
-	}
-	/* This will release the thread on our CPU. */
-	put_cpu();
-	flush_workqueue(stop_machine_wq);
-	ret = active.fnret;
-	mutex_unlock(&lock);
-	return ret;
+	struct stop_machine_data smdata = { .fn = fn, .data = data,
+					    .num_threads = num_online_cpus(),
+					    .active_cpus = cpus };
+
+	/* Set the initial state and stop all online cpus. */
+	set_state(&smdata, STOPMACHINE_PREPARE);
+	return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata);
 }
 
 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 {
 	int ret;
 
-	ret = stop_machine_create();
-	if (ret)
-		return ret;
 	/* No CPUs can come up or down during this. */
 	get_online_cpus();
 	ret = __stop_machine(fn, data, cpus);
 	put_online_cpus();
-	stop_machine_destroy();
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
-- 
cgit v1.2.3


From 969c79215a35b06e5e3efe69b9412f858df7856c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Thu, 6 May 2010 18:49:21 +0200
Subject: sched: replace migration_thread with cpu_stop

Currently migration_thread is serving three purposes - migration
pusher, context to execute active_load_balance() and forced context
switcher for expedited RCU synchronize_sched.  All three roles are
hardcoded into migration_thread() and determining which job is
scheduled is slightly messy.

This patch kills migration_thread and replaces all three uses with
cpu_stop.  The three different roles of migration_thread() are
splitted into three separate cpu_stop callbacks -
migration_cpu_stop(), active_load_balance_cpu_stop() and
synchronize_sched_expedited_cpu_stop() - and each use case now simply
asks cpu_stop to execute the callback as necessary.

synchronize_sched_expedited() was implemented with private
preallocated resources and custom multi-cpu queueing and waiting
logic, both of which are provided by cpu_stop.
synchronize_sched_expedited_count is made atomic and all other shared
resources along with the mutex are dropped.

synchronize_sched_expedited() also implemented a check to detect cases
where not all the callback got executed on their assigned cpus and
fall back to synchronize_sched().  If called with cpu hotplug blocked,
cpu_stop already guarantees that and the condition cannot happen;
otherwise, stop_machine() would break.  However, this patch preserves
the paranoid check using a cpumask to record on which cpus the stopper
ran so that it can serve as a bisection point if something actually
goes wrong theree.

Because the internal execution state is no longer visible,
rcu_expedited_torture_stats() is removed.

This patch also renames cpu_stop threads to from "stopper/%d" to
"migration/%d".  The names of these threads ultimately don't matter
and there's no reason to make unnecessary userland visible changes.

With this patch applied, stop_machine() and sched now share the same
resources.  stop_machine() is faster without wasting any resources and
sched migration users are much cleaner.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Josh Triplett <josh@freedesktop.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
---
 Documentation/RCU/torture.txt |  10 --
 include/linux/rcutiny.h       |   2 -
 include/linux/rcutree.h       |   1 -
 kernel/rcutorture.c           |   2 +-
 kernel/sched.c                | 315 ++++++++++++------------------------------
 kernel/sched_fair.c           |  48 +++++--
 kernel/stop_machine.c         |   2 +-
 7 files changed, 127 insertions(+), 253 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/torture.txt b/Documentation/RCU/torture.txt
index 0e50bc2aa1e2..5d9016795fd8 100644
--- a/Documentation/RCU/torture.txt
+++ b/Documentation/RCU/torture.txt
@@ -182,16 +182,6 @@ Similarly, sched_expedited RCU provides the following:
 	sched_expedited-torture: Reader Pipe:  12660320201 95875 0 0 0 0 0 0 0 0 0
 	sched_expedited-torture: Reader Batch:  12660424885 0 0 0 0 0 0 0 0 0 0
 	sched_expedited-torture: Free-Block Circulation:  1090795 1090795 1090794 1090793 1090792 1090791 1090790 1090789 1090788 1090787 0
-	state: -1 / 0:0 3:0 4:0
-
-As before, the first four lines are similar to those for RCU.
-The last line shows the task-migration state.  The first number is
--1 if synchronize_sched_expedited() is idle, -2 if in the process of
-posting wakeups to the migration kthreads, and N when waiting on CPU N.
-Each of the colon-separated fields following the "/" is a CPU:state pair.
-Valid states are "0" for idle, "1" for waiting for quiescent state,
-"2" for passed through quiescent state, and "3" when a race with a
-CPU-hotplug event forces use of the synchronize_sched() primitive.
 
 
 USAGE
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index a5195875480a..0006b2df00e1 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -60,8 +60,6 @@ static inline long rcu_batches_completed_bh(void)
 	return 0;
 }
 
-extern int rcu_expedited_torture_stats(char *page);
-
 static inline void rcu_force_quiescent_state(void)
 {
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 42cc3a04779e..24e467e526b8 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -35,7 +35,6 @@ struct notifier_block;
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
 extern int rcu_needs_cpu(int cpu);
-extern int rcu_expedited_torture_stats(char *page);
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
 
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 58df55bf83ed..2b676f3a0f26 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -669,7 +669,7 @@ static struct rcu_torture_ops sched_expedited_ops = {
 	.sync		= synchronize_sched_expedited,
 	.cb_barrier	= NULL,
 	.fqs		= rcu_sched_force_quiescent_state,
-	.stats		= rcu_expedited_torture_stats,
+	.stats		= NULL,
 	.irq_capable	= 1,
 	.name		= "sched_expedited"
 };
diff --git a/kernel/sched.c b/kernel/sched.c
index 4956ed092838..f1d577a0a8ab 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -55,9 +55,9 @@
 #include <linux/cpu.h>
 #include <linux/cpuset.h>
 #include <linux/percpu.h>
-#include <linux/kthread.h>
 #include <linux/proc_fs.h>
 #include <linux/seq_file.h>
+#include <linux/stop_machine.h>
 #include <linux/sysctl.h>
 #include <linux/syscalls.h>
 #include <linux/times.h>
@@ -539,15 +539,13 @@ struct rq {
 	int post_schedule;
 	int active_balance;
 	int push_cpu;
+	struct cpu_stop_work active_balance_work;
 	/* cpu of this runqueue: */
 	int cpu;
 	int online;
 
 	unsigned long avg_load_per_task;
 
-	struct task_struct *migration_thread;
-	struct list_head migration_queue;
-
 	u64 rt_avg;
 	u64 age_stamp;
 	u64 idle_stamp;
@@ -2037,21 +2035,18 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 	__set_task_cpu(p, new_cpu);
 }
 
-struct migration_req {
-	struct list_head list;
-
+struct migration_arg {
 	struct task_struct *task;
 	int dest_cpu;
-
-	struct completion done;
 };
 
+static int migration_cpu_stop(void *data);
+
 /*
  * The task's runqueue lock must be held.
  * Returns true if you have to wait for migration thread.
  */
-static int
-migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
+static bool migrate_task(struct task_struct *p, int dest_cpu)
 {
 	struct rq *rq = task_rq(p);
 
@@ -2059,15 +2054,7 @@ migrate_task(struct task_struct *p, int dest_cpu, struct migration_req *req)
 	 * If the task is not on a runqueue (and not running), then
 	 * the next wake-up will properly place the task.
 	 */
-	if (!p->se.on_rq && !task_running(rq, p))
-		return 0;
-
-	init_completion(&req->done);
-	req->task = p;
-	req->dest_cpu = dest_cpu;
-	list_add(&req->list, &rq->migration_queue);
-
-	return 1;
+	return p->se.on_rq || task_running(rq, p);
 }
 
 /*
@@ -3110,7 +3097,6 @@ static void update_cpu_load(struct rq *this_rq)
 void sched_exec(void)
 {
 	struct task_struct *p = current;
-	struct migration_req req;
 	unsigned long flags;
 	struct rq *rq;
 	int dest_cpu;
@@ -3124,17 +3110,11 @@ void sched_exec(void)
 	 * select_task_rq() can race against ->cpus_allowed
 	 */
 	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
-	    likely(cpu_active(dest_cpu)) &&
-	    migrate_task(p, dest_cpu, &req)) {
-		/* Need to wait for migration thread (might exit: take ref). */
-		struct task_struct *mt = rq->migration_thread;
+	    likely(cpu_active(dest_cpu)) && migrate_task(p, dest_cpu)) {
+		struct migration_arg arg = { p, dest_cpu };
 
-		get_task_struct(mt);
 		task_rq_unlock(rq, &flags);
-		wake_up_process(mt);
-		put_task_struct(mt);
-		wait_for_completion(&req.done);
-
+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		return;
 	}
 unlock:
@@ -5290,17 +5270,15 @@ static inline void sched_init_granularity(void)
 /*
  * This is how migration works:
  *
- * 1) we queue a struct migration_req structure in the source CPU's
- *    runqueue and wake up that CPU's migration thread.
- * 2) we down() the locked semaphore => thread blocks.
- * 3) migration thread wakes up (implicitly it forces the migrated
- *    thread off the CPU)
- * 4) it gets the migration request and checks whether the migrated
- *    task is still in the wrong runqueue.
- * 5) if it's in the wrong runqueue then the migration thread removes
+ * 1) we invoke migration_cpu_stop() on the target CPU using
+ *    stop_one_cpu().
+ * 2) stopper starts to run (implicitly forcing the migrated thread
+ *    off the CPU)
+ * 3) it checks whether the migrated task is still in the wrong runqueue.
+ * 4) if it's in the wrong runqueue then the migration thread removes
  *    it and puts it into the right queue.
- * 6) migration thread up()s the semaphore.
- * 7) we wake up and the migration is done.
+ * 5) stopper completes and stop_one_cpu() returns and the migration
+ *    is done.
  */
 
 /*
@@ -5314,9 +5292,9 @@ static inline void sched_init_granularity(void)
  */
 int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
 {
-	struct migration_req req;
 	unsigned long flags;
 	struct rq *rq;
+	unsigned int dest_cpu;
 	int ret = 0;
 
 	/*
@@ -5354,15 +5332,12 @@ again:
 	if (cpumask_test_cpu(task_cpu(p), new_mask))
 		goto out;
 
-	if (migrate_task(p, cpumask_any_and(cpu_active_mask, new_mask), &req)) {
+	dest_cpu = cpumask_any_and(cpu_active_mask, new_mask);
+	if (migrate_task(p, dest_cpu)) {
+		struct migration_arg arg = { p, dest_cpu };
 		/* Need help from migration thread: drop lock and wait. */
-		struct task_struct *mt = rq->migration_thread;
-
-		get_task_struct(mt);
 		task_rq_unlock(rq, &flags);
-		wake_up_process(mt);
-		put_task_struct(mt);
-		wait_for_completion(&req.done);
+		stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 		tlb_migrate_finish(p->mm);
 		return 0;
 	}
@@ -5420,70 +5395,22 @@ fail:
 	return ret;
 }
 
-#define RCU_MIGRATION_IDLE	0
-#define RCU_MIGRATION_NEED_QS	1
-#define RCU_MIGRATION_GOT_QS	2
-#define RCU_MIGRATION_MUST_SYNC	3
-
 /*
- * migration_thread - this is a highprio system thread that performs
- * thread migration by bumping thread off CPU then 'pushing' onto
- * another runqueue.
+ * migration_cpu_stop - this will be executed by a highprio stopper thread
+ * and performs thread migration by bumping thread off CPU then
+ * 'pushing' onto another runqueue.
  */
-static int migration_thread(void *data)
+static int migration_cpu_stop(void *data)
 {
-	int badcpu;
-	int cpu = (long)data;
-	struct rq *rq;
-
-	rq = cpu_rq(cpu);
-	BUG_ON(rq->migration_thread != current);
-
-	set_current_state(TASK_INTERRUPTIBLE);
-	while (!kthread_should_stop()) {
-		struct migration_req *req;
-		struct list_head *head;
-
-		raw_spin_lock_irq(&rq->lock);
-
-		if (cpu_is_offline(cpu)) {
-			raw_spin_unlock_irq(&rq->lock);
-			break;
-		}
-
-		if (rq->active_balance) {
-			active_load_balance(rq, cpu);
-			rq->active_balance = 0;
-		}
-
-		head = &rq->migration_queue;
-
-		if (list_empty(head)) {
-			raw_spin_unlock_irq(&rq->lock);
-			schedule();
-			set_current_state(TASK_INTERRUPTIBLE);
-			continue;
-		}
-		req = list_entry(head->next, struct migration_req, list);
-		list_del_init(head->next);
-
-		if (req->task != NULL) {
-			raw_spin_unlock(&rq->lock);
-			__migrate_task(req->task, cpu, req->dest_cpu);
-		} else if (likely(cpu == (badcpu = smp_processor_id()))) {
-			req->dest_cpu = RCU_MIGRATION_GOT_QS;
-			raw_spin_unlock(&rq->lock);
-		} else {
-			req->dest_cpu = RCU_MIGRATION_MUST_SYNC;
-			raw_spin_unlock(&rq->lock);
-			WARN_ONCE(1, "migration_thread() on CPU %d, expected %d\n", badcpu, cpu);
-		}
-		local_irq_enable();
-
-		complete(&req->done);
-	}
-	__set_current_state(TASK_RUNNING);
+	struct migration_arg *arg = data;
 
+	/*
+	 * The original target cpu might have gone down and we might
+	 * be on another cpu but it doesn't matter.
+	 */
+	local_irq_disable();
+	__migrate_task(arg->task, raw_smp_processor_id(), arg->dest_cpu);
+	local_irq_enable();
 	return 0;
 }
 
@@ -5850,35 +5777,20 @@ static void set_rq_offline(struct rq *rq)
 static int __cpuinit
 migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
-	struct task_struct *p;
 	int cpu = (long)hcpu;
 	unsigned long flags;
-	struct rq *rq;
+	struct rq *rq = cpu_rq(cpu);
 
 	switch (action) {
 
 	case CPU_UP_PREPARE:
 	case CPU_UP_PREPARE_FROZEN:
-		p = kthread_create(migration_thread, hcpu, "migration/%d", cpu);
-		if (IS_ERR(p))
-			return NOTIFY_BAD;
-		kthread_bind(p, cpu);
-		/* Must be high prio: stop_machine expects to yield to it. */
-		rq = task_rq_lock(p, &flags);
-		__setscheduler(rq, p, SCHED_FIFO, MAX_RT_PRIO-1);
-		task_rq_unlock(rq, &flags);
-		get_task_struct(p);
-		cpu_rq(cpu)->migration_thread = p;
 		rq->calc_load_update = calc_load_update;
 		break;
 
 	case CPU_ONLINE:
 	case CPU_ONLINE_FROZEN:
-		/* Strictly unnecessary, as first user will wake it. */
-		wake_up_process(cpu_rq(cpu)->migration_thread);
-
 		/* Update our root-domain */
-		rq = cpu_rq(cpu);
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -5889,25 +5801,9 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		break;
 
 #ifdef CONFIG_HOTPLUG_CPU
-	case CPU_UP_CANCELED:
-	case CPU_UP_CANCELED_FROZEN:
-		if (!cpu_rq(cpu)->migration_thread)
-			break;
-		/* Unbind it from offline cpu so it can run. Fall thru. */
-		kthread_bind(cpu_rq(cpu)->migration_thread,
-			     cpumask_any(cpu_online_mask));
-		kthread_stop(cpu_rq(cpu)->migration_thread);
-		put_task_struct(cpu_rq(cpu)->migration_thread);
-		cpu_rq(cpu)->migration_thread = NULL;
-		break;
-
 	case CPU_DEAD:
 	case CPU_DEAD_FROZEN:
 		migrate_live_tasks(cpu);
-		rq = cpu_rq(cpu);
-		kthread_stop(rq->migration_thread);
-		put_task_struct(rq->migration_thread);
-		rq->migration_thread = NULL;
 		/* Idle task back to normal (off runqueue, low prio) */
 		raw_spin_lock_irq(&rq->lock);
 		deactivate_task(rq, rq->idle, 0);
@@ -5918,29 +5814,11 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
 		migrate_nr_uninterruptible(rq);
 		BUG_ON(rq->nr_running != 0);
 		calc_global_load_remove(rq);
-		/*
-		 * No need to migrate the tasks: it was best-effort if
-		 * they didn't take sched_hotcpu_mutex. Just wake up
-		 * the requestors.
-		 */
-		raw_spin_lock_irq(&rq->lock);
-		while (!list_empty(&rq->migration_queue)) {
-			struct migration_req *req;
-
-			req = list_entry(rq->migration_queue.next,
-					 struct migration_req, list);
-			list_del_init(&req->list);
-			raw_spin_unlock_irq(&rq->lock);
-			complete(&req->done);
-			raw_spin_lock_irq(&rq->lock);
-		}
-		raw_spin_unlock_irq(&rq->lock);
 		break;
 
 	case CPU_DYING:
 	case CPU_DYING_FROZEN:
 		/* Update our root-domain */
-		rq = cpu_rq(cpu);
 		raw_spin_lock_irqsave(&rq->lock, flags);
 		if (rq->rd) {
 			BUG_ON(!cpumask_test_cpu(cpu, rq->rd->span));
@@ -7757,10 +7635,8 @@ void __init sched_init(void)
 		rq->push_cpu = 0;
 		rq->cpu = i;
 		rq->online = 0;
-		rq->migration_thread = NULL;
 		rq->idle_stamp = 0;
 		rq->avg_idle = 2*sysctl_sched_migration_cost;
-		INIT_LIST_HEAD(&rq->migration_queue);
 		rq_attach_root(rq, &def_root_domain);
 #endif
 		init_rq_hrtick(rq);
@@ -9054,43 +8930,39 @@ struct cgroup_subsys cpuacct_subsys = {
 
 #ifndef CONFIG_SMP
 
-int rcu_expedited_torture_stats(char *page)
-{
-	return 0;
-}
-EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
-
 void synchronize_sched_expedited(void)
 {
+	/*
+	 * There must be a full memory barrier on each affected CPU
+	 * between the time that try_stop_cpus() is called and the
+	 * time that it returns.
+	 *
+	 * In the current initial implementation of cpu_stop, the
+	 * above condition is already met when the control reaches
+	 * this point and the following smp_mb() is not strictly
+	 * necessary.  Do smp_mb() anyway for documentation and
+	 * robustness against future implementation changes.
+	 */
+	smp_mb();
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
 #else /* #ifndef CONFIG_SMP */
 
-static DEFINE_PER_CPU(struct migration_req, rcu_migration_req);
-static DEFINE_MUTEX(rcu_sched_expedited_mutex);
-
-#define RCU_EXPEDITED_STATE_POST -2
-#define RCU_EXPEDITED_STATE_IDLE -1
+static atomic_t synchronize_sched_expedited_count = ATOMIC_INIT(0);
 
-static int rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
-
-int rcu_expedited_torture_stats(char *page)
+static int synchronize_sched_expedited_cpu_stop(void *data)
 {
-	int cnt = 0;
-	int cpu;
+	static DEFINE_SPINLOCK(done_mask_lock);
+	struct cpumask *done_mask = data;
 
-	cnt += sprintf(&page[cnt], "state: %d /", rcu_expedited_state);
-	for_each_online_cpu(cpu) {
-		 cnt += sprintf(&page[cnt], " %d:%d",
-				cpu, per_cpu(rcu_migration_req, cpu).dest_cpu);
+	if (done_mask) {
+		spin_lock(&done_mask_lock);
+		cpumask_set_cpu(smp_processor_id(), done_mask);
+		spin_unlock(&done_mask_lock);
 	}
-	cnt += sprintf(&page[cnt], "\n");
-	return cnt;
+	return 0;
 }
-EXPORT_SYMBOL_GPL(rcu_expedited_torture_stats);
-
-static long synchronize_sched_expedited_count;
 
 /*
  * Wait for an rcu-sched grace period to elapse, but use "big hammer"
@@ -9104,60 +8976,55 @@ static long synchronize_sched_expedited_count;
  */
 void synchronize_sched_expedited(void)
 {
-	int cpu;
-	unsigned long flags;
-	bool need_full_sync = 0;
-	struct rq *rq;
-	struct migration_req *req;
-	long snap;
-	int trycount = 0;
+	cpumask_var_t done_mask_var;
+	struct cpumask *done_mask = NULL;
+	int snap, trycount = 0;
+
+	/*
+	 * done_mask is used to check that all cpus actually have
+	 * finished running the stopper, which is guaranteed by
+	 * stop_cpus() if it's called with cpu hotplug blocked.  Keep
+	 * the paranoia for now but it's best effort if cpumask is off
+	 * stack.
+	 */
+	if (zalloc_cpumask_var(&done_mask_var, GFP_ATOMIC))
+		done_mask = done_mask_var;
 
 	smp_mb();  /* ensure prior mod happens before capturing snap. */
-	snap = ACCESS_ONCE(synchronize_sched_expedited_count) + 1;
+	snap = atomic_read(&synchronize_sched_expedited_count) + 1;
 	get_online_cpus();
-	while (!mutex_trylock(&rcu_sched_expedited_mutex)) {
+	while (try_stop_cpus(cpu_online_mask,
+			     synchronize_sched_expedited_cpu_stop,
+			     done_mask) == -EAGAIN) {
 		put_online_cpus();
 		if (trycount++ < 10)
 			udelay(trycount * num_online_cpus());
 		else {
 			synchronize_sched();
-			return;
+			goto free_out;
 		}
-		if (ACCESS_ONCE(synchronize_sched_expedited_count) - snap > 0) {
+		if (atomic_read(&synchronize_sched_expedited_count) - snap > 0) {
 			smp_mb(); /* ensure test happens before caller kfree */
-			return;
+			goto free_out;
 		}
 		get_online_cpus();
 	}
-	rcu_expedited_state = RCU_EXPEDITED_STATE_POST;
-	for_each_online_cpu(cpu) {
-		rq = cpu_rq(cpu);
-		req = &per_cpu(rcu_migration_req, cpu);
-		init_completion(&req->done);
-		req->task = NULL;
-		req->dest_cpu = RCU_MIGRATION_NEED_QS;
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		list_add(&req->list, &rq->migration_queue);
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-		wake_up_process(rq->migration_thread);
-	}
-	for_each_online_cpu(cpu) {
-		rcu_expedited_state = cpu;
-		req = &per_cpu(rcu_migration_req, cpu);
-		rq = cpu_rq(cpu);
-		wait_for_completion(&req->done);
-		raw_spin_lock_irqsave(&rq->lock, flags);
-		if (unlikely(req->dest_cpu == RCU_MIGRATION_MUST_SYNC))
-			need_full_sync = 1;
-		req->dest_cpu = RCU_MIGRATION_IDLE;
-		raw_spin_unlock_irqrestore(&rq->lock, flags);
-	}
-	rcu_expedited_state = RCU_EXPEDITED_STATE_IDLE;
-	synchronize_sched_expedited_count++;
-	mutex_unlock(&rcu_sched_expedited_mutex);
+	atomic_inc(&synchronize_sched_expedited_count);
+	if (done_mask)
+		cpumask_xor(done_mask, done_mask, cpu_online_mask);
 	put_online_cpus();
-	if (need_full_sync)
+
+	/* paranoia - this can't happen */
+	if (done_mask && cpumask_weight(done_mask)) {
+		char buf[80];
+
+		cpulist_scnprintf(buf, sizeof(buf), done_mask);
+		WARN_ONCE(1, "synchronize_sched_expedited: cpu online and done masks disagree on %d cpus: %s\n",
+			  cpumask_weight(done_mask), buf);
 		synchronize_sched();
+	}
+free_out:
+	free_cpumask_var(done_mask_var);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
 
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cbd8b8a296d1..217e4a9393e4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
 	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
 
+static int active_load_balance_cpu_stop(void *data);
+
 /*
  * Check this_cpu to ensure it is balanced within domain. Attempt to move
  * tasks if there is an imbalance.
@@ -2887,8 +2889,9 @@ redo:
 		if (need_active_balance(sd, sd_idle, idle)) {
 			raw_spin_lock_irqsave(&busiest->lock, flags);
 
-			/* don't kick the migration_thread, if the curr
-			 * task on busiest cpu can't be moved to this_cpu
+			/* don't kick the active_load_balance_cpu_stop,
+			 * if the curr task on busiest cpu can't be
+			 * moved to this_cpu
 			 */
 			if (!cpumask_test_cpu(this_cpu,
 					      &busiest->curr->cpus_allowed)) {
@@ -2898,14 +2901,22 @@ redo:
 				goto out_one_pinned;
 			}
 
+			/*
+			 * ->active_balance synchronizes accesses to
+			 * ->active_balance_work.  Once set, it's cleared
+			 * only after active load balance is finished.
+			 */
 			if (!busiest->active_balance) {
 				busiest->active_balance = 1;
 				busiest->push_cpu = this_cpu;
 				active_balance = 1;
 			}
 			raw_spin_unlock_irqrestore(&busiest->lock, flags);
+
 			if (active_balance)
-				wake_up_process(busiest->migration_thread);
+				stop_one_cpu_nowait(cpu_of(busiest),
+					active_load_balance_cpu_stop, busiest,
+					&busiest->active_balance_work);
 
 			/*
 			 * We've kicked active balancing, reset the failure
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 }
 
 /*
- * active_load_balance is run by migration threads. It pushes running tasks
- * off the busiest CPU onto idle CPUs. It requires at least 1 task to be
- * running on each physical CPU where possible, and avoids physical /
- * logical imbalances.
- *
- * Called with busiest_rq locked.
+ * active_load_balance_cpu_stop is run by cpu stopper. It pushes
+ * running tasks off the busiest CPU onto idle CPUs. It requires at
+ * least 1 task to be running on each physical CPU where possible, and
+ * avoids physical / logical imbalances.
  */
-static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
+static int active_load_balance_cpu_stop(void *data)
 {
+	struct rq *busiest_rq = data;
+	int busiest_cpu = cpu_of(busiest_rq);
 	int target_cpu = busiest_rq->push_cpu;
+	struct rq *target_rq = cpu_rq(target_cpu);
 	struct sched_domain *sd;
-	struct rq *target_rq;
+
+	raw_spin_lock_irq(&busiest_rq->lock);
+
+	/* make sure the requested cpu hasn't gone down in the meantime */
+	if (unlikely(busiest_cpu != smp_processor_id() ||
+		     !busiest_rq->active_balance))
+		goto out_unlock;
 
 	/* Is there any task to move? */
 	if (busiest_rq->nr_running <= 1)
-		return;
-
-	target_rq = cpu_rq(target_cpu);
+		goto out_unlock;
 
 	/*
 	 * This condition is "impossible", if it occurs
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
 			schedstat_inc(sd, alb_failed);
 	}
 	double_unlock_balance(busiest_rq, target_rq);
+out_unlock:
+	busiest_rq->active_balance = 0;
+	raw_spin_unlock_irq(&busiest_rq->lock);
+	return 0;
 }
 
 #ifdef CONFIG_NO_HZ
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 884c7a1afeed..5b20141a5ec1 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -301,7 +301,7 @@ static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
 	case CPU_UP_PREPARE:
 		BUG_ON(stopper->thread || stopper->enabled ||
 		       !list_empty(&stopper->works));
-		p = kthread_create(cpu_stopper_thread, stopper, "stopper/%d",
+		p = kthread_create(cpu_stopper_thread, stopper, "migration/%d",
 				   cpu);
 		if (IS_ERR(p))
 			return NOTIFY_BAD;
-- 
cgit v1.2.3


From 18e8c134f4e984e6639e62846345192816f06d5c Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Thu, 6 May 2010 21:58:51 -0700
Subject: net: Increase NET_SKB_PAD to 64 bytes

eth_type_trans() & get_rps_cpus() currently need two 64bytes cache
lines in packet to compute rxhash.

Increasing NET_SKB_PAD from 32 to 64 reduces the need to one cache
line only, and makes RPS faster.

NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 88d55395a27c..c9525bce80f6 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1361,9 +1361,12 @@ static inline int skb_network_offset(const struct sk_buff *skb)
  *
  * Various parts of the networking layer expect at least 32 bytes of
  * headroom, you should not reduce this.
+ * With RPS, we raised NET_SKB_PAD to 64 so that get_rps_cpus() fetches span
+ * a 64 bytes aligned block to fit modern (>= 64 bytes) cache line sizes
+ * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8)
  */
 #ifndef NET_SKB_PAD
-#define NET_SKB_PAD	32
+#define NET_SKB_PAD	64
 #endif
 
 extern int ___pskb_trim(struct sk_buff *skb, unsigned int len);
-- 
cgit v1.2.3


From 4fd38e4595e2f6c9d27732c042a0e16b2753049c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu, 6 May 2010 17:31:38 +0200
Subject: perf: Fix exit() vs PERF_FORMAT_GROUP

Both Stephane and Corey reported that PERF_FORMAT_GROUP didn't work
as expected if the task the counters were attached to quit before
the read() call.

The cause is that we unconditionally destroy the grouping when we
remove counters from their context. Fix this by only doing this when
we free the counter itself.

Reported-by: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Reported-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1273160566.5605.404.camel@twins>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 1 +
 kernel/perf_event.c        | 5 +++++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c8e375440403..bf8f3c003297 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -522,6 +522,7 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+	PERF_EVENT_STATE_FREE		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 3d1552d3c12b..f13c3db765f4 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -341,6 +341,9 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (event->state > PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_OFF;
 
+	if (event->state > PERF_EVENT_STATE_FREE)
+		return;
+
 	/*
 	 * If this was a group event with sibling events then
 	 * upgrade the siblings to singleton events by adding them
@@ -1856,6 +1859,8 @@ int perf_event_release_kernel(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 
+	event->state = PERF_EVENT_STATE_FREE;
+
 	WARN_ON_ONCE(ctx->parent_ctx);
 	mutex_lock(&ctx->mutex);
 	perf_event_remove_from_context(event);
-- 
cgit v1.2.3


From ab608344bcbde4f55ec4cd911b686b0ce3eae076 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 8 Apr 2010 23:03:20 +0200
Subject: perf, x86: Improve the PEBS ABI

Rename perf_event_attr::precise to perf_event_attr::precise_ip and
widen it to 2 bits. This new field describes the required precision of
the PERF_SAMPLE_IP field:

  0 - SAMPLE_IP can have arbitrary skid
  1 - SAMPLE_IP must have constant skid
  2 - SAMPLE_IP requested to have 0 skid
  3 - SAMPLE_IP must have 0 skid

And modify the Intel PEBS code accordingly. The PEBS implementation
now supports up to precise_ip == 2, where we perform the IP fixup.

Also s/PERF_RECORD_MISC_EXACT/&_IP/ to clarify its meaning, this bit
should be set for each PERF_SAMPLE_IP field known to match the actual
instruction triggering the event.

This new scheme allows for a PEBS mode that uses the buffer for more
than a single event.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c          | 17 ++++++++++++++++-
 arch/x86/kernel/cpu/perf_event_intel.c    |  4 ++--
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 12 ++++++------
 include/linux/perf_event.h                | 23 +++++++++++++++++++----
 tools/perf/builtin-top.c                  |  2 +-
 tools/perf/util/parse-events.c            | 25 ++++++++++++++++---------
 6 files changed, 60 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4a3f1f2b9b91..27fa9eeed024 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -488,6 +488,21 @@ static int x86_setup_perfctr(struct perf_event *event)
 
 static int x86_pmu_hw_config(struct perf_event *event)
 {
+	if (event->attr.precise_ip) {
+		int precise = 0;
+
+		/* Support for constant skid */
+		if (x86_pmu.pebs)
+			precise++;
+
+		/* Support for IP fixup */
+		if (x86_pmu.lbr_nr)
+			precise++;
+
+		if (event->attr.precise_ip > precise)
+			return -EOPNOTSUPP;
+	}
+
 	/*
 	 * Generate PMC IRQs:
 	 * (keep 'enabled' bit clear for now)
@@ -1780,7 +1795,7 @@ unsigned long perf_misc_flags(struct pt_regs *regs)
 	}
 
 	if (regs->flags & PERF_EFLAGS_EXACT)
-		misc |= PERF_RECORD_MISC_EXACT;
+		misc |= PERF_RECORD_MISC_EXACT_IP;
 
 	return misc;
 }
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a4b56ac425cb..fdbc652d3feb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -563,7 +563,7 @@ static void intel_pmu_disable_event(struct perf_event *event)
 
 	x86_pmu_disable_event(event);
 
-	if (unlikely(event->attr.precise))
+	if (unlikely(event->attr.precise_ip))
 		intel_pmu_pebs_disable(event);
 }
 
@@ -615,7 +615,7 @@ static void intel_pmu_enable_event(struct perf_event *event)
 		return;
 	}
 
-	if (unlikely(event->attr.precise))
+	if (unlikely(event->attr.precise_ip))
 		intel_pmu_pebs_enable(event);
 
 	__x86_pmu_enable_event(hwc, ARCH_PERFMON_EVENTSEL_ENABLE);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 35056f715e9e..18018d1311cd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -307,7 +307,7 @@ intel_pebs_constraints(struct perf_event *event)
 {
 	struct event_constraint *c;
 
-	if (!event->attr.precise)
+	if (!event->attr.precise_ip)
 		return NULL;
 
 	if (x86_pmu.pebs_constraints) {
@@ -330,7 +330,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event)
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 	WARN_ON_ONCE(cpuc->enabled);
 
-	if (x86_pmu.intel_cap.pebs_trap)
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
 		intel_pmu_lbr_enable(event);
 }
 
@@ -345,7 +345,7 @@ static void intel_pmu_pebs_disable(struct perf_event *event)
 
 	hwc->config |= ARCH_PERFMON_EVENTSEL_INT;
 
-	if (x86_pmu.intel_cap.pebs_trap)
+	if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1)
 		intel_pmu_lbr_disable(event);
 }
 
@@ -485,7 +485,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	regs.bp = pebs->bp;
 	regs.sp = pebs->sp;
 
-	if (intel_pmu_pebs_fixup_ip(regs))
+	if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
 		regs.flags |= PERF_EFLAGS_EXACT;
 	else
 		regs.flags &= ~PERF_EFLAGS_EXACT;
@@ -518,7 +518,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 
 	WARN_ON_ONCE(!event);
 
-	if (!event->attr.precise)
+	if (!event->attr.precise_ip)
 		return;
 
 	n = top - at;
@@ -570,7 +570,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 
 			WARN_ON_ONCE(!event);
 
-			if (!event->attr.precise)
+			if (!event->attr.precise_ip)
 				continue;
 
 			if (__test_and_set_bit(bit, (unsigned long *)&status))
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 6be4a0f9137c..23cd0057a681 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -203,9 +203,19 @@ struct perf_event_attr {
 				enable_on_exec :  1, /* next exec enables     */
 				task           :  1, /* trace fork/exit       */
 				watermark      :  1, /* wakeup_watermark      */
-				precise        :  1, /* OoO invariant counter */
-
-				__reserved_1   : 48;
+				/*
+				 * precise_ip:
+				 *
+				 *  0 - SAMPLE_IP can have arbitrary skid
+				 *  1 - SAMPLE_IP must have constant skid
+				 *  2 - SAMPLE_IP requested to have 0 skid
+				 *  3 - SAMPLE_IP must have 0 skid
+				 *
+				 *  See also PERF_RECORD_MISC_EXACT_IP
+				 */
+				precise_ip     :  2, /* skid constraint       */
+
+				__reserved_1   : 47;
 
 	union {
 		__u32		wakeup_events;	  /* wakeup every n events */
@@ -296,7 +306,12 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
-#define PERF_RECORD_MISC_EXACT			(1 << 14)
+/*
+ * Indicates that the content of PERF_SAMPLE_IP points to
+ * the actual instruction that triggered the event. See also
+ * perf_event_attr::precise_ip.
+ */
+#define PERF_RECORD_MISC_EXACT_IP		(1 << 14)
 /*
  * Reserve the last bit to indicate some extended misc field
  */
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 3de397764cb3..ed9b5b6905fa 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1021,7 +1021,7 @@ static void event__process_sample(const event_t *self,
 		return;
 	}
 
-	if (self->header.misc & PERF_RECORD_MISC_EXACT)
+	if (self->header.misc & PERF_RECORD_MISC_EXACT_IP)
 		exact_samples++;
 
 	if (event__preprocess_sample(self, session, &al, symbol_filter) < 0 ||
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index bc8b7e614207..ae7f5917935c 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -654,10 +654,6 @@ parse_raw_event(const char **strp, struct perf_event_attr *attr)
 		return EVT_FAILED;
 	n = hex2u64(str + 1, &config);
 	if (n > 0) {
-		if (str[n+1] == 'p') {
-			attr->precise = 1;
-			n++;
-		}
 		*strp = str + n + 1;
 		attr->type = PERF_TYPE_RAW;
 		attr->config = config;
@@ -692,19 +688,29 @@ static enum event_result
 parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 {
 	const char *str = *strp;
-	int eu = 1, ek = 1, eh = 1;
+	int exclude = 0;
+	int eu = 0, ek = 0, eh = 0, precise = 0;
 
 	if (*str++ != ':')
 		return 0;
 	while (*str) {
-		if (*str == 'u')
+		if (*str == 'u') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
 			eu = 0;
-		else if (*str == 'k')
+		} else if (*str == 'k') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
 			ek = 0;
-		else if (*str == 'h')
+		} else if (*str == 'h') {
+			if (!exclude)
+				exclude = eu = ek = eh = 1;
 			eh = 0;
-		else
+		} else if (*str == 'p') {
+			precise++;
+		} else
 			break;
+
 		++str;
 	}
 	if (str >= *strp + 2) {
@@ -712,6 +718,7 @@ parse_event_modifier(const char **strp, struct perf_event_attr *attr)
 		attr->exclude_user   = eu;
 		attr->exclude_kernel = ek;
 		attr->exclude_hv     = eh;
+		attr->precise_ip     = precise;
 		return 1;
 	}
 	return 0;
-- 
cgit v1.2.3


From 6bde9b6ce0127e2a56228a2071536d422be31336 Mon Sep 17 00:00:00 2001
From: Lin Ming <ming.m.lin@intel.com>
Date: Fri, 23 Apr 2010 13:56:00 +0800
Subject: perf: Add group scheduling transactional APIs

Add group scheduling transactional APIs to struct pmu.
These APIs will be implemented in arch code, based on Peter's idea as
below.

> the idea behind hw_perf_group_sched_in() is to not perform
> schedulability tests on each event in the group, but to add the group
> as a whole and then perform one test.
>
> Of course, when that test fails, you'll have to roll-back the whole
> group again.
>
> So start_txn (or a better name) would simply toggle a flag in the pmu
> implementation that will make pmu::enable() not perform the
> schedulablilty test.
>
> Then commit_txn() will perform the schedulability test (so note the
> method has to have a !void return value.
>
> This will allow us to use the regular
> kernel/perf_event.c::group_sched_in() and all the rollback code.
> Currently each hw_perf_group_sched_in() implementation duplicates all
> the rolllback code (with various bugs).

->start_txn:
Start group events scheduling transaction, set a flag to make
pmu::enable() not perform the schedulability test, it will be performed
at commit time.

->commit_txn:
Commit group events scheduling transaction, perform the group
schedulability as a whole

->cancel_txn:
Stop group events scheduling transaction, clear the flag so
pmu::enable() will perform the schedulability test.

Reviewed-by: Stephane Eranian <eranian@google.com>
Reviewed-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
Cc: David Miller <davem@davemloft.net>
Cc: Paul Mackerras <paulus@samba.org>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1272002160.5707.60.camel@minggr.sh.intel.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 15 ++++++++++++---
 kernel/perf_event.c        | 33 ++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 23cd0057a681..4924c96d7e2d 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -547,6 +547,8 @@ struct hw_perf_event {
 
 struct perf_event;
 
+#define PERF_EVENT_TXN_STARTED 1
+
 /**
  * struct pmu - generic performance monitoring unit
  */
@@ -557,6 +559,16 @@ struct pmu {
 	void (*stop)			(struct perf_event *event);
 	void (*read)			(struct perf_event *event);
 	void (*unthrottle)		(struct perf_event *event);
+
+	/*
+	 * group events scheduling is treated as a transaction,
+	 * add group events as a whole and perform one schedulability test.
+	 * If test fails, roll back the whole group
+	 */
+
+	void (*start_txn)	(const struct pmu *pmu);
+	void (*cancel_txn)	(const struct pmu *pmu);
+	int  (*commit_txn)	(const struct pmu *pmu);
 };
 
 /**
@@ -823,9 +835,6 @@ extern void perf_disable(void);
 extern void perf_enable(void);
 extern int perf_event_task_disable(void);
 extern int perf_event_task_enable(void);
-extern int hw_perf_group_sched_in(struct perf_event *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx);
 extern void perf_event_update_userpage(struct perf_event *event);
 extern int perf_event_release_kernel(struct perf_event *event);
 extern struct perf_event *
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 34659d4085c7..bb06382f98e7 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -83,14 +83,6 @@ extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
 void __weak hw_perf_disable(void)		{ barrier(); }
 void __weak hw_perf_enable(void)		{ barrier(); }
 
-int __weak
-hw_perf_group_sched_in(struct perf_event *group_leader,
-	       struct perf_cpu_context *cpuctx,
-	       struct perf_event_context *ctx)
-{
-	return 0;
-}
-
 void __weak perf_event_print_debug(void)	{ }
 
 static DEFINE_PER_CPU(int, perf_disable_count);
@@ -644,15 +636,20 @@ group_sched_in(struct perf_event *group_event,
 	       struct perf_cpu_context *cpuctx,
 	       struct perf_event_context *ctx)
 {
-	struct perf_event *event, *partial_group;
+	struct perf_event *event, *partial_group = NULL;
+	const struct pmu *pmu = group_event->pmu;
+	bool txn = false;
 	int ret;
 
 	if (group_event->state == PERF_EVENT_STATE_OFF)
 		return 0;
 
-	ret = hw_perf_group_sched_in(group_event, cpuctx, ctx);
-	if (ret)
-		return ret < 0 ? ret : 0;
+	/* Check if group transaction availabe */
+	if (pmu->start_txn)
+		txn = true;
+
+	if (txn)
+		pmu->start_txn(pmu);
 
 	if (event_sched_in(group_event, cpuctx, ctx))
 		return -EAGAIN;
@@ -667,9 +664,19 @@ group_sched_in(struct perf_event *group_event,
 		}
 	}
 
-	return 0;
+	if (txn) {
+		ret = pmu->commit_txn(pmu);
+		if (!ret) {
+			pmu->cancel_txn(pmu);
+
+			return 0;
+		}
+	}
 
 group_error:
+	if (txn)
+		pmu->cancel_txn(pmu);
+
 	/*
 	 * Groups can be scheduled in as one unit only, so undo any
 	 * partial group before returning:
-- 
cgit v1.2.3


From f444de05d20e27cdd960c13fcbcfca3099f03143 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 5 May 2010 15:25:02 +0200
Subject: cfg80211/mac80211: better channel handling

Currently (all tested with hwsim) you can do stupid
things like setting up an AP on a certain channel,
then adding another virtual interface and making
that associate on another channel -- this will make
the beaconing to move channel but obviously without
the necessary IEs data update.

In order to improve this situation, first make the
configuration APIs (cfg80211 and nl80211) aware of
multi-channel operation -- we'll eventually need
that in the future anyway. There's one userland API
change and one API addition. The API change is that
now SET_WIPHY must be called with virtual interface
index rather than only wiphy index in order to take
effect for that interface -- luckily all current
users (hostapd) do that. For monitor interfaces, the
old setting is preserved, but monitors are always
slaved to other devices anyway so no guarantees.

The second userland API change is the introduction
of a per virtual interface SET_CHANNEL command, that
hostapd should use going forward to make it easier
to understand what's going on (it can automatically
detect a kernel with this command).

Other than mac80211, no existing cfg80211 drivers
are affected by this change because they only allow
a single virtual interface.

mac80211, however, now needs to be aware that the
channel settings are per interface now, and needs
to disallow (for now) real multi-channel operation,
which is another important part of this patch.

One of the immediate benefits is that you can now
start hostapd to operate on a hardware that already
has a connection on another virtual interface, as
long as you specify the same channel.

Note that two things are left unhandled (this is an
improvement -- not a complete fix):

 * different HT/no-HT modes

   currently you could start an HT AP and then
   connect to a non-HT network on the same channel
   which would configure the hardware for no HT;
   that can be fixed fairly easily

 * CSA

   An AP we're connected to on a virtual interface
   might indicate switching channels, and in that
   case we would follow it, regardless of how many
   other interfaces are operating; this requires
   more effort to fix but is pretty rare after all

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/libertas/cfg.c |   1 +
 drivers/net/wireless/orinoco/cfg.c  |   1 +
 drivers/net/wireless/rndis_wlan.c   |   4 +-
 include/linux/nl80211.h             |  13 +++
 include/net/cfg80211.h              |  11 ++-
 net/mac80211/Makefile               |   3 +-
 net/mac80211/cfg.c                  |  41 +++++++++
 net/mac80211/chan.c                 |  57 ++++++++++++
 net/mac80211/ieee80211_i.h          |  11 +++
 net/wireless/chan.c                 |  56 ++++--------
 net/wireless/core.h                 |  12 +--
 net/wireless/ibss.c                 |   5 --
 net/wireless/nl80211.c              | 171 ++++++++++++++++++++++++++++--------
 net/wireless/sme.c                  |   5 --
 net/wireless/wext-compat.c          |  15 ++--
 net/wireless/wext-sme.c             |   2 +-
 16 files changed, 300 insertions(+), 108 deletions(-)
 create mode 100644 net/mac80211/chan.c

(limited to 'include/linux')

diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 82ebe1461a77..ea9d0b2ea0d7 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -78,6 +78,7 @@ static const u32 cipher_suites[] = {
 
 
 static int lbs_cfg_set_channel(struct wiphy *wiphy,
+	struct net_device *netdev,
 	struct ieee80211_channel *chan,
 	enum nl80211_channel_type channel_type)
 {
diff --git a/drivers/net/wireless/orinoco/cfg.c b/drivers/net/wireless/orinoco/cfg.c
index 81d228de9e5d..8c4169c227ae 100644
--- a/drivers/net/wireless/orinoco/cfg.c
+++ b/drivers/net/wireless/orinoco/cfg.c
@@ -159,6 +159,7 @@ static int orinoco_scan(struct wiphy *wiphy, struct net_device *dev,
 }
 
 static int orinoco_set_channel(struct wiphy *wiphy,
+			struct net_device *netdev,
 			struct ieee80211_channel *chan,
 			enum nl80211_channel_type channel_type)
 {
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index aceb95ef7274..99d4f0de77ca 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -534,7 +534,7 @@ static int rndis_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 
 static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev);
 
-static int rndis_set_channel(struct wiphy *wiphy,
+static int rndis_set_channel(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_channel *chan, enum nl80211_channel_type channel_type);
 
 static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev,
@@ -2290,7 +2290,7 @@ static int rndis_leave_ibss(struct wiphy *wiphy, struct net_device *dev)
 	return deauthenticate(usbdev);
 }
 
-static int rndis_set_channel(struct wiphy *wiphy,
+static int rndis_set_channel(struct wiphy *wiphy, struct net_device *netdev,
 	struct ieee80211_channel *chan, enum nl80211_channel_type channel_type)
 {
 	struct rndis_wlan_private *priv = wiphy_priv(wiphy);
diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index f8750f9a65b8..b7c77f9712f4 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -52,6 +52,8 @@
  *	%NL80211_ATTR_WIPHY_CHANNEL_TYPE, %NL80211_ATTR_WIPHY_RETRY_SHORT,
  *	%NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
  *	and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
+ *	However, for setting the channel, see %NL80211_CMD_SET_CHANNEL
+ *	instead, the support here is for backward compatibility only.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -329,6 +331,15 @@
  * @NL80211_CMD_NOTIFY_CQM: Connection quality monitor notification. This
  *	command is used as an event to indicate the that a trigger level was
  *	reached.
+ * @NL80211_CMD_SET_CHANNEL: Set the channel (using %NL80211_ATTR_WIPHY_FREQ
+ *	and %NL80211_ATTR_WIPHY_CHANNEL_TYPE) the given interface (identifed
+ *	by %NL80211_ATTR_IFINDEX) shall operate on.
+ *	In case multiple channels are supported by the device, the mechanism
+ *	with which it switches channels is implementation-defined.
+ *	When a monitor interface is given, it can only switch channel while
+ *	no other interfaces are operating to avoid disturbing the operation
+ *	of any other interfaces, and other interfaces will again take
+ *	precedence when they are used.
  *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
@@ -428,6 +439,8 @@ enum nl80211_commands {
 	NL80211_CMD_SET_CQM,
 	NL80211_CMD_NOTIFY_CQM,
 
+	NL80211_CMD_SET_CHANNEL,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7d10c0182f53..b44a2e5321a3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -966,7 +966,11 @@ struct cfg80211_pmksa {
  *
  * @set_txq_params: Set TX queue parameters
  *
- * @set_channel: Set channel
+ * @set_channel: Set channel for a given wireless interface. Some devices
+ *	may support multi-channel operation (by channel hopping) so cfg80211
+ *	doesn't verify much. Note, however, that the passed netdev may be
+ *	%NULL as well if the user requested changing the channel for the
+ *	device itself, or for a monitor interface.
  *
  * @scan: Request to do a scan. If returning zero, the scan request is given
  *	the driver, and will be valid until passed to cfg80211_scan_done().
@@ -1095,7 +1099,7 @@ struct cfg80211_ops {
 	int	(*set_txq_params)(struct wiphy *wiphy,
 				  struct ieee80211_txq_params *params);
 
-	int	(*set_channel)(struct wiphy *wiphy,
+	int	(*set_channel)(struct wiphy *wiphy, struct net_device *dev,
 			       struct ieee80211_channel *chan,
 			       enum nl80211_channel_type channel_type);
 
@@ -1461,6 +1465,8 @@ struct cfg80211_cached_keys;
  * @list: (private) Used to collect the interfaces
  * @netdev: (private) Used to reference back to the netdev
  * @current_bss: (private) Used by the internal configuration code
+ * @channel: (private) Used by the internal configuration code to track
+ *	user-set AP, monitor and WDS channels for wireless extensions
  * @bssid: (private) Used by the internal configuration code
  * @ssid: (private) Used by the internal configuration code
  * @ssid_len: (private) Used by the internal configuration code
@@ -1507,6 +1513,7 @@ struct wireless_dev {
 	struct cfg80211_internal_bss *authtry_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *auth_bsses[MAX_AUTH_BSSES];
 	struct cfg80211_internal_bss *current_bss; /* associated / joined */
+	struct ieee80211_channel *channel;
 
 	bool ps;
 	int ps_timeout;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 04420291e7ad..84b48ba8a77e 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -23,7 +23,8 @@ mac80211-y := \
 	key.o \
 	util.o \
 	wme.o \
-	event.o
+	event.o \
+	chan.o
 
 mac80211-$(CONFIG_MAC80211_LEDS) += led.o
 mac80211-$(CONFIG_MAC80211_DEBUGFS) += \
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b575a5066219..414b7dd7d7fd 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1161,11 +1161,24 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 }
 
 static int ieee80211_set_channel(struct wiphy *wiphy,
+				 struct net_device *netdev,
 				 struct ieee80211_channel *chan,
 				 enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 
+	switch (ieee80211_get_channel_mode(local, NULL)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == chan &&
+		    local->oper_channel_type == channel_type)
+			return 0;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	local->oper_channel = chan;
 	local->oper_channel_type = channel_type;
 
@@ -1213,6 +1226,20 @@ static int ieee80211_auth(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_assoc(struct wiphy *wiphy, struct net_device *dev,
 			   struct cfg80211_assoc_request *req)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
+	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
+
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (local->oper_channel == req->bss->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_mgd_assoc(IEEE80211_DEV_TO_SUB_IF(dev), req);
 }
 
@@ -1235,8 +1262,22 @@ static int ieee80211_disassoc(struct wiphy *wiphy, struct net_device *dev,
 static int ieee80211_join_ibss(struct wiphy *wiphy, struct net_device *dev,
 			       struct cfg80211_ibss_params *params)
 {
+	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 
+	switch (ieee80211_get_channel_mode(local, sdata)) {
+	case CHAN_MODE_HOPPING:
+		return -EBUSY;
+	case CHAN_MODE_FIXED:
+		if (!params->channel_fixed)
+			return -EBUSY;
+		if (local->oper_channel == params->channel)
+			break;
+		return -EBUSY;
+	case CHAN_MODE_UNDEFINED:
+		break;
+	}
+
 	return ieee80211_ibss_join(sdata, params);
 }
 
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
new file mode 100644
index 000000000000..08f3832661a5
--- /dev/null
+++ b/net/mac80211/chan.c
@@ -0,0 +1,57 @@
+/*
+ * mac80211 - channel management
+ */
+
+#include "ieee80211_i.h"
+
+enum ieee80211_chan_mode
+__ieee80211_get_channel_mode(struct ieee80211_local *local,
+			     struct ieee80211_sub_if_data *ignore)
+{
+	struct ieee80211_sub_if_data *sdata;
+
+	WARN_ON(!mutex_is_locked(&local->iflist_mtx));
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		if (sdata == ignore)
+			continue;
+
+		if (!ieee80211_sdata_running(sdata))
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_MONITOR)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_STATION &&
+		    !sdata->u.mgd.associated)
+			continue;
+
+		if (sdata->vif.type == NL80211_IFTYPE_ADHOC) {
+			if (!sdata->u.ibss.ssid_len)
+				continue;
+			if (!sdata->u.ibss.fixed_channel)
+				return CHAN_MODE_HOPPING;
+		}
+
+		if (sdata->vif.type == NL80211_IFTYPE_AP &&
+		    !sdata->u.ap.beacon)
+			continue;
+
+		return CHAN_MODE_FIXED;
+	}
+
+	return CHAN_MODE_UNDEFINED;
+}
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore)
+{
+	enum ieee80211_chan_mode mode;
+
+	mutex_lock(&local->iflist_mtx);
+	mode = __ieee80211_get_channel_mode(local, ignore);
+	mutex_unlock(&local->iflist_mtx);
+
+	return mode;
+}
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c8077a3647c6..359edff31471 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1229,6 +1229,17 @@ int ieee80211_wk_remain_on_channel(struct ieee80211_sub_if_data *sdata,
 int ieee80211_wk_cancel_remain_on_channel(
 	struct ieee80211_sub_if_data *sdata, u64 cookie);
 
+/* channel management */
+enum ieee80211_chan_mode {
+	CHAN_MODE_UNDEFINED,
+	CHAN_MODE_HOPPING,
+	CHAN_MODE_FIXED,
+};
+
+enum ieee80211_chan_mode
+ieee80211_get_channel_mode(struct ieee80211_local *local,
+			   struct ieee80211_sub_if_data *ignore);
+
 #ifdef CONFIG_MAC80211_NOINLINE
 #define debug_noinline noinline
 #else
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index bf1737fc9a7e..d92d088026bf 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -9,38 +9,6 @@
 #include <net/cfg80211.h>
 #include "core.h"
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev)
-{
-	struct wireless_dev *wdev;
-	struct ieee80211_channel *result = NULL;
-
-	WARN_ON(!mutex_is_locked(&rdev->devlist_mtx));
-
-	list_for_each_entry(wdev, &rdev->netdev_list, list) {
-		if (wdev == for_wdev)
-			continue;
-
-		/*
-		 * Lock manually to tell lockdep about allowed
-		 * nesting here if for_wdev->mtx is held already.
-		 * This is ok as it's all under the rdev devlist
-		 * mutex and as such can only be done once at any
-		 * given time.
-		 */
-		mutex_lock_nested(&wdev->mtx, SINGLE_DEPTH_NESTING);
-		if (wdev->current_bss)
-			result = wdev->current_bss->pub.channel;
-		wdev_unlock(wdev);
-
-		if (result)
-			break;
-	}
-
-	return result;
-}
-
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type)
@@ -75,15 +43,22 @@ rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 	return chan;
 }
 
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type)
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type)
 {
 	struct ieee80211_channel *chan;
 	int result;
 
-	if (rdev_fixed_channel(rdev, for_wdev))
-		return -EBUSY;
+	if (wdev->iftype == NL80211_IFTYPE_MONITOR)
+		wdev = NULL;
+
+	if (wdev) {
+		ASSERT_WDEV_LOCK(wdev);
+
+		if (!netif_running(wdev->netdev))
+			return -ENETDOWN;
+	}
 
 	if (!rdev->ops->set_channel)
 		return -EOPNOTSUPP;
@@ -92,11 +67,14 @@ int rdev_set_freq(struct cfg80211_registered_device *rdev,
 	if (!chan)
 		return -EINVAL;
 
-	result = rdev->ops->set_channel(&rdev->wiphy, chan, channel_type);
+	result = rdev->ops->set_channel(&rdev->wiphy,
+					wdev ? wdev->netdev : NULL,
+					chan, channel_type);
 	if (result)
 		return result;
 
-	rdev->channel = chan;
+	if (wdev)
+		wdev->channel = chan;
 
 	return 0;
 }
diff --git a/net/wireless/core.h b/net/wireless/core.h
index b2234b436ead..ae930acf75e9 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,9 +70,6 @@ struct cfg80211_registered_device {
 	struct work_struct conn_work;
 	struct work_struct event_work;
 
-	/* current channel */
-	struct ieee80211_channel *channel;
-
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __attribute__((__aligned__(NETDEV_ALIGN)));
@@ -387,15 +384,12 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev,
 			  u32 *flags, struct vif_params *params);
 void cfg80211_process_rdev_events(struct cfg80211_registered_device *rdev);
 
-struct ieee80211_channel *
-rdev_fixed_channel(struct cfg80211_registered_device *rdev,
-		   struct wireless_dev *for_wdev);
 struct ieee80211_channel *
 rdev_freq_to_chan(struct cfg80211_registered_device *rdev,
 		  int freq, enum nl80211_channel_type channel_type);
-int rdev_set_freq(struct cfg80211_registered_device *rdev,
-		  struct wireless_dev *for_wdev,
-		  int freq, enum nl80211_channel_type channel_type);
+int cfg80211_set_freq(struct cfg80211_registered_device *rdev,
+		      struct wireless_dev *wdev, int freq,
+		      enum nl80211_channel_type channel_type);
 
 u16 cfg80211_calculate_bitrate(struct rate_info *rate);
 
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index 6ef5a491fb4b..9825317e653a 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -80,15 +80,10 @@ int __cfg80211_join_ibss(struct cfg80211_registered_device *rdev,
 			 struct cfg80211_cached_keys *connkeys)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	int err;
 
 	ASSERT_WDEV_LOCK(wdev);
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != params->channel)
-		return -EBUSY;
-
 	if (wdev->ssid_len)
 		return -EALREADY;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c27bef8e0c11..ec1b4a896c6e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -588,6 +588,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
 	}
+	CMD(set_channel, SET_CHANNEL);
 
 #undef CMD
 
@@ -688,10 +689,90 @@ static int parse_txq_params(struct nlattr *tb[],
 	return 0;
 }
 
+static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
+{
+	/*
+	 * You can only set the channel explicitly for AP, mesh
+	 * and WDS type interfaces; all others have their channel
+	 * managed via their respective "establish a connection"
+	 * command (connect, join, ...)
+	 *
+	 * Monitors are special as they are normally slaved to
+	 * whatever else is going on, so they behave as though
+	 * you tried setting the wiphy channel itself.
+	 */
+	return !wdev ||
+		wdev->iftype == NL80211_IFTYPE_AP ||
+		wdev->iftype == NL80211_IFTYPE_WDS ||
+		wdev->iftype == NL80211_IFTYPE_MESH_POINT ||
+		wdev->iftype == NL80211_IFTYPE_MONITOR;
+}
+
+static int __nl80211_set_channel(struct cfg80211_registered_device *rdev,
+				 struct wireless_dev *wdev,
+				 struct genl_info *info)
+{
+	enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
+	u32 freq;
+	int result;
+
+	if (!info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		return -EINVAL;
+
+	if (!nl80211_can_set_dev_channel(wdev))
+		return -EOPNOTSUPP;
+
+	if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
+		channel_type = nla_get_u32(info->attrs[
+				   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
+		if (channel_type != NL80211_CHAN_NO_HT &&
+		    channel_type != NL80211_CHAN_HT20 &&
+		    channel_type != NL80211_CHAN_HT40PLUS &&
+		    channel_type != NL80211_CHAN_HT40MINUS)
+			return -EINVAL;
+	}
+
+	freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+
+	mutex_lock(&rdev->devlist_mtx);
+	if (wdev) {
+		wdev_lock(wdev);
+		result = cfg80211_set_freq(rdev, wdev, freq, channel_type);
+		wdev_unlock(wdev);
+	} else {
+		result = cfg80211_set_freq(rdev, NULL, freq, channel_type);
+	}
+	mutex_unlock(&rdev->devlist_mtx);
+
+	return result;
+}
+
+static int nl80211_set_channel(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev;
+	struct net_device *netdev;
+	int result;
+
+	rtnl_lock();
+
+	result = get_rdev_dev_by_info_ifindex(info, &rdev, &netdev);
+	if (result)
+		goto unlock;
+
+	result = __nl80211_set_channel(rdev, netdev->ieee80211_ptr, info);
+
+ unlock:
+	rtnl_unlock();
+
+	return result;
+}
+
 static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
-	int result = 0, rem_txq_params = 0;
+	struct net_device *netdev = NULL;
+	struct wireless_dev *wdev;
+	int result, rem_txq_params = 0;
 	struct nlattr *nl_txq_params;
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
@@ -700,16 +781,50 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
 	rtnl_lock();
 
+	/*
+	 * Try to find the wiphy and netdev. Normally this
+	 * function shouldn't need the netdev, but this is
+	 * done for backward compatibility -- previously
+	 * setting the channel was done per wiphy, but now
+	 * it is per netdev. Previous userland like hostapd
+	 * also passed a netdev to set_wiphy, so that it is
+	 * possible to let that go to the right netdev!
+	 */
 	mutex_lock(&cfg80211_mutex);
 
-	rdev = __cfg80211_rdev_from_info(info);
-	if (IS_ERR(rdev)) {
-		mutex_unlock(&cfg80211_mutex);
-		result = PTR_ERR(rdev);
-		goto unlock;
+	if (info->attrs[NL80211_ATTR_IFINDEX]) {
+		int ifindex = nla_get_u32(info->attrs[NL80211_ATTR_IFINDEX]);
+
+		netdev = dev_get_by_index(genl_info_net(info), ifindex);
+		if (netdev && netdev->ieee80211_ptr) {
+			rdev = wiphy_to_dev(netdev->ieee80211_ptr->wiphy);
+			mutex_lock(&rdev->mtx);
+		} else
+			netdev = NULL;
 	}
 
-	mutex_lock(&rdev->mtx);
+	if (!netdev) {
+		rdev = __cfg80211_rdev_from_info(info);
+		if (IS_ERR(rdev)) {
+			mutex_unlock(&cfg80211_mutex);
+			result = PTR_ERR(rdev);
+			goto unlock;
+		}
+		wdev = NULL;
+		netdev = NULL;
+		result = 0;
+
+		mutex_lock(&rdev->mtx);
+	} else if (netif_running(netdev) &&
+		   nl80211_can_set_dev_channel(netdev->ieee80211_ptr))
+		wdev = netdev->ieee80211_ptr;
+	else
+		wdev = NULL;
+
+	/*
+	 * end workaround code, by now the rdev is available
+	 * and locked, and wdev may or may not be NULL.
+	 */
 
 	if (info->attrs[NL80211_ATTR_WIPHY_NAME])
 		result = cfg80211_dev_rename(
@@ -748,26 +863,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		enum nl80211_channel_type channel_type = NL80211_CHAN_NO_HT;
-		u32 freq;
-
-		result = -EINVAL;
-
-		if (info->attrs[NL80211_ATTR_WIPHY_CHANNEL_TYPE]) {
-			channel_type = nla_get_u32(info->attrs[
-					   NL80211_ATTR_WIPHY_CHANNEL_TYPE]);
-			if (channel_type != NL80211_CHAN_NO_HT &&
-			    channel_type != NL80211_CHAN_HT20 &&
-			    channel_type != NL80211_CHAN_HT40PLUS &&
-			    channel_type != NL80211_CHAN_HT40MINUS)
-				goto bad_res;
-		}
-
-		freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]);
-
-		mutex_lock(&rdev->devlist_mtx);
-		result = rdev_set_freq(rdev, NULL, freq, channel_type);
-		mutex_unlock(&rdev->devlist_mtx);
+		result = __nl80211_set_channel(rdev, wdev, info);
 		if (result)
 			goto bad_res;
 	}
@@ -864,6 +960,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 
  bad_res:
 	mutex_unlock(&rdev->mtx);
+	if (netdev)
+		dev_put(netdev);
  unlock:
 	rtnl_unlock();
 	return result;
@@ -3561,9 +3659,8 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev;
 	struct net_device *dev;
-	struct wireless_dev *wdev;
 	struct cfg80211_crypto_settings crypto;
-	struct ieee80211_channel *chan, *fixedchan;
+	struct ieee80211_channel *chan;
 	const u8 *bssid, *ssid, *ie = NULL, *prev_bssid = NULL;
 	int err, ssid_len, ie_len = 0;
 	bool use_mfp = false;
@@ -3606,16 +3703,6 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 		goto out;
 	}
 
-	mutex_lock(&rdev->devlist_mtx);
-	wdev = dev->ieee80211_ptr;
-	fixedchan = rdev_fixed_channel(rdev, wdev);
-	if (fixedchan && chan != fixedchan) {
-		err = -EBUSY;
-		mutex_unlock(&rdev->devlist_mtx);
-		goto out;
-	}
-	mutex_unlock(&rdev->devlist_mtx);
-
 	ssid = nla_data(info->attrs[NL80211_ATTR_SSID]);
 	ssid_len = nla_len(info->attrs[NL80211_ATTR_SSID]);
 
@@ -5185,6 +5272,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_CHANNEL,
+		.doit = nl80211_set_channel,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index dcd7685242f7..14cf8163912a 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -740,7 +740,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 		       const u8 *prev_bssid)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct ieee80211_channel *chan;
 	struct cfg80211_bss *bss = NULL;
 	int err;
 
@@ -749,10 +748,6 @@ int __cfg80211_connect(struct cfg80211_registered_device *rdev,
 	if (wdev->sme_state != CFG80211_SME_IDLE)
 		return -EALREADY;
 
-	chan = rdev_fixed_channel(rdev, wdev);
-	if (chan && chan != connect->channel)
-		return -EBUSY;
-
 	if (WARN_ON(wdev->connect_keys)) {
 		kfree(wdev->connect_keys);
 		wdev->connect_keys = NULL;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 9ab51838849e..75848c6cb22a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -781,16 +781,22 @@ int cfg80211_wext_siwfreq(struct net_device *dev,
 		return cfg80211_mgd_wext_siwfreq(dev, info, wextfreq, extra);
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_siwfreq(dev, info, wextfreq, extra);
-	default:
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_WDS:
+	case NL80211_IFTYPE_MESH_POINT:
 		freq = cfg80211_wext_freq(wdev->wiphy, wextfreq);
 		if (freq < 0)
 			return freq;
 		if (freq == 0)
 			return -EINVAL;
+		wdev_lock(wdev);
 		mutex_lock(&rdev->devlist_mtx);
-		err = rdev_set_freq(rdev, NULL, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		mutex_unlock(&rdev->devlist_mtx);
+		wdev_unlock(wdev);
 		return err;
+	default:
+		return -EOPNOTSUPP;
 	}
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwfreq);
@@ -800,7 +806,6 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 			  struct iw_freq *freq, char *extra)
 {
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
-	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_STATION:
@@ -808,9 +813,9 @@ int cfg80211_wext_giwfreq(struct net_device *dev,
 	case NL80211_IFTYPE_ADHOC:
 		return cfg80211_ibss_wext_giwfreq(dev, info, freq, extra);
 	default:
-		if (!rdev->channel)
+		if (!wdev->channel)
 			return -EINVAL;
-		freq->m = rdev->channel->center_freq;
+		freq->m = wdev->channel->center_freq;
 		freq->e = 6;
 		return 0;
 	}
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 5615a8802536..8e5ab4f4e9c4 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -107,7 +107,7 @@ int cfg80211_mgd_wext_siwfreq(struct net_device *dev,
 
 	/* SSID is not set, we just want to switch channel */
 	if (chan && !wdev->wext.connect.ssid_len) {
-		err = rdev_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
+		err = cfg80211_set_freq(rdev, wdev, freq, NL80211_CHAN_NO_HT);
 		goto out;
 	}
 
-- 
cgit v1.2.3


From 5e688883563ef62bd3725922f1a795a8253df499 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Sat, 8 May 2010 11:24:56 +0200
Subject: ALSA: sound/usb: fix UAC1 regression

Commit 23caaf19b ("ALSA: usb-mixer: Add support for Audio Class v2.0")
broke support for Class1 devices due to two faulty changes. This patch
fixes it.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Reported-and-Tested-by: The Source <thesourcehim@gmail.com>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 2 +-
 sound/usb/mixer.c         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 905a87caf3fb..57f20551939d 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -244,7 +244,7 @@ struct uac_selector_unit_descriptor {
 static inline __u8 uac_selector_unit_iSelector(struct uac_selector_unit_descriptor *desc)
 {
 	__u8 *raw = (__u8 *) desc;
-	return raw[desc->bLength - 1];
+	return raw[9 + desc->bLength - 1];
 }
 
 /* 4.3.2.5 Feature Unit Descriptor */
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index 1deef623c081..e350f053440a 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -656,7 +656,7 @@ static int check_input_term(struct mixer_build *state, int id, struct usb_audio_
 		case UAC_FEATURE_UNIT: {
 			/* the header is the same for v1 and v2 */
 			struct uac_feature_unit_descriptor *d = p1;
-			id = d->bUnitID;
+			id = d->bSourceID;
 			break; /* continue to parse */
 		}
 		case UAC_MIXER_UNIT: {
-- 
cgit v1.2.3


From bbf1bb3eee86f2eef2baa14e600be454d09109ee Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 8 May 2010 16:20:53 +0200
Subject: cpu_stop: add dummy implementation for UP

When !CONFIG_SMP, cpu_stop functions weren't defined at all which
could lead to build failures if UP code uses cpu_stop facility.  Add
dummy cpu_stop implementation for UP.  The waiting variants execute
the work function directly with preempt disabled and
stop_one_cpu_nowait() schedules a workqueue work.

Makefile and ifdefs around stop_machine implementation are updated to
accomodate CONFIG_SMP && !CONFIG_STOP_MACHINE case.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/stop_machine.h | 69 ++++++++++++++++++++++++++++++++++++++++----
 kernel/Makefile              |  2 +-
 kernel/stop_machine.c        |  4 +++
 3 files changed, 68 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 0e552e72a4c4..6b524a0d02e4 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -6,8 +6,6 @@
 #include <linux/list.h>
 #include <asm/system.h>
 
-#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
-
 /*
  * stop_cpu[s]() is simplistic per-cpu maximum priority cpu
  * monopolization mechanism.  The caller can specify a non-sleeping
@@ -18,9 +16,10 @@
  * up and requests are guaranteed to be served as long as the target
  * cpus are online.
  */
-
 typedef int (*cpu_stop_fn_t)(void *arg);
 
+#ifdef CONFIG_SMP
+
 struct cpu_stop_work {
 	struct list_head	list;		/* cpu_stopper->works */
 	cpu_stop_fn_t		fn;
@@ -34,12 +33,70 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg);
 
+#else	/* CONFIG_SMP */
+
+#include <linux/workqueue.h>
+
+struct cpu_stop_work {
+	struct work_struct	work;
+	cpu_stop_fn_t		fn;
+	void			*arg;
+};
+
+static inline int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg)
+{
+	int ret = -ENOENT;
+	preempt_disable();
+	if (cpu == smp_processor_id())
+		ret = fn(arg);
+	preempt_enable();
+	return ret;
+}
+
+static void stop_one_cpu_nowait_workfn(struct work_struct *work)
+{
+	struct cpu_stop_work *stwork =
+		container_of(work, struct cpu_stop_work, work);
+	preempt_disable();
+	stwork->fn(stwork->arg);
+	preempt_enable();
+}
+
+static inline void stop_one_cpu_nowait(unsigned int cpu,
+				       cpu_stop_fn_t fn, void *arg,
+				       struct cpu_stop_work *work_buf)
+{
+	if (cpu == smp_processor_id()) {
+		INIT_WORK(&work_buf->work, stop_one_cpu_nowait_workfn);
+		work_buf->fn = fn;
+		work_buf->arg = arg;
+		schedule_work(&work_buf->work);
+	}
+}
+
+static inline int stop_cpus(const struct cpumask *cpumask,
+			    cpu_stop_fn_t fn, void *arg)
+{
+	if (cpumask_test_cpu(raw_smp_processor_id(), cpumask))
+		return stop_one_cpu(raw_smp_processor_id(), fn, arg);
+	return -ENOENT;
+}
+
+static inline int try_stop_cpus(const struct cpumask *cpumask,
+				cpu_stop_fn_t fn, void *arg)
+{
+	return stop_cpus(cpumask, fn, arg);
+}
+
+#endif	/* CONFIG_SMP */
+
 /*
  * stop_machine "Bogolock": stop the entire machine, disable
  * interrupts.  This is a very heavy lock, which is equivalent to
  * grabbing every spinlock (and more).  So the "read" side to such a
  * lock is anything which disables preeempt.
  */
+#if defined(CONFIG_STOP_MACHINE) && defined(CONFIG_SMP)
 
 /**
  * stop_machine: freeze the machine on all CPUs and run this function
@@ -67,7 +124,7 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
  */
 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus);
 
-#else
+#else	 /* CONFIG_STOP_MACHINE && CONFIG_SMP */
 
 static inline int stop_machine(int (*fn)(void *), void *data,
 			       const struct cpumask *cpus)
@@ -79,5 +136,5 @@ static inline int stop_machine(int (*fn)(void *), void *data,
 	return ret;
 }
 
-#endif /* CONFIG_SMP */
-#endif /* _LINUX_STOP_MACHINE */
+#endif	/* CONFIG_STOP_MACHINE && CONFIG_SMP */
+#endif	/* _LINUX_STOP_MACHINE */
diff --git a/kernel/Makefile b/kernel/Makefile
index a987aa1676b5..149e18ef1ab1 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -68,7 +68,7 @@ obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
 obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
-obj-$(CONFIG_STOP_MACHINE) += stop_machine.o
+obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o audit_watch.o
 obj-$(CONFIG_AUDITSYSCALL) += auditsc.o
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index 5b20141a5ec1..ef51d1fcf5e6 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -375,6 +375,8 @@ static int __init cpu_stop_init(void)
 }
 early_initcall(cpu_stop_init);
 
+#ifdef CONFIG_STOP_MACHINE
+
 /* This controls the threads on each CPU. */
 enum stopmachine_state {
 	/* Dummy starting state for thread. */
@@ -477,3 +479,5 @@ int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(stop_machine);
+
+#endif	/* CONFIG_STOP_MACHINE */
-- 
cgit v1.2.3


From e0e37c200f1357db0dd986edb359c41c57d24f6e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 9 May 2010 08:24:39 -0700
Subject: sched: Eliminate the ts->idle_lastupdate field

Now that the only user of ts->idle_lastupdate is
update_ts_time_stats(), the entire field can be eliminated.

In update_ts_time_stats(), idle_lastupdate is first set to
"now", and a few lines later, the only user is an if() statement
that assigns a variable either to "now" or to
ts->idle_lastupdate, which has the value of "now" at that point.

Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: davej@redhat.com
LKML-Reference: <20100509082439.2fab0b4f@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tick.h     | 1 -
 kernel/time/tick-sched.c | 5 +----
 2 files changed, 1 insertion(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index d2ae79e21be3..0343eed40619 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -60,7 +60,6 @@ struct tick_sched {
 	ktime_t				idle_waketime;
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
-	ktime_t				idle_lastupdate;
 	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index e86e1c6674d1..50953f4c42b2 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -158,16 +158,13 @@ update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time)
 {
 	ktime_t delta;
 
-	ts->idle_lastupdate = now;
 	if (ts->idle_active) {
 		delta = ktime_sub(now, ts->idle_entrytime);
 		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
 		ts->idle_entrytime = now;
 	}
 
-	if (ts->idle_active && last_update_time)
-		*last_update_time = ktime_to_us(ts->idle_lastupdate);
-	else
+	if (last_update_time)
 		*last_update_time = ktime_to_us(now);
 
 }
-- 
cgit v1.2.3


From 0224cf4c5ee0d7faec83956b8e21f7d89e3df3bd Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sun, 9 May 2010 08:25:23 -0700
Subject: sched: Intoduce get_cpu_iowait_time_us()

For the ondemand cpufreq governor, it is desired that the iowait
time is microaccounted in a similar way as idle time is.

This patch introduces the infrastructure to account and expose
this information via the get_cpu_iowait_time_us() function.

[akpm@linux-foundation.org: fix CONFIG_NO_HZ=n build]
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: davej@redhat.com
LKML-Reference: <20100509082523.284feab6@infradead.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/tick.h     |  4 ++++
 kernel/time/tick-sched.c | 28 ++++++++++++++++++++++++++++
 kernel/time/timer_list.c |  1 +
 3 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 0343eed40619..b232ccc0ee29 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -42,6 +42,7 @@ enum tick_nohz_mode {
  * @idle_waketime:	Time when the idle was interrupted
  * @idle_exittime:	Time when the idle state was left
  * @idle_sleeptime:	Sum of the time slept in idle with sched tick stopped
+ * @iowait_sleeptime:	Sum of the time slept in idle with sched tick stopped, with IO outstanding
  * @sleep_length:	Duration of the current idle sleep
  * @do_timer_lst:	CPU was the last one doing do_timer before going idle
  */
@@ -60,6 +61,7 @@ struct tick_sched {
 	ktime_t				idle_waketime;
 	ktime_t				idle_exittime;
 	ktime_t				idle_sleeptime;
+	ktime_t				iowait_sleeptime;
 	ktime_t				sleep_length;
 	unsigned long			last_jiffies;
 	unsigned long			next_jiffies;
@@ -123,6 +125,7 @@ extern void tick_nohz_stop_sched_tick(int inidle);
 extern void tick_nohz_restart_sched_tick(void);
 extern ktime_t tick_nohz_get_sleep_length(void);
 extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time);
+extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time);
 # else
 static inline void tick_nohz_stop_sched_tick(int inidle) { }
 static inline void tick_nohz_restart_sched_tick(void) { }
@@ -133,6 +136,7 @@ static inline ktime_t tick_nohz_get_sleep_length(void)
 	return len;
 }
 static inline u64 get_cpu_idle_time_us(int cpu, u64 *unused) { return -1; }
+static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; }
 # endif /* !NO_HZ */
 
 #endif
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 50953f4c42b2..1d7b9bc1c034 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -161,6 +161,8 @@ update_ts_time_stats(struct tick_sched *ts, ktime_t now, u64 *last_update_time)
 	if (ts->idle_active) {
 		delta = ktime_sub(now, ts->idle_entrytime);
 		ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+		if (nr_iowait_cpu() > 0)
+			ts->iowait_sleeptime = ktime_add(ts->iowait_sleeptime, delta);
 		ts->idle_entrytime = now;
 	}
 
@@ -220,6 +222,32 @@ u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time)
 }
 EXPORT_SYMBOL_GPL(get_cpu_idle_time_us);
 
+/*
+ * get_cpu_iowait_time_us - get the total iowait time of a cpu
+ * @cpu: CPU number to query
+ * @last_update_time: variable to store update time in
+ *
+ * Return the cummulative iowait time (since boot) for a given
+ * CPU, in microseconds.
+ *
+ * This time is measured via accounting rather than sampling,
+ * and is as accurate as ktime_get() is.
+ *
+ * This function returns -1 if NOHZ is not enabled.
+ */
+u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time)
+{
+	struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+
+	if (!tick_nohz_enabled)
+		return -1;
+
+	update_ts_time_stats(ts, ktime_get(), last_update_time);
+
+	return ktime_to_us(ts->iowait_sleeptime);
+}
+EXPORT_SYMBOL_GPL(get_cpu_iowait_time_us);
+
 /**
  * tick_nohz_stop_sched_tick - stop the idle tick from the idle task
  *
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 1a4a7dd78777..ab8f5e33fa92 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -176,6 +176,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
 		P_ns(idle_waketime);
 		P_ns(idle_exittime);
 		P_ns(idle_sleeptime);
+		P_ns(iowait_sleeptime);
 		P(last_jiffies);
 		P(next_jiffies);
 		P_ns(idle_expires);
-- 
cgit v1.2.3


From 37e11f3397fab21604bff506cb31ffbf70fb255a Mon Sep 17 00:00:00 2001
From: Qinghuang Feng <qhfeng.kernel@gmail.com>
Date: Sun, 25 Apr 2010 20:17:25 +0800
Subject: nilfs2: update comment for struct nilfs_dat_entry

The comment of struct nilfs_dat_entry is mismatched, fix it.

Signed-off-by: Qinghuang Feng <qhfeng.kernel@gmail.com>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 640702e97457..478ee34e9d65 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -437,10 +437,10 @@ struct nilfs_palloc_group_desc {
 
 /**
  * struct nilfs_dat_entry - disk address translation entry
- * @dt_blocknr: block number
- * @dt_start: start checkpoint number
- * @dt_end: end checkpoint number
- * @dt_rsv: reserved for future use
+ * @de_blocknr: block number
+ * @de_start: start checkpoint number
+ * @de_end: end checkpoint number
+ * @de_rsv: reserved for future use
  */
 struct nilfs_dat_entry {
 	__le64 de_blocknr;
-- 
cgit v1.2.3


From 0d9cc2332df24d3e81060c782b2ecb87c28443f9 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Mon, 26 Apr 2010 01:17:48 +0900
Subject: nilfs2: fix style problems in nilfs2_fs.h

This kills the following checkpatch warnings:

WARNING: please, no space before tabs
+^I__le32^Is_first_ino; ^I^I/* First non-reserved inode */$

WARNING: please, no space before tabs
+^I__le16  s_inode_size; ^I^I/* Size of an inode */$

WARNING: please, no space before tabs
+^Ichar^Is_volume_name[16]; ^I/* volume name */$

WARNING: please, no space before tabs
+^Ichar^Is_last_mounted[64]; ^I/* directory where last mounted */$

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 478ee34e9d65..f960e1d264e8 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -199,16 +199,16 @@ struct nilfs_super_block {
 	__le32	s_creator_os;		/* OS */
 	__le16	s_def_resuid;		/* Default uid for reserved blocks */
 	__le16	s_def_resgid;		/* Default gid for reserved blocks */
-	__le32	s_first_ino; 		/* First non-reserved inode */
+	__le32	s_first_ino;		/* First non-reserved inode */
 
-	__le16  s_inode_size; 		/* Size of an inode */
+	__le16  s_inode_size;		/* Size of an inode */
 	__le16  s_dat_entry_size;       /* Size of a dat entry */
 	__le16  s_checkpoint_size;      /* Size of a checkpoint */
 	__le16	s_segment_usage_size;	/* Size of a segment usage */
 
 	__u8	s_uuid[16];		/* 128-bit uuid for volume */
-	char	s_volume_name[16]; 	/* volume name */
-	char	s_last_mounted[64]; 	/* directory where last mounted */
+	char	s_volume_name[16];	/* volume name */
+	char	s_last_mounted[64];	/* directory where last mounted */
 
 	__le32  s_c_interval;           /* Commit interval of segment */
 	__le32  s_c_block_max;          /* Threshold of data amount for
-- 
cgit v1.2.3


From 50614bcf29d0cec6df5b84c0d8331e8b8c7d72a7 Mon Sep 17 00:00:00 2001
From: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
Date: Sat, 10 Apr 2010 17:59:15 +0900
Subject: nilfs2: insert checkpoint number in segment summary header

This adds a field to record the latest checkpoint number in the
nilfs_segment_summary structure.  This will help to recover the latest
checkpoint number from logs on disk.  This field is intended for
crucial cases in which super blocks have lost pointer to the latest
log.

Even though this will change the disk format, both backward and
forward compatibility is preserved by a size field prepared in the
segment summary header.

Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 fs/nilfs2/recovery.c      | 2 ++
 fs/nilfs2/segbuf.c        | 4 +++-
 fs/nilfs2/segbuf.h        | 4 +++-
 fs/nilfs2/segment.c       | 3 ++-
 include/linux/nilfs2_fs.h | 2 ++
 5 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index ba43146f3c30..bae2a516b4ee 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -105,6 +105,8 @@ static void store_segsum_info(struct nilfs_segsum_info *ssi,
 
 	ssi->nsumblk = DIV_ROUND_UP(ssi->sumbytes, blocksize);
 	ssi->nfileblk = ssi->nblocks - ssi->nsumblk - !!NILFS_SEG_HAS_SR(ssi);
+
+	/* need to verify ->ss_bytes field if read ->ss_cno */
 }
 
 /**
diff --git a/fs/nilfs2/segbuf.c b/fs/nilfs2/segbuf.c
index 9f83bc02593c..2e6a2723b8fa 100644
--- a/fs/nilfs2/segbuf.c
+++ b/fs/nilfs2/segbuf.c
@@ -134,7 +134,7 @@ int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *segbuf,
 }
 
 int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
-		       time_t ctime)
+		       time_t ctime, __u64 cno)
 {
 	int err;
 
@@ -147,6 +147,7 @@ int nilfs_segbuf_reset(struct nilfs_segment_buffer *segbuf, unsigned flags,
 	segbuf->sb_sum.sumbytes = sizeof(struct nilfs_segment_summary);
 	segbuf->sb_sum.nfinfo = segbuf->sb_sum.nfileblk = 0;
 	segbuf->sb_sum.ctime = ctime;
+	segbuf->sb_sum.cno = cno;
 	return 0;
 }
 
@@ -172,6 +173,7 @@ void nilfs_segbuf_fill_in_segsum(struct nilfs_segment_buffer *segbuf)
 	raw_sum->ss_nfinfo   = cpu_to_le32(segbuf->sb_sum.nfinfo);
 	raw_sum->ss_sumbytes = cpu_to_le32(segbuf->sb_sum.sumbytes);
 	raw_sum->ss_pad      = 0;
+	raw_sum->ss_cno      = cpu_to_le64(segbuf->sb_sum.cno);
 }
 
 /*
diff --git a/fs/nilfs2/segbuf.h b/fs/nilfs2/segbuf.h
index e21497f61b0c..fdf1c3b6d673 100644
--- a/fs/nilfs2/segbuf.h
+++ b/fs/nilfs2/segbuf.h
@@ -37,6 +37,7 @@
  * @sumbytes: Byte count of segment summary
  * @nfileblk: Total number of file blocks
  * @seg_seq: Segment sequence number
+ * @cno: Checkpoint number
  * @ctime: Creation time
  * @next: Block number of the next full segment
  */
@@ -48,6 +49,7 @@ struct nilfs_segsum_info {
 	unsigned long		sumbytes;
 	unsigned long		nfileblk;
 	u64			seg_seq;
+	__u64			cno;
 	time_t			ctime;
 	sector_t		next;
 };
@@ -135,7 +137,7 @@ void nilfs_segbuf_map_cont(struct nilfs_segment_buffer *segbuf,
 			   struct nilfs_segment_buffer *prev);
 void nilfs_segbuf_set_next_segnum(struct nilfs_segment_buffer *, __u64,
 				  struct the_nilfs *);
-int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t);
+int nilfs_segbuf_reset(struct nilfs_segment_buffer *, unsigned, time_t, __u64);
 int nilfs_segbuf_extend_segsum(struct nilfs_segment_buffer *);
 int nilfs_segbuf_extend_payload(struct nilfs_segment_buffer *,
 				struct buffer_head **);
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index a17bfa193e3f..9f50fde0cd06 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -366,7 +366,8 @@ static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci)
 
 	if (nilfs_doing_gc())
 		flags = NILFS_SS_GC;
-	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime);
+	err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime,
+				 sci->sc_sbi->s_nilfs->ns_cno);
 	if (unlikely(err))
 		return err;
 
diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index f960e1d264e8..6505c00f1fc1 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -377,6 +377,7 @@ union nilfs_binfo {
  * @ss_nfinfo: number of finfo structures
  * @ss_sumbytes: total size of segment summary in bytes
  * @ss_pad: padding
+ * @ss_cno: checkpoint number
  */
 struct nilfs_segment_summary {
 	__le32 ss_datasum;
@@ -391,6 +392,7 @@ struct nilfs_segment_summary {
 	__le32 ss_nfinfo;
 	__le32 ss_sumbytes;
 	__le32 ss_pad;
+	__le64 ss_cno;
 	/* array of finfo structures */
 };
 
-- 
cgit v1.2.3


From 400ade845cb9930552e791bbd658a0953f68499d Mon Sep 17 00:00:00 2001
From: Jiro SEKIBA <jir@unicus.jp>
Date: Sun, 2 May 2010 23:29:04 +0900
Subject: nilfs2: enlarge s_volume_name member in nilfs_super_block

Current s_volume_name has 16 bytes, which is too small as modern filesystem.

s_last_mounted resides just after s_volume_name and has 64 bytes.

s_last_mounted is historically came from ext2, but not used in nilfs2 at all.
Deleting s_last_mounted member and merging that space with s_volume_name
enlarge s_volume_name upto 80 bytes for volume label.

When user land tools see the old header for new disk, it will just ignore
additional bytes stored in s_last_mounted.  While, old disk format has only
16 bytes label, it doesn't affects in case seeing the new header for old disk.

Signed-off-by: Jiro SEKIBA <jir@unicus.jp>
Signed-off-by: Ryusuke Konishi <konishi.ryusuke@lab.ntt.co.jp>
---
 include/linux/nilfs2_fs.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h
index 6505c00f1fc1..8c2c6116e788 100644
--- a/include/linux/nilfs2_fs.h
+++ b/include/linux/nilfs2_fs.h
@@ -207,8 +207,7 @@ struct nilfs_super_block {
 	__le16	s_segment_usage_size;	/* Size of a segment usage */
 
 	__u8	s_uuid[16];		/* 128-bit uuid for volume */
-	char	s_volume_name[16];	/* volume name */
-	char	s_last_mounted[64];	/* directory where last mounted */
+	char	s_volume_name[80];	/* volume name */
 
 	__le32  s_c_interval;           /* Commit interval of segment */
 	__le32  s_c_block_max;          /* Threshold of data amount for
-- 
cgit v1.2.3


From d7e81c269db899b800e0963dc4aceece1f82a680 Mon Sep 17 00:00:00 2001
From: John Stultz <johnstul@us.ibm.com>
Date: Fri, 7 May 2010 18:07:38 -0700
Subject: clocksource: Add clocksource_register_hz/khz interface

How to pick good mult/shift pairs has always been difficult to
describe to folks writing clocksource drivers, since it requires
careful tradeoffs in adjustment accuracy vs overflow limits.

Now, with the clocks_calc_mult_shift function, its much
easier. However, not many clocksources have converted to using that
function, and there is still the issue of the max interval length
assumption being made by each clocksource driver independently.

So this patch simplifies the registration process by having
clocksources be registered with a hz/khz value and the registration
function taking care of setting mult/shift.

This should take most of the confusion out of writing a clocksource
driver.

Additionally it also keeps the shift size tradeoff (more accuracy vs
longer possible nohz times) centralized so the timekeeping core can
keep track of the assumptions being made.

[ tglx: Coding style and comments fixed ]

Signed-off-by: John Stultz <johnstul@us.ibm.com>
LKML-Reference: <1273280858-30143-1-git-send-email-johnstul@us.ibm.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/clocksource.h | 19 +++++++++++++++++-
 kernel/time/clocksource.c   | 48 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 4bca8b60cdf7..5ea3c60c160c 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -273,7 +273,6 @@ static inline s64 clocksource_cyc2ns(cycle_t cycles, u32 mult, u32 shift)
 }
 
 
-/* used to install a new clocksource */
 extern int clocksource_register(struct clocksource*);
 extern void clocksource_unregister(struct clocksource*);
 extern void clocksource_touch_watchdog(void);
@@ -287,6 +286,24 @@ extern void clocksource_mark_unstable(struct clocksource *cs);
 extern void
 clocks_calc_mult_shift(u32 *mult, u32 *shift, u32 from, u32 to, u32 minsec);
 
+/*
+ * Don't call __clocksource_register_scale directly, use
+ * clocksource_register_hz/khz
+ */
+extern int
+__clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq);
+
+static inline int clocksource_register_hz(struct clocksource *cs, u32 hz)
+{
+	return __clocksource_register_scale(cs, 1, hz);
+}
+
+static inline int clocksource_register_khz(struct clocksource *cs, u32 khz)
+{
+	return __clocksource_register_scale(cs, 1000, khz);
+}
+
+
 static inline void
 clocksource_calc_mult_shift(struct clocksource *cs, u32 freq, u32 minsec)
 {
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 1f5dde637457..f08e99c1d561 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -625,6 +625,54 @@ static void clocksource_enqueue(struct clocksource *cs)
 	list_add(&cs->list, entry);
 }
 
+
+/*
+ * Maximum time we expect to go between ticks. This includes idle
+ * tickless time. It provides the trade off between selecting a
+ * mult/shift pair that is very precise but can only handle a short
+ * period of time, vs. a mult/shift pair that can handle long periods
+ * of time but isn't as precise.
+ *
+ * This is a subsystem constant, and actual hardware limitations
+ * may override it (ie: clocksources that wrap every 3 seconds).
+ */
+#define MAX_UPDATE_LENGTH 5 /* Seconds */
+
+/**
+ * __clocksource_register_scale - Used to install new clocksources
+ * @t:		clocksource to be registered
+ * @scale:	Scale factor multiplied against freq to get clocksource hz
+ * @freq:	clocksource frequency (cycles per second) divided by scale
+ *
+ * Returns -EBUSY if registration fails, zero otherwise.
+ *
+ * This *SHOULD NOT* be called directly! Please use the
+ * clocksource_register_hz() or clocksource_register_khz helper functions.
+ */
+int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
+{
+
+	/*
+	 * Ideally we want to use  some of the limits used in
+	 * clocksource_max_deferment, to provide a more informed
+	 * MAX_UPDATE_LENGTH. But for now this just gets the
+	 * register interface working properly.
+	 */
+	clocks_calc_mult_shift(&cs->mult, &cs->shift, freq,
+				      NSEC_PER_SEC/scale,
+				      MAX_UPDATE_LENGTH*scale);
+	cs->max_idle_ns = clocksource_max_deferment(cs);
+
+	mutex_lock(&clocksource_mutex);
+	clocksource_enqueue(cs);
+	clocksource_select();
+	clocksource_enqueue_watchdog(cs);
+	mutex_unlock(&clocksource_mutex);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__clocksource_register_scale);
+
+
 /**
  * clocksource_register - Used to install new clocksources
  * @t:		clocksource to be registered
-- 
cgit v1.2.3


From 0a382a74b677360096857bcb5288c340fca671ed Mon Sep 17 00:00:00 2001
From: Andrea Gelmini <andrea.gelmini@gelma.net>
Date: Sat, 27 Feb 2010 17:51:37 +0100
Subject: mtd: mtdram.h: checkpatch cleanup

include/linux/mtd/mtdram.h:6: ERROR: code indent should use tabs where possible

Signed-off-by: Andrea Gelmini <andrea.gelmini@gelma.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/mtdram.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/mtdram.h b/include/linux/mtd/mtdram.h
index 04fdc07b7353..68891313875d 100644
--- a/include/linux/mtd/mtdram.h
+++ b/include/linux/mtd/mtdram.h
@@ -3,6 +3,6 @@
 
 #include <linux/mtd/mtd.h>
 int mtdram_init_device(struct mtd_info *mtd, void *mapped_address,
-                       unsigned long size, char *name);
+			unsigned long size, char *name);
 
 #endif /* __MTD_MTDRAM_H__ */
-- 
cgit v1.2.3


From 67026418f534045525a7c39f506006cd7fbd197f Mon Sep 17 00:00:00 2001
From: Ferenc Wagner <wferi@niif.hu>
Date: Tue, 23 Mar 2010 18:09:09 +0100
Subject: mtd/nand/sh_flctl: Replace the dangerous mtd_to_flctl macro

The original macro worked only when applied to variables named 'mtd'.
While this could have been fixed by simply renaming the macro argument,
a more type-safe replacement is preferred.

Signed-off-by: Ferenc Wagner <wferi@niif.hu>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/sh_flctl.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index ab77609ec337..178b5c26c995 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -93,7 +93,10 @@
 #define INIT_FL4ECCRESULT_VAL	0x03FF03FF
 #define LOOP_TIMEOUT_MAX	0x00010000
 
-#define mtd_to_flctl(mtd)	container_of(mtd, struct sh_flctl, mtd)
+static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
+{
+	return container_of(mtdinfo, struct sh_flctl, mtd);
+}
 
 struct sh_flctl {
 	struct mtd_info		mtd;
-- 
cgit v1.2.3


From c4e773764cead9358fd4b036d1b883fff3968513 Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Sun, 18 Apr 2010 22:46:44 +0200
Subject: mtd: fix a huge latency problem in the MTD CFI and LPDDR flash
 drivers.

The use of a memcpy() during a spinlock operation will cause very long
thread context switch delays if the flash chip bandwidth is low and the
data to be copied large, because a spinlock will disable preemption.

For example: A flash with 6,5 MB/s bandwidth will cause under ubifs,
which request sometimes 128 KiB (the flash erase size), a preemption delay of
20 milliseconds. High priority threads will not be served during this
time, regardless whether this threads access the flash or not. This behavior
breaks real time.

The patch changes all the use of spin_lock operations for xxxx->mutex
into mutex operations, which is exact what the name says and means.

I have checked the code of the drivers and there is no use of atomic
pathes like interrupt or timers. The mtdoops facility will also not be used
by this drivers. So it is dave to replace the spin_lock against mutex.

There is no performance regression since the mutex is normally not
acquired.

Changelog:
 06.03.2010 First release
 26.03.2010 Fix mutex[1] issue and tested it for compile failure

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_cmdset_0001.c | 131 +++++++++++++++++-----------------
 drivers/mtd/chips/cfi_cmdset_0002.c | 122 ++++++++++++++++----------------
 drivers/mtd/chips/cfi_cmdset_0020.c | 136 ++++++++++++++++++------------------
 drivers/mtd/chips/fwh_lock.h        |   6 +-
 drivers/mtd/chips/gen_probe.c       |   3 +-
 drivers/mtd/lpddr/lpddr_cmds.c      |  79 +++++++++++----------
 include/linux/mtd/flashchip.h       |   4 +-
 7 files changed, 239 insertions(+), 242 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_cmdset_0001.c b/drivers/mtd/chips/cfi_cmdset_0001.c
index 92530433c11c..62f3ea9de848 100644
--- a/drivers/mtd/chips/cfi_cmdset_0001.c
+++ b/drivers/mtd/chips/cfi_cmdset_0001.c
@@ -725,8 +725,7 @@ static int cfi_intelext_partition_fixup(struct mtd_info *mtd,
 				/* those should be reset too since
 				   they create memory references. */
 				init_waitqueue_head(&chip->wq);
-				spin_lock_init(&chip->_spinlock);
-				chip->mutex = &chip->_spinlock;
+				mutex_init(&chip->mutex);
 				chip++;
 			}
 		}
@@ -772,9 +771,9 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
 			if (chip->priv && map_word_andequal(map, status, status_PWS, status_PWS))
 				break;
 
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			cfi_udelay(1);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			/* Someone else might have been playing with it. */
 			return -EAGAIN;
 		}
@@ -821,9 +820,9 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
 				return -EIO;
 			}
 
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			cfi_udelay(1);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			/* Nobody will touch it while it's in state FL_ERASE_SUSPENDING.
 			   So we can just loop here. */
 		}
@@ -850,10 +849,10 @@ static int chip_ready (struct map_info *map, struct flchip *chip, unsigned long
 	sleep:
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		return -EAGAIN;
 	}
 }
@@ -899,20 +898,20 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 			 * it'll happily send us to sleep.  In any case, when
 			 * get_chip returns success we're clear to go ahead.
 			 */
-			ret = spin_trylock(contender->mutex);
+			ret = mutex_trylock(&contender->mutex);
 			spin_unlock(&shared->lock);
 			if (!ret)
 				goto retry;
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			ret = chip_ready(map, contender, contender->start, mode);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 
 			if (ret == -EAGAIN) {
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				goto retry;
 			}
 			if (ret) {
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				return ret;
 			}
 			spin_lock(&shared->lock);
@@ -921,10 +920,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 			 * in FL_SYNCING state. Put contender and retry. */
 			if (chip->state == FL_SYNCING) {
 				put_chip(map, contender, contender->start);
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				goto retry;
 			}
-			spin_unlock(contender->mutex);
+			mutex_unlock(&contender->mutex);
 		}
 
 		/* Check if we already have suspended erase
@@ -934,10 +933,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 			spin_unlock(&shared->lock);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			goto retry;
 		}
 
@@ -967,12 +966,12 @@ static void put_chip(struct map_info *map, struct flchip *chip, unsigned long ad
 			if (shared->writing && shared->writing != chip) {
 				/* give back ownership to who we loaned it from */
 				struct flchip *loaner = shared->writing;
-				spin_lock(loaner->mutex);
+				mutex_lock(&loaner->mutex);
 				spin_unlock(&shared->lock);
-				spin_unlock(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				put_chip(map, loaner, loaner->start);
-				spin_lock(chip->mutex);
-				spin_unlock(loaner->mutex);
+				mutex_lock(&chip->mutex);
+				mutex_unlock(&loaner->mutex);
 				wake_up(&chip->wq);
 				return;
 			}
@@ -1142,7 +1141,7 @@ static int __xipram xip_wait_for_operation(
 			(void) map_read(map, adr);
 			xip_iprefetch();
 			local_irq_enable();
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			xip_iprefetch();
 			cond_resched();
 
@@ -1152,15 +1151,15 @@ static int __xipram xip_wait_for_operation(
 			 * a suspended erase state.  If so let's wait
 			 * until it's done.
 			 */
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			while (chip->state != newstate) {
 				DECLARE_WAITQUEUE(wait, current);
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				add_wait_queue(&chip->wq, &wait);
-				spin_unlock(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				schedule();
 				remove_wait_queue(&chip->wq, &wait);
-				spin_lock(chip->mutex);
+				mutex_lock(&chip->mutex);
 			}
 			/* Disallow XIP again */
 			local_irq_disable();
@@ -1216,10 +1215,10 @@ static int inval_cache_and_wait_for_operation(
 	int chip_state = chip->state;
 	unsigned int timeo, sleep_time, reset_timeo;
 
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	if (inval_len)
 		INVALIDATE_CACHED_RANGE(map, inval_adr, inval_len);
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	timeo = chip_op_time_max;
 	if (!timeo)
@@ -1239,7 +1238,7 @@ static int inval_cache_and_wait_for_operation(
 		}
 
 		/* OK Still waiting. Drop the lock, wait a while and retry. */
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		if (sleep_time >= 1000000/HZ) {
 			/*
 			 * Half of the normal delay still remaining
@@ -1254,17 +1253,17 @@ static int inval_cache_and_wait_for_operation(
 			cond_resched();
 			timeo--;
 		}
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		while (chip->state != chip_state) {
 			/* Someone's suspended the operation: sleep */
 			DECLARE_WAITQUEUE(wait, current);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 		}
 		if (chip->erase_suspended && chip_state == FL_ERASING)  {
 			/* Erase suspend occured while sleep: reset timeout */
@@ -1300,7 +1299,7 @@ static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t a
 	/* Ensure cmd read/writes are aligned. */
 	cmd_addr = adr & ~(map_bankwidth(map)-1);
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	ret = get_chip(map, chip, cmd_addr, FL_POINT);
 
@@ -1311,7 +1310,7 @@ static int do_point_onechip (struct map_info *map, struct flchip *chip, loff_t a
 		chip->state = FL_POINT;
 		chip->ref_point_counter++;
 	}
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
 	return ret;
 }
@@ -1396,7 +1395,7 @@ static void cfi_intelext_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 		else
 			thislen = len;
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		if (chip->state == FL_POINT) {
 			chip->ref_point_counter--;
 			if(chip->ref_point_counter == 0)
@@ -1405,7 +1404,7 @@ static void cfi_intelext_unpoint(struct mtd_info *mtd, loff_t from, size_t len)
 			printk(KERN_ERR "%s: Warning: unpoint called on non pointed region\n", map->name); /* Should this give an error? */
 
 		put_chip(map, chip, chip->start);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 
 		len -= thislen;
 		ofs = 0;
@@ -1424,10 +1423,10 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 	/* Ensure cmd read/writes are aligned. */
 	cmd_addr = adr & ~(map_bankwidth(map)-1);
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, cmd_addr, FL_READY);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1441,7 +1440,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 
 	put_chip(map, chip, cmd_addr);
 
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 
@@ -1504,10 +1503,10 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 		return -EINVAL;
 	}
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, mode);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1553,7 +1552,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 	xip_enable(map, chip, adr);
  out:	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1662,10 +1661,10 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 	/* Let's determine this according to the interleave only once */
 	write_cmd = (cfi->cfiq->P_ID != 0x0200) ? CMD(0xe8) : CMD(0xe9);
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, cmd_adr, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1796,7 +1795,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	xip_enable(map, chip, cmd_adr);
  out:	put_chip(map, chip, cmd_adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1875,10 +1874,10 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 	adr += chip->start;
 
  retry:
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_ERASING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1934,7 +1933,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 		} else if (chipstatus & 0x20 && retries--) {
 			printk(KERN_DEBUG "block erase failed at 0x%08lx: status 0x%lx. Retrying...\n", adr, chipstatus);
 			put_chip(map, chip, adr);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			goto retry;
 		} else {
 			printk(KERN_ERR "%s: block erase failed at 0x%08lx (status 0x%lx)\n", map->name, adr, chipstatus);
@@ -1946,7 +1945,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	xip_enable(map, chip, adr);
  out:	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1979,7 +1978,7 @@ static void cfi_intelext_sync (struct mtd_info *mtd)
 	for (i=0; !ret && i<cfi->numchips; i++) {
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		ret = get_chip(map, chip, chip->start, FL_SYNCING);
 
 		if (!ret) {
@@ -1990,7 +1989,7 @@ static void cfi_intelext_sync (struct mtd_info *mtd)
 			 * with the chip now anyway.
 			 */
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	/* Unlock the chips again */
@@ -1998,14 +1997,14 @@ static void cfi_intelext_sync (struct mtd_info *mtd)
 	for (i--; i >=0; i--) {
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		if (chip->state == FL_SYNCING) {
 			chip->state = chip->oldstate;
 			chip->oldstate = FL_READY;
 			wake_up(&chip->wq);
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 }
 
@@ -2051,10 +2050,10 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
 
 	adr += chip->start;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_LOCKING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -2088,7 +2087,7 @@ static int __xipram do_xxlock_oneblock(struct map_info *map, struct flchip *chip
 
 	xip_enable(map, chip, adr);
 out:	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -2153,10 +2152,10 @@ do_otp_read(struct map_info *map, struct flchip *chip, u_long offset,
 	struct cfi_private *cfi = map->fldrv_priv;
 	int ret;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, chip->start, FL_JEDEC_QUERY);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -2175,7 +2174,7 @@ do_otp_read(struct map_info *map, struct flchip *chip, u_long offset,
 	INVALIDATE_CACHED_RANGE(map, chip->start + offset, size);
 
 	put_chip(map, chip, chip->start);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 
@@ -2450,7 +2449,7 @@ static int cfi_intelext_suspend(struct mtd_info *mtd)
 	for (i=0; !ret && i<cfi->numchips; i++) {
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		switch (chip->state) {
 		case FL_READY:
@@ -2482,7 +2481,7 @@ static int cfi_intelext_suspend(struct mtd_info *mtd)
 		case FL_PM_SUSPENDED:
 			break;
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	/* Unlock the chips again */
@@ -2491,7 +2490,7 @@ static int cfi_intelext_suspend(struct mtd_info *mtd)
 		for (i--; i >=0; i--) {
 			chip = &cfi->chips[i];
 
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 
 			if (chip->state == FL_PM_SUSPENDED) {
 				/* No need to force it into a known state here,
@@ -2501,7 +2500,7 @@ static int cfi_intelext_suspend(struct mtd_info *mtd)
 				chip->oldstate = FL_READY;
 				wake_up(&chip->wq);
 			}
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 		}
 	}
 
@@ -2542,7 +2541,7 @@ static void cfi_intelext_resume(struct mtd_info *mtd)
 
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		/* Go to known state. Chip may have been power cycled */
 		if (chip->state == FL_PM_SUSPENDED) {
@@ -2551,7 +2550,7 @@ static void cfi_intelext_resume(struct mtd_info *mtd)
 			wake_up(&chip->wq);
 		}
 
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	if ((mtd->flags & MTD_POWERUP_LOCK)
@@ -2571,14 +2570,14 @@ static int cfi_intelext_reset(struct mtd_info *mtd)
 		/* force the completion of any ongoing operation
 		   and switch to array mode so any bootloader in
 		   flash is accessible for soft reboot. */
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		ret = get_chip(map, chip, chip->start, FL_SHUTDOWN);
 		if (!ret) {
 			map_write(map, CMD(0xff), chip->start);
 			chip->state = FL_SHUTDOWN;
 			put_chip(map, chip, chip->start);
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	return 0;
diff --git a/drivers/mtd/chips/cfi_cmdset_0002.c b/drivers/mtd/chips/cfi_cmdset_0002.c
index ea2a7f66ddf9..c93e47d21ce0 100644
--- a/drivers/mtd/chips/cfi_cmdset_0002.c
+++ b/drivers/mtd/chips/cfi_cmdset_0002.c
@@ -565,9 +565,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 				printk(KERN_ERR "Waiting for chip to be ready timed out.\n");
 				return -EIO;
 			}
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			cfi_udelay(1);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			/* Someone else might have been playing with it. */
 			goto retry;
 		}
@@ -611,9 +611,9 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 				return -EIO;
 			}
 
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			cfi_udelay(1);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			/* Nobody will touch it while it's in state FL_ERASE_SUSPENDING.
 			   So we can just loop here. */
 		}
@@ -637,10 +637,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, unsigned long adr
 	sleep:
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		goto resettime;
 	}
 }
@@ -772,7 +772,7 @@ static void __xipram xip_udelay(struct map_info *map, struct flchip *chip,
 			(void) map_read(map, adr);
 			xip_iprefetch();
 			local_irq_enable();
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			xip_iprefetch();
 			cond_resched();
 
@@ -782,15 +782,15 @@ static void __xipram xip_udelay(struct map_info *map, struct flchip *chip,
 			 * a suspended erase state.  If so let's wait
 			 * until it's done.
 			 */
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			while (chip->state != FL_XIP_WHILE_ERASING) {
 				DECLARE_WAITQUEUE(wait, current);
 				set_current_state(TASK_UNINTERRUPTIBLE);
 				add_wait_queue(&chip->wq, &wait);
-				spin_unlock(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				schedule();
 				remove_wait_queue(&chip->wq, &wait);
-				spin_lock(chip->mutex);
+				mutex_lock(&chip->mutex);
 			}
 			/* Disallow XIP again */
 			local_irq_disable();
@@ -852,17 +852,17 @@ static void __xipram xip_udelay(struct map_info *map, struct flchip *chip,
 
 #define UDELAY(map, chip, adr, usec)  \
 do {  \
-	spin_unlock(chip->mutex);  \
+	mutex_unlock(&chip->mutex);  \
 	cfi_udelay(usec);  \
-	spin_lock(chip->mutex);  \
+	mutex_lock(&chip->mutex);  \
 } while (0)
 
 #define INVALIDATE_CACHE_UDELAY(map, chip, adr, len, usec)  \
 do {  \
-	spin_unlock(chip->mutex);  \
+	mutex_unlock(&chip->mutex);  \
 	INVALIDATE_CACHED_RANGE(map, adr, len);  \
 	cfi_udelay(usec);  \
-	spin_lock(chip->mutex);  \
+	mutex_lock(&chip->mutex);  \
 } while (0)
 
 #endif
@@ -878,10 +878,10 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 	/* Ensure cmd read/writes are aligned. */
 	cmd_addr = adr & ~(map_bankwidth(map)-1);
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, cmd_addr, FL_READY);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -894,7 +894,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 
 	put_chip(map, chip, cmd_addr);
 
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 
@@ -948,7 +948,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi
 	struct cfi_private *cfi = map->fldrv_priv;
 
  retry:
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	if (chip->state != FL_READY){
 #if 0
@@ -957,7 +957,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
 
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
@@ -986,7 +986,7 @@ static inline int do_read_secsi_onechip(struct map_info *map, struct flchip *chi
 	cfi_send_gen_cmd(0x00, cfi->addr_unlock1, chip->start, map, cfi, cfi->device_type, NULL);
 
 	wake_up(&chip->wq);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
 	return 0;
 }
@@ -1055,10 +1055,10 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 	adr += chip->start;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1101,11 +1101,11 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
 
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
 			timeo = jiffies + (HZ / 2); /* FIXME */
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 
@@ -1137,7 +1137,7 @@ static int __xipram do_write_oneword(struct map_info *map, struct flchip *chip,
  op_done:
 	chip->state = FL_READY;
 	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
 	return ret;
 }
@@ -1169,7 +1169,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 		map_word tmp_buf;
 
  retry:
-		spin_lock(cfi->chips[chipnum].mutex);
+		mutex_lock(&cfi->chips[chipnum].mutex);
 
 		if (cfi->chips[chipnum].state != FL_READY) {
 #if 0
@@ -1178,7 +1178,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&cfi->chips[chipnum].wq, &wait);
 
-			spin_unlock(cfi->chips[chipnum].mutex);
+			mutex_unlock(&cfi->chips[chipnum].mutex);
 
 			schedule();
 			remove_wait_queue(&cfi->chips[chipnum].wq, &wait);
@@ -1192,7 +1192,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 		/* Load 'tmp_buf' with old contents of flash */
 		tmp_buf = map_read(map, bus_ofs+chipstart);
 
-		spin_unlock(cfi->chips[chipnum].mutex);
+		mutex_unlock(&cfi->chips[chipnum].mutex);
 
 		/* Number of bytes to copy from buffer */
 		n = min_t(int, len, map_bankwidth(map)-i);
@@ -1247,7 +1247,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 		map_word tmp_buf;
 
  retry1:
-		spin_lock(cfi->chips[chipnum].mutex);
+		mutex_lock(&cfi->chips[chipnum].mutex);
 
 		if (cfi->chips[chipnum].state != FL_READY) {
 #if 0
@@ -1256,7 +1256,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&cfi->chips[chipnum].wq, &wait);
 
-			spin_unlock(cfi->chips[chipnum].mutex);
+			mutex_unlock(&cfi->chips[chipnum].mutex);
 
 			schedule();
 			remove_wait_queue(&cfi->chips[chipnum].wq, &wait);
@@ -1269,7 +1269,7 @@ static int cfi_amdstd_write_words(struct mtd_info *mtd, loff_t to, size_t len,
 
 		tmp_buf = map_read(map, ofs + chipstart);
 
-		spin_unlock(cfi->chips[chipnum].mutex);
+		mutex_unlock(&cfi->chips[chipnum].mutex);
 
 		tmp_buf = map_word_load_partial(map, tmp_buf, buf, 0, len);
 
@@ -1304,10 +1304,10 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 	adr += chip->start;
 	cmd_adr = adr;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1362,11 +1362,11 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
 
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
 			timeo = jiffies + (HZ / 2); /* FIXME */
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 
@@ -1394,7 +1394,7 @@ static int __xipram do_write_buffer(struct map_info *map, struct flchip *chip,
  op_done:
 	chip->state = FL_READY;
 	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
 	return ret;
 }
@@ -1494,10 +1494,10 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 
 	adr = cfi->addr_unlock1;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1530,10 +1530,10 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 			/* Someone's suspended the erase. Sleep */
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 		if (chip->erase_suspended) {
@@ -1567,7 +1567,7 @@ static int __xipram do_erase_chip(struct map_info *map, struct flchip *chip)
 	chip->state = FL_READY;
 	xip_enable(map, chip, adr);
 	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
 	return ret;
 }
@@ -1582,10 +1582,10 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	adr += chip->start;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_ERASING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -1618,10 +1618,10 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 			/* Someone's suspended the erase. Sleep */
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 		if (chip->erase_suspended) {
@@ -1657,7 +1657,7 @@ static int __xipram do_erase_oneblock(struct map_info *map, struct flchip *chip,
 
 	chip->state = FL_READY;
 	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1709,7 +1709,7 @@ static int do_atmel_lock(struct map_info *map, struct flchip *chip,
 	struct cfi_private *cfi = map->fldrv_priv;
 	int ret;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr + chip->start, FL_LOCKING);
 	if (ret)
 		goto out_unlock;
@@ -1735,7 +1735,7 @@ static int do_atmel_lock(struct map_info *map, struct flchip *chip,
 	ret = 0;
 
 out_unlock:
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1745,7 +1745,7 @@ static int do_atmel_unlock(struct map_info *map, struct flchip *chip,
 	struct cfi_private *cfi = map->fldrv_priv;
 	int ret;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr + chip->start, FL_UNLOCKING);
 	if (ret)
 		goto out_unlock;
@@ -1763,7 +1763,7 @@ static int do_atmel_unlock(struct map_info *map, struct flchip *chip,
 	ret = 0;
 
 out_unlock:
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -1791,7 +1791,7 @@ static void cfi_amdstd_sync (struct mtd_info *mtd)
 		chip = &cfi->chips[i];
 
 	retry:
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		switch(chip->state) {
 		case FL_READY:
@@ -1805,7 +1805,7 @@ static void cfi_amdstd_sync (struct mtd_info *mtd)
 			 * with the chip now anyway.
 			 */
 		case FL_SYNCING:
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			break;
 
 		default:
@@ -1813,7 +1813,7 @@ static void cfi_amdstd_sync (struct mtd_info *mtd)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
 
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 
 			schedule();
 
@@ -1828,13 +1828,13 @@ static void cfi_amdstd_sync (struct mtd_info *mtd)
 	for (i--; i >=0; i--) {
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		if (chip->state == FL_SYNCING) {
 			chip->state = chip->oldstate;
 			wake_up(&chip->wq);
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 }
 
@@ -1850,7 +1850,7 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd)
 	for (i=0; !ret && i<cfi->numchips; i++) {
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		switch(chip->state) {
 		case FL_READY:
@@ -1870,7 +1870,7 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd)
 			ret = -EAGAIN;
 			break;
 		}
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	/* Unlock the chips again */
@@ -1879,13 +1879,13 @@ static int cfi_amdstd_suspend(struct mtd_info *mtd)
 		for (i--; i >=0; i--) {
 			chip = &cfi->chips[i];
 
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 
 			if (chip->state == FL_PM_SUSPENDED) {
 				chip->state = chip->oldstate;
 				wake_up(&chip->wq);
 			}
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 		}
 	}
 
@@ -1904,7 +1904,7 @@ static void cfi_amdstd_resume(struct mtd_info *mtd)
 
 		chip = &cfi->chips[i];
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		if (chip->state == FL_PM_SUSPENDED) {
 			chip->state = FL_READY;
@@ -1914,7 +1914,7 @@ static void cfi_amdstd_resume(struct mtd_info *mtd)
 		else
 			printk(KERN_ERR "Argh. Chip not in PM_SUSPENDED state upon resume()\n");
 
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 }
 
diff --git a/drivers/mtd/chips/cfi_cmdset_0020.c b/drivers/mtd/chips/cfi_cmdset_0020.c
index 0667a671525d..e54e8c169d76 100644
--- a/drivers/mtd/chips/cfi_cmdset_0020.c
+++ b/drivers/mtd/chips/cfi_cmdset_0020.c
@@ -265,7 +265,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 
 	timeo = jiffies + HZ;
  retry:
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* Check that the chip's ready to talk to us.
 	 * If it's in FL_ERASING state, suspend it and make it talk now.
@@ -296,15 +296,15 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 				/* make sure we're in 'read status' mode */
 				map_write(map, CMD(0x70), cmd_addr);
 				chip->state = FL_ERASING;
-				spin_unlock_bh(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				printk(KERN_ERR "Chip not ready after erase "
 				       "suspended: status = 0x%lx\n", status.x[0]);
 				return -EIO;
 			}
 
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			cfi_udelay(1);
-			spin_lock_bh(chip->mutex);
+			mutex_lock(&chip->mutex);
 		}
 
 		suspended = 1;
@@ -335,13 +335,13 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 
 		/* Urgh. Chip not yet ready to talk to us. */
 		if (time_after(jiffies, timeo)) {
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "waiting for chip to be ready timed out in read. WSM status = %lx\n", status.x[0]);
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		goto retry;
 
@@ -351,7 +351,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 		   someone changes the status */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
 		timeo = jiffies + HZ;
@@ -376,7 +376,7 @@ static inline int do_read_onechip(struct map_info *map, struct flchip *chip, lof
 	}
 
 	wake_up(&chip->wq);
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 
@@ -445,7 +445,7 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 #ifdef DEBUG_CFI_FEATURES
        printk("%s: chip->state[%d]\n", __func__, chip->state);
 #endif
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* Check that the chip's ready to talk to us.
 	 * Later, we can actually think about interrupting it
@@ -470,14 +470,14 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 			break;
 		/* Urgh. Chip not yet ready to talk to us. */
 		if (time_after(jiffies, timeo)) {
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
                         printk(KERN_ERR "waiting for chip to be ready timed out in buffer write Xstatus = %lx, status = %lx\n",
                                status.x[0], map_read(map, cmd_adr).x[0]);
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		goto retry;
 
@@ -486,7 +486,7 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 		   someone changes the status */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
 		timeo = jiffies + HZ;
@@ -503,16 +503,16 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 		if (map_word_andequal(map, status, status_OK, status_OK))
 			break;
 
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		if (++z > 100) {
 			/* Argh. Not ready for write to buffer */
 			DISABLE_VPP(map);
                         map_write(map, CMD(0x70), cmd_adr);
 			chip->state = FL_STATUS;
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "Chip not ready for buffer write. Xstatus = %lx\n", status.x[0]);
 			return -EIO;
 		}
@@ -532,9 +532,9 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 	map_write(map, CMD(0xd0), cmd_adr);
 	chip->state = FL_WRITING;
 
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	cfi_udelay(chip->buffer_write_time);
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	timeo = jiffies + (HZ/2);
 	z = 0;
@@ -543,11 +543,11 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 			/* Someone's suspended the write. Sleep */
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
 			timeo = jiffies + (HZ / 2); /* FIXME */
-			spin_lock_bh(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 
@@ -563,16 +563,16 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
                         map_write(map, CMD(0x70), adr);
 			chip->state = FL_STATUS;
 			DISABLE_VPP(map);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "waiting for chip to be ready timed out in bufwrite\n");
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		z++;
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 	}
 	if (!z) {
 		chip->buffer_write_time--;
@@ -596,11 +596,11 @@ static inline int do_write_buffer(struct map_info *map, struct flchip *chip,
 		/* put back into read status register mode */
 		map_write(map, CMD(0x70), adr);
 		wake_up(&chip->wq);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return map_word_bitsset(map, status, CMD(0x02)) ? -EROFS : -EIO;
 	}
 	wake_up(&chip->wq);
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 
         return 0;
 }
@@ -749,7 +749,7 @@ static inline int do_erase_oneblock(struct map_info *map, struct flchip *chip, u
 
 	timeo = jiffies + HZ;
 retry:
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* Check that the chip's ready to talk to us. */
 	switch (chip->state) {
@@ -766,13 +766,13 @@ retry:
 
 		/* Urgh. Chip not yet ready to talk to us. */
 		if (time_after(jiffies, timeo)) {
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "waiting for chip to be ready timed out in erase\n");
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		goto retry;
 
@@ -781,7 +781,7 @@ retry:
 		   someone changes the status */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
 		timeo = jiffies + HZ;
@@ -797,9 +797,9 @@ retry:
 	map_write(map, CMD(0xD0), adr);
 	chip->state = FL_ERASING;
 
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	msleep(1000);
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* FIXME. Use a timer to check this, and return immediately. */
 	/* Once the state machine's known to be working I'll do that */
@@ -810,11 +810,11 @@ retry:
 			/* Someone's suspended the erase. Sleep */
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
 			timeo = jiffies + (HZ*20); /* FIXME */
-			spin_lock_bh(chip->mutex);
+			mutex_lock(&chip->mutex);
 			continue;
 		}
 
@@ -828,14 +828,14 @@ retry:
 			chip->state = FL_STATUS;
 			printk(KERN_ERR "waiting for erase to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]);
 			DISABLE_VPP(map);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 	}
 
 	DISABLE_VPP(map);
@@ -878,7 +878,7 @@ retry:
 				printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x. Retrying...\n", adr, chipstatus);
 				timeo = jiffies + HZ;
 				chip->state = FL_STATUS;
-				spin_unlock_bh(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				goto retry;
 			}
 			printk(KERN_DEBUG "Chip erase failed at 0x%08lx: status 0x%x\n", adr, chipstatus);
@@ -887,7 +887,7 @@ retry:
 	}
 
 	wake_up(&chip->wq);
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -995,7 +995,7 @@ static void cfi_staa_sync (struct mtd_info *mtd)
 		chip = &cfi->chips[i];
 
 	retry:
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		switch(chip->state) {
 		case FL_READY:
@@ -1009,7 +1009,7 @@ static void cfi_staa_sync (struct mtd_info *mtd)
 			 * with the chip now anyway.
 			 */
 		case FL_SYNCING:
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			break;
 
 		default:
@@ -1017,7 +1017,7 @@ static void cfi_staa_sync (struct mtd_info *mtd)
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
 
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 		        remove_wait_queue(&chip->wq, &wait);
 
@@ -1030,13 +1030,13 @@ static void cfi_staa_sync (struct mtd_info *mtd)
 	for (i--; i >=0; i--) {
 		chip = &cfi->chips[i];
 
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		if (chip->state == FL_SYNCING) {
 			chip->state = chip->oldstate;
 			wake_up(&chip->wq);
 		}
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 }
 
@@ -1054,7 +1054,7 @@ static inline int do_lock_oneblock(struct map_info *map, struct flchip *chip, un
 
 	timeo = jiffies + HZ;
 retry:
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* Check that the chip's ready to talk to us. */
 	switch (chip->state) {
@@ -1071,13 +1071,13 @@ retry:
 
 		/* Urgh. Chip not yet ready to talk to us. */
 		if (time_after(jiffies, timeo)) {
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "waiting for chip to be ready timed out in lock\n");
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		goto retry;
 
@@ -1086,7 +1086,7 @@ retry:
 		   someone changes the status */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
 		timeo = jiffies + HZ;
@@ -1098,9 +1098,9 @@ retry:
 	map_write(map, CMD(0x01), adr);
 	chip->state = FL_LOCKING;
 
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	msleep(1000);
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* FIXME. Use a timer to check this, and return immediately. */
 	/* Once the state machine's known to be working I'll do that */
@@ -1118,21 +1118,21 @@ retry:
 			chip->state = FL_STATUS;
 			printk(KERN_ERR "waiting for lock to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]);
 			DISABLE_VPP(map);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 	}
 
 	/* Done and happy. */
 	chip->state = FL_STATUS;
 	DISABLE_VPP(map);
 	wake_up(&chip->wq);
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 static int cfi_staa_lock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
@@ -1203,7 +1203,7 @@ static inline int do_unlock_oneblock(struct map_info *map, struct flchip *chip,
 
 	timeo = jiffies + HZ;
 retry:
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* Check that the chip's ready to talk to us. */
 	switch (chip->state) {
@@ -1220,13 +1220,13 @@ retry:
 
 		/* Urgh. Chip not yet ready to talk to us. */
 		if (time_after(jiffies, timeo)) {
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			printk(KERN_ERR "waiting for chip to be ready timed out in unlock\n");
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the lock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
 		goto retry;
 
@@ -1235,7 +1235,7 @@ retry:
 		   someone changes the status */
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
 		timeo = jiffies + HZ;
@@ -1247,9 +1247,9 @@ retry:
 	map_write(map, CMD(0xD0), adr);
 	chip->state = FL_UNLOCKING;
 
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	msleep(1000);
-	spin_lock_bh(chip->mutex);
+	mutex_lock(&chip->mutex);
 
 	/* FIXME. Use a timer to check this, and return immediately. */
 	/* Once the state machine's known to be working I'll do that */
@@ -1267,21 +1267,21 @@ retry:
 			chip->state = FL_STATUS;
 			printk(KERN_ERR "waiting for unlock to complete timed out. Xstatus = %lx, status = %lx.\n", status.x[0], map_read(map, adr).x[0]);
 			DISABLE_VPP(map);
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			return -EIO;
 		}
 
 		/* Latency issues. Drop the unlock, wait a while and retry */
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		cfi_udelay(1);
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 	}
 
 	/* Done and happy. */
 	chip->state = FL_STATUS;
 	DISABLE_VPP(map);
 	wake_up(&chip->wq);
-	spin_unlock_bh(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 static int cfi_staa_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len)
@@ -1334,7 +1334,7 @@ static int cfi_staa_suspend(struct mtd_info *mtd)
 	for (i=0; !ret && i<cfi->numchips; i++) {
 		chip = &cfi->chips[i];
 
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		switch(chip->state) {
 		case FL_READY:
@@ -1354,7 +1354,7 @@ static int cfi_staa_suspend(struct mtd_info *mtd)
 			ret = -EAGAIN;
 			break;
 		}
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 
 	/* Unlock the chips again */
@@ -1363,7 +1363,7 @@ static int cfi_staa_suspend(struct mtd_info *mtd)
 		for (i--; i >=0; i--) {
 			chip = &cfi->chips[i];
 
-			spin_lock_bh(chip->mutex);
+			mutex_lock(&chip->mutex);
 
 			if (chip->state == FL_PM_SUSPENDED) {
 				/* No need to force it into a known state here,
@@ -1372,7 +1372,7 @@ static int cfi_staa_suspend(struct mtd_info *mtd)
 				chip->state = chip->oldstate;
 				wake_up(&chip->wq);
 			}
-			spin_unlock_bh(chip->mutex);
+			mutex_unlock(&chip->mutex);
 		}
 	}
 
@@ -1390,7 +1390,7 @@ static void cfi_staa_resume(struct mtd_info *mtd)
 
 		chip = &cfi->chips[i];
 
-		spin_lock_bh(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		/* Go to known state. Chip may have been power cycled */
 		if (chip->state == FL_PM_SUSPENDED) {
@@ -1399,7 +1399,7 @@ static void cfi_staa_resume(struct mtd_info *mtd)
 			wake_up(&chip->wq);
 		}
 
-		spin_unlock_bh(chip->mutex);
+		mutex_unlock(&chip->mutex);
 	}
 }
 
diff --git a/drivers/mtd/chips/fwh_lock.h b/drivers/mtd/chips/fwh_lock.h
index 57e0e4e921f9..d18064977192 100644
--- a/drivers/mtd/chips/fwh_lock.h
+++ b/drivers/mtd/chips/fwh_lock.h
@@ -58,10 +58,10 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip,
 	 * to flash memory - that means that we don't have to check status
 	 * and timeout.
 	 */
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, adr, FL_LOCKING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -72,7 +72,7 @@ static int fwh_xxlock_oneblock(struct map_info *map, struct flchip *chip,
 	/* Done and happy. */
 	chip->state = chip->oldstate;
 	put_chip(map, chip, adr);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return 0;
 }
 
diff --git a/drivers/mtd/chips/gen_probe.c b/drivers/mtd/chips/gen_probe.c
index e2dc96441e05..fcc1bc02c8a2 100644
--- a/drivers/mtd/chips/gen_probe.c
+++ b/drivers/mtd/chips/gen_probe.c
@@ -155,8 +155,7 @@ static struct cfi_private *genprobe_ident_chips(struct map_info *map, struct chi
 			pchip->start = (i << cfi.chipshift);
 			pchip->state = FL_READY;
 			init_waitqueue_head(&pchip->wq);
-			spin_lock_init(&pchip->_spinlock);
-			pchip->mutex = &pchip->_spinlock;
+			mutex_init(&pchip->mutex);
 		}
 	}
 
diff --git a/drivers/mtd/lpddr/lpddr_cmds.c b/drivers/mtd/lpddr/lpddr_cmds.c
index e22ca49583e7..eb6f437ca9ec 100644
--- a/drivers/mtd/lpddr/lpddr_cmds.c
+++ b/drivers/mtd/lpddr/lpddr_cmds.c
@@ -106,8 +106,7 @@ struct mtd_info *lpddr_cmdset(struct map_info *map)
 			/* those should be reset too since
 			   they create memory references. */
 			init_waitqueue_head(&chip->wq);
-			spin_lock_init(&chip->_spinlock);
-			chip->mutex = &chip->_spinlock;
+			mutex_init(&chip->mutex);
 			chip++;
 		}
 	}
@@ -143,7 +142,7 @@ static int wait_for_ready(struct map_info *map, struct flchip *chip,
 		}
 
 		/* OK Still waiting. Drop the lock, wait a while and retry. */
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		if (sleep_time >= 1000000/HZ) {
 			/*
 			 * Half of the normal delay still remaining
@@ -158,17 +157,17 @@ static int wait_for_ready(struct map_info *map, struct flchip *chip,
 			cond_resched();
 			timeo--;
 		}
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 
 		while (chip->state != chip_state) {
 			/* Someone's suspended the operation: sleep */
 			DECLARE_WAITQUEUE(wait, current);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 		}
 		if (chip->erase_suspended || chip->write_suspended)  {
 			/* Suspend has occured while sleep: reset timeout */
@@ -229,20 +228,20 @@ static int get_chip(struct map_info *map, struct flchip *chip, int mode)
 			 * it'll happily send us to sleep.  In any case, when
 			 * get_chip returns success we're clear to go ahead.
 			 */
-			ret = spin_trylock(contender->mutex);
+			ret = mutex_trylock(&contender->mutex);
 			spin_unlock(&shared->lock);
 			if (!ret)
 				goto retry;
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			ret = chip_ready(map, contender, mode);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 
 			if (ret == -EAGAIN) {
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				goto retry;
 			}
 			if (ret) {
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				return ret;
 			}
 			spin_lock(&shared->lock);
@@ -251,10 +250,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, int mode)
 			 * state. Put contender and retry. */
 			if (chip->state == FL_SYNCING) {
 				put_chip(map, contender);
-				spin_unlock(contender->mutex);
+				mutex_unlock(&contender->mutex);
 				goto retry;
 			}
-			spin_unlock(contender->mutex);
+			mutex_unlock(&contender->mutex);
 		}
 
 		/* Check if we have suspended erase on this chip.
@@ -264,10 +263,10 @@ static int get_chip(struct map_info *map, struct flchip *chip, int mode)
 			spin_unlock(&shared->lock);
 			set_current_state(TASK_UNINTERRUPTIBLE);
 			add_wait_queue(&chip->wq, &wait);
-			spin_unlock(chip->mutex);
+			mutex_unlock(&chip->mutex);
 			schedule();
 			remove_wait_queue(&chip->wq, &wait);
-			spin_lock(chip->mutex);
+			mutex_lock(&chip->mutex);
 			goto retry;
 		}
 
@@ -336,10 +335,10 @@ static int chip_ready(struct map_info *map, struct flchip *chip, int mode)
 sleep:
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		add_wait_queue(&chip->wq, &wait);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		schedule();
 		remove_wait_queue(&chip->wq, &wait);
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		return -EAGAIN;
 	}
 }
@@ -355,12 +354,12 @@ static void put_chip(struct map_info *map, struct flchip *chip)
 			if (shared->writing && shared->writing != chip) {
 				/* give back the ownership */
 				struct flchip *loaner = shared->writing;
-				spin_lock(loaner->mutex);
+				mutex_lock(&loaner->mutex);
 				spin_unlock(&shared->lock);
-				spin_unlock(chip->mutex);
+				mutex_unlock(&chip->mutex);
 				put_chip(map, loaner);
-				spin_lock(chip->mutex);
-				spin_unlock(loaner->mutex);
+				mutex_lock(&chip->mutex);
+				mutex_unlock(&loaner->mutex);
 				wake_up(&chip->wq);
 				return;
 			}
@@ -413,10 +412,10 @@ int do_write_buffer(struct map_info *map, struct flchip *chip,
 
 	wbufsize = 1 << lpddr->qinfo->BufSizeShift;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 	/* Figure out the number of words to write */
@@ -477,7 +476,7 @@ int do_write_buffer(struct map_info *map, struct flchip *chip,
 	}
 
  out:	put_chip(map, chip);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -489,10 +488,10 @@ int do_erase_oneblock(struct mtd_info *mtd, loff_t adr)
 	struct flchip *chip = &lpddr->chips[chipnum];
 	int ret;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, FL_ERASING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 	send_pfow_command(map, LPDDR_BLOCK_ERASE, adr, 0, NULL);
@@ -504,7 +503,7 @@ int do_erase_oneblock(struct mtd_info *mtd, loff_t adr)
 		goto out;
 	}
  out:	put_chip(map, chip);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -517,10 +516,10 @@ static int lpddr_read(struct mtd_info *mtd, loff_t adr, size_t len,
 	struct flchip *chip = &lpddr->chips[chipnum];
 	int ret = 0;
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, FL_READY);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -528,7 +527,7 @@ static int lpddr_read(struct mtd_info *mtd, loff_t adr, size_t len,
 	*retlen = len;
 
 	put_chip(map, chip);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -568,9 +567,9 @@ static int lpddr_point(struct mtd_info *mtd, loff_t adr, size_t len,
 		else
 			thislen = len;
 		/* get the chip */
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		ret = get_chip(map, chip, FL_POINT);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		if (ret)
 			break;
 
@@ -610,7 +609,7 @@ static void lpddr_unpoint (struct mtd_info *mtd, loff_t adr, size_t len)
 		else
 			thislen = len;
 
-		spin_lock(chip->mutex);
+		mutex_lock(&chip->mutex);
 		if (chip->state == FL_POINT) {
 			chip->ref_point_counter--;
 			if (chip->ref_point_counter == 0)
@@ -620,7 +619,7 @@ static void lpddr_unpoint (struct mtd_info *mtd, loff_t adr, size_t len)
 					"pointed region\n", map->name);
 
 		put_chip(map, chip);
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 
 		len -= thislen;
 		ofs = 0;
@@ -726,10 +725,10 @@ int do_xxlock(struct mtd_info *mtd, loff_t adr, uint32_t len, int thunk)
 	int chipnum = adr >> lpddr->chipshift;
 	struct flchip *chip = &lpddr->chips[chipnum];
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, FL_LOCKING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -749,7 +748,7 @@ int do_xxlock(struct mtd_info *mtd, loff_t adr, uint32_t len, int thunk)
 		goto out;
 	}
 out:	put_chip(map, chip);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
@@ -770,10 +769,10 @@ int word_program(struct map_info *map, loff_t adr, uint32_t curval)
 	int chipnum = adr >> lpddr->chipshift;
 	struct flchip *chip = &lpddr->chips[chipnum];
 
-	spin_lock(chip->mutex);
+	mutex_lock(&chip->mutex);
 	ret = get_chip(map, chip, FL_WRITING);
 	if (ret) {
-		spin_unlock(chip->mutex);
+		mutex_unlock(&chip->mutex);
 		return ret;
 	}
 
@@ -787,7 +786,7 @@ int word_program(struct map_info *map, loff_t adr, uint32_t curval)
 	}
 
 out:	put_chip(map, chip);
-	spin_unlock(chip->mutex);
+	mutex_unlock(&chip->mutex);
 	return ret;
 }
 
diff --git a/include/linux/mtd/flashchip.h b/include/linux/mtd/flashchip.h
index d0bf422ae374..f43e9b49b751 100644
--- a/include/linux/mtd/flashchip.h
+++ b/include/linux/mtd/flashchip.h
@@ -15,6 +15,7 @@
  * has asm/spinlock.h, or 2.4, which has linux/spinlock.h
  */
 #include <linux/sched.h>
+#include <linux/mutex.h>
 
 typedef enum {
 	FL_READY,
@@ -74,8 +75,7 @@ struct flchip {
 	unsigned int erase_suspended:1;
 	unsigned long in_progress_block_addr;
 
-	spinlock_t *mutex;
-	spinlock_t _spinlock; /* We do it like this because sometimes they'll be shared. */
+	struct mutex mutex;
 	wait_queue_head_t wq; /* Wait on here when we're waiting for the chip
 			     to be ready */
 	int word_write_time;
-- 
cgit v1.2.3


From 2b3fc35f6919344e3cf722dde8308f47235c0b70 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 20 Apr 2010 16:23:07 +0800
Subject: rcu: optionally leave lockdep enabled after RCU lockdep splat

There is no need to disable lockdep after an RCU lockdep splat,
so remove the debug_lockdeps_off() from lockdep_rcu_dereference().
To avoid repeated lockdep splats, use a static variable in the inlined
rcu_dereference_check() and rcu_dereference_protected() macros so that
a given instance splats only once, but so that multiple instances can
be detected per boot.

This is controlled by a new config variable CONFIG_PROVE_RCU_REPEATEDLY,
which is disabled by default.  This provides the normal lockdep behavior
by default, but permits people who want to find multiple RCU-lockdep
splats per boot to easily do so.

Requested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Tested-by: Eric Paris <eparis@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 15 +++++++++++----
 kernel/lockdep.c         |  3 +++
 lib/Kconfig.debug        | 12 ++++++++++++
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index db266bbed23f..4dca2752cfde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -192,6 +192,15 @@ static inline int rcu_read_lock_sched_held(void)
 
 extern int rcu_my_thread_group_empty(void);
 
+#define __do_rcu_dereference_check(c)					\
+	do {								\
+		static bool __warned;					\
+		if (debug_lockdep_rcu_enabled() && !__warned && !(c)) {	\
+			__warned = true;				\
+			lockdep_rcu_dereference(__FILE__, __LINE__);	\
+		}							\
+	} while (0)
+
 /**
  * rcu_dereference_check - rcu_dereference with debug checking
  * @p: The pointer to read, prior to dereferencing
@@ -221,8 +230,7 @@ extern int rcu_my_thread_group_empty(void);
  */
 #define rcu_dereference_check(p, c) \
 	({ \
-		if (debug_lockdep_rcu_enabled() && !(c)) \
-			lockdep_rcu_dereference(__FILE__, __LINE__); \
+		__do_rcu_dereference_check(c); \
 		rcu_dereference_raw(p); \
 	})
 
@@ -239,8 +247,7 @@ extern int rcu_my_thread_group_empty(void);
  */
 #define rcu_dereference_protected(p, c) \
 	({ \
-		if (debug_lockdep_rcu_enabled() && !(c)) \
-			lockdep_rcu_dereference(__FILE__, __LINE__); \
+		__do_rcu_dereference_check(c); \
 		(p); \
 	})
 
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 2594e1ce41cb..3a756ba8d5d8 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3801,8 +3801,11 @@ void lockdep_rcu_dereference(const char *file, const int line)
 {
 	struct task_struct *curr = current;
 
+#ifndef CONFIG_PROVE_RCU_REPEATEDLY
 	if (!debug_locks_off())
 		return;
+#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
+	/* Note: the following can be executed concurrently, so be careful. */
 	printk("\n===================================================\n");
 	printk(  "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
 	printk(  "---------------------------------------------------\n");
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 935248bdbc47..94090b4bb7d2 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -512,6 +512,18 @@ config PROVE_RCU
 
 	 Say N if you are unsure.
 
+config PROVE_RCU_REPEATEDLY
+	bool "RCU debugging: don't disable PROVE_RCU on first splat"
+	depends on PROVE_RCU
+	default n
+	help
+	 By itself, PROVE_RCU will disable checking upon issuing the
+	 first warning (or "splat").  This feature prevents such
+	 disabling, allowing multiple RCU-lockdep warnings to be printed
+	 on a single reboot.
+
+	 Say N if you are unsure.
+
 config LOCKDEP
 	bool
 	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-- 
cgit v1.2.3


From d20200b591f59847ab6a5c23507084a7d29e23c5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 30 Mar 2010 10:52:21 -0700
Subject: rcu: Fix bogus CONFIG_PROVE_LOCKING in comments to reflect reality.

It is CONFIG_DEBUG_LOCK_ALLOC rather than CONFIG_PROVE_LOCKING, so fix it.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 15 ++++++++-------
 include/linux/srcu.h     |  4 ++--
 2 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 4dca2752cfde..a150af0e5cd5 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -106,8 +106,8 @@ extern int debug_lockdep_rcu_enabled(void);
 /**
  * rcu_read_lock_held - might we be in RCU read-side critical section?
  *
- * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
- * an RCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
  * this assumes we are in an RCU read-side critical section unless it can
  * prove otherwise.
  *
@@ -129,11 +129,12 @@ extern int rcu_read_lock_bh_held(void);
 /**
  * rcu_read_lock_sched_held - might we be in RCU-sched read-side critical section?
  *
- * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in an
- * RCU-sched read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
- * this assumes we are in an RCU-sched read-side critical section unless it
- * can prove otherwise.  Note that disabling of preemption (including
- * disabling irqs) counts as an RCU-sched read-side critical section.
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an
+ * RCU-sched read-side critical section.  In absence of
+ * CONFIG_DEBUG_LOCK_ALLOC, this assumes we are in an RCU-sched read-side
+ * critical section unless it can prove otherwise.  Note that disabling
+ * of preemption (including disabling irqs) counts as an RCU-sched
+ * read-side critical section.
  *
  * Check rcu_scheduler_active to prevent false positives during boot.
  */
diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 4d5ecb222af9..9c01f1022428 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -84,8 +84,8 @@ long srcu_batches_completed(struct srcu_struct *sp);
 /**
  * srcu_read_lock_held - might we be in SRCU read-side critical section?
  *
- * If CONFIG_PROVE_LOCKING is selected and enabled, returns nonzero iff in
- * an SRCU read-side critical section.  In absence of CONFIG_PROVE_LOCKING,
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
  * this assumes we are in an SRCU read-side critical section unless it can
  * prove otherwise.
  */
-- 
cgit v1.2.3


From 32c141a0a1dfa29e0a07d78bec0c0919fc4b9f88 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 30 Mar 2010 10:59:28 -0700
Subject: rcu: fix now-bogus rcu_scheduler_active comments.

The rcu_scheduler_active check has been wrapped into the new
debug_lockdep_rcu_enabled() function, so update the comments to
reflect this new reality.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a150af0e5cd5..02537a72aaa4 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -111,7 +111,8 @@ extern int debug_lockdep_rcu_enabled(void);
  * this assumes we are in an RCU read-side critical section unless it can
  * prove otherwise.
  *
- * Check rcu_scheduler_active to prevent false positives during boot.
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
  */
 static inline int rcu_read_lock_held(void)
 {
@@ -136,7 +137,8 @@ extern int rcu_read_lock_bh_held(void);
  * of preemption (including disabling irqs) counts as an RCU-sched
  * read-side critical section.
  *
- * Check rcu_scheduler_active to prevent false positives during boot.
+ * Check debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
  */
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
-- 
cgit v1.2.3


From da848c47bc6e873a54a445ea1960423a495b6b32 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 30 Mar 2010 15:46:01 -0700
Subject: rcu: shrink rcutiny by making synchronize_rcu_bh() be inline

Because synchronize_rcu_bh() is identical to synchronize_sched(),
make the former a static inline invoking the latter, saving the
overhead of an EXPORT_SYMBOL_GPL() and the duplicate code.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 --
 include/linux/rcutiny.h  | 12 +++++++++++-
 include/linux/rcutree.h  |  2 ++
 kernel/rcutiny.c         |  9 ++-------
 4 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 02537a72aaa4..d8fb2abcf303 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -56,8 +56,6 @@ struct rcu_head {
 };
 
 /* Exported common interfaces */
-extern void synchronize_rcu_bh(void);
-extern void synchronize_sched(void);
 extern void rcu_barrier(void);
 extern void rcu_barrier_bh(void);
 extern void rcu_barrier_sched(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index a5195875480a..bbeb55b7709b 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -74,7 +74,17 @@ static inline void rcu_sched_force_quiescent_state(void)
 {
 }
 
-#define synchronize_rcu synchronize_sched
+extern void synchronize_sched(void);
+
+static inline void synchronize_rcu(void)
+{
+	synchronize_sched();
+}
+
+static inline void synchronize_rcu_bh(void)
+{
+	synchronize_sched();
+}
 
 static inline void synchronize_rcu_expedited(void)
 {
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 42cc3a04779e..7484fe66a3aa 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -86,6 +86,8 @@ static inline void __rcu_read_unlock_bh(void)
 
 extern void call_rcu_sched(struct rcu_head *head,
 			   void (*func)(struct rcu_head *rcu));
+extern void synchronize_rcu_bh(void);
+extern void synchronize_sched(void);
 extern void synchronize_rcu_expedited(void);
 
 static inline void synchronize_rcu_bh_expedited(void)
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index 9f6d9ff2572c..272c6d21a75f 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -187,7 +187,8 @@ static void rcu_process_callbacks(struct softirq_action *unused)
  *
  * Cool, huh?  (Due to Josh Triplett.)
  *
- * But we want to make this a static inline later.
+ * But we want to make this a static inline later.  The cond_resched()
+ * currently makes this problematic.
  */
 void synchronize_sched(void)
 {
@@ -195,12 +196,6 @@ void synchronize_sched(void)
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
-void synchronize_rcu_bh(void)
-{
-	synchronize_sched();
-}
-EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
-
 /*
  * Helper function for call_rcu() and call_rcu_bh().
  */
-- 
cgit v1.2.3


From 25502a6c13745f4650cc59322bd198194f55e796 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 1 Apr 2010 17:37:01 -0700
Subject: rcu: refactor RCU's context-switch handling

The addition of preemptible RCU to treercu resulted in a bit of
confusion and inefficiency surrounding the handling of context switches
for RCU-sched and for RCU-preempt.  For RCU-sched, a context switch
is a quiescent state, pure and simple, just like it always has been.
For RCU-preempt, a context switch is in no way a quiescent state, but
special handling is required when a task blocks in an RCU read-side
critical section.

However, the callout from the scheduler and the outer loop in ksoftirqd
still calls something named rcu_sched_qs(), whose name is no longer
accurate.  Furthermore, when rcu_check_callbacks() notes an RCU-sched
quiescent state, it ends up unnecessarily (though harmlessly, aside
from the performance hit) enqueuing the current task if it happens to
be running in an RCU-preempt read-side critical section.  This not only
increases the maximum latency of scheduler_tick(), it also needlessly
increases the overhead of the next outermost rcu_read_unlock() invocation.

This patch addresses this situation by separating the notion of RCU's
context-switch handling from that of RCU-sched's quiescent states.
The context-switch handling is covered by rcu_note_context_switch() in
general and by rcu_preempt_note_context_switch() for preemptible RCU.
This permits rcu_sched_qs() to handle quiescent states and only quiescent
states.  It also reduces the maximum latency of scheduler_tick(), though
probably by much less than a microsecond.  Finally, it means that tasks
within preemptible-RCU read-side critical sections avoid incurring the
overhead of queuing unless there really is a context switch.

Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/rcutiny.h |  4 ++++
 include/linux/rcutree.h |  1 +
 kernel/rcutree.c        | 17 ++++++++++++-----
 kernel/rcutree_plugin.h | 11 +++++++----
 kernel/sched.c          |  2 +-
 kernel/softirq.c        |  2 +-
 6 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index bbeb55b7709b..ff22b97fb979 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -29,6 +29,10 @@
 
 void rcu_sched_qs(int cpu);
 void rcu_bh_qs(int cpu);
+static inline void rcu_note_context_switch(int cpu)
+{
+	rcu_sched_qs(cpu);
+}
 
 #define __rcu_read_lock()	preempt_disable()
 #define __rcu_read_unlock()	preempt_enable()
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 7484fe66a3aa..b9f74606f320 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -34,6 +34,7 @@ struct notifier_block;
 
 extern void rcu_sched_qs(int cpu);
 extern void rcu_bh_qs(int cpu);
+extern void rcu_note_context_switch(int cpu);
 extern int rcu_needs_cpu(int cpu);
 extern int rcu_expedited_torture_stats(char *page);
 
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 86bb9499aae6..e33631354b69 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -97,25 +97,32 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  */
 void rcu_sched_qs(int cpu)
 {
-	struct rcu_data *rdp;
+	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
 
-	rdp = &per_cpu(rcu_sched_data, cpu);
 	rdp->passed_quiesc_completed = rdp->gpnum - 1;
 	barrier();
 	rdp->passed_quiesc = 1;
-	rcu_preempt_note_context_switch(cpu);
 }
 
 void rcu_bh_qs(int cpu)
 {
-	struct rcu_data *rdp;
+	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
 
-	rdp = &per_cpu(rcu_bh_data, cpu);
 	rdp->passed_quiesc_completed = rdp->gpnum - 1;
 	barrier();
 	rdp->passed_quiesc = 1;
 }
 
+/*
+ * Note a context switch.  This is a quiescent state for RCU-sched,
+ * and requires special handling for preemptible RCU.
+ */
+void rcu_note_context_switch(int cpu)
+{
+	rcu_sched_qs(cpu);
+	rcu_preempt_note_context_switch(cpu);
+}
+
 #ifdef CONFIG_NO_HZ
 DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
 	.dynticks_nesting = 1,
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 687c4e90722e..f9bc83a047da 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -75,13 +75,19 @@ EXPORT_SYMBOL_GPL(rcu_force_quiescent_state);
  * that this just means that the task currently running on the CPU is
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
+ *
+ * Unlike the other rcu_*_qs() functions, callers to this function
+ * must disable irqs in order to protect the assignment to
+ * ->rcu_read_unlock_special.
  */
 static void rcu_preempt_qs(int cpu)
 {
 	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
+
 	rdp->passed_quiesc_completed = rdp->gpnum - 1;
 	barrier();
 	rdp->passed_quiesc = 1;
+	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 }
 
 /*
@@ -144,9 +150,8 @@ static void rcu_preempt_note_context_switch(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs(cpu);
 	local_irq_save(flags);
-	t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+	rcu_preempt_qs(cpu);
 	local_irq_restore(flags);
 }
 
@@ -236,7 +241,6 @@ static void rcu_read_unlock_special(struct task_struct *t)
 	 */
 	special = t->rcu_read_unlock_special;
 	if (special & RCU_READ_UNLOCK_NEED_QS) {
-		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 		rcu_preempt_qs(smp_processor_id());
 	}
 
@@ -473,7 +477,6 @@ static void rcu_preempt_check_callbacks(int cpu)
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
 		rcu_preempt_qs(cpu);
 		return;
 	}
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f70ffe..d8a213ccdc3b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3706,7 +3706,7 @@ need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
-	rcu_sched_qs(cpu);
+	rcu_note_context_switch(cpu);
 	prev = rq->curr;
 	switch_count = &prev->nivcsw;
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 7c1a67ef0274..0db913a5c60f 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -716,7 +716,7 @@ static int run_ksoftirqd(void * __bind_cpu)
 			preempt_enable_no_resched();
 			cond_resched();
 			preempt_disable();
-			rcu_sched_qs((long)__bind_cpu);
+			rcu_note_context_switch((long)__bind_cpu);
 		}
 		preempt_enable();
 		set_current_state(TASK_INTERRUPTIBLE);
-- 
cgit v1.2.3


From bbad937983147c017c25406860287cb94da9af7c Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 2 Apr 2010 16:17:17 -0700
Subject: rcu: slim down rcutiny by removing rcu_scheduler_active and friends

TINY_RCU does not need rcu_scheduler_active unless CONFIG_DEBUG_LOCK_ALLOC.
So conditionally compile rcu_scheduler_active in order to slim down
rcutiny a bit more.  Also gets rid of an EXPORT_SYMBOL_GPL, which is
responsible for most of the slimming.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  4 +---
 include/linux/rcutiny.h  | 13 +++++++++++++
 include/linux/rcutree.h  |  3 +++
 kernel/rcupdate.c        | 19 -------------------
 kernel/rcutiny.c         |  7 +++++++
 kernel/rcutiny_plugin.h  | 39 +++++++++++++++++++++++++++++++++++++++
 kernel/rcutree.c         | 19 +++++++++++++++++++
 7 files changed, 82 insertions(+), 22 deletions(-)
 create mode 100644 kernel/rcutiny_plugin.h

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d8fb2abcf303..23be3a702516 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -64,8 +64,6 @@ extern int sched_expedited_torture_stats(char *page);
 
 /* Internal to kernel */
 extern void rcu_init(void);
-extern int rcu_scheduler_active;
-extern void rcu_scheduler_starting(void);
 
 #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
 #include <linux/rcutree.h>
@@ -178,7 +176,7 @@ static inline int rcu_read_lock_bh_held(void)
 #ifdef CONFIG_PREEMPT
 static inline int rcu_read_lock_sched_held(void)
 {
-	return !rcu_scheduler_active || preempt_count() != 0 || irqs_disabled();
+	return preempt_count() != 0 || irqs_disabled();
 }
 #else /* #ifdef CONFIG_PREEMPT */
 static inline int rcu_read_lock_sched_held(void)
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index ff22b97fb979..14e5a76b2c06 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -128,4 +128,17 @@ static inline int rcu_preempt_depth(void)
 	return 0;
 }
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+extern int rcu_scheduler_active __read_mostly;
+extern void rcu_scheduler_starting(void);
+
+#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
+static inline void rcu_scheduler_starting(void)
+{
+}
+
+#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #endif /* __LINUX_RCUTINY_H */
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index b9f74606f320..48282055e83d 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -123,4 +123,7 @@ static inline int rcu_blocking_is_gp(void)
 	return num_online_cpus() == 1;
 }
 
+extern void rcu_scheduler_starting(void);
+extern int rcu_scheduler_active __read_mostly;
+
 #endif /* __LINUX_RCUTREE_H */
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 49d808e833b0..72a8dc9567f5 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -44,7 +44,6 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <linux/kernel_stat.h>
 #include <linux/hardirq.h>
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
@@ -64,9 +63,6 @@ struct lockdep_map rcu_sched_lock_map =
 EXPORT_SYMBOL_GPL(rcu_sched_lock_map);
 #endif
 
-int rcu_scheduler_active __read_mostly;
-EXPORT_SYMBOL_GPL(rcu_scheduler_active);
-
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 int debug_lockdep_rcu_enabled(void)
@@ -96,21 +92,6 @@ EXPORT_SYMBOL_GPL(rcu_read_lock_bh_held);
 
 #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
 
-/*
- * This function is invoked towards the end of the scheduler's initialization
- * process.  Before this is called, the idle task might contain
- * RCU read-side critical sections (during which time, this idle
- * task is booting the system).  After this function is called, the
- * idle tasks are prohibited from containing RCU read-side critical
- * sections.
- */
-void rcu_scheduler_starting(void)
-{
-	WARN_ON(num_online_cpus() != 1);
-	WARN_ON(nr_context_switches() > 0);
-	rcu_scheduler_active = 1;
-}
-
 /*
  * Awaken the corresponding synchronize_rcu() instance now that a
  * grace period has elapsed.
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index d9f8a623c9fa..b1804ff83d5e 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -54,6 +54,11 @@ static struct rcu_ctrlblk rcu_bh_ctrlblk = {
 	.curtail	= &rcu_bh_ctrlblk.rcucblist,
 };
 
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
+
 #ifdef CONFIG_NO_HZ
 
 static long rcu_dynticks_nesting = 1;
@@ -276,3 +281,5 @@ void __init rcu_init(void)
 {
 	open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
 }
+
+#include "rcutiny_plugin.h"
diff --git a/kernel/rcutiny_plugin.h b/kernel/rcutiny_plugin.h
new file mode 100644
index 000000000000..d223a92bc742
--- /dev/null
+++ b/kernel/rcutiny_plugin.h
@@ -0,0 +1,39 @@
+/*
+ * Read-Copy Update mechanism for mutual exclusion (tree-based version)
+ * Internal non-public definitions that provide either classic
+ * or preemptable semantics.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Copyright IBM Corporation, 2009
+ *
+ * Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
+ */
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
+#include <linux/kernel_stat.h>
+
+/*
+ * During boot, we forgive RCU lockdep issues.  After this function is
+ * invoked, we start taking RCU lockdep issues seriously.
+ */
+void rcu_scheduler_starting(void)
+{
+	WARN_ON(nr_context_switches() > 0);
+	rcu_scheduler_active = 1;
+}
+
+#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index e33631354b69..3623f8e10220 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -46,6 +46,7 @@
 #include <linux/cpu.h>
 #include <linux/mutex.h>
 #include <linux/time.h>
+#include <linux/kernel_stat.h>
 
 #include "rcutree.h"
 
@@ -80,6 +81,9 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
 struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
 DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
 
+int rcu_scheduler_active __read_mostly;
+EXPORT_SYMBOL_GPL(rcu_scheduler_active);
+
 /*
  * Return true if an RCU grace period is in progress.  The ACCESS_ONCE()s
  * permit this function to be invoked without holding the root rcu_node
@@ -1783,6 +1787,21 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
+/*
+ * This function is invoked towards the end of the scheduler's initialization
+ * process.  Before this is called, the idle task might contain
+ * RCU read-side critical sections (during which time, this idle
+ * task is booting the system).  After this function is called, the
+ * idle tasks are prohibited from containing RCU read-side critical
+ * sections.  This function also enables RCU lockdep checking.
+ */
+void rcu_scheduler_starting(void)
+{
+	WARN_ON(num_online_cpus() != 1);
+	WARN_ON(nr_context_switches() > 0);
+	rcu_scheduler_active = 1;
+}
+
 /*
  * Compute the per-level fanout, either using the exact fanout specified
  * or balancing the tree, depending on CONFIG_RCU_FANOUT_EXACT.
-- 
cgit v1.2.3


From d14aada8e20bdf81ffd43f433b123972cf575b32 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 19 Apr 2010 22:24:22 -0700
Subject: rcu: make SRCU usable in modules

Add a #include for mutex.h to allow SRCU to be more easily used in
kernel modules.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/srcu.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/srcu.h b/include/linux/srcu.h
index 9c01f1022428..4d5d2f546dbf 100644
--- a/include/linux/srcu.h
+++ b/include/linux/srcu.h
@@ -27,6 +27,8 @@
 #ifndef _LINUX_SRCU_H
 #define _LINUX_SRCU_H
 
+#include <linux/mutex.h>
+
 struct srcu_struct_array {
 	int c[2];
 };
-- 
cgit v1.2.3


From 228c54ef7a028d5a4b6606eb0c8035874d9b6788 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Mon, 15 Mar 2010 21:44:41 +0100
Subject: PM: pm_wakeup - switch to using bool

Also change couple of stubs implemented as macros in !CONFIG_PM case
in statinc inline functions to provide proper typechecking of
arguments regardless of config.

Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 include/linux/pm_wakeup.h | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h
index 0aae7776185e..22d64c18056c 100644
--- a/include/linux/pm_wakeup.h
+++ b/include/linux/pm_wakeup.h
@@ -25,32 +25,34 @@
 # error "please don't include this file directly"
 #endif
 
+#include <linux/types.h>
+
 #ifdef CONFIG_PM
 
 /* changes to device_may_wakeup take effect on the next pm state change.
  * by default, devices should wakeup if they can.
  */
-static inline void device_init_wakeup(struct device *dev, int val)
+static inline void device_init_wakeup(struct device *dev, bool val)
 {
-	dev->power.can_wakeup = dev->power.should_wakeup = !!val;
+	dev->power.can_wakeup = dev->power.should_wakeup = val;
 }
 
-static inline void device_set_wakeup_capable(struct device *dev, int val)
+static inline void device_set_wakeup_capable(struct device *dev, bool capable)
 {
-	dev->power.can_wakeup = !!val;
+	dev->power.can_wakeup = capable;
 }
 
-static inline int device_can_wakeup(struct device *dev)
+static inline bool device_can_wakeup(struct device *dev)
 {
 	return dev->power.can_wakeup;
 }
 
-static inline void device_set_wakeup_enable(struct device *dev, int val)
+static inline void device_set_wakeup_enable(struct device *dev, bool enable)
 {
-	dev->power.should_wakeup = !!val;
+	dev->power.should_wakeup = enable;
 }
 
-static inline int device_may_wakeup(struct device *dev)
+static inline bool device_may_wakeup(struct device *dev)
 {
 	return dev->power.can_wakeup && dev->power.should_wakeup;
 }
@@ -58,20 +60,28 @@ static inline int device_may_wakeup(struct device *dev)
 #else /* !CONFIG_PM */
 
 /* For some reason the next two routines work even without CONFIG_PM */
-static inline void device_init_wakeup(struct device *dev, int val)
+static inline void device_init_wakeup(struct device *dev, bool val)
 {
-	dev->power.can_wakeup = !!val;
+	dev->power.can_wakeup = val;
 }
 
-static inline void device_set_wakeup_capable(struct device *dev, int val) { }
+static inline void device_set_wakeup_capable(struct device *dev, bool capable)
+{
+}
 
-static inline int device_can_wakeup(struct device *dev)
+static inline bool device_can_wakeup(struct device *dev)
 {
 	return dev->power.can_wakeup;
 }
 
-#define device_set_wakeup_enable(dev, val)	do {} while (0)
-#define device_may_wakeup(dev)			0
+static inline void device_set_wakeup_enable(struct device *dev, bool enable)
+{
+}
+
+static inline bool device_may_wakeup(struct device *dev)
+{
+	return false;
+}
 
 #endif /* !CONFIG_PM */
 
-- 
cgit v1.2.3


From 6a727b43be8b005609e893a80af980808012cfdb Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Sat, 1 May 2010 23:51:22 +0200
Subject: FS / libfs: Implement simple_write_to_buffer

It will be used in suspend code and serves as an easy wrap around
copy_from_user. Similar to simple_read_from_buffer, it takes care
of transfers with proper lengths depending on available and count
parameters and advances ppos appropriately.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 fs/libfs.c         | 35 +++++++++++++++++++++++++++++++++++
 include/linux/fs.h |  2 ++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/fs/libfs.c b/fs/libfs.c
index ea9a6cc9b35c..232bea425b09 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -546,6 +546,40 @@ ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
 	return count;
 }
 
+/**
+ * simple_write_to_buffer - copy data from user space to the buffer
+ * @to: the buffer to write to
+ * @available: the size of the buffer
+ * @ppos: the current position in the buffer
+ * @from: the user space buffer to read from
+ * @count: the maximum number of bytes to read
+ *
+ * The simple_write_to_buffer() function reads up to @count bytes from the user
+ * space address starting at @from into the buffer @to at offset @ppos.
+ *
+ * On success, the number of bytes written is returned and the offset @ppos is
+ * advanced by this number, or negative value is returned on error.
+ **/
+ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+		const void __user *from, size_t count)
+{
+	loff_t pos = *ppos;
+	size_t res;
+
+	if (pos < 0)
+		return -EINVAL;
+	if (pos >= available || !count)
+		return 0;
+	if (count > available - pos)
+		count = available - pos;
+	res = copy_from_user(to + pos, from, count);
+	if (res == count)
+		return -EFAULT;
+	count -= res;
+	*ppos = pos + count;
+	return count;
+}
+
 /**
  * memory_read_from_buffer - copy data from the buffer
  * @to: the kernel space buffer to read to
@@ -864,6 +898,7 @@ EXPORT_SYMBOL(simple_statfs);
 EXPORT_SYMBOL(simple_sync_file);
 EXPORT_SYMBOL(simple_unlink);
 EXPORT_SYMBOL(simple_read_from_buffer);
+EXPORT_SYMBOL(simple_write_to_buffer);
 EXPORT_SYMBOL(memory_read_from_buffer);
 EXPORT_SYMBOL(simple_transaction_set);
 EXPORT_SYMBOL(simple_transaction_get);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 44f35aea2f1f..948bd2bfb1de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2362,6 +2362,8 @@ extern void simple_release_fs(struct vfsmount **mount, int *count);
 
 extern ssize_t simple_read_from_buffer(void __user *to, size_t count,
 			loff_t *ppos, const void *from, size_t available);
+extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
+		const void __user *from, size_t count);
 
 extern int simple_fsync(struct file *, struct dentry *, int);
 
-- 
cgit v1.2.3


From ed77134bfccf5e75b6cbadab268e559dbe6a4ebb Mon Sep 17 00:00:00 2001
From: Mark Gross <mgross@linux.intel.com>
Date: Thu, 6 May 2010 01:59:26 +0200
Subject: PM QOS update

This patch changes the string based list management to a handle base
implementation to help with the hot path use of pm-qos, it also renames
much of the API to use "request" as opposed to "requirement" that was
used in the initial implementation.  I did this because request more
accurately represents what it actually does.

Also, I added a string based ABI for users wanting to use a string
interface.  So if the user writes 0xDDDDDDDD formatted hex it will be
accepted by the interface.  (someone asked me for it and I don't think
it hurts anything.)

This patch updates some documentation input I got from Randy.

Signed-off-by: markgross <mgross@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
---
 Documentation/power/pm_qos_interface.txt |  48 +++----
 drivers/acpi/processor_idle.c            |   2 +-
 drivers/cpuidle/governors/ladder.c       |   2 +-
 drivers/cpuidle/governors/menu.c         |   2 +-
 drivers/net/e1000e/netdev.c              |  22 ++--
 drivers/net/igbvf/netdev.c               |   6 +-
 drivers/net/wireless/ipw2x00/ipw2100.c   |  11 +-
 include/linux/netdevice.h                |   4 +
 include/linux/pm_qos_params.h            |  14 +-
 include/sound/pcm.h                      |   3 +-
 kernel/pm_qos_params.c                   | 214 +++++++++++++++----------------
 net/mac80211/mlme.c                      |   2 +-
 sound/core/pcm.c                         |   3 -
 sound/core/pcm_native.c                  |  14 +-
 14 files changed, 176 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index c40866e8b957..bfed898a03fc 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -18,44 +18,46 @@ and pm_qos_params.h.  This is done because having the available parameters
 being runtime configurable or changeable from a driver was seen as too easy to
 abuse.
 
-For each parameter a list of performance requirements is maintained along with
+For each parameter a list of performance requests is maintained along with
 an aggregated target value.  The aggregated target value is updated with
-changes to the requirement list or elements of the list.  Typically the
-aggregated target value is simply the max or min of the requirement values held
+changes to the request list or elements of the list.  Typically the
+aggregated target value is simply the max or min of the request values held
 in the parameter list elements.
 
 From kernel mode the use of this interface is simple:
-pm_qos_add_requirement(param_id, name, target_value):
-Will insert a named element in the list for that identified PM_QOS parameter
-with the target value.  Upon change to this list the new target is recomputed
-and any registered notifiers are called only if the target value is now
-different.
 
-pm_qos_update_requirement(param_id, name, new_target_value):
-Will search the list identified by the param_id for the named list element and
-then update its target value, calling the notification tree if the aggregated
-target is changed.  with that name is already registered.
+handle = pm_qos_add_request(param_class, target_value):
+Will insert an element into the list for that identified PM_QOS class with the
+target value.  Upon change to this list the new target is recomputed and any
+registered notifiers are called only if the target value is now different.
+Clients of pm_qos need to save the returned handle.
 
-pm_qos_remove_requirement(param_id, name):
-Will search the identified list for the named element and remove it, after
-removal it will update the aggregate target and call the notification tree if
-the target was changed as a result of removing the named requirement.
+void pm_qos_update_request(handle, new_target_value):
+Will update the list element pointed to by the handle with the new target value
+and recompute the new aggregated target, calling the notification tree if the
+target is changed.
+
+void pm_qos_remove_request(handle):
+Will remove the element.  After removal it will update the aggregate target and
+call the notification tree if the target was changed as a result of removing
+the request.
 
 
 From user mode:
-Only processes can register a pm_qos requirement.  To provide for automatic
-cleanup for process the interface requires the process to register its
-parameter requirements in the following way:
+Only processes can register a pm_qos request.  To provide for automatic
+cleanup of a process, the interface requires the process to register its
+parameter requests in the following way:
 
 To register the default pm_qos target for the specific parameter, the process
 must open one of /dev/[cpu_dma_latency, network_latency, network_throughput]
 
 As long as the device node is held open that process has a registered
-requirement on the parameter.  The name of the requirement is "process_<PID>"
-derived from the current->pid from within the open system call.
+request on the parameter.
 
-To change the requested target value the process needs to write a s32 value to
-the open device node.  This translates to a pm_qos_update_requirement call.
+To change the requested target value the process needs to write an s32 value to
+the open device node.  Alternatively the user mode program could write a hex
+string for the value using 10 char long format e.g. "0x12345678".  This
+translates to a pm_qos_update_request call.
 
 To remove the user mode request for a target value simply close the device
 node.
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 5939e7f7d8e9..c3817e1f32c7 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -698,7 +698,7 @@ static int acpi_processor_power_seq_show(struct seq_file *seq, void *offset)
 		   "max_cstate:              C%d\n"
 		   "maximum allowed latency: %d usec\n",
 		   pr->power.state ? pr->power.state - pr->power.states : 0,
-		   max_cstate, pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY));
+		   max_cstate, pm_qos_request(PM_QOS_CPU_DMA_LATENCY));
 
 	seq_puts(seq, "states:\n");
 
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 1c1ceb4f218f..12c98900dcf8 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -67,7 +67,7 @@ static int ladder_select_state(struct cpuidle_device *dev)
 	struct ladder_device *ldev = &__get_cpu_var(ladder_devices);
 	struct ladder_device_state *last_state;
 	int last_residency, last_idx = ldev->last_state_idx;
-	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
+	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 
 	/* Special case when user has set very strict latency requirement */
 	if (unlikely(latency_req == 0)) {
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index f8e57c6303f2..b81ad9c731ae 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -182,7 +182,7 @@ static u64 div_round64(u64 dividend, u32 divisor)
 static int menu_select(struct cpuidle_device *dev)
 {
 	struct menu_device *data = &__get_cpu_var(menu_devices);
-	int latency_req = pm_qos_requirement(PM_QOS_CPU_DMA_LATENCY);
+	int latency_req = pm_qos_request(PM_QOS_CPU_DMA_LATENCY);
 	int i;
 	int multiplier;
 
diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
index dbf81788bb40..d5d55c6a373f 100644
--- a/drivers/net/e1000e/netdev.c
+++ b/drivers/net/e1000e/netdev.c
@@ -2524,12 +2524,12 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
 			 * excessive C-state transition latencies result in
 			 * dropped transactions.
 			 */
-			pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
-						  adapter->netdev->name, 55);
+			pm_qos_update_request(
+				adapter->netdev->pm_qos_req, 55);
 		} else {
-			pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
-						  adapter->netdev->name,
-						  PM_QOS_DEFAULT_VALUE);
+			pm_qos_update_request(
+				adapter->netdev->pm_qos_req,
+				PM_QOS_DEFAULT_VALUE);
 		}
 	}
 
@@ -2824,8 +2824,8 @@ int e1000e_up(struct e1000_adapter *adapter)
 
 	/* DMA latency requirement to workaround early-receive/jumbo issue */
 	if (adapter->flags & FLAG_HAS_ERT)
-		pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY,
-		                       adapter->netdev->name,
+		adapter->netdev->pm_qos_req =
+			pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
 				       PM_QOS_DEFAULT_VALUE);
 
 	/* hardware has been reset, we need to reload some things */
@@ -2887,9 +2887,11 @@ void e1000e_down(struct e1000_adapter *adapter)
 	e1000_clean_tx_ring(adapter);
 	e1000_clean_rx_ring(adapter);
 
-	if (adapter->flags & FLAG_HAS_ERT)
-		pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY,
-		                          adapter->netdev->name);
+	if (adapter->flags & FLAG_HAS_ERT) {
+		pm_qos_remove_request(
+			      adapter->netdev->pm_qos_req);
+		adapter->netdev->pm_qos_req = NULL;
+	}
 
 	/*
 	 * TODO: for power management, we could drop the link and
diff --git a/drivers/net/igbvf/netdev.c b/drivers/net/igbvf/netdev.c
index 1b1edad1eb5e..f16e981812a9 100644
--- a/drivers/net/igbvf/netdev.c
+++ b/drivers/net/igbvf/netdev.c
@@ -48,6 +48,7 @@
 #define DRV_VERSION "1.0.0-k0"
 char igbvf_driver_name[] = "igbvf";
 const char igbvf_driver_version[] = DRV_VERSION;
+struct pm_qos_request_list *igbvf_driver_pm_qos_req;
 static const char igbvf_driver_string[] =
 				"Intel(R) Virtual Function Network Driver";
 static const char igbvf_copyright[] = "Copyright (c) 2009 Intel Corporation.";
@@ -2899,7 +2900,7 @@ static int __init igbvf_init_module(void)
 	printk(KERN_INFO "%s\n", igbvf_copyright);
 
 	ret = pci_register_driver(&igbvf_driver);
-	pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, igbvf_driver_name,
+	igbvf_driver_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
 	                       PM_QOS_DEFAULT_VALUE);
 
 	return ret;
@@ -2915,7 +2916,8 @@ module_init(igbvf_init_module);
 static void __exit igbvf_exit_module(void)
 {
 	pci_unregister_driver(&igbvf_driver);
-	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, igbvf_driver_name);
+	pm_qos_remove_request(igbvf_driver_pm_qos_req);
+	igbvf_driver_pm_qos_req = NULL;
 }
 module_exit(igbvf_exit_module);
 
diff --git a/drivers/net/wireless/ipw2x00/ipw2100.c b/drivers/net/wireless/ipw2x00/ipw2100.c
index 9b72c45a7748..2b05fe5e994c 100644
--- a/drivers/net/wireless/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/ipw2x00/ipw2100.c
@@ -174,6 +174,8 @@ that only one external action is invoked at a time.
 #define DRV_DESCRIPTION	"Intel(R) PRO/Wireless 2100 Network Driver"
 #define DRV_COPYRIGHT	"Copyright(c) 2003-2006 Intel Corporation"
 
+struct pm_qos_request_list *ipw2100_pm_qos_req;
+
 /* Debugging stuff */
 #ifdef CONFIG_IPW2100_DEBUG
 #define IPW2100_RX_DEBUG	/* Reception debugging */
@@ -1739,7 +1741,7 @@ static int ipw2100_up(struct ipw2100_priv *priv, int deferred)
 	/* the ipw2100 hardware really doesn't want power management delays
 	 * longer than 175usec
 	 */
-	pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100", 175);
+	pm_qos_update_request(ipw2100_pm_qos_req, 175);
 
 	/* If the interrupt is enabled, turn it off... */
 	spin_lock_irqsave(&priv->low_lock, flags);
@@ -1887,8 +1889,7 @@ static void ipw2100_down(struct ipw2100_priv *priv)
 	ipw2100_disable_interrupts(priv);
 	spin_unlock_irqrestore(&priv->low_lock, flags);
 
-	pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
-			PM_QOS_DEFAULT_VALUE);
+	pm_qos_update_request(ipw2100_pm_qos_req, PM_QOS_DEFAULT_VALUE);
 
 	/* We have to signal any supplicant if we are disassociating */
 	if (associated)
@@ -6669,7 +6670,7 @@ static int __init ipw2100_init(void)
 	if (ret)
 		goto out;
 
-	pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100",
+	ipw2100_pm_qos_req = pm_qos_add_request(PM_QOS_CPU_DMA_LATENCY,
 			PM_QOS_DEFAULT_VALUE);
 #ifdef CONFIG_IPW2100_DEBUG
 	ipw2100_debug_level = debug;
@@ -6692,7 +6693,7 @@ static void __exit ipw2100_exit(void)
 			   &driver_attr_debug_level);
 #endif
 	pci_unregister_driver(&ipw2100_pci_driver);
-	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, "ipw2100");
+	pm_qos_remove_request(ipw2100_pm_qos_req);
 }
 
 module_init(ipw2100_init);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index fa8b47637997..3857517f1ca5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -31,6 +31,7 @@
 #include <linux/if_link.h>
 
 #ifdef __KERNEL__
+#include <linux/pm_qos_params.h>
 #include <linux/timer.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -711,6 +712,9 @@ struct net_device {
 	 * the interface.
 	 */
 	char			name[IFNAMSIZ];
+
+	struct pm_qos_request_list *pm_qos_req;
+
 	/* device name hash chain */
 	struct hlist_node	name_hlist;
 	/* snmp alias */
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index d74f75ed1e47..8ba440e5eb7f 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -14,12 +14,14 @@
 #define PM_QOS_NUM_CLASSES 4
 #define PM_QOS_DEFAULT_VALUE -1
 
-int pm_qos_add_requirement(int qos, char *name, s32 value);
-int pm_qos_update_requirement(int qos, char *name, s32 new_value);
-void pm_qos_remove_requirement(int qos, char *name);
+struct pm_qos_request_list;
 
-int pm_qos_requirement(int qos);
+struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value);
+void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+		s32 new_value);
+void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req);
 
-int pm_qos_add_notifier(int qos, struct notifier_block *notifier);
-int pm_qos_remove_notifier(int qos, struct notifier_block *notifier);
+int pm_qos_request(int pm_qos_class);
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier);
+int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier);
 
diff --git a/include/sound/pcm.h b/include/sound/pcm.h
index 8b611a561985..dd76cdede64d 100644
--- a/include/sound/pcm.h
+++ b/include/sound/pcm.h
@@ -29,6 +29,7 @@
 #include <linux/poll.h>
 #include <linux/mm.h>
 #include <linux/bitops.h>
+#include <linux/pm_qos_params.h>
 
 #define snd_pcm_substream_chip(substream) ((substream)->private_data)
 #define snd_pcm_chip(pcm) ((pcm)->private_data)
@@ -365,7 +366,7 @@ struct snd_pcm_substream {
 	int number;
 	char name[32];			/* substream name */
 	int stream;			/* stream (direction) */
-	char latency_id[20];		/* latency identifier */
+	struct pm_qos_request_list *latency_pm_qos_req; /* pm_qos request */
 	size_t buffer_bytes_max;	/* limit ring buffer size */
 	struct snd_dma_buffer dma_buffer;
 	unsigned int dma_buf_id;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index 3db49b9ca374..a1aea040eb57 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -2,7 +2,7 @@
  * This module exposes the interface to kernel space for specifying
  * QoS dependencies.  It provides infrastructure for registration of:
  *
- * Dependents on a QoS value : register requirements
+ * Dependents on a QoS value : register requests
  * Watchers of QoS value : get notified when target QoS value changes
  *
  * This QoS design is best effort based.  Dependents register their QoS needs.
@@ -14,19 +14,21 @@
  * timeout: usec <-- currently not used.
  * throughput: kbs (kilo byte / sec)
  *
- * There are lists of pm_qos_objects each one wrapping requirements, notifiers
+ * There are lists of pm_qos_objects each one wrapping requests, notifiers
  *
- * User mode requirements on a QOS parameter register themselves to the
+ * User mode requests on a QOS parameter register themselves to the
  * subsystem by opening the device node /dev/... and writing there request to
  * the node.  As long as the process holds a file handle open to the node the
  * client continues to be accounted for.  Upon file release the usermode
- * requirement is removed and a new qos target is computed.  This way when the
- * requirement that the application has is cleaned up when closes the file
+ * request is removed and a new qos target is computed.  This way when the
+ * request that the application has is cleaned up when closes the file
  * pointer or exits the pm_qos_object will get an opportunity to clean up.
  *
  * Mark Gross <mgross@linux.intel.com>
  */
 
+/*#define DEBUG*/
+
 #include <linux/pm_qos_params.h>
 #include <linux/sched.h>
 #include <linux/spinlock.h>
@@ -42,25 +44,25 @@
 #include <linux/uaccess.h>
 
 /*
- * locking rule: all changes to requirements or notifiers lists
+ * locking rule: all changes to requests or notifiers lists
  * or pm_qos_object list and pm_qos_objects need to happen with pm_qos_lock
  * held, taken with _irqsave.  One lock to rule them all
  */
-struct requirement_list {
+struct pm_qos_request_list {
 	struct list_head list;
 	union {
 		s32 value;
 		s32 usec;
 		s32 kbps;
 	};
-	char *name;
+	int pm_qos_class;
 };
 
 static s32 max_compare(s32 v1, s32 v2);
 static s32 min_compare(s32 v1, s32 v2);
 
 struct pm_qos_object {
-	struct requirement_list requirements;
+	struct pm_qos_request_list requests;
 	struct blocking_notifier_head *notifiers;
 	struct miscdevice pm_qos_power_miscdev;
 	char *name;
@@ -72,7 +74,7 @@ struct pm_qos_object {
 static struct pm_qos_object null_pm_qos;
 static BLOCKING_NOTIFIER_HEAD(cpu_dma_lat_notifier);
 static struct pm_qos_object cpu_dma_pm_qos = {
-	.requirements = {LIST_HEAD_INIT(cpu_dma_pm_qos.requirements.list)},
+	.requests = {LIST_HEAD_INIT(cpu_dma_pm_qos.requests.list)},
 	.notifiers = &cpu_dma_lat_notifier,
 	.name = "cpu_dma_latency",
 	.default_value = 2000 * USEC_PER_SEC,
@@ -82,7 +84,7 @@ static struct pm_qos_object cpu_dma_pm_qos = {
 
 static BLOCKING_NOTIFIER_HEAD(network_lat_notifier);
 static struct pm_qos_object network_lat_pm_qos = {
-	.requirements = {LIST_HEAD_INIT(network_lat_pm_qos.requirements.list)},
+	.requests = {LIST_HEAD_INIT(network_lat_pm_qos.requests.list)},
 	.notifiers = &network_lat_notifier,
 	.name = "network_latency",
 	.default_value = 2000 * USEC_PER_SEC,
@@ -93,8 +95,7 @@ static struct pm_qos_object network_lat_pm_qos = {
 
 static BLOCKING_NOTIFIER_HEAD(network_throughput_notifier);
 static struct pm_qos_object network_throughput_pm_qos = {
-	.requirements =
-		{LIST_HEAD_INIT(network_throughput_pm_qos.requirements.list)},
+	.requests = {LIST_HEAD_INIT(network_throughput_pm_qos.requests.list)},
 	.notifiers = &network_throughput_notifier,
 	.name = "network_throughput",
 	.default_value = 0,
@@ -135,31 +136,34 @@ static s32 min_compare(s32 v1, s32 v2)
 }
 
 
-static void update_target(int target)
+static void update_target(int pm_qos_class)
 {
 	s32 extreme_value;
-	struct requirement_list *node;
+	struct pm_qos_request_list *node;
 	unsigned long flags;
 	int call_notifier = 0;
 
 	spin_lock_irqsave(&pm_qos_lock, flags);
-	extreme_value = pm_qos_array[target]->default_value;
+	extreme_value = pm_qos_array[pm_qos_class]->default_value;
 	list_for_each_entry(node,
-			&pm_qos_array[target]->requirements.list, list) {
-		extreme_value = pm_qos_array[target]->comparitor(
+			&pm_qos_array[pm_qos_class]->requests.list, list) {
+		extreme_value = pm_qos_array[pm_qos_class]->comparitor(
 				extreme_value, node->value);
 	}
-	if (atomic_read(&pm_qos_array[target]->target_value) != extreme_value) {
+	if (atomic_read(&pm_qos_array[pm_qos_class]->target_value) !=
+			extreme_value) {
 		call_notifier = 1;
-		atomic_set(&pm_qos_array[target]->target_value, extreme_value);
-		pr_debug(KERN_ERR "new target for qos %d is %d\n", target,
-			atomic_read(&pm_qos_array[target]->target_value));
+		atomic_set(&pm_qos_array[pm_qos_class]->target_value,
+				extreme_value);
+		pr_debug(KERN_ERR "new target for qos %d is %d\n", pm_qos_class,
+			atomic_read(&pm_qos_array[pm_qos_class]->target_value));
 	}
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
 
 	if (call_notifier)
-		blocking_notifier_call_chain(pm_qos_array[target]->notifiers,
-			(unsigned long) extreme_value, NULL);
+		blocking_notifier_call_chain(
+				pm_qos_array[pm_qos_class]->notifiers,
+					(unsigned long) extreme_value, NULL);
 }
 
 static int register_pm_qos_misc(struct pm_qos_object *qos)
@@ -185,125 +189,110 @@ static int find_pm_qos_object_by_minor(int minor)
 }
 
 /**
- * pm_qos_requirement - returns current system wide qos expectation
+ * pm_qos_request - returns current system wide qos expectation
  * @pm_qos_class: identification of which qos value is requested
  *
  * This function returns the current target value in an atomic manner.
  */
-int pm_qos_requirement(int pm_qos_class)
+int pm_qos_request(int pm_qos_class)
 {
 	return atomic_read(&pm_qos_array[pm_qos_class]->target_value);
 }
-EXPORT_SYMBOL_GPL(pm_qos_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_request);
 
 /**
- * pm_qos_add_requirement - inserts new qos request into the list
+ * pm_qos_add_request - inserts new qos request into the list
  * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
  * @value: defines the qos request
  *
  * This function inserts a new entry in the pm_qos_class list of requested qos
  * performance characteristics.  It recomputes the aggregate QoS expectations
- * for the pm_qos_class of parameters.
+ * for the pm_qos_class of parameters, and returns the pm_qos_request list
+ * element as a handle for use in updating and removal.  Call needs to save
+ * this handle for later use.
  */
-int pm_qos_add_requirement(int pm_qos_class, char *name, s32 value)
+struct pm_qos_request_list *pm_qos_add_request(int pm_qos_class, s32 value)
 {
-	struct requirement_list *dep;
+	struct pm_qos_request_list *dep;
 	unsigned long flags;
 
-	dep = kzalloc(sizeof(struct requirement_list), GFP_KERNEL);
+	dep = kzalloc(sizeof(struct pm_qos_request_list), GFP_KERNEL);
 	if (dep) {
 		if (value == PM_QOS_DEFAULT_VALUE)
 			dep->value = pm_qos_array[pm_qos_class]->default_value;
 		else
 			dep->value = value;
-		dep->name = kstrdup(name, GFP_KERNEL);
-		if (!dep->name)
-			goto cleanup;
+		dep->pm_qos_class = pm_qos_class;
 
 		spin_lock_irqsave(&pm_qos_lock, flags);
 		list_add(&dep->list,
-			&pm_qos_array[pm_qos_class]->requirements.list);
+			&pm_qos_array[pm_qos_class]->requests.list);
 		spin_unlock_irqrestore(&pm_qos_lock, flags);
 		update_target(pm_qos_class);
-
-		return 0;
 	}
 
-cleanup:
-	kfree(dep);
-	return -ENOMEM;
+	return dep;
 }
-EXPORT_SYMBOL_GPL(pm_qos_add_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_add_request);
 
 /**
- * pm_qos_update_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_update_request - modifies an existing qos request
+ * @pm_qos_req : handle to list element holding a pm_qos request to use
  * @value: defines the qos request
  *
- * Updates an existing qos requirement for the pm_qos_class of parameters along
+ * Updates an existing qos request for the pm_qos_class of parameters along
  * with updating the target pm_qos_class value.
  *
- * If the named request isn't in the list then no change is made.
+ * Attempts are made to make this code callable on hot code paths.
  */
-int pm_qos_update_requirement(int pm_qos_class, char *name, s32 new_value)
+void pm_qos_update_request(struct pm_qos_request_list *pm_qos_req,
+		s32 new_value)
 {
 	unsigned long flags;
-	struct requirement_list *node;
 	int pending_update = 0;
+	s32 temp;
 
 	spin_lock_irqsave(&pm_qos_lock, flags);
-	list_for_each_entry(node,
-		&pm_qos_array[pm_qos_class]->requirements.list, list) {
-		if (strcmp(node->name, name) == 0) {
-			if (new_value == PM_QOS_DEFAULT_VALUE)
-				node->value =
-				pm_qos_array[pm_qos_class]->default_value;
-			else
-				node->value = new_value;
-			pending_update = 1;
-			break;
-		}
+	if (new_value == PM_QOS_DEFAULT_VALUE)
+		temp = pm_qos_array[pm_qos_req->pm_qos_class]->default_value;
+	else
+		temp = new_value;
+
+	if (temp != pm_qos_req->value) {
+		pending_update = 1;
+		pm_qos_req->value = temp;
 	}
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
 	if (pending_update)
-		update_target(pm_qos_class);
-
-	return 0;
+		update_target(pm_qos_req->pm_qos_class);
 }
-EXPORT_SYMBOL_GPL(pm_qos_update_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_update_request);
 
 /**
- * pm_qos_remove_requirement - modifies an existing qos request
- * @pm_qos_class: identifies which list of qos request to us
- * @name: identifies the request
+ * pm_qos_remove_request - modifies an existing qos request
+ * @pm_qos_req: handle to request list element
  *
- * Will remove named qos request from pm_qos_class list of parameters and
- * recompute the current target value for the pm_qos_class.
+ * Will remove pm qos request from the list of requests and
+ * recompute the current target value for the pm_qos_class.  Call this
+ * on slow code paths.
  */
-void pm_qos_remove_requirement(int pm_qos_class, char *name)
+void pm_qos_remove_request(struct pm_qos_request_list *pm_qos_req)
 {
 	unsigned long flags;
-	struct requirement_list *node;
-	int pending_update = 0;
+	int qos_class;
+
+	if (pm_qos_req == NULL)
+		return;
+		/* silent return to keep pcm code cleaner */
 
+	qos_class = pm_qos_req->pm_qos_class;
 	spin_lock_irqsave(&pm_qos_lock, flags);
-	list_for_each_entry(node,
-		&pm_qos_array[pm_qos_class]->requirements.list, list) {
-		if (strcmp(node->name, name) == 0) {
-			kfree(node->name);
-			list_del(&node->list);
-			kfree(node);
-			pending_update = 1;
-			break;
-		}
-	}
+	list_del(&pm_qos_req->list);
+	kfree(pm_qos_req);
 	spin_unlock_irqrestore(&pm_qos_lock, flags);
-	if (pending_update)
-		update_target(pm_qos_class);
+	update_target(qos_class);
 }
-EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
+EXPORT_SYMBOL_GPL(pm_qos_remove_request);
 
 /**
  * pm_qos_add_notifier - sets notification entry for changes to target value
@@ -313,7 +302,7 @@ EXPORT_SYMBOL_GPL(pm_qos_remove_requirement);
  * will register the notifier into a notification chain that gets called
  * upon changes to the pm_qos_class target value.
  */
- int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
+int pm_qos_add_notifier(int pm_qos_class, struct notifier_block *notifier)
 {
 	int retval;
 
@@ -343,21 +332,16 @@ int pm_qos_remove_notifier(int pm_qos_class, struct notifier_block *notifier)
 }
 EXPORT_SYMBOL_GPL(pm_qos_remove_notifier);
 
-#define PID_NAME_LEN 32
-
 static int pm_qos_power_open(struct inode *inode, struct file *filp)
 {
-	int ret;
 	long pm_qos_class;
-	char name[PID_NAME_LEN];
 
 	pm_qos_class = find_pm_qos_object_by_minor(iminor(inode));
 	if (pm_qos_class >= 0) {
-		filp->private_data = (void *)pm_qos_class;
-		snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
-		ret = pm_qos_add_requirement(pm_qos_class, name,
-					PM_QOS_DEFAULT_VALUE);
-		if (ret >= 0)
+		filp->private_data = (void *) pm_qos_add_request(pm_qos_class,
+				PM_QOS_DEFAULT_VALUE);
+
+		if (filp->private_data)
 			return 0;
 	}
 	return -EPERM;
@@ -365,32 +349,40 @@ static int pm_qos_power_open(struct inode *inode, struct file *filp)
 
 static int pm_qos_power_release(struct inode *inode, struct file *filp)
 {
-	int pm_qos_class;
-	char name[PID_NAME_LEN];
+	struct pm_qos_request_list *req;
 
-	pm_qos_class = (long)filp->private_data;
-	snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
-	pm_qos_remove_requirement(pm_qos_class, name);
+	req = (struct pm_qos_request_list *)filp->private_data;
+	pm_qos_remove_request(req);
 
 	return 0;
 }
 
+
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
 		size_t count, loff_t *f_pos)
 {
 	s32 value;
-	int pm_qos_class;
-	char name[PID_NAME_LEN];
-
-	pm_qos_class = (long)filp->private_data;
-	if (count != sizeof(s32))
+	int x;
+	char ascii_value[11];
+	struct pm_qos_request_list *pm_qos_req;
+
+	if (count == sizeof(s32)) {
+		if (copy_from_user(&value, buf, sizeof(s32)))
+			return -EFAULT;
+	} else if (count == 11) { /* len('0x12345678/0') */
+		if (copy_from_user(ascii_value, buf, 11))
+			return -EFAULT;
+		x = sscanf(ascii_value, "%x", &value);
+		if (x != 1)
+			return -EINVAL;
+		pr_debug(KERN_ERR "%s, %d, 0x%x\n", ascii_value, x, value);
+	} else
 		return -EINVAL;
-	if (copy_from_user(&value, buf, sizeof(s32)))
-		return -EFAULT;
-	snprintf(name, PID_NAME_LEN, "process_%d", current->pid);
-	pm_qos_update_requirement(pm_qos_class, name, value);
 
-	return  sizeof(s32);
+	pm_qos_req = (struct pm_qos_request_list *)filp->private_data;
+	pm_qos_update_request(pm_qos_req, value);
+
+	return count;
 }
 
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 4aefa6dc3091..29de1965ff74 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -495,7 +495,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		s32 beaconint_us;
 
 		if (latency < 0)
-			latency = pm_qos_requirement(PM_QOS_NETWORK_LATENCY);
+			latency = pm_qos_request(PM_QOS_NETWORK_LATENCY);
 
 		beaconint_us = ieee80211_tu_to_usec(
 					found->vif.bss_conf.beacon_int);
diff --git a/sound/core/pcm.c b/sound/core/pcm.c
index 0d428d0896db..cbe815dfbdc8 100644
--- a/sound/core/pcm.c
+++ b/sound/core/pcm.c
@@ -648,9 +648,6 @@ int snd_pcm_new_stream(struct snd_pcm *pcm, int stream, int substream_count)
 		substream->number = idx;
 		substream->stream = stream;
 		sprintf(substream->name, "subdevice #%i", idx);
-		snprintf(substream->latency_id, sizeof(substream->latency_id),
-			 "ALSA-PCM%d-%d%c%d", pcm->card->number, pcm->device,
-			 (stream ? 'c' : 'p'), idx);
 		substream->buffer_bytes_max = UINT_MAX;
 		if (prev == NULL)
 			pstr->substream = substream;
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 872887624030..605c86df71c5 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -481,11 +481,13 @@ static int snd_pcm_hw_params(struct snd_pcm_substream *substream,
 	snd_pcm_timer_resolution_change(substream);
 	runtime->status->state = SNDRV_PCM_STATE_SETUP;
 
-	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY,
-				substream->latency_id);
+	if (substream->latency_pm_qos_req) {
+		pm_qos_remove_request(substream->latency_pm_qos_req);
+		substream->latency_pm_qos_req = NULL;
+	}
 	if ((usecs = period_to_usecs(runtime)) >= 0)
-		pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY,
-					substream->latency_id, usecs);
+		substream->latency_pm_qos_req = pm_qos_add_request(
+					PM_QOS_CPU_DMA_LATENCY, usecs);
 	return 0;
  _error:
 	/* hardware might be unuseable from this time,
@@ -540,8 +542,8 @@ static int snd_pcm_hw_free(struct snd_pcm_substream *substream)
 	if (substream->ops->hw_free)
 		result = substream->ops->hw_free(substream);
 	runtime->status->state = SNDRV_PCM_STATE_OPEN;
-	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY,
-		substream->latency_id);
+	pm_qos_remove_request(substream->latency_pm_qos_req);
+	substream->latency_pm_qos_req = NULL;
 	return result;
 }
 
-- 
cgit v1.2.3


From 2f60ba706bd9af84c4eab704243b262e69556f2e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Mon, 10 May 2010 23:09:30 +0200
Subject: i2c: Fix bus-level power management callbacks

There are three issues with the i2c bus type's power management
callbacks at the moment.  First, they don't include any hibernate
callbacks, although they should at least include the .restore()
callback (there's no guarantee that the driver will be present in
memory before loading the image kernel and we must restore the
pre-hibernation state of the device).  Second, the "legacy"
callbacks are not going to be invoked by the PM core since the bus
type's pm object is not NULL.  Finally, the system sleep PM
(ie. suspend/resume) callbacks don't check if the device has been
already suspended at run time, in which case they should skip
suspending it.  Also, it looks like the i2c bus type can use the
generic subsystem-level runtime PM callbacks.

For these reasons, rework the system sleep PM callbacks provided by
the i2c bus type to handle hibernation correctly and to invoke the
"legacy" callbacks for drivers that provide them.  In addition to
that make the i2c bus type use the generic subsystem-level runtime
PM callbacks.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Acked-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Acked-by: Jean Delvare <khali@linux-fr.org>
---
 drivers/i2c/i2c-core.c     | 166 ++++++++++++++++++++++++++-------------------
 include/linux/pm_runtime.h |   7 ++
 2 files changed, 104 insertions(+), 69 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index c2258a51fe0c..7c469a62c3c1 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -159,107 +159,131 @@ static void i2c_device_shutdown(struct device *dev)
 		driver->shutdown(client);
 }
 
-#ifdef CONFIG_SUSPEND
-static int i2c_device_pm_suspend(struct device *dev)
+#ifdef CONFIG_PM_SLEEP
+static int i2c_legacy_suspend(struct device *dev, pm_message_t mesg)
 {
-	const struct dev_pm_ops *pm;
+	struct i2c_client *client = i2c_verify_client(dev);
+	struct i2c_driver *driver;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return 0;
-	pm = dev->driver->pm;
-	if (!pm || !pm->suspend)
+	driver = to_i2c_driver(dev->driver);
+	if (!driver->suspend)
 		return 0;
-	return pm->suspend(dev);
+	return driver->suspend(client, mesg);
 }
 
-static int i2c_device_pm_resume(struct device *dev)
+static int i2c_legacy_resume(struct device *dev)
 {
-	const struct dev_pm_ops *pm;
+	struct i2c_client *client = i2c_verify_client(dev);
+	struct i2c_driver *driver;
 
-	if (!dev->driver)
+	if (!client || !dev->driver)
 		return 0;
-	pm = dev->driver->pm;
-	if (!pm || !pm->resume)
+	driver = to_i2c_driver(dev->driver);
+	if (!driver->resume)
 		return 0;
-	return pm->resume(dev);
+	return driver->resume(client);
 }
-#else
-#define i2c_device_pm_suspend	NULL
-#define i2c_device_pm_resume	NULL
-#endif
 
-#ifdef CONFIG_PM_RUNTIME
-static int i2c_device_runtime_suspend(struct device *dev)
+static int i2c_device_pm_suspend(struct device *dev)
 {
-	const struct dev_pm_ops *pm;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	if (!dev->driver)
-		return 0;
-	pm = dev->driver->pm;
-	if (!pm || !pm->runtime_suspend)
+	if (pm_runtime_suspended(dev))
 		return 0;
-	return pm->runtime_suspend(dev);
-}
 
-static int i2c_device_runtime_resume(struct device *dev)
-{
-	const struct dev_pm_ops *pm;
+	if (pm)
+		return pm->suspend ? pm->suspend(dev) : 0;
 
-	if (!dev->driver)
-		return 0;
-	pm = dev->driver->pm;
-	if (!pm || !pm->runtime_resume)
-		return 0;
-	return pm->runtime_resume(dev);
+	return i2c_legacy_suspend(dev, PMSG_SUSPEND);
 }
 
-static int i2c_device_runtime_idle(struct device *dev)
+static int i2c_device_pm_resume(struct device *dev)
 {
-	const struct dev_pm_ops *pm = NULL;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 	int ret;
 
-	if (dev->driver)
-		pm = dev->driver->pm;
-	if (pm && pm->runtime_idle) {
-		ret = pm->runtime_idle(dev);
-		if (ret)
-			return ret;
+	if (pm)
+		ret = pm->resume ? pm->resume(dev) : 0;
+	else
+		ret = i2c_legacy_resume(dev);
+
+	if (!ret) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
 	}
 
-	return pm_runtime_suspend(dev);
+	return ret;
 }
-#else
-#define i2c_device_runtime_suspend	NULL
-#define i2c_device_runtime_resume	NULL
-#define i2c_device_runtime_idle		NULL
-#endif
 
-static int i2c_device_suspend(struct device *dev, pm_message_t mesg)
+static int i2c_device_pm_freeze(struct device *dev)
 {
-	struct i2c_client *client = i2c_verify_client(dev);
-	struct i2c_driver *driver;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	if (!client || !dev->driver)
+	if (pm_runtime_suspended(dev))
 		return 0;
-	driver = to_i2c_driver(dev->driver);
-	if (!driver->suspend)
-		return 0;
-	return driver->suspend(client, mesg);
+
+	if (pm)
+		return pm->freeze ? pm->freeze(dev) : 0;
+
+	return i2c_legacy_suspend(dev, PMSG_FREEZE);
 }
 
-static int i2c_device_resume(struct device *dev)
+static int i2c_device_pm_thaw(struct device *dev)
 {
-	struct i2c_client *client = i2c_verify_client(dev);
-	struct i2c_driver *driver;
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
 
-	if (!client || !dev->driver)
+	if (pm_runtime_suspended(dev))
 		return 0;
-	driver = to_i2c_driver(dev->driver);
-	if (!driver->resume)
+
+	if (pm)
+		return pm->thaw ? pm->thaw(dev) : 0;
+
+	return i2c_legacy_resume(dev);
+}
+
+static int i2c_device_pm_poweroff(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+
+	if (pm_runtime_suspended(dev))
 		return 0;
-	return driver->resume(client);
+
+	if (pm)
+		return pm->poweroff ? pm->poweroff(dev) : 0;
+
+	return i2c_legacy_suspend(dev, PMSG_HIBERNATE);
 }
 
+static int i2c_device_pm_restore(struct device *dev)
+{
+	const struct dev_pm_ops *pm = dev->driver ? dev->driver->pm : NULL;
+	int ret;
+
+	if (pm)
+		ret = pm->restore ? pm->restore(dev) : 0;
+	else
+		ret = i2c_legacy_resume(dev);
+
+	if (!ret) {
+		pm_runtime_disable(dev);
+		pm_runtime_set_active(dev);
+		pm_runtime_enable(dev);
+	}
+
+	return ret;
+}
+#else /* !CONFIG_PM_SLEEP */
+#define i2c_device_pm_suspend	NULL
+#define i2c_device_pm_resume	NULL
+#define i2c_device_pm_freeze	NULL
+#define i2c_device_pm_thaw	NULL
+#define i2c_device_pm_poweroff	NULL
+#define i2c_device_pm_restore	NULL
+#endif /* !CONFIG_PM_SLEEP */
+
 static void i2c_client_dev_release(struct device *dev)
 {
 	kfree(to_i2c_client(dev));
@@ -301,9 +325,15 @@ static const struct attribute_group *i2c_dev_attr_groups[] = {
 static const struct dev_pm_ops i2c_device_pm_ops = {
 	.suspend = i2c_device_pm_suspend,
 	.resume = i2c_device_pm_resume,
-	.runtime_suspend = i2c_device_runtime_suspend,
-	.runtime_resume = i2c_device_runtime_resume,
-	.runtime_idle = i2c_device_runtime_idle,
+	.freeze = i2c_device_pm_freeze,
+	.thaw = i2c_device_pm_thaw,
+	.poweroff = i2c_device_pm_poweroff,
+	.restore = i2c_device_pm_restore,
+	SET_RUNTIME_PM_OPS(
+		pm_generic_runtime_suspend,
+		pm_generic_runtime_resume,
+		pm_generic_runtime_idle
+	)
 };
 
 struct bus_type i2c_bus_type = {
@@ -312,8 +342,6 @@ struct bus_type i2c_bus_type = {
 	.probe		= i2c_device_probe,
 	.remove		= i2c_device_remove,
 	.shutdown	= i2c_device_shutdown,
-	.suspend	= i2c_device_suspend,
-	.resume		= i2c_device_resume,
 	.pm		= &i2c_device_pm_ops,
 };
 EXPORT_SYMBOL_GPL(i2c_bus_type);
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index b776db737244..6e81888c6222 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -30,6 +30,9 @@ extern void pm_runtime_enable(struct device *dev);
 extern void __pm_runtime_disable(struct device *dev, bool check_resume);
 extern void pm_runtime_allow(struct device *dev);
 extern void pm_runtime_forbid(struct device *dev);
+extern int pm_generic_runtime_idle(struct device *dev);
+extern int pm_generic_runtime_suspend(struct device *dev);
+extern int pm_generic_runtime_resume(struct device *dev);
 
 static inline bool pm_children_suspended(struct device *dev)
 {
@@ -96,6 +99,10 @@ static inline bool device_run_wake(struct device *dev) { return false; }
 static inline void device_set_run_wake(struct device *dev, bool enable) {}
 static inline bool pm_runtime_suspended(struct device *dev) { return false; }
 
+static inline int pm_generic_runtime_idle(struct device *dev) { return 0; }
+static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
+static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
+
 #endif /* !CONFIG_PM_RUNTIME */
 
 static inline int pm_runtime_get(struct device *dev)
-- 
cgit v1.2.3


From a5d8e467f83f6672104f276223a88e3b50cbd375 Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 17 Apr 2010 08:48:38 -0400
Subject: Debugobjects transition check

Implement a basic state machine checker in the debugobjects.

This state machine checker detects races and inconsistencies within the "active"
life of a debugobject. The checker only keeps track of the current state; all
the state machine logic is kept at the object instance level.

The checker works by adding a supplementary "unsigned int astate" field to the
debug_obj structure. It keeps track of the current "active state" of the object.

The only constraints that are imposed on the states by the debugobjects system
is that:

- activation of an object sets the current active state to 0,
- deactivation of an object expects the current active state to be 0.

For the rest of the states, the state mapping is determined by the specific
object instance. Therefore, the logic keeping track of the state machine is
within the specialized instance, without any need to know about it at the
debugobject level.

The current object active state is changed by calling:

debug_object_active_state(addr, descr, expect, next)

where "expect" is the expected state and "next" is the next state to move to if
the expected state is found. A warning is generated if the expected is not
found.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: David S. Miller <davem@davemloft.net>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: akpm@linux-foundation.org
CC: mingo@elte.hu
CC: laijs@cn.fujitsu.com
CC: dipankar@in.ibm.com
CC: josh@joshtriplett.org
CC: dvhltc@us.ibm.com
CC: niv@us.ibm.com
CC: peterz@infradead.org
CC: rostedt@goodmis.org
CC: Valdis.Kletnieks@vt.edu
CC: dhowells@redhat.com
CC: eric.dumazet@gmail.com
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/debugobjects.h | 11 +++++++++
 lib/debugobjects.c           | 59 +++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index 8c243aaa86a7..597692f1fc8d 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -20,12 +20,14 @@ struct debug_obj_descr;
  * struct debug_obj - representaion of an tracked object
  * @node:	hlist node to link the object into the tracker list
  * @state:	tracked object state
+ * @astate:	current active state
  * @object:	pointer to the real object
  * @descr:	pointer to an object type specific debug description structure
  */
 struct debug_obj {
 	struct hlist_node	node;
 	enum debug_obj_state	state;
+	unsigned int		astate;
 	void			*object;
 	struct debug_obj_descr	*descr;
 };
@@ -60,6 +62,15 @@ extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr);
 extern void debug_object_destroy   (void *addr, struct debug_obj_descr *descr);
 extern void debug_object_free      (void *addr, struct debug_obj_descr *descr);
 
+/*
+ * Active state:
+ * - Set at 0 upon initialization.
+ * - Must return to 0 before deactivation.
+ */
+extern void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+			  unsigned int expect, unsigned int next);
+
 extern void debug_objects_early_init(void);
 extern void debug_objects_mem_init(void);
 #else
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index b862b30369ff..076464fd2072 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -141,6 +141,7 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
 		obj->object = addr;
 		obj->descr  = descr;
 		obj->state  = ODEBUG_STATE_NONE;
+		obj->astate = 0;
 		hlist_del(&obj->node);
 
 		hlist_add_head(&obj->node, &b->list);
@@ -252,8 +253,10 @@ static void debug_print_object(struct debug_obj *obj, char *msg)
 
 	if (limit < 5 && obj->descr != descr_test) {
 		limit++;
-		WARN(1, KERN_ERR "ODEBUG: %s %s object type: %s\n", msg,
-		       obj_states[obj->state], obj->descr->name);
+		WARN(1, KERN_ERR "ODEBUG: %s %s (active state %u) "
+				 "object type: %s\n",
+			msg, obj_states[obj->state], obj->astate,
+			obj->descr->name);
 	}
 	debug_objects_warnings++;
 }
@@ -447,7 +450,10 @@ void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
 		case ODEBUG_STATE_INIT:
 		case ODEBUG_STATE_INACTIVE:
 		case ODEBUG_STATE_ACTIVE:
-			obj->state = ODEBUG_STATE_INACTIVE;
+			if (!obj->astate)
+				obj->state = ODEBUG_STATE_INACTIVE;
+			else
+				debug_print_object(obj, "deactivate");
 			break;
 
 		case ODEBUG_STATE_DESTROYED:
@@ -553,6 +559,53 @@ out_unlock:
 	raw_spin_unlock_irqrestore(&db->lock, flags);
 }
 
+/**
+ * debug_object_active_state - debug checks object usage state machine
+ * @addr:	address of the object
+ * @descr:	pointer to an object specific debug description structure
+ * @expect:	expected state
+ * @next:	state to move to if expected state is found
+ */
+void
+debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+			  unsigned int expect, unsigned int next)
+{
+	struct debug_bucket *db;
+	struct debug_obj *obj;
+	unsigned long flags;
+
+	if (!debug_objects_enabled)
+		return;
+
+	db = get_bucket((unsigned long) addr);
+
+	raw_spin_lock_irqsave(&db->lock, flags);
+
+	obj = lookup_object(addr, db);
+	if (obj) {
+		switch (obj->state) {
+		case ODEBUG_STATE_ACTIVE:
+			if (obj->astate == expect)
+				obj->astate = next;
+			else
+				debug_print_object(obj, "active_state");
+			break;
+
+		default:
+			debug_print_object(obj, "active_state");
+			break;
+		}
+	} else {
+		struct debug_obj o = { .object = addr,
+				       .state = ODEBUG_STATE_NOTAVAILABLE,
+				       .descr = descr };
+
+		debug_print_object(&o, "active_state");
+	}
+
+	raw_spin_unlock_irqrestore(&db->lock, flags);
+}
+
 #ifdef CONFIG_DEBUG_OBJECTS_FREE
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
-- 
cgit v1.2.3


From 4376030a54860dedab9d848dfa7cc700a6025c0b Mon Sep 17 00:00:00 2001
From: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Date: Sat, 17 Apr 2010 08:48:39 -0400
Subject: rcu head introduce rcu head init on stack

PEM:
o     Would it be possible to make this bisectable as follows?

      a.      Insert a new patch after current patch 4/6 that
              defines destroy_rcu_head_on_stack(),
              init_rcu_head_on_stack(), and init_rcu_head() with
              their !CONFIG_DEBUG_OBJECTS_RCU_HEAD definitions.

This patch performs this transition.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
CC: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
CC: David S. Miller <davem@davemloft.net>
CC: akpm@linux-foundation.org
CC: mingo@elte.hu
CC: laijs@cn.fujitsu.com
CC: dipankar@in.ibm.com
CC: josh@joshtriplett.org
CC: dvhltc@us.ibm.com
CC: niv@us.ibm.com
CC: tglx@linutronix.de
CC: peterz@infradead.org
CC: rostedt@goodmis.org
CC: Valdis.Kletnieks@vt.edu
CC: dhowells@redhat.com
CC: eric.dumazet@gmail.com
CC: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 23be3a702516..b653b4aaa8a6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -79,6 +79,14 @@ extern void rcu_init(void);
        (ptr)->next = NULL; (ptr)->func = NULL; \
 } while (0)
 
+static inline void init_rcu_head_on_stack(struct rcu_head *head)
+{
+}
+
+static inline void destroy_rcu_head_on_stack(struct rcu_head *head)
+{
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 
 extern struct lockdep_map rcu_lock_map;
-- 
cgit v1.2.3


From e3174cfd2a1e28fff774681f00a0eef3d31da970 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 11 May 2010 08:31:49 +0200
Subject: Revert "perf: Fix exit() vs PERF_FORMAT_GROUP"

This reverts commit 4fd38e4595e2f6c9d27732c042a0e16b2753049c.

It causes various crashes and hangs when events are activated.

The cause is not fully understood yet but we need to revert it
because the effects are severe.

Reported-by: Stephane Eranian <eranian@google.com>
Reported-by: Lin Ming <ming.m.lin@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 1 -
 kernel/perf_event.c        | 5 -----
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 4924c96d7e2d..3fd5c82e0e18 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -575,7 +575,6 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
-	PERF_EVENT_STATE_FREE		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 180151ff8376..a9047463fd83 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -334,9 +334,6 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
 	if (event->state > PERF_EVENT_STATE_OFF)
 		event->state = PERF_EVENT_STATE_OFF;
 
-	if (event->state > PERF_EVENT_STATE_FREE)
-		return;
-
 	/*
 	 * If this was a group event with sibling events then
 	 * upgrade the siblings to singleton events by adding them
@@ -1871,8 +1868,6 @@ int perf_event_release_kernel(struct perf_event *event)
 {
 	struct perf_event_context *ctx = event->ctx;
 
-	event->state = PERF_EVENT_STATE_FREE;
-
 	WARN_ON_ONCE(ctx->parent_ctx);
 	/*
 	 * There are two ways this annotation is useful:
-- 
cgit v1.2.3


From 758ef749f3b1a4c5e6ba5b5bca5eae968a2c0c5f Mon Sep 17 00:00:00 2001
From: Bill Pemberton <wfp5p@virginia.edu>
Date: Fri, 30 Apr 2010 09:34:38 -0400
Subject: rtc-v3020: make bitfield unsigned

Fix sparse warning:

include/linux/rtc-v3020.h:18:23: error: dubious one-bit signed bitfield

Signed-off-by: Bill Pemberton <wfp5p@virginia.edu>
CC: p_gortmaker@yahoo.com
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/rtc-v3020.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rtc-v3020.h b/include/linux/rtc-v3020.h
index 8ba646e610d9..e55d82cebf80 100644
--- a/include/linux/rtc-v3020.h
+++ b/include/linux/rtc-v3020.h
@@ -15,7 +15,7 @@
 struct v3020_platform_data {
 	int leftshift; /* (1<<(leftshift)) & readl() */
 
-	int use_gpio:1;
+	unsigned int use_gpio:1;
 	unsigned int gpio_cs;
 	unsigned int gpio_wr;
 	unsigned int gpio_rd;
-- 
cgit v1.2.3


From b5aa30b19121de49021fba57aa1f6e4c787fcf67 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:50 +0200
Subject: ipv6: ip6mr: remove net pointer from struct mfc6_cache

Now that cache entries in unres_queue don't need to be distinguished by their
network namespace pointer anymore, we can remove it from struct mfc6_cache
add pass the namespace as function argument to the functions that need it.

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h | 15 ------------
 net/ipv6/ip6mr.c        | 63 ++++++++++++++++++++++++-------------------------
 2 files changed, 31 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 2caa1a8e525d..04e2e54d0404 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -183,9 +183,6 @@ struct mif_device {
 
 struct mfc6_cache {
 	struct mfc6_cache *next;		/* Next entry on cache line 	*/
-#ifdef CONFIG_NET_NS
-	struct net *mfc6_net;
-#endif
 	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
 	struct in6_addr mf6c_origin;			/* Source of packet 		*/
 	mifi_t mf6c_parent;			/* Source interface		*/
@@ -208,18 +205,6 @@ struct mfc6_cache {
 	} mfc_un;
 };
 
-static inline
-struct net *mfc6_net(const struct mfc6_cache *mfc)
-{
-	return read_pnet(&mfc->mfc6_net);
-}
-
-static inline
-void mfc6_net_set(struct mfc6_cache *mfc, struct net *net)
-{
-	write_pnet(&mfc->mfc6_net, hold_net(net));
-}
-
 #define MFC_STATIC		1
 #define MFC_NOTIFY		2
 
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 7236030e403e..b3783a436bbd 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -76,10 +76,12 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm);
+static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct net *net);
 
 
@@ -523,7 +525,6 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 
 static inline void ip6mr_cache_free(struct mfc6_cache *c)
 {
-	release_net(mfc6_net(c));
 	kmem_cache_free(mrt_cachep, c);
 }
 
@@ -531,10 +532,9 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
-	struct net *net = mfc6_net(c);
 
 	atomic_dec(&net->ipv6.cache_resolve_queue_len);
 
@@ -575,7 +575,7 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		*cp = c->next;
-		ip6mr_destroy_unres(c);
+		ip6mr_destroy_unres(net, c);
 	}
 
 	if (net->ipv6.mfc6_unres_queue != NULL)
@@ -599,10 +599,10 @@ static void ipmr_expire_process(unsigned long arg)
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct mfc6_cache *cache, unsigned char *ttls)
+static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+				    unsigned char *ttls)
 {
 	int vifi;
-	struct net *net = mfc6_net(cache);
 
 	cache->mfc_un.res.minvif = MAXMIFS;
 	cache->mfc_un.res.maxvif = 0;
@@ -717,24 +717,22 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 /*
  *	Allocate a multicast cache entry
  */
-static struct mfc6_cache *ip6mr_cache_alloc(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
 	if (c == NULL)
 		return NULL;
 	c->mfc_un.res.minvif = MAXMIFS;
-	mfc6_net_set(c, net);
 	return c;
 }
 
-static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
+static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
 {
 	struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
 	if (c == NULL)
 		return NULL;
 	skb_queue_head_init(&c->mfc_un.unres.unresolved);
 	c->mfc_un.unres.expires = jiffies + 10 * HZ;
-	mfc6_net_set(c, net);
 	return c;
 }
 
@@ -742,7 +740,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(struct net *net)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
+				struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -755,7 +754,7 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -763,9 +762,9 @@ static void ip6mr_cache_resolve(struct mfc6_cache *uc, struct mfc6_cache *c)
 				skb_trim(skb, nlh->nlmsg_len);
 				((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE;
 			}
-			err = rtnl_unicast(skb, mfc6_net(uc), NETLINK_CB(skb).pid);
+			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(skb, c);
+			ip6_mr_forward(net, skb, c);
 	}
 }
 
@@ -889,7 +888,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 */
 
 		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
-		    (c = ip6mr_cache_alloc_unres(net)) == NULL) {
+		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
 			kfree_skb(skb);
@@ -1133,7 +1132,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (c != NULL) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(c, ttls);
+		ip6mr_update_thresholds(net, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1143,14 +1142,14 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (!ipv6_addr_is_multicast(&mfc->mf6cc_mcastgrp.sin6_addr))
 		return -EINVAL;
 
-	c = ip6mr_cache_alloc(net);
+	c = ip6mr_cache_alloc();
 	if (c == NULL)
 		return -ENOMEM;
 
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(c, ttls);
+	ip6mr_update_thresholds(net, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
@@ -1178,7 +1177,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (uc) {
-		ip6mr_cache_resolve(uc, c);
+		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1229,7 +1228,7 @@ static void mroute_clean_tables(struct net *net)
 		cp = &net->ipv6.mfc6_unres_queue;
 		while ((c = *cp) != NULL) {
 			*cp = c->next;
-			ip6mr_destroy_unres(c);
+			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
@@ -1497,10 +1496,10 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct sk_buff *skb, struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct net *net = mfc6_net(c);
 	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
@@ -1581,11 +1580,11 @@ static int ip6mr_find_vif(struct net_device *dev)
 	return ct;
 }
 
-static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
+			  struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
-	struct net *net = mfc6_net(cache);
 
 	vif = cache->mf6c_parent;
 	cache->mfc_un.res.pkt++;
@@ -1627,13 +1626,13 @@ static int ip6_mr_forward(struct sk_buff *skb, struct mfc6_cache *cache)
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(skb2, cache, psend);
+					ip6mr_forward2(net, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(skb, cache, psend);
+		ip6mr_forward2(net, skb, cache, psend);
 		return 0;
 	}
 
@@ -1674,7 +1673,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(skb, cache);
+	ip6_mr_forward(net, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1683,11 +1682,11 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct sk_buff *skb, struct mfc6_cache *c, struct rtmsg *rtm)
+ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
+		  struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
-	struct net *net = mfc6_net(c);
 	u8 *b = skb_tail_pointer(skb);
 	struct rtattr *mp_head;
 
@@ -1781,7 +1780,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(skb, cache, rtm);
+	err = ip6mr_fill_mroute(net, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3


From f30a77842129b5656360cc1f5db48a3fcfb64528 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:51 +0200
Subject: ipv6: ip6mr: convert struct mfc_cache to struct list_head

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h  |   2 +-
 include/net/netns/ipv6.h |   4 +-
 net/ipv6/ip6mr.c         | 127 +++++++++++++++++++++++------------------------
 3 files changed, 65 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 04e2e54d0404..94a0cb521ca9 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -182,7 +182,7 @@ struct mif_device {
 #define VIFF_STATIC 0x8000
 
 struct mfc6_cache {
-	struct mfc6_cache *next;		/* Next entry on cache line 	*/
+	struct list_head list;
 	struct in6_addr mf6c_mcastgrp;			/* Group the entry belongs to 	*/
 	struct in6_addr mf6c_origin;			/* Source of packet 		*/
 	mifi_t mf6c_parent;			/* Source interface		*/
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 43d842ab004e..9cb3b5f2cd75 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -61,8 +61,8 @@ struct netns_ipv6 {
 #ifdef CONFIG_IPV6_MROUTE
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
-	struct mfc6_cache	*mfc6_unres_queue;
-	struct mfc6_cache	**mfc6_cache_array;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	*mfc6_cache_array;
 	struct mif_device	*vif6_table;
 	int			maxvif;
 	atomic_t		cache_resolve_queue_len;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index b3783a436bbd..08e09042ad1c 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -89,7 +89,7 @@ static void mroute_clean_tables(struct net *net);
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
-	struct mfc6_cache **cache;
+	struct list_head *cache;
 	int ct;
 };
 
@@ -99,18 +99,18 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 {
 	struct mfc6_cache *mfc;
 
-	it->cache = net->ipv6.mfc6_cache_array;
 	read_lock(&mrt_lock);
-	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++)
-		for (mfc = net->ipv6.mfc6_cache_array[it->ct];
-		     mfc; mfc = mfc->next)
+	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
+	}
 	read_unlock(&mrt_lock);
 
-	it->cache = &net->ipv6.mfc6_unres_queue;
 	spin_lock_bh(&mfc_unres_lock);
-	for (mfc = net->ipv6.mfc6_unres_queue; mfc; mfc = mfc->next)
+	it->cache = &net->ipv6.mfc6_unres_queue;
+	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
 	spin_unlock_bh(&mfc_unres_lock);
@@ -119,9 +119,6 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	return NULL;
 }
 
-
-
-
 /*
  *	The /proc interfaces to multicast routing /proc/ip6_mr_cache /proc/ip6_mr_vif
  */
@@ -238,18 +235,19 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (v == SEQ_START_TOKEN)
 		return ipmr_mfc_seq_idx(net, seq->private, 0);
 
-	if (mfc->next)
-		return mfc->next;
+	if (mfc->list.next != it->cache)
+		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
 	if (it->cache == &net->ipv6.mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != net->ipv6.mfc6_cache_array);
+	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		mfc = net->ipv6.mfc6_cache_array[it->ct];
-		if (mfc)
-			return mfc;
+		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		if (list_empty(it->cache))
+			continue;
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 	}
 
 	/* exhausted cache_array, show unresolved */
@@ -258,9 +256,8 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
-	mfc = net->ipv6.mfc6_unres_queue;
-	if (mfc)
-		return mfc;
+	if (!list_empty(it->cache))
+		return list_first_entry(it->cache, struct mfc6_cache, list);
 
  end_of_list:
 	spin_unlock_bh(&mfc_unres_lock);
@@ -560,25 +557,22 @@ static void ipmr_do_expire_process(struct net *net)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
-	struct mfc6_cache *c, **cp;
-
-	cp = &net->ipv6.mfc6_unres_queue;
+	struct mfc6_cache *c, *next;
 
-	while ((c = *cp) != NULL) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
 			if (interval < expires)
 				expires = interval;
-			cp = &c->next;
 			continue;
 		}
 
-		*cp = c->next;
+		list_del(&c->list);
 		ip6mr_destroy_unres(net, c);
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
 }
 
@@ -591,7 +585,7 @@ static void ipmr_expire_process(unsigned long arg)
 		return;
 	}
 
-	if (net->ipv6.mfc6_unres_queue != NULL)
+	if (!list_empty(&net->ipv6.mfc6_unres_queue))
 		ipmr_do_expire_process(net);
 
 	spin_unlock(&mfc_unres_lock);
@@ -706,12 +700,12 @@ static struct mfc6_cache *ip6mr_cache_find(struct net *net,
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	for (c = net->ipv6.mfc6_cache_array[line]; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
-			break;
+			return c;
 	}
-	return c;
+	return NULL;
 }
 
 /*
@@ -872,17 +866,20 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 static int
 ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 {
+	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	for (c = net->ipv6.mfc6_unres_queue; c; c = c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
-		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr))
+		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c == NULL) {
+	if (!found) {
 		/*
 		 *	Create a new entry if allowable
 		 */
@@ -918,8 +915,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		}
 
 		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		c->next = net->ipv6.mfc6_unres_queue;
-		net->ipv6.mfc6_unres_queue = c;
+		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
 
 		ipmr_do_expire_process(net);
 	}
@@ -946,16 +942,15 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
 {
 	int line;
-	struct mfc6_cache *c, **cp;
+	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -997,7 +992,9 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	unsigned int i;
 	int err = 0;
+
 	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
 				       GFP_KERNEL);
 	if (!net->ipv6.vif6_table) {
@@ -1007,13 +1004,18 @@ static int __net_init ip6mr_net_init(struct net *net)
 
 	/* Forwarding cache */
 	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct mfc6_cache *),
+					     sizeof(struct list_head),
 					     GFP_KERNEL);
 	if (!net->ipv6.mfc6_cache_array) {
 		err = -ENOMEM;
 		goto fail_mfc6_cache;
 	}
 
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+
 	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
 		    (unsigned long)net);
 
@@ -1105,8 +1107,9 @@ void ip6_mr_cleanup(void)
 
 static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 {
+	bool found = false;
 	int line;
-	struct mfc6_cache *uc, *c, **cp;
+	struct mfc6_cache *uc, *c;
 	unsigned char ttls[MAXMIFS];
 	int i;
 
@@ -1122,14 +1125,15 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	for (cp = &net->ipv6.mfc6_cache_array[line];
-	     (c = *cp) != NULL; cp = &c->next) {
+	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
-		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr))
+		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
+			found = true;
 			break;
+		}
 	}
 
-	if (c != NULL) {
+	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
 		ip6mr_update_thresholds(net, c, ttls);
@@ -1154,29 +1158,29 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	c->next = net->ipv6.mfc6_cache_array[line];
-	net->ipv6.mfc6_cache_array[line] = c;
+	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
 	 *	Check to see if we resolved a queued list. If so we
 	 *	need to send on the frames and tidy up.
 	 */
+	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	for (cp = &net->ipv6.mfc6_unres_queue; (uc = *cp) != NULL;
-	     cp = &uc->next) {
+	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
-			*cp = uc->next;
+			list_del(&uc->list);
 			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			found = true;
 			break;
 		}
 	}
-	if (net->ipv6.mfc6_unres_queue == NULL)
+	if (list_empty(&net->ipv6.mfc6_unres_queue))
 		del_timer(&net->ipv6.ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
-	if (uc) {
+	if (found) {
 		ip6mr_cache_resolve(net, uc, c);
 		ip6mr_cache_free(uc);
 	}
@@ -1191,6 +1195,7 @@ static void mroute_clean_tables(struct net *net)
 {
 	int i;
 	LIST_HEAD(list);
+	struct mfc6_cache *c, *next;
 
 	/*
 	 *	Shut down all active vif entries
@@ -1205,16 +1210,11 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		struct mfc6_cache *c, **cp;
-
-		cp = &net->ipv6.mfc6_cache_array[i];
-		while ((c = *cp) != NULL) {
-			if (c->mfc_flags & MFC_STATIC) {
-				cp = &c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+			if (c->mfc_flags & MFC_STATIC)
 				continue;
-			}
 			write_lock_bh(&mrt_lock);
-			*cp = c->next;
+			list_del(&c->list);
 			write_unlock_bh(&mrt_lock);
 
 			ip6mr_cache_free(c);
@@ -1222,12 +1222,9 @@ static void mroute_clean_tables(struct net *net)
 	}
 
 	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
-		struct mfc6_cache *c, **cp;
-
 		spin_lock_bh(&mfc_unres_lock);
-		cp = &net->ipv6.mfc6_unres_queue;
-		while ((c = *cp) != NULL) {
-			*cp = c->next;
+		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+			list_del(&c->list);
 			ip6mr_destroy_unres(net, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
-- 
cgit v1.2.3


From 6bd521433942d85e80f7a731a88cc91a327f38e0 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:53 +0200
Subject: ipv6: ip6mr: move mroute data into seperate structure

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/mroute6.h  |   5 +-
 include/net/netns/ipv6.h |  13 +-
 net/ipv6/ip6mr.c         | 390 ++++++++++++++++++++++++++---------------------
 3 files changed, 216 insertions(+), 192 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 94a0cb521ca9..0370dd4f2389 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -229,10 +229,7 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
-static inline struct sock *mroute6_socket(struct net *net)
-{
-	return net->ipv6.mroute6_sk;
-}
+extern struct sock *mroute6_socket(struct net *net);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
 static inline struct sock *mroute6_socket(struct net *net) { return NULL; }
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 9cb3b5f2cd75..4e2780e6d8bc 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -59,18 +59,7 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
-	struct sock		*mroute6_sk;
-	struct timer_list	ipmr_expire_timer;
-	struct list_head	mfc6_unres_queue;
-	struct list_head	*mfc6_cache_array;
-	struct mif_device	*vif6_table;
-	int			maxvif;
-	atomic_t		cache_resolve_queue_len;
-	int			mroute_do_assert;
-	int			mroute_do_pim;
-#ifdef CONFIG_IPV6_PIMSM_V2
-	int			mroute_reg_vif_num;
-#endif
+	struct mr6_table	*mrt6;
 #endif
 };
 #endif
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 08e09042ad1c..9419fceeed41 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -51,6 +51,24 @@
 #include <linux/netfilter_ipv6.h>
 #include <net/ip6_checksum.h>
 
+struct mr6_table {
+#ifdef CONFIG_NET_NS
+	struct net		*net;
+#endif
+	struct sock		*mroute6_sk;
+	struct timer_list	ipmr_expire_timer;
+	struct list_head	mfc6_unres_queue;
+	struct list_head	mfc6_cache_array[MFC6_LINES];
+	struct mif_device	vif6_table[MAXMIFS];
+	int			maxvif;
+	atomic_t		cache_resolve_queue_len;
+	int			mroute_do_assert;
+	int			mroute_do_pim;
+#ifdef CONFIG_IPV6_PIMSM_V2
+	int			mroute_reg_vif_num;
+#endif
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -61,7 +79,7 @@ static DEFINE_RWLOCK(mrt_lock);
  *	Multicast router control variables
  */
 
-#define MIF_EXISTS(_net, _idx) ((_net)->ipv6.vif6_table[_idx].dev != NULL)
+#define MIF_EXISTS(_mrt, _idx) ((_mrt)->vif6_table[_idx].dev != NULL)
 
 /* Special spinlock for queue of unresolved entries */
 static DEFINE_SPINLOCK(mfc_unres_lock);
@@ -76,13 +94,13 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache);
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt,
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache);
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 			      mifi_t mifi, int assert);
-static int ip6mr_fill_mroute(struct net *net, struct sk_buff *skb,
+static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
-static void mroute_clean_tables(struct net *net);
+static void mroute_clean_tables(struct mr6_table *mrt);
 
 
 #ifdef CONFIG_PROC_FS
@@ -97,11 +115,12 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
 	for (it->ct = 0; it->ct < MFC6_LINES; it->ct++) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		list_for_each_entry(mfc, it->cache, list)
 			if (pos-- == 0)
 				return mfc;
@@ -109,7 +128,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 	read_unlock(&mrt_lock);
 
 	spin_lock_bh(&mfc_unres_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	list_for_each_entry(mfc, it->cache, list)
 		if (pos-- == 0)
 			return mfc;
@@ -132,11 +151,13 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	for (iter->ct = 0; iter->ct < net->ipv6.maxvif; ++iter->ct) {
-		if (!MIF_EXISTS(net, iter->ct))
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
 		if (pos-- == 0)
-			return &net->ipv6.vif6_table[iter->ct];
+			return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -155,15 +176,16 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
 		return ip6mr_vif_seq_idx(net, iter, 0);
 
-	while (++iter->ct < net->ipv6.maxvif) {
-		if (!MIF_EXISTS(net, iter->ct))
+	while (++iter->ct < mrt->maxvif) {
+		if (!MIF_EXISTS(mrt, iter->ct))
 			continue;
-		return &net->ipv6.vif6_table[iter->ct];
+		return &mrt->vif6_table[iter->ct];
 	}
 	return NULL;
 }
@@ -177,6 +199,7 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -187,7 +210,7 @@ static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 
 		seq_printf(seq,
 			   "%2td %-10s %8ld %7ld  %8ld %7ld %05X\n",
-			   vif - net->ipv6.vif6_table,
+			   vif - mrt->vif6_table,
 			   name, vif->bytes_in, vif->pkt_in,
 			   vif->bytes_out, vif->pkt_out,
 			   vif->flags);
@@ -229,6 +252,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	++*pos;
 
@@ -238,13 +262,13 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (mfc->list.next != it->cache)
 		return list_entry(mfc->list.next, struct mfc6_cache, list);
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		goto end_of_list;
 
-	BUG_ON(it->cache != &net->ipv6.mfc6_cache_array[it->ct]);
+	BUG_ON(it->cache != &mrt->mfc6_cache_array[it->ct]);
 
 	while (++it->ct < MFC6_LINES) {
-		it->cache = &net->ipv6.mfc6_cache_array[it->ct];
+		it->cache = &mrt->mfc6_cache_array[it->ct];
 		if (list_empty(it->cache))
 			continue;
 		return list_first_entry(it->cache, struct mfc6_cache, list);
@@ -252,7 +276,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	/* exhausted cache_array, show unresolved */
 	read_unlock(&mrt_lock);
-	it->cache = &net->ipv6.mfc6_unres_queue;
+	it->cache = &mrt->mfc6_unres_queue;
 	it->ct = 0;
 
 	spin_lock_bh(&mfc_unres_lock);
@@ -270,10 +294,11 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
-	if (it->cache == &net->ipv6.mfc6_unres_queue)
+	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
-	else if (it->cache == net->ipv6.mfc6_cache_array)
+	else if (it->cache == mrt->mfc6_cache_array)
 		read_unlock(&mrt_lock);
 }
 
@@ -281,6 +306,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -295,14 +321,14 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
 			   mfc->mf6c_parent);
 
-		if (it->cache != &net->ipv6.mfc6_unres_queue) {
+		if (it->cache != &mrt->mfc6_unres_queue) {
 			seq_printf(seq, " %8lu %8lu %8lu",
 				   mfc->mfc_un.res.pkt,
 				   mfc->mfc_un.res.bytes,
 				   mfc->mfc_un.res.wrong_if);
 			for (n = mfc->mfc_un.res.minvif;
 			     n < mfc->mfc_un.res.maxvif; n++) {
-				if (MIF_EXISTS(net, n) &&
+				if (MIF_EXISTS(mrt, n) &&
 				    mfc->mfc_un.res.ttls[n] < 255)
 					seq_printf(seq,
 						   " %2d:%-3d",
@@ -349,7 +375,8 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	int reg_vif_num = net->ipv6.mroute_reg_vif_num;
+	struct mr6_table *mrt = net->ipv6.mrt6;
+	int reg_vif_num = mrt->mroute_reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -374,7 +401,7 @@ static int pim6_rcv(struct sk_buff *skb)
 
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
-		reg_dev = net->ipv6.vif6_table[reg_vif_num].dev;
+		reg_dev = mrt->vif6_table[reg_vif_num].dev;
 	if (reg_dev)
 		dev_hold(reg_dev);
 	read_unlock(&mrt_lock);
@@ -411,12 +438,12 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
 	dev->stats.tx_packets++;
-	ip6mr_cache_report(net, skb, net->ipv6.mroute_reg_vif_num,
-			   MRT6MSG_WHOLEPKT);
+	ip6mr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, MRT6MSG_WHOLEPKT);
 	read_unlock(&mrt_lock);
 	kfree_skb(skb);
 	return NETDEV_TX_OK;
@@ -472,15 +499,16 @@ failure:
  *	Delete a VIF entry
  */
 
-static int mif6_delete(struct net *net, int vifi, struct list_head *head)
+static int mif6_delete(struct mr6_table *mrt, int vifi, struct list_head *head)
 {
 	struct mif_device *v;
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
-	if (vifi < 0 || vifi >= net->ipv6.maxvif)
+
+	if (vifi < 0 || vifi >= mrt->maxvif)
 		return -EADDRNOTAVAIL;
 
-	v = &net->ipv6.vif6_table[vifi];
+	v = &mrt->vif6_table[vifi];
 
 	write_lock_bh(&mrt_lock);
 	dev = v->dev;
@@ -492,17 +520,17 @@ static int mif6_delete(struct net *net, int vifi, struct list_head *head)
 	}
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	if (vifi == net->ipv6.mroute_reg_vif_num)
-		net->ipv6.mroute_reg_vif_num = -1;
+	if (vifi == mrt->mroute_reg_vif_num)
+		mrt->mroute_reg_vif_num = -1;
 #endif
 
-	if (vifi + 1 == net->ipv6.maxvif) {
+	if (vifi + 1 == mrt->maxvif) {
 		int tmp;
 		for (tmp = vifi - 1; tmp >= 0; tmp--) {
-			if (MIF_EXISTS(net, tmp))
+			if (MIF_EXISTS(mrt, tmp))
 				break;
 		}
-		net->ipv6.maxvif = tmp + 1;
+		mrt->maxvif = tmp + 1;
 	}
 
 	write_unlock_bh(&mrt_lock);
@@ -529,11 +557,12 @@ static inline void ip6mr_cache_free(struct mfc6_cache *c)
    and reporting error to netlink readers.
  */
 
-static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
+static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c)
 {
+	struct net *net = read_pnet(&mrt->net);
 	struct sk_buff *skb;
 
-	atomic_dec(&net->ipv6.cache_resolve_queue_len);
+	atomic_dec(&mrt->cache_resolve_queue_len);
 
 	while((skb = skb_dequeue(&c->mfc_un.unres.unresolved)) != NULL) {
 		if (ipv6_hdr(skb)->version == 0) {
@@ -553,13 +582,13 @@ static void ip6mr_destroy_unres(struct net *net, struct mfc6_cache *c)
 
 /* Timer process for all the unresolved queue. */
 
-static void ipmr_do_expire_process(struct net *net)
+static void ipmr_do_expire_process(struct mr6_table *mrt)
 {
 	unsigned long now = jiffies;
 	unsigned long expires = 10 * HZ;
 	struct mfc6_cache *c, *next;
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 		if (time_after(c->mfc_un.unres.expires, now)) {
 			/* not yet... */
 			unsigned long interval = c->mfc_un.unres.expires - now;
@@ -569,31 +598,31 @@ static void ipmr_do_expire_process(struct net *net)
 		}
 
 		list_del(&c->list);
-		ip6mr_destroy_unres(net, c);
+		ip6mr_destroy_unres(mrt, c);
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + expires);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
 }
 
 static void ipmr_expire_process(unsigned long arg)
 {
-	struct net *net = (struct net *)arg;
+	struct mr6_table *mrt = (struct mr6_table *)arg;
 
 	if (!spin_trylock(&mfc_unres_lock)) {
-		mod_timer(&net->ipv6.ipmr_expire_timer, jiffies + 1);
+		mod_timer(&mrt->ipmr_expire_timer, jiffies + 1);
 		return;
 	}
 
-	if (!list_empty(&net->ipv6.mfc6_unres_queue))
-		ipmr_do_expire_process(net);
+	if (!list_empty(&mrt->mfc6_unres_queue))
+		ipmr_do_expire_process(mrt);
 
 	spin_unlock(&mfc_unres_lock);
 }
 
 /* Fill oifs list. It is called under write locked mrt_lock. */
 
-static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
+static void ip6mr_update_thresholds(struct mr6_table *mrt, struct mfc6_cache *cache,
 				    unsigned char *ttls)
 {
 	int vifi;
@@ -602,8 +631,8 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	cache->mfc_un.res.maxvif = 0;
 	memset(cache->mfc_un.res.ttls, 255, MAXMIFS);
 
-	for (vifi = 0; vifi < net->ipv6.maxvif; vifi++) {
-		if (MIF_EXISTS(net, vifi) &&
+	for (vifi = 0; vifi < mrt->maxvif; vifi++) {
+		if (MIF_EXISTS(mrt, vifi) &&
 		    ttls[vifi] && ttls[vifi] < 255) {
 			cache->mfc_un.res.ttls[vifi] = ttls[vifi];
 			if (cache->mfc_un.res.minvif > vifi)
@@ -614,16 +643,17 @@ static void ip6mr_update_thresholds(struct net *net, struct mfc6_cache *cache,
 	}
 }
 
-static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
+static int mif6_add(struct net *net, struct mr6_table *mrt,
+		    struct mif6ctl *vifc, int mrtsock)
 {
 	int vifi = vifc->mif6c_mifi;
-	struct mif_device *v = &net->ipv6.vif6_table[vifi];
+	struct mif_device *v = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct inet6_dev *in6_dev;
 	int err;
 
 	/* Is vif busy ? */
-	if (MIF_EXISTS(net, vifi))
+	if (MIF_EXISTS(mrt, vifi))
 		return -EADDRINUSE;
 
 	switch (vifc->mif6c_flags) {
@@ -633,7 +663,7 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 		 * Special Purpose VIF in PIM
 		 * All the packets will be sent to the daemon
 		 */
-		if (net->ipv6.mroute_reg_vif_num >= 0)
+		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
 		dev = ip6mr_reg_vif(net);
 		if (!dev)
@@ -685,22 +715,22 @@ static int mif6_add(struct net *net, struct mif6ctl *vifc, int mrtsock)
 	v->dev = dev;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	if (v->flags & MIFF_REGISTER)
-		net->ipv6.mroute_reg_vif_num = vifi;
+		mrt->mroute_reg_vif_num = vifi;
 #endif
-	if (vifi + 1 > net->ipv6.maxvif)
-		net->ipv6.maxvif = vifi + 1;
+	if (vifi + 1 > mrt->maxvif)
+		mrt->maxvif = vifi + 1;
 	write_unlock_bh(&mrt_lock);
 	return 0;
 }
 
-static struct mfc6_cache *ip6mr_cache_find(struct net *net,
+static struct mfc6_cache *ip6mr_cache_find(struct mr6_table *mrt,
 					   struct in6_addr *origin,
 					   struct in6_addr *mcastgrp)
 {
 	int line = MFC6_HASH(mcastgrp, origin);
 	struct mfc6_cache *c;
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, origin) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, mcastgrp))
 			return c;
@@ -734,8 +764,8 @@ static struct mfc6_cache *ip6mr_cache_alloc_unres(void)
  *	A cache entry has gone into a resolved state from queued
  */
 
-static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
-				struct mfc6_cache *c)
+static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt,
+				struct mfc6_cache *uc, struct mfc6_cache *c)
 {
 	struct sk_buff *skb;
 
@@ -748,7 +778,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			int err;
 			struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct ipv6hdr));
 
-			if (ip6mr_fill_mroute(net, skb, c, NLMSG_DATA(nlh)) > 0) {
+			if (ip6mr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
 				nlh->nlmsg_len = skb_tail_pointer(skb) - (u8 *)nlh;
 			} else {
 				nlh->nlmsg_type = NLMSG_ERROR;
@@ -758,7 +788,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
 			}
 			err = rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
 		} else
-			ip6_mr_forward(net, skb, c);
+			ip6_mr_forward(net, mrt, skb, c);
 	}
 }
 
@@ -769,8 +799,8 @@ static void ip6mr_cache_resolve(struct net *net, struct mfc6_cache *uc,
  *	Called under mrt_lock.
  */
 
-static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
-			      int assert)
+static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
+			      mifi_t mifi, int assert)
 {
 	struct sk_buff *skb;
 	struct mrt6msg *msg;
@@ -806,7 +836,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 		msg = (struct mrt6msg *)skb_transport_header(skb);
 		msg->im6_mbz = 0;
 		msg->im6_msgtype = MRT6MSG_WHOLEPKT;
-		msg->im6_mif = net->ipv6.mroute_reg_vif_num;
+		msg->im6_mif = mrt->mroute_reg_vif_num;
 		msg->im6_pad = 0;
 		ipv6_addr_copy(&msg->im6_src, &ipv6_hdr(pkt)->saddr);
 		ipv6_addr_copy(&msg->im6_dst, &ipv6_hdr(pkt)->daddr);
@@ -841,7 +871,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	skb->ip_summed = CHECKSUM_UNNECESSARY;
 	}
 
-	if (net->ipv6.mroute6_sk == NULL) {
+	if (mrt->mroute6_sk == NULL) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -849,7 +879,7 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
 	/*
 	 *	Deliver to user space multicast routing algorithms
 	 */
-	ret = sock_queue_rcv_skb(net->ipv6.mroute6_sk, skb);
+	ret = sock_queue_rcv_skb(mrt->mroute6_sk, skb);
 	if (ret < 0) {
 		if (net_ratelimit())
 			printk(KERN_WARNING "mroute6: pending queue full, dropping entries.\n");
@@ -864,14 +894,14 @@ static int ip6mr_cache_report(struct net *net, struct sk_buff *pkt, mifi_t mifi,
  */
 
 static int
-ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
+ip6mr_cache_unresolved(struct mr6_table *mrt, mifi_t mifi, struct sk_buff *skb)
 {
 	bool found = false;
 	int err;
 	struct mfc6_cache *c;
 
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(c, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(c, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&c->mf6c_mcastgrp, &ipv6_hdr(skb)->daddr) &&
 		    ipv6_addr_equal(&c->mf6c_origin, &ipv6_hdr(skb)->saddr)) {
 			found = true;
@@ -884,7 +914,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		 *	Create a new entry if allowable
 		 */
 
-		if (atomic_read(&net->ipv6.cache_resolve_queue_len) >= 10 ||
+		if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
 		    (c = ip6mr_cache_alloc_unres()) == NULL) {
 			spin_unlock_bh(&mfc_unres_lock);
 
@@ -902,7 +932,7 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 		/*
 		 *	Reflect first query at pim6sd
 		 */
-		err = ip6mr_cache_report(net, skb, mifi, MRT6MSG_NOCACHE);
+		err = ip6mr_cache_report(mrt, skb, mifi, MRT6MSG_NOCACHE);
 		if (err < 0) {
 			/* If the report failed throw the cache entry
 			   out - Brad Parker
@@ -914,10 +944,10 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
 			return err;
 		}
 
-		atomic_inc(&net->ipv6.cache_resolve_queue_len);
-		list_add(&c->list, &net->ipv6.mfc6_unres_queue);
+		atomic_inc(&mrt->cache_resolve_queue_len);
+		list_add(&c->list, &mrt->mfc6_unres_queue);
 
-		ipmr_do_expire_process(net);
+		ipmr_do_expire_process(mrt);
 	}
 
 	/*
@@ -939,14 +969,14 @@ ip6mr_cache_unresolved(struct net *net, mifi_t mifi, struct sk_buff *skb)
  *	MFC6 cache manipulation by user space
  */
 
-static int ip6mr_mfc_delete(struct net *net, struct mf6cctl *mfc)
+static int ip6mr_mfc_delete(struct mr6_table *mrt, struct mf6cctl *mfc)
 {
 	int line;
 	struct mfc6_cache *c, *next;
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			write_lock_bh(&mrt_lock);
@@ -965,6 +995,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -972,10 +1003,10 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &net->ipv6.vif6_table[0];
-	for (ct = 0; ct < net->ipv6.maxvif; ct++, v++) {
+	v = &mrt->vif6_table[0];
+	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
 		if (v->dev == dev)
-			mif6_delete(net, ct, &list);
+			mif6_delete(mrt, ct, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -992,35 +1023,28 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
+	struct mr6_table *mrt;
 	unsigned int i;
 	int err = 0;
 
-	net->ipv6.vif6_table = kcalloc(MAXMIFS, sizeof(struct mif_device),
-				       GFP_KERNEL);
-	if (!net->ipv6.vif6_table) {
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
-	/* Forwarding cache */
-	net->ipv6.mfc6_cache_array = kcalloc(MFC6_LINES,
-					     sizeof(struct list_head),
-					     GFP_KERNEL);
-	if (!net->ipv6.mfc6_cache_array) {
-		err = -ENOMEM;
-		goto fail_mfc6_cache;
-	}
+	write_pnet(&mrt->net, net);
 
 	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&net->ipv6.mfc6_cache_array[i]);
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
 
-	INIT_LIST_HEAD(&net->ipv6.mfc6_unres_queue);
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
 
-	setup_timer(&net->ipv6.ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)net);
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
 
 #ifdef CONFIG_IPV6_PIMSM_V2
-	net->ipv6.mroute_reg_vif_num = -1;
+	mrt->mroute_reg_vif_num = -1;
 #endif
 
 #ifdef CONFIG_PROC_FS
@@ -1030,30 +1054,31 @@ static int __net_init ip6mr_net_init(struct net *net)
 	if (!proc_net_fops_create(net, "ip6_mr_cache", 0, &ip6mr_mfc_fops))
 		goto proc_cache_fail;
 #endif
+
+	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(net->ipv6.mfc6_cache_array);
+	kfree(mrt);
 #endif
-fail_mfc6_cache:
-	kfree(net->ipv6.vif6_table);
 fail:
 	return err;
 }
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&net->ipv6.ipmr_expire_timer);
-	mroute_clean_tables(net);
-	kfree(net->ipv6.mfc6_cache_array);
-	kfree(net->ipv6.vif6_table);
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1105,7 +1130,8 @@ void ip6_mr_cleanup(void)
 	kmem_cache_destroy(mrt_cachep);
 }
 
-static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
+static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt,
+			 struct mf6cctl *mfc, int mrtsock)
 {
 	bool found = false;
 	int line;
@@ -1125,7 +1151,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 
 	line = MFC6_HASH(&mfc->mf6cc_mcastgrp.sin6_addr, &mfc->mf6cc_origin.sin6_addr);
 
-	list_for_each_entry(c, &net->ipv6.mfc6_cache_array[line], list) {
+	list_for_each_entry(c, &mrt->mfc6_cache_array[line], list) {
 		if (ipv6_addr_equal(&c->mf6c_origin, &mfc->mf6cc_origin.sin6_addr) &&
 		    ipv6_addr_equal(&c->mf6c_mcastgrp, &mfc->mf6cc_mcastgrp.sin6_addr)) {
 			found = true;
@@ -1136,7 +1162,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	if (found) {
 		write_lock_bh(&mrt_lock);
 		c->mf6c_parent = mfc->mf6cc_parent;
-		ip6mr_update_thresholds(net, c, ttls);
+		ip6mr_update_thresholds(mrt, c, ttls);
 		if (!mrtsock)
 			c->mfc_flags |= MFC_STATIC;
 		write_unlock_bh(&mrt_lock);
@@ -1153,12 +1179,12 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	c->mf6c_origin = mfc->mf6cc_origin.sin6_addr;
 	c->mf6c_mcastgrp = mfc->mf6cc_mcastgrp.sin6_addr;
 	c->mf6c_parent = mfc->mf6cc_parent;
-	ip6mr_update_thresholds(net, c, ttls);
+	ip6mr_update_thresholds(mrt, c, ttls);
 	if (!mrtsock)
 		c->mfc_flags |= MFC_STATIC;
 
 	write_lock_bh(&mrt_lock);
-	list_add(&c->list, &net->ipv6.mfc6_cache_array[line]);
+	list_add(&c->list, &mrt->mfc6_cache_array[line]);
 	write_unlock_bh(&mrt_lock);
 
 	/*
@@ -1167,21 +1193,21 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
 	 */
 	found = false;
 	spin_lock_bh(&mfc_unres_lock);
-	list_for_each_entry(uc, &net->ipv6.mfc6_unres_queue, list) {
+	list_for_each_entry(uc, &mrt->mfc6_unres_queue, list) {
 		if (ipv6_addr_equal(&uc->mf6c_origin, &c->mf6c_origin) &&
 		    ipv6_addr_equal(&uc->mf6c_mcastgrp, &c->mf6c_mcastgrp)) {
 			list_del(&uc->list);
-			atomic_dec(&net->ipv6.cache_resolve_queue_len);
+			atomic_dec(&mrt->cache_resolve_queue_len);
 			found = true;
 			break;
 		}
 	}
-	if (list_empty(&net->ipv6.mfc6_unres_queue))
-		del_timer(&net->ipv6.ipmr_expire_timer);
+	if (list_empty(&mrt->mfc6_unres_queue))
+		del_timer(&mrt->ipmr_expire_timer);
 	spin_unlock_bh(&mfc_unres_lock);
 
 	if (found) {
-		ip6mr_cache_resolve(net, uc, c);
+		ip6mr_cache_resolve(net, mrt, uc, c);
 		ip6mr_cache_free(uc);
 	}
 	return 0;
@@ -1191,7 +1217,7 @@ static int ip6mr_mfc_add(struct net *net, struct mf6cctl *mfc, int mrtsock)
  *	Close the multicast socket, and clear the vif tables etc
  */
 
-static void mroute_clean_tables(struct net *net)
+static void mroute_clean_tables(struct mr6_table *mrt)
 {
 	int i;
 	LIST_HEAD(list);
@@ -1200,9 +1226,9 @@ static void mroute_clean_tables(struct net *net)
 	/*
 	 *	Shut down all active vif entries
 	 */
-	for (i = 0; i < net->ipv6.maxvif; i++) {
-		if (!(net->ipv6.vif6_table[i].flags & VIFF_STATIC))
-			mif6_delete(net, i, &list);
+	for (i = 0; i < mrt->maxvif; i++) {
+		if (!(mrt->vif6_table[i].flags & VIFF_STATIC))
+			mif6_delete(mrt, i, &list);
 	}
 	unregister_netdevice_many(&list);
 
@@ -1210,7 +1236,7 @@ static void mroute_clean_tables(struct net *net)
 	 *	Wipe the cache
 	 */
 	for (i = 0; i < MFC6_LINES; i++) {
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_cache_array[i], list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_cache_array[i], list) {
 			if (c->mfc_flags & MFC_STATIC)
 				continue;
 			write_lock_bh(&mrt_lock);
@@ -1221,25 +1247,25 @@ static void mroute_clean_tables(struct net *net)
 		}
 	}
 
-	if (atomic_read(&net->ipv6.cache_resolve_queue_len) != 0) {
+	if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
 		spin_lock_bh(&mfc_unres_lock);
-		list_for_each_entry_safe(c, next, &net->ipv6.mfc6_unres_queue, list) {
+		list_for_each_entry_safe(c, next, &mrt->mfc6_unres_queue, list) {
 			list_del(&c->list);
-			ip6mr_destroy_unres(net, c);
+			ip6mr_destroy_unres(mrt, c);
 		}
 		spin_unlock_bh(&mfc_unres_lock);
 	}
 }
 
-static int ip6mr_sk_init(struct sock *sk)
+static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
 
 	rtnl_lock();
 	write_lock_bh(&mrt_lock);
-	if (likely(net->ipv6.mroute6_sk == NULL)) {
-		net->ipv6.mroute6_sk = sk;
+	if (likely(mrt->mroute6_sk == NULL)) {
+		mrt->mroute6_sk = sk;
 		net->ipv6.devconf_all->mc_forwarding++;
 	}
 	else
@@ -1255,15 +1281,16 @@ int ip6mr_sk_done(struct sock *sk)
 {
 	int err = 0;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	rtnl_lock();
-	if (sk == net->ipv6.mroute6_sk) {
+	if (sk == mrt->mroute6_sk) {
 		write_lock_bh(&mrt_lock);
-		net->ipv6.mroute6_sk = NULL;
+		mrt->mroute6_sk = NULL;
 		net->ipv6.devconf_all->mc_forwarding--;
 		write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(net);
+		mroute_clean_tables(mrt);
 	} else
 		err = -EACCES;
 	rtnl_unlock();
@@ -1271,6 +1298,13 @@ int ip6mr_sk_done(struct sock *sk)
 	return err;
 }
 
+struct sock *mroute6_socket(struct net *net)
+{
+	struct mr6_table *mrt = net->ipv6.mrt6;
+
+	return mrt->mroute6_sk;
+}
+
 /*
  *	Socket options and virtual interface manipulation. The whole
  *	virtual interface system is a complete heap, but unfortunately
@@ -1285,9 +1319,10 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (optname != MRT6_INIT) {
-		if (sk != net->ipv6.mroute6_sk && !capable(CAP_NET_ADMIN))
+		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
 			return -EACCES;
 	}
 
@@ -1299,7 +1334,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (optlen < sizeof(int))
 			return -EINVAL;
 
-		return ip6mr_sk_init(sk);
+		return ip6mr_sk_init(mrt, sk);
 
 	case MRT6_DONE:
 		return ip6mr_sk_done(sk);
@@ -1312,7 +1347,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (vif.mif6c_mifi >= MAXMIFS)
 			return -ENFILE;
 		rtnl_lock();
-		ret = mif6_add(net, &vif, sk == net->ipv6.mroute6_sk);
+		ret = mif6_add(net, mrt, &vif, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1322,7 +1357,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		if (copy_from_user(&mifi, optval, sizeof(mifi_t)))
 			return -EFAULT;
 		rtnl_lock();
-		ret = mif6_delete(net, mifi, NULL);
+		ret = mif6_delete(mrt, mifi, NULL);
 		rtnl_unlock();
 		return ret;
 
@@ -1338,10 +1373,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 			return -EFAULT;
 		rtnl_lock();
 		if (optname == MRT6_DEL_MFC)
-			ret = ip6mr_mfc_delete(net, &mfc);
+			ret = ip6mr_mfc_delete(mrt, &mfc);
 		else
-			ret = ip6mr_mfc_add(net, &mfc,
-					    sk == net->ipv6.mroute6_sk);
+			ret = ip6mr_mfc_add(net, mrt, &mfc, sk == mrt->mroute6_sk);
 		rtnl_unlock();
 		return ret;
 
@@ -1353,7 +1387,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		int v;
 		if (get_user(v, (int __user *)optval))
 			return -EFAULT;
-		net->ipv6.mroute_do_assert = !!v;
+		mrt->mroute_do_assert = !!v;
 		return 0;
 	}
 
@@ -1366,9 +1400,9 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		v = !!v;
 		rtnl_lock();
 		ret = 0;
-		if (v != net->ipv6.mroute_do_pim) {
-			net->ipv6.mroute_do_pim = v;
-			net->ipv6.mroute_do_assert = v;
+		if (v != mrt->mroute_do_pim) {
+			mrt->mroute_do_pim = v;
+			mrt->mroute_do_assert = v;
 		}
 		rtnl_unlock();
 		return ret;
@@ -1394,6 +1428,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1401,11 +1436,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 		break;
 #ifdef CONFIG_IPV6_PIMSM_V2
 	case MRT6_PIM:
-		val = net->ipv6.mroute_do_pim;
+		val = mrt->mroute_do_pim;
 		break;
 #endif
 	case MRT6_ASSERT:
-		val = net->ipv6.mroute_do_assert;
+		val = mrt->mroute_do_assert;
 		break;
 	default:
 		return -ENOPROTOOPT;
@@ -1436,16 +1471,17 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
 		if (copy_from_user(&vr, arg, sizeof(vr)))
 			return -EFAULT;
-		if (vr.mifi >= net->ipv6.maxvif)
+		if (vr.mifi >= mrt->maxvif)
 			return -EINVAL;
 		read_lock(&mrt_lock);
-		vif = &net->ipv6.vif6_table[vr.mifi];
-		if (MIF_EXISTS(net, vr.mifi)) {
+		vif = &mrt->vif6_table[vr.mifi];
+		if (MIF_EXISTS(mrt, vr.mifi)) {
 			vr.icount = vif->pkt_in;
 			vr.ocount = vif->pkt_out;
 			vr.ibytes = vif->bytes_in;
@@ -1463,7 +1499,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 			return -EFAULT;
 
 		read_lock(&mrt_lock);
-		c = ip6mr_cache_find(net, &sr.src.sin6_addr, &sr.grp.sin6_addr);
+		c = ip6mr_cache_find(mrt, &sr.src.sin6_addr, &sr.grp.sin6_addr);
 		if (c) {
 			sr.pktcnt = c->mfc_un.res.pkt;
 			sr.bytecnt = c->mfc_un.res.bytes;
@@ -1493,11 +1529,11 @@ static inline int ip6mr_forward2_finish(struct sk_buff *skb)
  *	Processing handlers for ip6mr_forward
  */
 
-static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *c, int vifi)
+static int ip6mr_forward2(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *c, int vifi)
 {
 	struct ipv6hdr *ipv6h;
-	struct mif_device *vif = &net->ipv6.vif6_table[vifi];
+	struct mif_device *vif = &mrt->vif6_table[vifi];
 	struct net_device *dev;
 	struct dst_entry *dst;
 	struct flowi fl;
@@ -1511,7 +1547,7 @@ static int ip6mr_forward2(struct net *net, struct sk_buff *skb,
 		vif->bytes_out += skb->len;
 		vif->dev->stats.tx_bytes += skb->len;
 		vif->dev->stats.tx_packets++;
-		ip6mr_cache_report(net, skb, vifi, MRT6MSG_WHOLEPKT);
+		ip6mr_cache_report(mrt, skb, vifi, MRT6MSG_WHOLEPKT);
 		goto out_free;
 	}
 #endif
@@ -1566,19 +1602,19 @@ out_free:
 	return 0;
 }
 
-static int ip6mr_find_vif(struct net_device *dev)
+static int ip6mr_find_vif(struct mr6_table *mrt, struct net_device *dev)
 {
-	struct net *net = dev_net(dev);
 	int ct;
-	for (ct = net->ipv6.maxvif - 1; ct >= 0; ct--) {
-		if (net->ipv6.vif6_table[ct].dev == dev)
+
+	for (ct = mrt->maxvif - 1; ct >= 0; ct--) {
+		if (mrt->vif6_table[ct].dev == dev)
 			break;
 	}
 	return ct;
 }
 
-static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
-			  struct mfc6_cache *cache)
+static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
+			  struct sk_buff *skb, struct mfc6_cache *cache)
 {
 	int psend = -1;
 	int vif, ct;
@@ -1590,30 +1626,30 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 	/*
 	 * Wrong interface: drop packet and (maybe) send PIM assert.
 	 */
-	if (net->ipv6.vif6_table[vif].dev != skb->dev) {
+	if (mrt->vif6_table[vif].dev != skb->dev) {
 		int true_vifi;
 
 		cache->mfc_un.res.wrong_if++;
-		true_vifi = ip6mr_find_vif(skb->dev);
+		true_vifi = ip6mr_find_vif(mrt, skb->dev);
 
-		if (true_vifi >= 0 && net->ipv6.mroute_do_assert &&
+		if (true_vifi >= 0 && mrt->mroute_do_assert &&
 		    /* pimsm uses asserts, when switching from RPT to SPT,
 		       so that we cannot check that packet arrived on an oif.
 		       It is bad, but otherwise we would need to move pretty
 		       large chunk of pimd to kernel. Ough... --ANK
 		     */
-		    (net->ipv6.mroute_do_pim ||
+		    (mrt->mroute_do_pim ||
 		     cache->mfc_un.res.ttls[true_vifi] < 255) &&
 		    time_after(jiffies,
 			       cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
 			cache->mfc_un.res.last_assert = jiffies;
-			ip6mr_cache_report(net, skb, true_vifi, MRT6MSG_WRONGMIF);
+			ip6mr_cache_report(mrt, skb, true_vifi, MRT6MSG_WRONGMIF);
 		}
 		goto dont_forward;
 	}
 
-	net->ipv6.vif6_table[vif].pkt_in++;
-	net->ipv6.vif6_table[vif].bytes_in += skb->len;
+	mrt->vif6_table[vif].pkt_in++;
+	mrt->vif6_table[vif].bytes_in += skb->len;
 
 	/*
 	 *	Forward the frame
@@ -1623,13 +1659,13 @@ static int ip6_mr_forward(struct net *net, struct sk_buff *skb,
 			if (psend != -1) {
 				struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 				if (skb2)
-					ip6mr_forward2(net, skb2, cache, psend);
+					ip6mr_forward2(net, mrt, skb2, cache, psend);
 			}
 			psend = ct;
 		}
 	}
 	if (psend != -1) {
-		ip6mr_forward2(net, skb, cache, psend);
+		ip6mr_forward2(net, mrt, skb, cache, psend);
 		return 0;
 	}
 
@@ -1647,9 +1683,10 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
+	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net,
+	cache = ip6mr_cache_find(mrt,
 				 &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr);
 
 	/*
@@ -1658,9 +1695,9 @@ int ip6_mr_input(struct sk_buff *skb)
 	if (cache == NULL) {
 		int vif;
 
-		vif = ip6mr_find_vif(skb->dev);
+		vif = ip6mr_find_vif(mrt, skb->dev);
 		if (vif >= 0) {
-			int err = ip6mr_cache_unresolved(net, vif, skb);
+			int err = ip6mr_cache_unresolved(mrt, vif, skb);
 			read_unlock(&mrt_lock);
 
 			return err;
@@ -1670,7 +1707,7 @@ int ip6_mr_input(struct sk_buff *skb)
 		return -ENODEV;
 	}
 
-	ip6_mr_forward(net, skb, cache);
+	ip6_mr_forward(net, mrt, skb, cache);
 
 	read_unlock(&mrt_lock);
 
@@ -1679,8 +1716,8 @@ int ip6_mr_input(struct sk_buff *skb)
 
 
 static int
-ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
-		  struct rtmsg *rtm)
+ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
+		  struct mfc6_cache *c, struct rtmsg *rtm)
 {
 	int ct;
 	struct rtnexthop *nhp;
@@ -1691,19 +1728,19 @@ ip6mr_fill_mroute(struct net *net, struct sk_buff *skb, struct mfc6_cache *c,
 	if (c->mf6c_parent > MAXMIFS)
 		return -ENOENT;
 
-	if (MIF_EXISTS(net, c->mf6c_parent))
-		RTA_PUT(skb, RTA_IIF, 4, &net->ipv6.vif6_table[c->mf6c_parent].dev->ifindex);
+	if (MIF_EXISTS(mrt, c->mf6c_parent))
+		RTA_PUT(skb, RTA_IIF, 4, &mrt->vif6_table[c->mf6c_parent].dev->ifindex);
 
 	mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
 
 	for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
-		if (MIF_EXISTS(net, ct) && c->mfc_un.res.ttls[ct] < 255) {
+		if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
 			if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
 				goto rtattr_failure;
 			nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
 			nhp->rtnh_flags = 0;
 			nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
-			nhp->rtnh_ifindex = net->ipv6.vif6_table[ct].dev->ifindex;
+			nhp->rtnh_ifindex = mrt->vif6_table[ct].dev->ifindex;
 			nhp->rtnh_len = sizeof(*nhp);
 		}
 	}
@@ -1721,11 +1758,12 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
+	struct mr6_table *mrt = net->ipv6.mrt6;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
 	read_lock(&mrt_lock);
-	cache = ip6mr_cache_find(net, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
+	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
 	if (!cache) {
 		struct sk_buff *skb2;
@@ -1739,7 +1777,7 @@ int ip6mr_get_route(struct net *net,
 		}
 
 		dev = skb->dev;
-		if (dev == NULL || (vif = ip6mr_find_vif(dev)) < 0) {
+		if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) {
 			read_unlock(&mrt_lock);
 			return -ENODEV;
 		}
@@ -1768,7 +1806,7 @@ int ip6mr_get_route(struct net *net,
 		ipv6_addr_copy(&iph->saddr, &rt->rt6i_src.addr);
 		ipv6_addr_copy(&iph->daddr, &rt->rt6i_dst.addr);
 
-		err = ip6mr_cache_unresolved(net, vif, skb2);
+		err = ip6mr_cache_unresolved(mrt, vif, skb2);
 		read_unlock(&mrt_lock);
 
 		return err;
@@ -1777,7 +1815,7 @@ int ip6mr_get_route(struct net *net,
 	if (!nowait && (rtm->rtm_flags&RTM_F_NOTIFY))
 		cache->mfc_flags |= MFC_NOTIFY;
 
-	err = ip6mr_fill_mroute(net, skb, cache, rtm);
+	err = ip6mr_fill_mroute(mrt, skb, cache, rtm);
 	read_unlock(&mrt_lock);
 	return err;
 }
-- 
cgit v1.2.3


From d1db275dd3f6e4182c4c4b4a1ac6287925d60569 Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Tue, 11 May 2010 14:40:55 +0200
Subject: ipv6: ip6mr: support multiple tables

This patch adds support for multiple independant multicast routing instances,
named "tables".

Userspace multicast routing daemons can bind to a specific table instance by
issuing a setsockopt call using a new option MRT6_TABLE. The table number is
stored in the raw socket data and affects all following ip6mr setsockopt(),
getsockopt() and ioctl() calls. By default, a single table (RT6_TABLE_DFLT)
is created with a default routing rule pointing to it. Newly created pim6reg
devices have the table number appended ("pim6regX"), with the exception of
devices created in the default table, which are named just "pim6reg" for
compatibility reasons.

Packets are directed to a specific table instance using routing rules,
similar to how regular routing rules work. Currently iif, oif and mark
are supported as keys, source and destination addresses could be supported
additionally.

Example usage:

- bind pimd/xorp/... to a specific table:

uint32_t table = 123;
setsockopt(fd, SOL_IPV6, MRT6_TABLE, &table, sizeof(table));

- create routing rules directing packets to the new table:

# ip -6 mrule add iif eth0 lookup 123
# ip -6 mrule add oif eth0 lookup 123

Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/linux/ipv6.h      |   1 +
 include/linux/mroute6.h   |  15 +-
 include/linux/rtnetlink.h |   3 +-
 include/net/netns/ipv6.h  |   5 +
 net/ipv6/Kconfig          |  14 ++
 net/ipv6/ip6_output.c     |   2 +-
 net/ipv6/ip6mr.c          | 428 +++++++++++++++++++++++++++++++++++++++-------
 7 files changed, 396 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 0e269038bb38..99e1ab7e3eec 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -383,6 +383,7 @@ struct raw6_sock {
 	__u32			checksum;	/* perform checksum */
 	__u32			offset;		/* checksum offset  */
 	struct icmp6_filter	filter;
+	__u32			ip6mr_table;
 	/* ipv6_pinfo has to be the last member of raw6_sock, see inet6_sk_generic */
 	struct ipv6_pinfo	inet6;
 };
diff --git a/include/linux/mroute6.h b/include/linux/mroute6.h
index 0370dd4f2389..6091ab77f388 100644
--- a/include/linux/mroute6.h
+++ b/include/linux/mroute6.h
@@ -24,7 +24,8 @@
 #define MRT6_DEL_MFC	(MRT6_BASE+5)	/* Delete a multicast forwarding entry	*/
 #define MRT6_VERSION	(MRT6_BASE+6)	/* Get the kernel multicast version	*/
 #define MRT6_ASSERT	(MRT6_BASE+7)	/* Activate PIM assert mode		*/
-#define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code	*/
+#define MRT6_PIM	(MRT6_BASE+8)	/* enable PIM code			*/
+#define MRT6_TABLE	(MRT6_BASE+9)	/* Specify mroute table ID		*/
 
 #define SIOCGETMIFCNT_IN6	SIOCPROTOPRIVATE	/* IP protocol privates */
 #define SIOCGETSGCNT_IN6	(SIOCPROTOPRIVATE+1)
@@ -229,11 +230,17 @@ extern int ip6mr_get_route(struct net *net, struct sk_buff *skb,
 			   struct rtmsg *rtm, int nowait);
 
 #ifdef CONFIG_IPV6_MROUTE
-extern struct sock *mroute6_socket(struct net *net);
+extern struct sock *mroute6_socket(struct net *net, struct sk_buff *skb);
 extern int ip6mr_sk_done(struct sock *sk);
 #else
-static inline struct sock *mroute6_socket(struct net *net) { return NULL; }
-static inline int ip6mr_sk_done(struct sock *sk) { return 0; }
+static inline struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
+{
+	return NULL;
+}
+static inline int ip6mr_sk_done(struct sock *sk)
+{
+	return 0;
+}
 #endif
 #endif
 
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 5a42c36cb6aa..fbc8cb0d48c3 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -11,7 +11,8 @@
  * families, values above 128 may be used arbitrarily.
  */
 #define RTNL_FAMILY_IPMR		128
-#define RTNL_FAMILY_MAX			128
+#define RTNL_FAMILY_IP6MR		129
+#define RTNL_FAMILY_MAX			129
 
 /****
  *		Routing/neighbour discovery messages.
diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h
index 4e2780e6d8bc..81abfcb2eb4e 100644
--- a/include/net/netns/ipv6.h
+++ b/include/net/netns/ipv6.h
@@ -59,7 +59,12 @@ struct netns_ipv6 {
 	struct sock             *tcp_sk;
 	struct sock             *igmp_sk;
 #ifdef CONFIG_IPV6_MROUTE
+#ifndef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
 	struct mr6_table	*mrt6;
+#else
+	struct list_head	mr6_tables;
+	struct fib_rules_ops	*mr6_rules_ops;
+#endif
 #endif
 };
 #endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index a578096152ab..36d7437ac054 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -229,6 +229,20 @@ config IPV6_MROUTE
 	  Experimental support for IPv6 multicast forwarding.
 	  If unsure, say N.
 
+config IPV6_MROUTE_MULTIPLE_TABLES
+	bool "IPv6: multicast policy routing"
+	depends on IPV6_MROUTE
+	select FIB_RULES
+	help
+	  Normally, a multicast router runs a userspace daemon and decides
+	  what to do with a multicast packet based on the source and
+	  destination addresses. If you say Y here, the multicast router
+	  will also be able to take interfaces and packet marks into
+	  account and run multiple instances of userspace daemons
+	  simultaneously, each one handling a single table.
+
+	  If unsure, say N.
+
 config IPV6_PIMSM_V2
 	bool "IPv6: PIM-SM version 2 support (EXPERIMENTAL)"
 	depends on IPV6_MROUTE
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 5173acaeb501..cd963f64e27c 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -108,7 +108,7 @@ static int ip6_finish_output2(struct sk_buff *skb)
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
 		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
-		    ((mroute6_socket(dev_net(dev)) &&
+		    ((mroute6_socket(dev_net(dev), skb) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 					 &ipv6_hdr(skb)->saddr))) {
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 9419fceeed41..c2920a1a6db3 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -42,6 +42,7 @@
 #include <linux/if_arp.h>
 #include <net/checksum.h>
 #include <net/netlink.h>
+#include <net/fib_rules.h>
 
 #include <net/ipv6.h>
 #include <net/ip6_route.h>
@@ -52,9 +53,11 @@
 #include <net/ip6_checksum.h>
 
 struct mr6_table {
+	struct list_head	list;
 #ifdef CONFIG_NET_NS
 	struct net		*net;
 #endif
+	u32			id;
 	struct sock		*mroute6_sk;
 	struct timer_list	ipmr_expire_timer;
 	struct list_head	mfc6_unres_queue;
@@ -69,6 +72,14 @@ struct mr6_table {
 #endif
 };
 
+struct ip6mr_rule {
+	struct fib_rule		common;
+};
+
+struct ip6mr_result {
+	struct mr6_table	*mrt;
+};
+
 /* Big lock, protecting vif table, mrt cache and mroute socket state.
    Note that the changes are semaphored via rtnl_lock.
  */
@@ -94,6 +105,9 @@ static DEFINE_SPINLOCK(mfc_unres_lock);
 
 static struct kmem_cache *mrt_cachep __read_mostly;
 
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id);
+static void ip6mr_free_table(struct mr6_table *mrt);
+
 static int ip6_mr_forward(struct net *net, struct mr6_table *mrt,
 			  struct sk_buff *skb, struct mfc6_cache *cache);
 static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
@@ -101,12 +115,220 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt,
 static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 			     struct mfc6_cache *c, struct rtmsg *rtm);
 static void mroute_clean_tables(struct mr6_table *mrt);
+static void ipmr_expire_process(unsigned long arg);
+
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+#define ip6mr_for_each_table(mrt, met) \
+	list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
 
+	ip6mr_for_each_table(mrt, net) {
+		if (mrt->id == id)
+			return mrt;
+	}
+	return NULL;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	struct ip6mr_result res;
+	struct fib_lookup_arg arg = { .result = &res, };
+	int err;
+
+	err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flp, 0, &arg);
+	if (err < 0)
+		return err;
+	*mrt = res.mrt;
+	return 0;
+}
+
+static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp,
+			     int flags, struct fib_lookup_arg *arg)
+{
+	struct ip6mr_result *res = arg->result;
+	struct mr6_table *mrt;
+
+	switch (rule->action) {
+	case FR_ACT_TO_TBL:
+		break;
+	case FR_ACT_UNREACHABLE:
+		return -ENETUNREACH;
+	case FR_ACT_PROHIBIT:
+		return -EACCES;
+	case FR_ACT_BLACKHOLE:
+	default:
+		return -EINVAL;
+	}
+
+	mrt = ip6mr_get_table(rule->fr_net, rule->table);
+	if (mrt == NULL)
+		return -EAGAIN;
+	res->mrt = mrt;
+	return 0;
+}
+
+static int ip6mr_rule_match(struct fib_rule *rule, struct flowi *flp, int flags)
+{
+	return 1;
+}
+
+static const struct nla_policy ip6mr_rule_policy[FRA_MAX + 1] = {
+	FRA_GENERIC_POLICY,
+};
+
+static int ip6mr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
+				struct fib_rule_hdr *frh, struct nlattr **tb)
+{
+	return 0;
+}
+
+static int ip6mr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
+			      struct nlattr **tb)
+{
+	return 1;
+}
+
+static int ip6mr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
+			   struct fib_rule_hdr *frh)
+{
+	frh->dst_len = 0;
+	frh->src_len = 0;
+	frh->tos     = 0;
+	return 0;
+}
+
+static const struct fib_rules_ops __net_initdata ip6mr_rules_ops_template = {
+	.family		= RTNL_FAMILY_IP6MR,
+	.rule_size	= sizeof(struct ip6mr_rule),
+	.addr_size	= sizeof(struct in6_addr),
+	.action		= ip6mr_rule_action,
+	.match		= ip6mr_rule_match,
+	.configure	= ip6mr_rule_configure,
+	.compare	= ip6mr_rule_compare,
+	.default_pref	= fib_default_rule_pref,
+	.fill		= ip6mr_rule_fill,
+	.nlgroup	= RTNLGRP_IPV6_RULE,
+	.policy		= ip6mr_rule_policy,
+	.owner		= THIS_MODULE,
+};
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	struct fib_rules_ops *ops;
+	struct mr6_table *mrt;
+	int err;
+
+	ops = fib_rules_register(&ip6mr_rules_ops_template, net);
+	if (IS_ERR(ops))
+		return PTR_ERR(ops);
+
+	INIT_LIST_HEAD(&net->ipv6.mr6_tables);
+
+	mrt = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL) {
+		err = -ENOMEM;
+		goto err1;
+	}
+
+	err = fib_default_rule_add(ops, 0x7fff, RT6_TABLE_DFLT, 0);
+	if (err < 0)
+		goto err2;
+
+	net->ipv6.mr6_rules_ops = ops;
+	return 0;
+
+err2:
+	kfree(mrt);
+err1:
+	fib_rules_unregister(ops);
+	return err;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	struct mr6_table *mrt, *next;
+
+	list_for_each_entry_safe(mrt, next, &net->ipv6.mr6_tables, list)
+		ip6mr_free_table(mrt);
+	fib_rules_unregister(net->ipv6.mr6_rules_ops);
+}
+#else
+#define ip6mr_for_each_table(mrt, net) \
+	for (mrt = net->ipv6.mrt6; mrt; mrt = NULL)
+
+static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)
+{
+	return net->ipv6.mrt6;
+}
+
+static int ip6mr_fib_lookup(struct net *net, struct flowi *flp,
+			    struct mr6_table **mrt)
+{
+	*mrt = net->ipv6.mrt6;
+	return 0;
+}
+
+static int __net_init ip6mr_rules_init(struct net *net)
+{
+	net->ipv6.mrt6 = ip6mr_new_table(net, RT6_TABLE_DFLT);
+	return net->ipv6.mrt6 ? 0 : -ENOMEM;
+}
+
+static void __net_exit ip6mr_rules_exit(struct net *net)
+{
+	ip6mr_free_table(net->ipv6.mrt6);
+}
+#endif
+
+static struct mr6_table *ip6mr_new_table(struct net *net, u32 id)
+{
+	struct mr6_table *mrt;
+	unsigned int i;
+
+	mrt = ip6mr_get_table(net, id);
+	if (mrt != NULL)
+		return mrt;
+
+	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
+	if (mrt == NULL)
+		return NULL;
+	mrt->id = id;
+	write_pnet(&mrt->net, net);
+
+	/* Forwarding cache */
+	for (i = 0; i < MFC6_LINES; i++)
+		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
+
+	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
+
+	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
+		    (unsigned long)mrt);
+
+#ifdef CONFIG_IPV6_PIMSM_V2
+	mrt->mroute_reg_vif_num = -1;
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	list_add_tail_rcu(&mrt->list, &net->ipv6.mr6_tables);
+#endif
+	return mrt;
+}
+
+static void ip6mr_free_table(struct mr6_table *mrt)
+{
+	del_timer(&mrt->ipmr_expire_timer);
+	mroute_clean_tables(mrt);
+	kfree(mrt);
+}
 
 #ifdef CONFIG_PROC_FS
 
 struct ipmr_mfc_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	struct list_head *cache;
 	int ct;
 };
@@ -115,7 +337,7 @@ struct ipmr_mfc_iter {
 static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 					   struct ipmr_mfc_iter *it, loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 	struct mfc6_cache *mfc;
 
 	read_lock(&mrt_lock);
@@ -144,6 +366,7 @@ static struct mfc6_cache *ipmr_mfc_seq_idx(struct net *net,
 
 struct ipmr_vif_iter {
 	struct seq_net_private p;
+	struct mr6_table *mrt;
 	int ct;
 };
 
@@ -151,7 +374,7 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 					    struct ipmr_vif_iter *iter,
 					    loff_t pos)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
 		if (!MIF_EXISTS(mrt, iter->ct))
@@ -165,7 +388,15 @@ static struct mif_device *ip6mr_vif_seq_idx(struct net *net,
 static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos)
 	__acquires(mrt_lock)
 {
+	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
+
+	iter->mrt = mrt;
 
 	read_lock(&mrt_lock);
 	return *pos ? ip6mr_vif_seq_idx(net, seq->private, *pos - 1)
@@ -176,7 +407,7 @@ static void *ip6mr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct ipmr_vif_iter *iter = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = iter->mrt;
 
 	++*pos;
 	if (v == SEQ_START_TOKEN)
@@ -198,8 +429,8 @@ static void ip6mr_vif_seq_stop(struct seq_file *seq, void *v)
 
 static int ip6mr_vif_seq_show(struct seq_file *seq, void *v)
 {
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct ipmr_vif_iter *iter = seq->private;
+	struct mr6_table *mrt = iter->mrt;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -241,8 +472,15 @@ static const struct file_operations ip6mr_vif_fops = {
 
 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
 {
+	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return ERR_PTR(-ENOENT);
 
+	it->mrt = mrt;
 	return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
 		: SEQ_START_TOKEN;
 }
@@ -252,7 +490,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	struct mfc6_cache *mfc = v;
 	struct ipmr_mfc_iter *it = seq->private;
 	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	++*pos;
 
@@ -293,8 +531,7 @@ static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 {
 	struct ipmr_mfc_iter *it = seq->private;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt = it->mrt;
 
 	if (it->cache == &mrt->mfc6_unres_queue)
 		spin_unlock_bh(&mfc_unres_lock);
@@ -305,8 +542,6 @@ static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 {
 	int n;
-	struct net *net = seq_file_net(seq);
-	struct mr6_table *mrt = net->ipv6.mrt6;
 
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -316,6 +551,7 @@ static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
 	} else {
 		const struct mfc6_cache *mfc = v;
 		const struct ipmr_mfc_iter *it = seq->private;
+		struct mr6_table *mrt = it->mrt;
 
 		seq_printf(seq, "%pI6 %pI6 %-3hd",
 			   &mfc->mf6c_mcastgrp, &mfc->mf6c_origin,
@@ -375,8 +611,12 @@ static int pim6_rcv(struct sk_buff *skb)
 	struct ipv6hdr   *encap;
 	struct net_device  *reg_dev = NULL;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
-	int reg_vif_num = mrt->mroute_reg_vif_num;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int reg_vif_num;
 
 	if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(*encap)))
 		goto drop;
@@ -399,6 +639,10 @@ static int pim6_rcv(struct sk_buff *skb)
 	    ntohs(encap->payload_len) + sizeof(*pim) > skb->len)
 		goto drop;
 
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		goto drop;
+	reg_vif_num = mrt->mroute_reg_vif_num;
+
 	read_lock(&mrt_lock);
 	if (reg_vif_num >= 0)
 		reg_dev = mrt->vif6_table[reg_vif_num].dev;
@@ -438,7 +682,17 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,
 				      struct net_device *dev)
 {
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.oif		= dev->ifindex,
+		.iif		= skb->skb_iif,
+		.mark		= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	dev->stats.tx_bytes += skb->len;
@@ -463,11 +717,17 @@ static void reg_vif_setup(struct net_device *dev)
 	dev->features		|= NETIF_F_NETNS_LOCAL;
 }
 
-static struct net_device *ip6mr_reg_vif(struct net *net)
+static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt)
 {
 	struct net_device *dev;
+	char name[IFNAMSIZ];
+
+	if (mrt->id == RT6_TABLE_DFLT)
+		sprintf(name, "pim6reg");
+	else
+		sprintf(name, "pim6reg%u", mrt->id);
 
-	dev = alloc_netdev(0, "pim6reg", reg_vif_setup);
+	dev = alloc_netdev(0, name, reg_vif_setup);
 	if (dev == NULL)
 		return NULL;
 
@@ -665,7 +925,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt,
 		 */
 		if (mrt->mroute_reg_vif_num >= 0)
 			return -EADDRINUSE;
-		dev = ip6mr_reg_vif(net);
+		dev = ip6mr_reg_vif(net, mrt);
 		if (!dev)
 			return -ENOBUFS;
 		err = dev_set_allmulti(dev, 1);
@@ -995,7 +1255,7 @@ static int ip6mr_device_event(struct notifier_block *this,
 {
 	struct net_device *dev = ptr;
 	struct net *net = dev_net(dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mif_device *v;
 	int ct;
 	LIST_HEAD(list);
@@ -1003,10 +1263,12 @@ static int ip6mr_device_event(struct notifier_block *this,
 	if (event != NETDEV_UNREGISTER)
 		return NOTIFY_DONE;
 
-	v = &mrt->vif6_table[0];
-	for (ct = 0; ct < mrt->maxvif; ct++, v++) {
-		if (v->dev == dev)
-			mif6_delete(mrt, ct, &list);
+	ip6mr_for_each_table(mrt, net) {
+		v = &mrt->vif6_table[0];
+		for (ct = 0; ct < mrt->maxvif; ct++, v++) {
+			if (v->dev == dev)
+				mif6_delete(mrt, ct, &list);
+		}
 	}
 	unregister_netdevice_many(&list);
 
@@ -1023,29 +1285,11 @@ static struct notifier_block ip6_mr_notifier = {
 
 static int __net_init ip6mr_net_init(struct net *net)
 {
-	struct mr6_table *mrt;
-	unsigned int i;
-	int err = 0;
+	int err;
 
-	mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
-	if (mrt == NULL) {
-		err = -ENOMEM;
+	err = ip6mr_rules_init(net);
+	if (err < 0)
 		goto fail;
-	}
-
-	write_pnet(&mrt->net, net);
-
-	for (i = 0; i < MFC6_LINES; i++)
-		INIT_LIST_HEAD(&mrt->mfc6_cache_array[i]);
-
-	INIT_LIST_HEAD(&mrt->mfc6_unres_queue);
-
-	setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
-		    (unsigned long)mrt);
-
-#ifdef CONFIG_IPV6_PIMSM_V2
-	mrt->mroute_reg_vif_num = -1;
-#endif
 
 #ifdef CONFIG_PROC_FS
 	err = -ENOMEM;
@@ -1055,14 +1299,13 @@ static int __net_init ip6mr_net_init(struct net *net)
 		goto proc_cache_fail;
 #endif
 
-	net->ipv6.mrt6 = mrt;
 	return 0;
 
 #ifdef CONFIG_PROC_FS
 proc_cache_fail:
 	proc_net_remove(net, "ip6_mr_vif");
 proc_vif_fail:
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 #endif
 fail:
 	return err;
@@ -1070,15 +1313,11 @@ fail:
 
 static void __net_exit ip6mr_net_exit(struct net *net)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
-
 #ifdef CONFIG_PROC_FS
 	proc_net_remove(net, "ip6_mr_cache");
 	proc_net_remove(net, "ip6_mr_vif");
 #endif
-	del_timer(&mrt->ipmr_expire_timer);
-	mroute_clean_tables(mrt);
-	kfree(mrt);
+	ip6mr_rules_exit(net);
 }
 
 static struct pernet_operations ip6mr_net_ops = {
@@ -1279,28 +1518,39 @@ static int ip6mr_sk_init(struct mr6_table *mrt, struct sock *sk)
 
 int ip6mr_sk_done(struct sock *sk)
 {
-	int err = 0;
+	int err = -EACCES;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 
 	rtnl_lock();
-	if (sk == mrt->mroute6_sk) {
-		write_lock_bh(&mrt_lock);
-		mrt->mroute6_sk = NULL;
-		net->ipv6.devconf_all->mc_forwarding--;
-		write_unlock_bh(&mrt_lock);
+	ip6mr_for_each_table(mrt, net) {
+		if (sk == mrt->mroute6_sk) {
+			write_lock_bh(&mrt_lock);
+			mrt->mroute6_sk = NULL;
+			net->ipv6.devconf_all->mc_forwarding--;
+			write_unlock_bh(&mrt_lock);
 
-		mroute_clean_tables(mrt);
-	} else
-		err = -EACCES;
+			mroute_clean_tables(mrt);
+			err = 0;
+			break;
+		}
+	}
 	rtnl_unlock();
 
 	return err;
 }
 
-struct sock *mroute6_socket(struct net *net)
+struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)
 {
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->skb_iif,
+		.oif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+
+	if (ip6mr_fib_lookup(net, &fl, &mrt) < 0)
+		return NULL;
 
 	return mrt->mroute6_sk;
 }
@@ -1319,7 +1569,11 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 	struct mf6cctl mfc;
 	mifi_t mifi;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	if (optname != MRT6_INIT) {
 		if (sk != mrt->mroute6_sk && !capable(CAP_NET_ADMIN))
@@ -1408,6 +1662,27 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns
 		return ret;
 	}
 
+#endif
+#ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES
+	case MRT6_TABLE:
+	{
+		u32 v;
+
+		if (optlen != sizeof(u32))
+			return -EINVAL;
+		if (get_user(v, (u32 __user *)optval))
+			return -EFAULT;
+		if (sk == mrt->mroute6_sk)
+			return -EBUSY;
+
+		rtnl_lock();
+		ret = 0;
+		if (!ip6mr_new_table(net, v))
+			ret = -ENOMEM;
+		raw6_sk(sk)->ip6mr_table = v;
+		rtnl_unlock();
+		return ret;
+	}
 #endif
 	/*
 	 *	Spurious command, or MRT6_VERSION which you cannot
@@ -1428,7 +1703,11 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval,
 	int olr;
 	int val;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (optname) {
 	case MRT6_VERSION:
@@ -1471,7 +1750,11 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg)
 	struct mif_device *vif;
 	struct mfc6_cache *c;
 	struct net *net = sock_net(sk);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+
+	mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
 
 	switch (cmd) {
 	case SIOCGETMIFCNT_IN6:
@@ -1683,7 +1966,16 @@ int ip6_mr_input(struct sk_buff *skb)
 {
 	struct mfc6_cache *cache;
 	struct net *net = dev_net(skb->dev);
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
+	struct flowi fl = {
+		.iif	= skb->dev->ifindex,
+		.mark	= skb->mark,
+	};
+	int err;
+
+	err = ip6mr_fib_lookup(net, &fl, &mrt);
+	if (err < 0)
+		return err;
 
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt,
@@ -1758,10 +2050,14 @@ int ip6mr_get_route(struct net *net,
 		    struct sk_buff *skb, struct rtmsg *rtm, int nowait)
 {
 	int err;
-	struct mr6_table *mrt = net->ipv6.mrt6;
+	struct mr6_table *mrt;
 	struct mfc6_cache *cache;
 	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
 
+	mrt = ip6mr_get_table(net, RT6_TABLE_DFLT);
+	if (mrt == NULL)
+		return -ENOENT;
+
 	read_lock(&mrt_lock);
 	cache = ip6mr_cache_find(mrt, &rt->rt6i_src.addr, &rt->rt6i_dst.addr);
 
-- 
cgit v1.2.3


From a93d2f1744206827ccf416e2cdc5018aa503314e Mon Sep 17 00:00:00 2001
From: Changli Gao <xiaosuo@gmail.com>
Date: Fri, 7 May 2010 14:33:26 +0800
Subject: sched, wait: Use wrapper functions

epoll should not touch flags in wait_queue_t. This patch introduces a new
function __add_wait_queue_exclusive(), for the users, who use wait queue as a
LIFO queue.

__add_wait_queue_tail_exclusive() is introduced too instead of
add_wait_queue_exclusive_locked(). remove_wait_queue_locked() is removed, as
it is a duplicate of __remove_wait_queue(), disliked by users, and with less
users.

Signed-off-by: Changli Gao <xiaosuo@gmail.com>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Paul Menage <menage@google.com>
Cc: Li Zefan <lizf@cn.fujitsu.com>
Cc: Davide Libenzi <davidel@xmailserver.org>
Cc: <containers@lists.linux-foundation.org>
LKML-Reference: <1273214006-2979-1-git-send-email-xiaosuo@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 fs/eventpoll.c       |  3 +--
 include/linux/wait.h | 35 +++++++++++++++--------------------
 kernel/cgroup.c      |  2 +-
 kernel/sched.c       |  3 +--
 4 files changed, 18 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index bd056a5b4efc..3817149919cb 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1140,8 +1140,7 @@ retry:
 		 * ep_poll_callback() when events will become available.
 		 */
 		init_waitqueue_entry(&wait, current);
-		wait.flags |= WQ_FLAG_EXCLUSIVE;
-		__add_wait_queue(&ep->wq, &wait);
+		__add_wait_queue_exclusive(&ep->wq, &wait);
 
 		for (;;) {
 			/*
diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b77d5e..76d96d035ea0 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -127,12 +127,26 @@ static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new)
 /*
  * Used for wake-one threads:
  */
+static inline void __add_wait_queue_exclusive(wait_queue_head_t *q,
+					      wait_queue_t *wait)
+{
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue(q, wait);
+}
+
 static inline void __add_wait_queue_tail(wait_queue_head_t *head,
-						wait_queue_t *new)
+					 wait_queue_t *new)
 {
 	list_add_tail(&new->task_list, &head->task_list);
 }
 
+static inline void __add_wait_queue_tail_exclusive(wait_queue_head_t *q,
+					      wait_queue_t *wait)
+{
+	wait->flags |= WQ_FLAG_EXCLUSIVE;
+	__add_wait_queue_tail(q, wait);
+}
+
 static inline void __remove_wait_queue(wait_queue_head_t *head,
 							wait_queue_t *old)
 {
@@ -403,25 +417,6 @@ do {									\
 	__ret;								\
 })
 
-/*
- * Must be called with the spinlock in the wait_queue_head_t held.
- */
-static inline void add_wait_queue_exclusive_locked(wait_queue_head_t *q,
-						   wait_queue_t * wait)
-{
-	wait->flags |= WQ_FLAG_EXCLUSIVE;
-	__add_wait_queue_tail(q,  wait);
-}
-
-/*
- * Must be called with the spinlock in the wait_queue_head_t held.
- */
-static inline void remove_wait_queue_locked(wait_queue_head_t *q,
-					    wait_queue_t * wait)
-{
-	__remove_wait_queue(q,  wait);
-}
-
 /*
  * These are the old interfaces to sleep waiting for an event.
  * They are racy.  DO NOT use them, use the wait_event* interfaces above.
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e2769e13980c..4a07d057a265 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -3010,7 +3010,7 @@ static int cgroup_event_wake(wait_queue_t *wait, unsigned mode,
 	unsigned long flags = (unsigned long)key;
 
 	if (flags & POLLHUP) {
-		remove_wait_queue_locked(event->wqh, &event->wait);
+		__remove_wait_queue(event->wqh, &event->wait);
 		spin_lock(&cgrp->event_list_lock);
 		list_del(&event->list);
 		spin_unlock(&cgrp->event_list_lock);
diff --git a/kernel/sched.c b/kernel/sched.c
index 39aa9c7e22c0..b531d7934083 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3983,8 +3983,7 @@ do_wait_for_common(struct completion *x, long timeout, int state)
 	if (!x->done) {
 		DECLARE_WAITQUEUE(wait, current);
 
-		wait.flags |= WQ_FLAG_EXCLUSIVE;
-		__add_wait_queue_tail(&x->wait, &wait);
+		__add_wait_queue_tail_exclusive(&x->wait, &wait);
 		do {
 			if (signal_pending_state(state, current)) {
 				timeout = -ERESTARTSYS;
-- 
cgit v1.2.3


From de74c16996287250f0d947663127f80c6beebd3c Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 18:26:37 +0200
Subject: netfilter: xtables: combine struct xt_match_param and xt_target_param

The structures carried - besides match/target - almost the same data.
It is possible to combine them, as extensions are evaluated serially,
and so, the callers end up a little smaller.

  text  data  bss  filename
-15318   740  104  net/ipv4/netfilter/ip_tables.o
+15286   740  104  net/ipv4/netfilter/ip_tables.o
-15333   540  152  net/ipv6/netfilter/ip6_tables.o
+15269   540  152  net/ipv6/netfilter/ip6_tables.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h | 42 +++++++++++++++-----------------------
 net/bridge/netfilter/ebtables.c    | 30 +++++++++++++--------------
 net/ipv4/netfilter/arp_tables.c    | 16 +++++++--------
 net/ipv4/netfilter/ip_tables.c     | 32 ++++++++++++++---------------
 net/ipv6/netfilter/ip6_tables.c    | 27 ++++++++++++------------
 5 files changed, 68 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index eeb4884c30be..5efa3757d08c 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -182,13 +182,17 @@ struct xt_counters_info {
 
 #include <linux/netdevice.h>
 
+#define xt_match_param xt_action_param
+#define xt_target_param xt_action_param
 /**
- * struct xt_match_param - parameters for match extensions' match functions
+ * struct xt_action_param - parameters for matches/targets
  *
+ * @match:	the match extension
+ * @target:	the target extension
+ * @matchinfo:	per-match data
+ * @targetinfo:	per-target data
  * @in:		input netdevice
  * @out:	output netdevice
- * @match:	struct xt_match through which this function was invoked
- * @matchinfo:	per-match data
  * @fragoff:	packet is a fragment, this is the data offset
  * @thoff:	position of transport header relative to skb->data
  * @hook:	hook number given packet came from
@@ -197,10 +201,15 @@ struct xt_counters_info {
  * @hotdrop:	drop packet if we had inspection problems
  * Network namespace obtainable using dev_net(in/out)
  */
-struct xt_match_param {
+struct xt_action_param {
+	union {
+		const struct xt_match *match;
+		const struct xt_target *target;
+	};
+	union {
+		const void *matchinfo, *targinfo;
+	};
 	const struct net_device *in, *out;
-	const struct xt_match *match;
-	const void *matchinfo;
 	int fragoff;
 	unsigned int thoff;
 	unsigned int hooknum;
@@ -242,23 +251,6 @@ struct xt_mtdtor_param {
 	u_int8_t family;
 };
 
-/**
- * struct xt_target_param - parameters for target extensions' target functions
- *
- * @hooknum:	hook through which this target was invoked
- * @target:	struct xt_target through which this function was invoked
- * @targinfo:	per-target data
- *
- * Other fields see above.
- */
-struct xt_target_param {
-	const struct net_device *in, *out;
-	const struct xt_target *target;
-	const void *targinfo;
-	unsigned int hooknum;
-	u_int8_t family;
-};
-
 /**
  * struct xt_tgchk_param - parameters for target extensions'
  * checkentry functions
@@ -298,7 +290,7 @@ struct xt_match {
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
 	bool (*match)(const struct sk_buff *skb,
-		      const struct xt_match_param *);
+		      const struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type. */
 	int (*checkentry)(const struct xt_mtchk_param *);
@@ -335,7 +327,7 @@ struct xt_target {
 	   must now handle non-linear skbs, using skb_copy_bits and
 	   skb_ip_make_writable. */
 	unsigned int (*target)(struct sk_buff *skb,
-			       const struct xt_target_param *);
+			       const struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type:
            hook_mask is a bitmask of hooks from which it can be
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 1d8c2c0a7470..290d43541d46 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -86,7 +86,7 @@ static struct xt_target ebt_standard_target = {
 
 static inline int
 ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
-	       struct xt_target_param *par)
+	       struct xt_action_param *par)
 {
 	par->target   = w->u.watcher;
 	par->targinfo = w->data;
@@ -95,8 +95,9 @@ ebt_do_watcher(const struct ebt_entry_watcher *w, struct sk_buff *skb,
 	return 0;
 }
 
-static inline int ebt_do_match (struct ebt_entry_match *m,
-   const struct sk_buff *skb, struct xt_match_param *par)
+static inline int
+ebt_do_match(struct ebt_entry_match *m, const struct sk_buff *skb,
+	     struct xt_action_param *par)
 {
 	par->match     = m->u.match;
 	par->matchinfo = m->data;
@@ -186,14 +187,13 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	const char *base;
 	const struct ebt_table_info *private;
 	bool hotdrop = false;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
-	mtpar.family  = tgpar.family = NFPROTO_BRIDGE;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.hotdrop = &hotdrop;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.family  = NFPROTO_BRIDGE;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hotdrop = &hotdrop;
+	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
 	private = table->private;
@@ -214,7 +214,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (ebt_basic_match(point, eth_hdr(skb), in, out))
 			goto letscontinue;
 
-		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &mtpar) != 0)
+		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
 		if (hotdrop) {
 			read_unlock_bh(&table->lock);
@@ -227,7 +227,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		/* these should only watch: not modify, nor tell us
 		   what to do with the packet */
-		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &tgpar);
+		EBT_WATCHER_ITERATE(point, ebt_do_watcher, skb, &acpar);
 
 		t = (struct ebt_entry_target *)
 		   (((char *)point) + point->target_offset);
@@ -235,9 +235,9 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 		if (!t->u.target->target)
 			verdict = ((struct ebt_standard_target *)t)->verdict;
 		else {
-			tgpar.target   = t->u.target;
-			tgpar.targinfo = t->data;
-			verdict = t->u.target->target(skb, &tgpar);
+			acpar.target   = t->u.target;
+			acpar.targinfo = t->data;
+			verdict = t->u.target->target(skb, &acpar);
 		}
 		if (verdict == EBT_ACCEPT) {
 			read_unlock_bh(&table->lock);
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 07a699059390..73d924b88f89 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -265,7 +265,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	const char *indev, *outdev;
 	void *table_base;
 	const struct xt_table_info *private;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
 		return NF_DROP;
@@ -280,10 +280,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	e = get_entry(table_base, private->hook_entry[hook]);
 	back = get_entry(table_base, private->underflow[hook]);
 
-	tgpar.in      = in;
-	tgpar.out     = out;
-	tgpar.hooknum = hook;
-	tgpar.family  = NFPROTO_ARP;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.hooknum = hook;
+	acpar.family  = NFPROTO_ARP;
 
 	arp = arp_hdr(skb);
 	do {
@@ -333,9 +333,9 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		/* Targets which reenter must return
 		 * abs. verdicts
 		 */
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
+		verdict = t->u.kernel.target->target(skb, &acpar);
 
 		/* Target might have changed stuff. */
 		arp = arp_hdr(skb);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 265cedf88660..e1a53c2da032 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -316,8 +316,7 @@ ipt_do_table(struct sk_buff *skb,
 	struct ipt_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	ip = ip_hdr(skb);
@@ -329,13 +328,13 @@ ipt_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
-	mtpar.thoff   = ip_hdrlen(skb);
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV4;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+	acpar.thoff   = ip_hdrlen(skb);
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV4;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 	xt_info_rdlock_bh();
@@ -358,16 +357,16 @@ ipt_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip_packet_match(ip, indev, outdev,
-		    &e->ip, mtpar.fragoff)) {
+		    &e->ip, acpar.fragoff)) {
  no_match:
 			e = ipt_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -422,11 +421,10 @@ ipt_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		/* Target might have changed stuff. */
 		ip = ip_hdr(skb);
 		if (verdict == IPT_CONTINUE)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index f8ac4a0b5899..076308c1acd7 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -345,8 +345,7 @@ ip6t_do_table(struct sk_buff *skb,
 	struct ip6t_entry *e, **jumpstack;
 	unsigned int *stackptr, origptr, cpu;
 	const struct xt_table_info *private;
-	struct xt_match_param mtpar;
-	struct xt_target_param tgpar;
+	struct xt_action_param acpar;
 
 	/* Initialization */
 	indev = in ? in->name : nulldevname;
@@ -357,11 +356,11 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	mtpar.hotdrop = &hotdrop;
-	mtpar.in      = tgpar.in  = in;
-	mtpar.out     = tgpar.out = out;
-	mtpar.family  = tgpar.family = NFPROTO_IPV6;
-	mtpar.hooknum = tgpar.hooknum = hook;
+	acpar.hotdrop = &hotdrop;
+	acpar.in      = in;
+	acpar.out     = out;
+	acpar.family  = NFPROTO_IPV6;
+	acpar.hooknum = hook;
 
 	IP_NF_ASSERT(table->valid_hooks & (1 << hook));
 
@@ -381,16 +380,16 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &mtpar.thoff, &mtpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
 		}
 
 		xt_ematch_foreach(ematch, e) {
-			mtpar.match     = ematch->u.kernel.match;
-			mtpar.matchinfo = ematch->data;
-			if (!mtpar.match->match(skb, &mtpar))
+			acpar.match     = ematch->u.kernel.match;
+			acpar.matchinfo = ematch->data;
+			if (!acpar.match->match(skb, &acpar))
 				goto no_match;
 		}
 
@@ -439,10 +438,10 @@ ip6t_do_table(struct sk_buff *skb,
 			continue;
 		}
 
-		tgpar.target   = t->u.kernel.target;
-		tgpar.targinfo = t->data;
+		acpar.target   = t->u.kernel.target;
+		acpar.targinfo = t->data;
 
-		verdict = t->u.kernel.target->target(skb, &tgpar);
+		verdict = t->u.kernel.target->target(skb, &acpar);
 		if (verdict == IP6T_CONTINUE)
 			e = ip6t_next_entry(e);
 		else
-- 
cgit v1.2.3


From 4b560b447df83368df44bd3712c0c39b1d79ba04 Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Sun, 5 Jul 2009 19:43:26 +0200
Subject: netfilter: xtables: substitute temporary defines by final name

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 --
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_arpreply.c  | 2 +-
 net/bridge/netfilter/ebt_dnat.c      | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_log.c       | 2 +-
 net/bridge/netfilter/ebt_mark.c      | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_nflog.c     | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_redirect.c  | 2 +-
 net/bridge/netfilter/ebt_snat.c      | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_ulog.c      | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/arp_tables.c      | 2 +-
 net/ipv4/netfilter/arpt_mangle.c     | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 4 ++--
 net/ipv4/netfilter/ipt_CLUSTERIP.c   | 2 +-
 net/ipv4/netfilter/ipt_ECN.c         | 2 +-
 net/ipv4/netfilter/ipt_LOG.c         | 2 +-
 net/ipv4/netfilter/ipt_MASQUERADE.c  | 2 +-
 net/ipv4/netfilter/ipt_NETMAP.c      | 2 +-
 net/ipv4/netfilter/ipt_REDIRECT.c    | 2 +-
 net/ipv4/netfilter/ipt_REJECT.c      | 2 +-
 net/ipv4/netfilter/ipt_ULOG.c        | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 ++-
 net/ipv4/netfilter/nf_nat_rule.c     | 4 ++--
 net/ipv6/netfilter/ip6_tables.c      | 4 ++--
 net/ipv6/netfilter/ip6t_LOG.c        | 2 +-
 net/ipv6/netfilter/ip6t_REJECT.c     | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 ++-
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 ++-
 net/ipv6/netfilter/ip6t_rt.c         | 3 ++-
 net/netfilter/xt_CLASSIFY.c          | 2 +-
 net/netfilter/xt_CONNSECMARK.c       | 2 +-
 net/netfilter/xt_CT.c                | 2 +-
 net/netfilter/xt_DSCP.c              | 8 ++++----
 net/netfilter/xt_HL.c                | 4 ++--
 net/netfilter/xt_LED.c               | 2 +-
 net/netfilter/xt_NFLOG.c             | 2 +-
 net/netfilter/xt_NFQUEUE.c           | 4 ++--
 net/netfilter/xt_NOTRACK.c           | 2 +-
 net/netfilter/xt_RATEEST.c           | 2 +-
 net/netfilter/xt_SECMARK.c           | 2 +-
 net/netfilter/xt_TCPMSS.c            | 4 ++--
 net/netfilter/xt_TCPOPTSTRIP.c       | 4 ++--
 net/netfilter/xt_TEE.c               | 4 ++--
 net/netfilter/xt_TPROXY.c            | 2 +-
 net/netfilter/xt_TRACE.c             | 2 +-
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 4 ++--
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 ++++---
 net/netfilter/xt_esp.c               | 3 ++-
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++++--
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 ++-
 net/netfilter/xt_mark.c              | 4 ++--
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 4 ++--
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++++--
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 ++-
 net/sched/act_ipt.c                  | 2 +-
 97 files changed, 133 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 5efa3757d08c..6d1f9a638056 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -182,8 +182,6 @@ struct xt_counters_info {
 
 #include <linux/netdevice.h>
 
-#define xt_match_param xt_action_param
-#define xt_target_param xt_action_param
 /**
  * struct xt_action_param - parameters for matches/targets
  *
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index f7de8dbc3422..4b0df00c82ec 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index 20068e03fa81..c04f9461f734 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 952150cd5e7d..6203f4dea2e2 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c
index 4581adb27583..070cf134a22f 100644
--- a/net/bridge/netfilter/ebt_arpreply.c
+++ b/net/bridge/netfilter/ebt_arpreply.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arpreply.h>
 
 static unsigned int
-ebt_arpreply_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_arpreply_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_arpreply_info *info = par->targinfo;
 	const __be32 *siptr, *diptr;
diff --git a/net/bridge/netfilter/ebt_dnat.c b/net/bridge/netfilter/ebt_dnat.c
index 59d5b7c8a557..c59f7bfae6e2 100644
--- a/net/bridge/netfilter/ebt_dnat.c
+++ b/net/bridge/netfilter/ebt_dnat.c
@@ -15,7 +15,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_dnat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_dnat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a1c76c7e5219..a0cde7442b55 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index 33f8413f05ad..c451dc2ff822 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 4b0e2e53fa57..760923f08067 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c
index c46024156539..6e5a8bb9b940 100644
--- a/net/bridge/netfilter/ebt_log.c
+++ b/net/bridge/netfilter/ebt_log.c
@@ -171,7 +171,7 @@ out:
 }
 
 static unsigned int
-ebt_log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_log_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_mark.c b/net/bridge/netfilter/ebt_mark.c
index 126e536ff8f4..66697cbd0a8b 100644
--- a/net/bridge/netfilter/ebt_mark.c
+++ b/net/bridge/netfilter/ebt_mark.c
@@ -19,7 +19,7 @@
 #include <linux/netfilter_bridge/ebt_mark_t.h>
 
 static unsigned int
-ebt_mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_t_info *info = par->targinfo;
 	int action = info->target & -16;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index e4366c0a1a43..de901634fa31 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_nflog.c b/net/bridge/netfilter/ebt_nflog.c
index 22e2ad5f23e8..5be68bbcc341 100644
--- a/net/bridge/netfilter/ebt_nflog.c
+++ b/net/bridge/netfilter/ebt_nflog.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_log.h>
 
 static unsigned int
-ebt_nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index f34bcc3197bd..7ba67c4b677a 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_redirect.c b/net/bridge/netfilter/ebt_redirect.c
index a6044a6f2383..9e19166ba453 100644
--- a/net/bridge/netfilter/ebt_redirect.c
+++ b/net/bridge/netfilter/ebt_redirect.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_redirect.h>
 
 static unsigned int
-ebt_redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_redirect_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_snat.c b/net/bridge/netfilter/ebt_snat.c
index 79caca34ae2b..f8f0bd1a1d51 100644
--- a/net/bridge/netfilter/ebt_snat.c
+++ b/net/bridge/netfilter/ebt_snat.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter_bridge/ebt_nat.h>
 
 static unsigned int
-ebt_snat_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_snat_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_nat_info *info = par->targinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 02f28fdda393..3cd6070a1137 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c
index 852f37c27659..ae3c7cef1484 100644
--- a/net/bridge/netfilter/ebt_ulog.c
+++ b/net/bridge/netfilter/ebt_ulog.c
@@ -243,7 +243,7 @@ static void ebt_log_packet(u_int8_t pf, unsigned int hooknum,
 }
 
 static unsigned int
-ebt_ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ebt_ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ebt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index bf8ae5c7a0c5..e4ab62533c74 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 73d924b88f89..9e7d089f168a 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -224,7 +224,7 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
 }
 
 static unsigned int
-arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		printk("arp_tables: error: '%s'\n",
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index 4b51a027f307..e1be7dd1171b 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
 MODULE_DESCRIPTION("arptables arp payload mangle target");
 
 static unsigned int
-target(struct sk_buff *skb, const struct xt_target_param *par)
+target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct arpt_mangle *mangle = par->targinfo;
 	const struct arphdr *arp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index e1a53c2da032..3ab1b81e799b 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -165,7 +165,7 @@ ip_checkentry(const struct ipt_ip *ip)
 }
 
 static unsigned int
-ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 8815d458de46..f91c94b9a790 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -282,7 +282,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
  ***********************************************************************/
 
 static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
 	struct nf_conn *ct;
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 563049f31aef..4bf3dc49ad1e 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
 }
 
 static unsigned int
-ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_ECN_info *einfo = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
index 3bd35f370817..5234f4f3499a 100644
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ b/net/ipv4/netfilter/ipt_LOG.c
@@ -425,7 +425,7 @@ ipt_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 02b1bc477998..d2ed9dc74ebc 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -44,7 +44,7 @@ static int masquerade_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	struct nf_conn_nat *nat;
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
index 708c7f8f7eea..f43867d1697f 100644
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ b/net/ipv4/netfilter/ipt_NETMAP.c
@@ -38,7 +38,7 @@ static int netmap_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
+netmap_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
index 3cf101916523..18a0656505a0 100644
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ b/net/ipv4/netfilter/ipt_REDIRECT.c
@@ -42,7 +42,7 @@ static int redirect_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
+redirect_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index a86135a28058..f5f4a888e4ec 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -136,7 +136,7 @@ static inline void send_unreach(struct sk_buff *skb_in, int code)
 }
 
 static unsigned int
-reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ipt_reject_info *reject = par->targinfo;
 
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 8f60749e87a3..446e0f467a17 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -276,7 +276,7 @@ alloc_failure:
 }
 
 static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
 	                par->targinfo, NULL);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index e4b8f2bf8aaa..24ec548515e4 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 9f9810204892..48a8293bc1d1 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 32e24100d8d1..744d13ee296e 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,7 +67,8 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ecn_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
index b48a0fc3d9ed..98ed78281aee 100644
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ b/net/ipv4/netfilter/nf_nat_rule.c
@@ -39,7 +39,7 @@ static const struct xt_table nat_table = {
 
 /* Source NAT */
 static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
@@ -58,7 +58,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct nf_conn *ct;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 076308c1acd7..c3bc999a8bb6 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -197,7 +197,7 @@ ip6_checkentry(const struct ip6t_ip6 *ipv6)
 }
 
 static unsigned int
-ip6t_error(struct sk_buff *skb, const struct xt_target_param *par)
+ip6t_error(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	if (net_ratelimit())
 		pr_info("error: `%s'\n", (const char *)par->targinfo);
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
index 1f47a525f484..af4ee11f2066 100644
--- a/net/ipv6/netfilter/ip6t_LOG.c
+++ b/net/ipv6/netfilter/ip6t_LOG.c
@@ -436,7 +436,7 @@ ip6t_log_packet(u_int8_t pf,
 }
 
 static unsigned int
-log_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+log_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_log_info *loginfo = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c
index af1d6494ac39..47d227713758 100644
--- a/net/ipv6/netfilter/ip6t_REJECT.c
+++ b/net/ipv6/netfilter/ip6t_REJECT.c
@@ -175,7 +175,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code,
 }
 
 static unsigned int
-reject_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_reject_info *reject = par->targinfo;
 	struct net *net = dev_net((par->in != NULL) ? par->in : par->out);
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 1580693c86c1..4fe71898381d 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index ca287f6d2bce..2fd2be1795e0 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index a5daf0ffb4ec..8401aa82ea0b 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index e424e7c8f824..d19d5cf47a38 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 46fbabb493fa..8e88bb8311de 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index c9f443e0138f..4a60788873fd 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,7 +32,8 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mh_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 09322720d2a6..793c27200881 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,7 +36,8 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool rt_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_CLASSIFY.c b/net/netfilter/xt_CLASSIFY.c
index 011bc80dd2a1..c2c0e4abeb99 100644
--- a/net/netfilter/xt_CLASSIFY.c
+++ b/net/netfilter/xt_CLASSIFY.c
@@ -27,7 +27,7 @@ MODULE_ALIAS("ipt_CLASSIFY");
 MODULE_ALIAS("ip6t_CLASSIFY");
 
 static unsigned int
-classify_tg(struct sk_buff *skb, const struct xt_target_param *par)
+classify_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_classify_target_info *clinfo = par->targinfo;
 
diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c
index e953e302141d..e04dc282e3bb 100644
--- a/net/netfilter/xt_CONNSECMARK.c
+++ b/net/netfilter/xt_CONNSECMARK.c
@@ -64,7 +64,7 @@ static void secmark_restore(struct sk_buff *skb)
 }
 
 static unsigned int
-connsecmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connsecmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connsecmark_target_info *info = par->targinfo;
 
diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c
index c8f547829bad..562bf3266e04 100644
--- a/net/netfilter/xt_CT.c
+++ b/net/netfilter/xt_CT.c
@@ -20,7 +20,7 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 
 static unsigned int xt_ct_target(struct sk_buff *skb,
-				 const struct xt_target_param *par)
+				 const struct xt_action_param *par)
 {
 	const struct xt_ct_target_info *info = par->targinfo;
 	struct nf_conn *ct = info->ct;
diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c
index 969634f293e5..0a229191e55b 100644
--- a/net/netfilter/xt_DSCP.c
+++ b/net/netfilter/xt_DSCP.c
@@ -28,7 +28,7 @@ MODULE_ALIAS("ipt_TOS");
 MODULE_ALIAS("ip6t_TOS");
 
 static unsigned int
-dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -45,7 +45,7 @@ dscp_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-dscp_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+dscp_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_DSCP_info *dinfo = par->targinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -72,7 +72,7 @@ static int dscp_tg_check(const struct xt_tgchk_param *par)
 }
 
 static unsigned int
-tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct iphdr *iph = ip_hdr(skb);
@@ -92,7 +92,7 @@ tos_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-tos_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tos_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tos_target_info *info = par->targinfo;
 	struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c
index 77b99f732711..95b084800fcc 100644
--- a/net/netfilter/xt_HL.c
+++ b/net/netfilter/xt_HL.c
@@ -26,7 +26,7 @@ MODULE_DESCRIPTION("Xtables: Hoplimit/TTL Limit field modification target");
 MODULE_LICENSE("GPL");
 
 static unsigned int
-ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ttl_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph;
 	const struct ipt_TTL_info *info = par->targinfo;
@@ -66,7 +66,7 @@ ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static unsigned int
-hl_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+hl_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ip6h;
 	const struct ip6t_HL_info *info = par->targinfo;
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index ab6f8ff9c9a7..a4140509eea1 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -49,7 +49,7 @@ struct xt_led_info_internal {
 };
 
 static unsigned int
-led_tg(struct sk_buff *skb, const struct xt_target_param *par)
+led_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_led_info *ledinfo = par->targinfo;
 	struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
diff --git a/net/netfilter/xt_NFLOG.c b/net/netfilter/xt_NFLOG.c
index 42dd8747b421..a17dd0f589b2 100644
--- a/net/netfilter/xt_NFLOG.c
+++ b/net/netfilter/xt_NFLOG.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ipt_NFLOG");
 MODULE_ALIAS("ip6t_NFLOG");
 
 static unsigned int
-nflog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nflog_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_nflog_info *info = par->targinfo;
 	struct nf_loginfo li;
diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c
index f9217cb56fe3..039cce1bde3d 100644
--- a/net/netfilter/xt_NFQUEUE.c
+++ b/net/netfilter/xt_NFQUEUE.c
@@ -31,7 +31,7 @@ static u32 jhash_initval __read_mostly;
 static bool rnd_inited __read_mostly;
 
 static unsigned int
-nfqueue_tg(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info *tinfo = par->targinfo;
 
@@ -65,7 +65,7 @@ static u32 hash_v6(const struct sk_buff *skb)
 #endif
 
 static unsigned int
-nfqueue_tg_v1(struct sk_buff *skb, const struct xt_target_param *par)
+nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_NFQ_info_v1 *info = par->targinfo;
 	u32 queue = info->queuenum;
diff --git a/net/netfilter/xt_NOTRACK.c b/net/netfilter/xt_NOTRACK.c
index e7a0a54fd4ea..512b9123252f 100644
--- a/net/netfilter/xt_NOTRACK.c
+++ b/net/netfilter/xt_NOTRACK.c
@@ -13,7 +13,7 @@ MODULE_ALIAS("ipt_NOTRACK");
 MODULE_ALIAS("ip6t_NOTRACK");
 
 static unsigned int
-notrack_tg(struct sk_buff *skb, const struct xt_target_param *par)
+notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* Previously seen (loopback)? Ignore. */
 	if (skb->nfct != NULL)
diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c
index a02193f06e39..69c01e10f8af 100644
--- a/net/netfilter/xt_RATEEST.c
+++ b/net/netfilter/xt_RATEEST.c
@@ -73,7 +73,7 @@ void xt_rateest_put(struct xt_rateest *est)
 EXPORT_SYMBOL_GPL(xt_rateest_put);
 
 static unsigned int
-xt_rateest_tg(struct sk_buff *skb, const struct xt_target_param *par)
+xt_rateest_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_target_info *info = par->targinfo;
 	struct gnet_stats_basic_packed *stats = &info->est->bstats;
diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c
index a91d4a7d5a2c..23b2d6c486b5 100644
--- a/net/netfilter/xt_SECMARK.c
+++ b/net/netfilter/xt_SECMARK.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_SECMARK");
 static u8 mode;
 
 static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	u32 secmark = 0;
 	const struct xt_secmark_target_info *info = par->targinfo;
diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c
index d04606459c9d..62ec021fbd50 100644
--- a/net/netfilter/xt_TCPMSS.c
+++ b/net/netfilter/xt_TCPMSS.c
@@ -172,7 +172,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
 }
 
 static unsigned int
-tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct iphdr *iph = ip_hdr(skb);
 	__be16 newlen;
@@ -195,7 +195,7 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
 static unsigned int
-tcpmss_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	u8 nexthdr;
diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c
index e8b57609ddc0..9dc9ecfdd546 100644
--- a/net/netfilter/xt_TCPOPTSTRIP.c
+++ b/net/netfilter/xt_TCPOPTSTRIP.c
@@ -74,7 +74,7 @@ tcpoptstrip_mangle_packet(struct sk_buff *skb,
 }
 
 static unsigned int
-tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return tcpoptstrip_mangle_packet(skb, par->targinfo, ip_hdrlen(skb),
 	       sizeof(struct iphdr) + sizeof(struct tcphdr));
@@ -82,7 +82,7 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_target_param *par)
 
 #if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE)
 static unsigned int
-tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct ipv6hdr *ipv6h = ipv6_hdr(skb);
 	int tcphoff;
diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c
index 49da6c05f4e0..d7920d9f49e9 100644
--- a/net/netfilter/xt_TEE.c
+++ b/net/netfilter/xt_TEE.c
@@ -84,7 +84,7 @@ tee_tg_route4(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg4(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 	struct iphdr *iph;
@@ -165,7 +165,7 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)
 }
 
 static unsigned int
-tee_tg6(struct sk_buff *skb, const struct xt_target_param *par)
+tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tee_tginfo *info = par->targinfo;
 
diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c
index 4f246ddc5c48..e1a0dedac258 100644
--- a/net/netfilter/xt_TPROXY.c
+++ b/net/netfilter/xt_TPROXY.c
@@ -25,7 +25,7 @@
 #include <net/netfilter/nf_tproxy_core.h>
 
 static unsigned int
-tproxy_tg(struct sk_buff *skb, const struct xt_target_param *par)
+tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct iphdr *iph = ip_hdr(skb);
 	const struct xt_tproxy_target_info *tgi = par->targinfo;
diff --git a/net/netfilter/xt_TRACE.c b/net/netfilter/xt_TRACE.c
index fbb04b86c46b..df48967af382 100644
--- a/net/netfilter/xt_TRACE.c
+++ b/net/netfilter/xt_TRACE.c
@@ -11,7 +11,7 @@ MODULE_ALIAS("ipt_TRACE");
 MODULE_ALIAS("ip6t_TRACE");
 
 static unsigned int
-trace_tg(struct sk_buff *skb, const struct xt_target_param *par)
+trace_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	skb->nf_trace = 1;
 	return XT_CONTINUE;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 6c941e1c6b9e..67fc317118c0 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index e82179832acd..1dbb3e13c059 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index ff738a5f963a..5e5cf15f011f 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 326bc1b81681..6eec1913298e 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index ae1015484ae2..b43cfc7f1bb5 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -37,7 +37,7 @@ MODULE_ALIAS("ipt_connmark");
 MODULE_ALIAS("ip6t_connmark");
 
 static unsigned int
-connmark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_tginfo1 *info = par->targinfo;
 	enum ip_conntrack_info ctinfo;
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 3348706ce56d..14a96f48001d 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
+conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_match_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 0d260aec487f..7a4d4e8edc10 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 9db51fddbdb8..2133b509d157 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,7 +54,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tos_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 143bfdc8e38f..39caafff9485 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,7 +36,8 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool esp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 0c366d387c8c..700c21e0804c 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index b8b3e13dc71e..e941bd26a519 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index be53f7299623..335c34a4fd1c 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,7 +25,8 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ttl_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -44,7 +45,8 @@ static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool hl_mt6(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 8471d9715bde..9578860a9217 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index c4871ca6c86d..842149b4122c 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_match_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_match_param *par)
+length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 88215dca19cb..7dcfe8602c83 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index b971ce93773e..36c49644ce35 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,7 +25,8 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool mac_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 035c468a0040..3c8347076d55 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_MARK");
 MODULE_ALIAS("ip6t_MARK");
 
 static unsigned int
-mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
+mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_tginfo2 *info = par->targinfo;
 
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_target_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index b21f90432247..3140fd4049fc 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 8dcde13a0781..37aa55860a96 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -193,8 +193,8 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 	return ip->ttl == f_ttl;
 }
 
-static bool xt_osf_match_packet(const struct sk_buff *skb,
-		const struct xt_match_param *p)
+static bool
+xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index d24c76dffee2..3dd1391d385a 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index d0bdf3dd4d25..298cd290b06a 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index 69da1d3a1d85..d95f2149df93 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1fa239c1fb93..1abfc7ad4277 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index 7c95d69f6f06..e79e07c75da8 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 23805f8a444b..53f7a4d12e1e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index 484d1689bfde..b063c783901a 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index b88d63b9c76a..503b7f199f14 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index c3694df54672..da4c3cb31403 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index a9b16867e1f7..2665e32d5db1 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
+socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_match_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
+socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index bb1271852d50..344cca661099 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index 5aeca1d023d8..ee4540a2cf33 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index f6d5112175e6..fd5dc5016a17 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 4809b34b10f8..5c8a7b435d66 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index efa2ede24ae6..19c31d4c2ba6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,7 +62,8 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool tcp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -128,7 +129,8 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool udp_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index d8556fdda440..ffdb8fac0be1 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index d7c05f03a7e7..f62797b1ddb5 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,7 +86,8 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool u32_mt(const struct sk_buff *skb,
+		   const struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 03f80a0fa167..1f9595467c17 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -199,7 +199,7 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a,
 {
 	int ret = 0, result = 0;
 	struct tcf_ipt *ipt = a->priv;
-	struct xt_target_param par;
+	struct xt_action_param par;
 
 	if (skb_cloned(skb)) {
 		if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
-- 
cgit v1.2.3


From 62fc8051083a334578c3f4b3488808f210b4565f Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:42:08 +0200
Subject: netfilter: xtables: deconstify struct xt_action_param for matches

In future, layer-3 matches will be an xt module of their own, and
need to set the fragoff and thoff fields. Adding more pointers would
needlessy increase memory requirements (esp. so for 64-bit, where
pointers are wider).

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h   | 2 +-
 net/bridge/netfilter/ebt_802_3.c     | 2 +-
 net/bridge/netfilter/ebt_among.c     | 2 +-
 net/bridge/netfilter/ebt_arp.c       | 2 +-
 net/bridge/netfilter/ebt_ip.c        | 2 +-
 net/bridge/netfilter/ebt_ip6.c       | 2 +-
 net/bridge/netfilter/ebt_limit.c     | 2 +-
 net/bridge/netfilter/ebt_mark_m.c    | 2 +-
 net/bridge/netfilter/ebt_pkttype.c   | 2 +-
 net/bridge/netfilter/ebt_stp.c       | 2 +-
 net/bridge/netfilter/ebt_vlan.c      | 2 +-
 net/ipv4/netfilter/ip_tables.c       | 2 +-
 net/ipv4/netfilter/ipt_addrtype.c    | 4 ++--
 net/ipv4/netfilter/ipt_ah.c          | 2 +-
 net/ipv4/netfilter/ipt_ecn.c         | 3 +--
 net/ipv6/netfilter/ip6_tables.c      | 2 +-
 net/ipv6/netfilter/ip6t_ah.c         | 3 +--
 net/ipv6/netfilter/ip6t_eui64.c      | 2 +-
 net/ipv6/netfilter/ip6t_frag.c       | 2 +-
 net/ipv6/netfilter/ip6t_hbh.c        | 2 +-
 net/ipv6/netfilter/ip6t_ipv6header.c | 2 +-
 net/ipv6/netfilter/ip6t_mh.c         | 3 +--
 net/ipv6/netfilter/ip6t_rt.c         | 3 +--
 net/netfilter/xt_cluster.c           | 2 +-
 net/netfilter/xt_comment.c           | 2 +-
 net/netfilter/xt_connbytes.c         | 2 +-
 net/netfilter/xt_connlimit.c         | 2 +-
 net/netfilter/xt_connmark.c          | 2 +-
 net/netfilter/xt_conntrack.c         | 6 +++---
 net/netfilter/xt_dccp.c              | 2 +-
 net/netfilter/xt_dscp.c              | 7 +++----
 net/netfilter/xt_esp.c               | 3 +--
 net/netfilter/xt_hashlimit.c         | 2 +-
 net/netfilter/xt_helper.c            | 2 +-
 net/netfilter/xt_hl.c                | 6 ++----
 net/netfilter/xt_iprange.c           | 4 ++--
 net/netfilter/xt_length.c            | 4 ++--
 net/netfilter/xt_limit.c             | 2 +-
 net/netfilter/xt_mac.c               | 3 +--
 net/netfilter/xt_mark.c              | 2 +-
 net/netfilter/xt_multiport.c         | 2 +-
 net/netfilter/xt_osf.c               | 2 +-
 net/netfilter/xt_owner.c             | 2 +-
 net/netfilter/xt_physdev.c           | 2 +-
 net/netfilter/xt_pkttype.c           | 2 +-
 net/netfilter/xt_policy.c            | 2 +-
 net/netfilter/xt_quota.c             | 2 +-
 net/netfilter/xt_rateest.c           | 2 +-
 net/netfilter/xt_realm.c             | 2 +-
 net/netfilter/xt_recent.c            | 2 +-
 net/netfilter/xt_sctp.c              | 2 +-
 net/netfilter/xt_socket.c            | 6 +++---
 net/netfilter/xt_state.c             | 2 +-
 net/netfilter/xt_statistic.c         | 2 +-
 net/netfilter/xt_string.c            | 2 +-
 net/netfilter/xt_tcpmss.c            | 2 +-
 net/netfilter/xt_tcpudp.c            | 6 ++----
 net/netfilter/xt_time.c              | 2 +-
 net/netfilter/xt_u32.c               | 3 +--
 59 files changed, 70 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 6d1f9a638056..40c6a8d2a9ea 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -288,7 +288,7 @@ struct xt_match {
 	   non-linear skb, using skb_header_pointer and
 	   skb_ip_make_writable. */
 	bool (*match)(const struct sk_buff *skb,
-		      const struct xt_action_param *);
+		      struct xt_action_param *);
 
 	/* Called when user tries to insert an entry of this type. */
 	int (*checkentry)(const struct xt_mtchk_param *);
diff --git a/net/bridge/netfilter/ebt_802_3.c b/net/bridge/netfilter/ebt_802_3.c
index 4b0df00c82ec..2a449b7ab8fa 100644
--- a/net/bridge/netfilter/ebt_802_3.c
+++ b/net/bridge/netfilter/ebt_802_3.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_802_3.h>
 
 static bool
-ebt_802_3_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_802_3_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_802_3_info *info = par->matchinfo;
 	const struct ebt_802_3_hdr *hdr = ebt_802_3_hdr(skb);
diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c
index c04f9461f734..8b84c581be30 100644
--- a/net/bridge/netfilter/ebt_among.c
+++ b/net/bridge/netfilter/ebt_among.c
@@ -129,7 +129,7 @@ static int get_ip_src(const struct sk_buff *skb, __be32 *addr)
 }
 
 static bool
-ebt_among_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_among_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_among_info *info = par->matchinfo;
 	const char *dmac, *smac;
diff --git a/net/bridge/netfilter/ebt_arp.c b/net/bridge/netfilter/ebt_arp.c
index 6203f4dea2e2..cd457b891b27 100644
--- a/net/bridge/netfilter/ebt_arp.c
+++ b/net/bridge/netfilter/ebt_arp.c
@@ -16,7 +16,7 @@
 #include <linux/netfilter_bridge/ebt_arp.h>
 
 static bool
-ebt_arp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_arp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_arp_info *info = par->matchinfo;
 	const struct arphdr *ah;
diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c
index a0cde7442b55..23bca62d58d2 100644
--- a/net/bridge/netfilter/ebt_ip.c
+++ b/net/bridge/netfilter/ebt_ip.c
@@ -25,7 +25,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip_info *info = par->matchinfo;
 	const struct iphdr *ih;
diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c
index c451dc2ff822..50a46afc2bcc 100644
--- a/net/bridge/netfilter/ebt_ip6.c
+++ b/net/bridge/netfilter/ebt_ip6.c
@@ -28,7 +28,7 @@ struct tcpudphdr {
 };
 
 static bool
-ebt_ip6_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_ip6_info *info = par->matchinfo;
 	const struct ipv6hdr *ih6;
diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c
index 760923f08067..517e78befcb2 100644
--- a/net/bridge/netfilter/ebt_limit.c
+++ b/net/bridge/netfilter/ebt_limit.c
@@ -32,7 +32,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-ebt_limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ebt_limit_info *info = (void *)par->matchinfo;
 	unsigned long now = jiffies;
diff --git a/net/bridge/netfilter/ebt_mark_m.c b/net/bridge/netfilter/ebt_mark_m.c
index de901634fa31..d98baefc4c7e 100644
--- a/net/bridge/netfilter/ebt_mark_m.c
+++ b/net/bridge/netfilter/ebt_mark_m.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_mark_m.h>
 
 static bool
-ebt_mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_mark_m_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_pkttype.c b/net/bridge/netfilter/ebt_pkttype.c
index 7ba67c4b677a..496a56515307 100644
--- a/net/bridge/netfilter/ebt_pkttype.c
+++ b/net/bridge/netfilter/ebt_pkttype.c
@@ -13,7 +13,7 @@
 #include <linux/netfilter_bridge/ebt_pkttype.h>
 
 static bool
-ebt_pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_pkttype_info *info = par->matchinfo;
 
diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c
index 3cd6070a1137..5b33a2e634a6 100644
--- a/net/bridge/netfilter/ebt_stp.c
+++ b/net/bridge/netfilter/ebt_stp.c
@@ -120,7 +120,7 @@ static bool ebt_filter_config(const struct ebt_stp_info *info,
 }
 
 static bool
-ebt_stp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_stp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_stp_info *info = par->matchinfo;
 	const struct stp_header *sp;
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index e4ab62533c74..87b53b3a921d 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -36,7 +36,7 @@ MODULE_LICENSE("GPL");
 #define EXIT_ON_MISMATCH(_MATCH_,_MASK_) {if (!((info->_MATCH_ == _MATCH_)^!!(info->invflags & _MASK_))) return false; }
 
 static bool
-ebt_vlan_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ebt_vlan_info *info = par->matchinfo;
 	const struct vlan_hdr *fp;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 3ab1b81e799b..4e674f2824a7 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -2138,7 +2138,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmphdr *ic;
 	struct icmphdr _icmph;
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
index 24ec548515e4..db8bff0fb86d 100644
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ b/net/ipv4/netfilter/ipt_addrtype.c
@@ -30,7 +30,7 @@ static inline bool match_type(struct net *net, const struct net_device *dev,
 }
 
 static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info *info = par->matchinfo;
@@ -48,7 +48,7 @@ addrtype_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct ipt_addrtype_info_v1 *info = par->matchinfo;
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 48a8293bc1d1..c9d83dc2d6fa 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -30,7 +30,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ahdr;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index 744d13ee296e..b79dddc9edd6 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -67,8 +67,7 @@ static inline bool match_tcp(const struct sk_buff *skb,
 	return true;
 }
 
-static bool ecn_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ecn_info *info = par->matchinfo;
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index c3bc999a8bb6..4549f8d6f88f 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -2154,7 +2154,7 @@ icmp6_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
 }
 
 static bool
-icmp6_match(const struct sk_buff *skb, const struct xt_action_param *par)
+icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct icmp6hdr *ic;
 	struct icmp6hdr _icmph;
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index 4fe71898381d..c89887f35a46 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool ah_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip_auth_hdr _ah;
 	const struct ip_auth_hdr *ah;
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index 2fd2be1795e0..f32fce34145a 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -20,7 +20,7 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-eui64_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	unsigned char eui64[8];
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index 8401aa82ea0b..fcc8c72f218e 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -35,7 +35,7 @@ id_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 }
 
 static bool
-frag_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct frag_hdr _frag;
 	const struct frag_hdr *fh;
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index d19d5cf47a38..f8aebc098d71 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -44,7 +44,7 @@ MODULE_ALIAS("ip6t_dst");
 static struct xt_match hbh_mt6_reg[] __read_mostly;
 
 static bool
-hbh_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_opt_hdr _optsh;
 	const struct ipv6_opt_hdr *oh;
diff --git a/net/ipv6/netfilter/ip6t_ipv6header.c b/net/ipv6/netfilter/ip6t_ipv6header.c
index 8e88bb8311de..54bd9790603f 100644
--- a/net/ipv6/netfilter/ip6t_ipv6header.c
+++ b/net/ipv6/netfilter/ip6t_ipv6header.c
@@ -27,7 +27,7 @@ MODULE_DESCRIPTION("Xtables: IPv6 header types match");
 MODULE_AUTHOR("Andras Kis-Szabo <kisza@sch.bme.hu>");
 
 static bool
-ipv6header_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+ipv6header_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_ipv6header_info *info = par->matchinfo;
 	unsigned int temp;
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index 4a60788873fd..eb1c3d65271a 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -32,8 +32,7 @@ type_match(u_int8_t min, u_int8_t max, u_int8_t type, bool invert)
 	return (type >= min && type <= max) ^ invert;
 }
 
-static bool mh_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ip6_mh _mh;
 	const struct ip6_mh *mh;
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index 793c27200881..ee584693ee35 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -36,8 +36,7 @@ segsleft_match(u_int32_t min, u_int32_t max, u_int32_t id, bool invert)
 	return r;
 }
 
-static bool rt_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct ipv6_rt_hdr _route;
 	const struct ipv6_rt_hdr *rh;
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 67fc317118c0..30b95a1c1c89 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -86,7 +86,7 @@ xt_cluster_is_multicast_addr(const struct sk_buff *skb, u_int8_t family)
 }
 
 static bool
-xt_cluster_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct sk_buff *pskb = (struct sk_buff *)skb;
 	const struct xt_cluster_match_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_comment.c b/net/netfilter/xt_comment.c
index 1dbb3e13c059..5c861d2f21ca 100644
--- a/net/netfilter/xt_comment.c
+++ b/net/netfilter/xt_comment.c
@@ -16,7 +16,7 @@ MODULE_ALIAS("ipt_comment");
 MODULE_ALIAS("ip6t_comment");
 
 static bool
-comment_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+comment_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	/* We always match */
 	return true;
diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c
index 5e5cf15f011f..73517835303d 100644
--- a/net/netfilter/xt_connbytes.c
+++ b/net/netfilter/xt_connbytes.c
@@ -18,7 +18,7 @@ MODULE_ALIAS("ipt_connbytes");
 MODULE_ALIAS("ip6t_connbytes");
 
 static bool
-connbytes_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connbytes_info *sinfo = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index 6eec1913298e..f130fd9817be 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -173,7 +173,7 @@ static int count_them(struct net *net,
 }
 
 static bool
-connlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	const struct xt_connlimit_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c
index b43cfc7f1bb5..7278145e6a68 100644
--- a/net/netfilter/xt_connmark.c
+++ b/net/netfilter/xt_connmark.c
@@ -91,7 +91,7 @@ static void connmark_tg_destroy(const struct xt_tgdtor_param *par)
 }
 
 static bool
-connmark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_connmark_mtinfo1 *info = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c
index 14a96f48001d..39681f10291c 100644
--- a/net/netfilter/xt_conntrack.c
+++ b/net/netfilter/xt_conntrack.c
@@ -113,7 +113,7 @@ ct_proto_port_check(const struct xt_conntrack_mtinfo2 *info,
 }
 
 static bool
-conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
+conntrack_mt(const struct sk_buff *skb, struct xt_action_param *par,
              u16 state_mask, u16 status_mask)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
@@ -191,7 +191,7 @@ conntrack_mt(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo1 *info = par->matchinfo;
 
@@ -199,7 +199,7 @@ conntrack_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-conntrack_mt_v2(const struct sk_buff *skb, const struct xt_action_param *par)
+conntrack_mt_v2(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_conntrack_mtinfo2 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index 7a4d4e8edc10..cc2c2919439f 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -96,7 +96,7 @@ match_option(u_int8_t option, const struct sk_buff *skb, unsigned int protoff,
 }
 
 static bool
-dccp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dccp_info *info = par->matchinfo;
 	const struct dccp_hdr *dh;
diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c
index 2133b509d157..64670fc5d0e1 100644
--- a/net/netfilter/xt_dscp.c
+++ b/net/netfilter/xt_dscp.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tos");
 MODULE_ALIAS("ip6t_tos");
 
 static bool
-dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv4_get_dsfield(ip_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -34,7 +34,7 @@ dscp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-dscp_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+dscp_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_dscp_info *info = par->matchinfo;
 	u_int8_t dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> XT_DSCP_SHIFT;
@@ -54,8 +54,7 @@ static int dscp_mt_check(const struct xt_mtchk_param *par)
 	return 0;
 }
 
-static bool tos_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tos_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tos_match_info *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 39caafff9485..7c2d80020554 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -36,8 +36,7 @@ spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
 	return r;
 }
 
-static bool esp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip_esp_hdr *eh;
 	struct ip_esp_hdr _esp;
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 700c21e0804c..25a6e548ec27 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -516,7 +516,7 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
 }
 
 static bool
-hashlimit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_hashlimit_mtinfo1 *info = par->matchinfo;
 	struct xt_hashlimit_htable *hinfo = info->hinfo;
diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c
index e941bd26a519..9f4ab00c8050 100644
--- a/net/netfilter/xt_helper.c
+++ b/net/netfilter/xt_helper.c
@@ -24,7 +24,7 @@ MODULE_ALIAS("ip6t_helper");
 
 
 static bool
-helper_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+helper_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_helper_info *info = par->matchinfo;
 	const struct nf_conn *ct;
diff --git a/net/netfilter/xt_hl.c b/net/netfilter/xt_hl.c
index 335c34a4fd1c..7d12221ead89 100644
--- a/net/netfilter/xt_hl.c
+++ b/net/netfilter/xt_hl.c
@@ -25,8 +25,7 @@ MODULE_LICENSE("GPL");
 MODULE_ALIAS("ipt_ttl");
 MODULE_ALIAS("ip6t_hl");
 
-static bool ttl_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool ttl_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ipt_ttl_info *info = par->matchinfo;
 	const u8 ttl = ip_hdr(skb)->ttl;
@@ -45,8 +44,7 @@ static bool ttl_mt(const struct sk_buff *skb,
 	return false;
 }
 
-static bool hl_mt6(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool hl_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct ip6t_hl_info *info = par->matchinfo;
 	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_iprange.c b/net/netfilter/xt_iprange.c
index 9578860a9217..88f7c3511c72 100644
--- a/net/netfilter/xt_iprange.c
+++ b/net/netfilter/xt_iprange.c
@@ -17,7 +17,7 @@
 #include <linux/netfilter/xt_iprange.h>
 
 static bool
-iprange_mt4(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt4(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct iphdr *iph = ip_hdr(skb);
@@ -68,7 +68,7 @@ iprange_ipv6_sub(const struct in6_addr *a, const struct in6_addr *b)
 }
 
 static bool
-iprange_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+iprange_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_iprange_mtinfo *info = par->matchinfo;
 	const struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 842149b4122c..176e5570a999 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_length");
 MODULE_ALIAS("ip6t_length");
 
 static bool
-length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
@@ -30,7 +30,7 @@ length_mt(const struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-length_mt6(const struct sk_buff *skb, const struct xt_action_param *par)
+length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_length_info *info = par->matchinfo;
 	const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c
index 7dcfe8602c83..32b7a579a032 100644
--- a/net/netfilter/xt_limit.c
+++ b/net/netfilter/xt_limit.c
@@ -65,7 +65,7 @@ static DEFINE_SPINLOCK(limit_lock);
 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
 
 static bool
-limit_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+limit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateinfo *r = par->matchinfo;
 	struct xt_limit_priv *priv = r->master;
diff --git a/net/netfilter/xt_mac.c b/net/netfilter/xt_mac.c
index 36c49644ce35..8160f6b1435d 100644
--- a/net/netfilter/xt_mac.c
+++ b/net/netfilter/xt_mac.c
@@ -25,8 +25,7 @@ MODULE_DESCRIPTION("Xtables: MAC address match");
 MODULE_ALIAS("ipt_mac");
 MODULE_ALIAS("ip6t_mac");
 
-static bool mac_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool mac_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mac_info *info = par->matchinfo;
 	bool ret;
diff --git a/net/netfilter/xt_mark.c b/net/netfilter/xt_mark.c
index 3c8347076d55..23345238711b 100644
--- a/net/netfilter/xt_mark.c
+++ b/net/netfilter/xt_mark.c
@@ -34,7 +34,7 @@ mark_tg(struct sk_buff *skb, const struct xt_action_param *par)
 }
 
 static bool
-mark_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+mark_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_mark_mtinfo1 *info = par->matchinfo;
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 3140fd4049fc..52beb68256c8 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -72,7 +72,7 @@ ports_match_v1(const struct xt_multiport_v1 *minfo,
 }
 
 static bool
-multiport_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const __be16 *pptr;
 	__be16 _ports[2];
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index 37aa55860a96..4327e101c047 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -194,7 +194,7 @@ static inline int xt_osf_ttl(const struct sk_buff *skb, const struct xt_osf_info
 }
 
 static bool
-xt_osf_match_packet(const struct sk_buff *skb, const struct xt_action_param *p)
+xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p)
 {
 	const struct xt_osf_info *info = p->matchinfo;
 	const struct iphdr *ip = ip_hdr(skb);
diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c
index 3dd1391d385a..772d7389b337 100644
--- a/net/netfilter/xt_owner.c
+++ b/net/netfilter/xt_owner.c
@@ -18,7 +18,7 @@
 #include <linux/netfilter/xt_owner.h>
 
 static bool
-owner_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+owner_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_owner_match_info *info = par->matchinfo;
 	const struct file *filp;
diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c
index 298cd290b06a..d7ca16b8b8df 100644
--- a/net/netfilter/xt_physdev.c
+++ b/net/netfilter/xt_physdev.c
@@ -22,7 +22,7 @@ MODULE_ALIAS("ip6t_physdev");
 
 
 static bool
-physdev_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+physdev_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct xt_physdev_info *info = par->matchinfo;
diff --git a/net/netfilter/xt_pkttype.c b/net/netfilter/xt_pkttype.c
index d95f2149df93..5b645cb598fc 100644
--- a/net/netfilter/xt_pkttype.c
+++ b/net/netfilter/xt_pkttype.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_pkttype");
 MODULE_ALIAS("ip6t_pkttype");
 
 static bool
-pkttype_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+pkttype_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_pkttype_info *info = par->matchinfo;
 	u_int8_t type;
diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c
index 1abfc7ad4277..f23e97bb42d7 100644
--- a/net/netfilter/xt_policy.c
+++ b/net/netfilter/xt_policy.c
@@ -110,7 +110,7 @@ match_policy_out(const struct sk_buff *skb, const struct xt_policy_info *info,
 }
 
 static bool
-policy_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+policy_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_policy_info *info = par->matchinfo;
 	int ret;
diff --git a/net/netfilter/xt_quota.c b/net/netfilter/xt_quota.c
index e79e07c75da8..b4f7dfea5980 100644
--- a/net/netfilter/xt_quota.c
+++ b/net/netfilter/xt_quota.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ip6t_quota");
 static DEFINE_SPINLOCK(quota_lock);
 
 static bool
-quota_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+quota_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct xt_quota_info *q = (void *)par->matchinfo;
 	struct xt_quota_priv *priv = q->master;
diff --git a/net/netfilter/xt_rateest.c b/net/netfilter/xt_rateest.c
index 53f7a4d12e1e..76a083184d8e 100644
--- a/net/netfilter/xt_rateest.c
+++ b/net/netfilter/xt_rateest.c
@@ -15,7 +15,7 @@
 
 
 static bool
-xt_rateest_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+xt_rateest_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_rateest_match_info *info = par->matchinfo;
 	struct gnet_stats_rate_est *r;
diff --git a/net/netfilter/xt_realm.c b/net/netfilter/xt_realm.c
index b063c783901a..459a7b256eb2 100644
--- a/net/netfilter/xt_realm.c
+++ b/net/netfilter/xt_realm.c
@@ -22,7 +22,7 @@ MODULE_DESCRIPTION("Xtables: Routing realm match");
 MODULE_ALIAS("ipt_realm");
 
 static bool
-realm_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+realm_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_realm_info *info = par->matchinfo;
 	const struct dst_entry *dst = skb_dst(skb);
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 503b7f199f14..2808a7e33947 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -224,7 +224,7 @@ static void recent_table_flush(struct recent_table *t)
 }
 
 static bool
-recent_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	struct net *net = dev_net(par->in ? par->in : par->out);
 	struct recent_net *recent_net = recent_pernet(net);
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index da4c3cb31403..94d8b5deb2d0 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -114,7 +114,7 @@ match_packet(const struct sk_buff *skb,
 }
 
 static bool
-sctp_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_sctp_info *info = par->matchinfo;
 	const sctp_sctphdr_t *sh;
diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c
index 2665e32d5db1..3d54c236a1ba 100644
--- a/net/netfilter/xt_socket.c
+++ b/net/netfilter/xt_socket.c
@@ -88,7 +88,7 @@ extract_icmp_fields(const struct sk_buff *skb,
 
 
 static bool
-socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
+socket_match(const struct sk_buff *skb, struct xt_action_param *par,
 	     const struct xt_socket_mtinfo1 *info)
 {
 	const struct iphdr *iph = ip_hdr(skb);
@@ -174,13 +174,13 @@ socket_match(const struct sk_buff *skb, const struct xt_action_param *par,
 }
 
 static bool
-socket_mt_v0(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, NULL);
 }
 
 static bool
-socket_mt_v1(const struct sk_buff *skb, const struct xt_action_param *par)
+socket_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	return socket_match(skb, par, par->matchinfo);
 }
diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c
index 344cca661099..e12e053d3782 100644
--- a/net/netfilter/xt_state.c
+++ b/net/netfilter/xt_state.c
@@ -21,7 +21,7 @@ MODULE_ALIAS("ipt_state");
 MODULE_ALIAS("ip6t_state");
 
 static bool
-state_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+state_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_state_info *sinfo = par->matchinfo;
 	enum ip_conntrack_info ctinfo;
diff --git a/net/netfilter/xt_statistic.c b/net/netfilter/xt_statistic.c
index ee4540a2cf33..96e62b8fd6b1 100644
--- a/net/netfilter/xt_statistic.c
+++ b/net/netfilter/xt_statistic.c
@@ -30,7 +30,7 @@ MODULE_ALIAS("ip6t_statistic");
 static DEFINE_SPINLOCK(nth_lock);
 
 static bool
-statistic_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+statistic_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_statistic_info *info = par->matchinfo;
 	bool ret = info->flags & XT_STATISTIC_INVERT;
diff --git a/net/netfilter/xt_string.c b/net/netfilter/xt_string.c
index fd5dc5016a17..d3c48b14ab94 100644
--- a/net/netfilter/xt_string.c
+++ b/net/netfilter/xt_string.c
@@ -23,7 +23,7 @@ MODULE_ALIAS("ipt_string");
 MODULE_ALIAS("ip6t_string");
 
 static bool
-string_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+string_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_string_info *conf = par->matchinfo;
 	struct ts_state state;
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index 5c8a7b435d66..f90728f6b8a4 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -25,7 +25,7 @@ MODULE_ALIAS("ipt_tcpmss");
 MODULE_ALIAS("ip6t_tcpmss");
 
 static bool
-tcpmss_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+tcpmss_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_tcpmss_match_info *info = par->matchinfo;
 	const struct tcphdr *th;
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index 19c31d4c2ba6..dedde33c9db6 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -62,8 +62,7 @@ tcp_find_option(u_int8_t option,
 	return invert;
 }
 
-static bool tcp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct tcphdr *th;
 	struct tcphdr _tcph;
@@ -129,8 +128,7 @@ static int tcp_mt_check(const struct xt_mtchk_param *par)
 	return (tcpinfo->invflags & ~XT_TCP_INV_MASK) ? -EINVAL : 0;
 }
 
-static bool udp_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct udphdr *uh;
 	struct udphdr _udph;
diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c
index ffdb8fac0be1..79234bb19d05 100644
--- a/net/netfilter/xt_time.c
+++ b/net/netfilter/xt_time.c
@@ -152,7 +152,7 @@ static void localtime_3(struct xtm *r, time_t time)
 }
 
 static bool
-time_mt(const struct sk_buff *skb, const struct xt_action_param *par)
+time_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_time_info *info = par->matchinfo;
 	unsigned int packet_time;
diff --git a/net/netfilter/xt_u32.c b/net/netfilter/xt_u32.c
index f62797b1ddb5..a95b50342dbb 100644
--- a/net/netfilter/xt_u32.c
+++ b/net/netfilter/xt_u32.c
@@ -86,8 +86,7 @@ static bool u32_match_it(const struct xt_u32 *data,
 	return true;
 }
 
-static bool u32_mt(const struct sk_buff *skb,
-		   const struct xt_action_param *par)
+static bool u32_mt(const struct sk_buff *skb, struct xt_action_param *par)
 {
 	const struct xt_u32 *data = par->matchinfo;
 	bool ret;
-- 
cgit v1.2.3


From b4ba26119b06052888696491f614201817491a0d Mon Sep 17 00:00:00 2001
From: Jan Engelhardt <jengelh@medozas.de>
Date: Tue, 7 Jul 2009 20:54:30 +0200
Subject: netfilter: xtables: change hotdrop pointer to direct modification

Since xt_action_param is writable, let's use it. The pointer to
'bool hotdrop' always worried (8 bytes (64-bit) to write 1 byte!).
Surprisingly results in a reduction in size:

   text    data     bss filename
5457066  692730  357892 vmlinux.o-prev
5456554  692730  357892 vmlinux.o

Signed-off-by: Jan Engelhardt <jengelh@medozas.de>
---
 include/linux/netfilter/x_tables.h |  5 ++++-
 net/bridge/netfilter/ebtables.c    |  5 ++---
 net/ipv4/netfilter/arp_tables.c    |  6 +++---
 net/ipv4/netfilter/ip_tables.c     |  9 ++++-----
 net/ipv4/netfilter/ipt_ah.c        |  2 +-
 net/ipv4/netfilter/ipt_ecn.c       |  2 +-
 net/ipv6/netfilter/ip6_tables.c    | 11 +++++------
 net/ipv6/netfilter/ip6t_ah.c       |  4 ++--
 net/ipv6/netfilter/ip6t_eui64.c    |  2 +-
 net/ipv6/netfilter/ip6t_frag.c     |  4 ++--
 net/ipv6/netfilter/ip6t_hbh.c      |  4 ++--
 net/ipv6/netfilter/ip6t_mh.c       |  4 ++--
 net/ipv6/netfilter/ip6t_rt.c       |  4 ++--
 net/netfilter/xt_connlimit.c       |  4 ++--
 net/netfilter/xt_dccp.c            |  4 ++--
 net/netfilter/xt_esp.c             |  2 +-
 net/netfilter/xt_hashlimit.c       |  2 +-
 net/netfilter/xt_multiport.c       |  2 +-
 net/netfilter/xt_recent.c          |  2 +-
 net/netfilter/xt_sctp.c            |  4 ++--
 net/netfilter/xt_tcpmss.c          |  2 +-
 net/netfilter/xt_tcpudp.c          | 10 +++++-----
 22 files changed, 47 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 40c6a8d2a9ea..c2ee5d8550cf 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -196,6 +196,9 @@ struct xt_counters_info {
  * @hook:	hook number given packet came from
  * @family:	Actual NFPROTO_* through which the function is invoked
  * 		(helpful when match->family == NFPROTO_UNSPEC)
+ *
+ * Fields written to by extensions:
+ *
  * @hotdrop:	drop packet if we had inspection problems
  * Network namespace obtainable using dev_net(in/out)
  */
@@ -212,7 +215,7 @@ struct xt_action_param {
 	unsigned int thoff;
 	unsigned int hooknum;
 	u_int8_t family;
-	bool *hotdrop;
+	bool hotdrop;
 };
 
 /**
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 290d43541d46..59ca00e40dec 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -186,13 +186,12 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 	struct ebt_entries *chaininfo;
 	const char *base;
 	const struct ebt_table_info *private;
-	bool hotdrop = false;
 	struct xt_action_param acpar;
 
 	acpar.family  = NFPROTO_BRIDGE;
 	acpar.in      = in;
 	acpar.out     = out;
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.hooknum = hook;
 
 	read_lock_bh(&table->lock);
@@ -216,7 +215,7 @@ unsigned int ebt_do_table (unsigned int hook, struct sk_buff *skb,
 
 		if (EBT_MATCH_ITERATE(point, ebt_do_match, skb, &acpar) != 0)
 			goto letscontinue;
-		if (hotdrop) {
+		if (acpar.hotdrop) {
 			read_unlock_bh(&table->lock);
 			return NF_DROP;
 		}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 9e7d089f168a..8cc56d26e937 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -260,7 +260,6 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	unsigned int verdict = NF_DROP;
 	const struct arphdr *arp;
-	bool hotdrop = false;
 	struct arpt_entry *e, *back;
 	const char *indev, *outdev;
 	void *table_base;
@@ -284,6 +283,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 	acpar.out     = out;
 	acpar.hooknum = hook;
 	acpar.family  = NFPROTO_ARP;
+	acpar.hotdrop = false;
 
 	arp = arp_hdr(skb);
 	do {
@@ -345,10 +345,10 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else
 		return verdict;
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 4e674f2824a7..607f89f16b76 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -308,7 +308,6 @@ ipt_do_table(struct sk_buff *skb,
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
 	const struct iphdr *ip;
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -330,7 +329,7 @@ ipt_do_table(struct sk_buff *skb,
 	 * match it. */
 	acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
 	acpar.thoff   = ip_hdrlen(skb);
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV4;
@@ -432,7 +431,7 @@ ipt_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 	xt_info_rdunlock_bh();
 	pr_debug("Exiting %s; resetting sp from %u to %u\n",
 		 __func__, *stackptr, origptr);
@@ -440,7 +439,7 @@ ipt_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2154,7 +2153,7 @@ icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index c9d83dc2d6fa..14a2aa8b8a14 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -46,7 +46,7 @@ static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil AH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return 0;
 	}
 
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
index b79dddc9edd6..af6e9c778345 100644
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ b/net/ipv4/netfilter/ipt_ecn.c
@@ -78,7 +78,7 @@ static bool ecn_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
 		if (ip_hdr(skb)->protocol != IPPROTO_TCP)
 			return false;
-		if (!match_tcp(skb, info, par->hotdrop))
+		if (!match_tcp(skb, info, &par->hotdrop))
 			return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 4549f8d6f88f..557fac9689c0 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -337,7 +337,6 @@ ip6t_do_table(struct sk_buff *skb,
 	      struct xt_table *table)
 {
 	static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
-	bool hotdrop = false;
 	/* Initializing verdict to NF_DROP keeps gcc happy. */
 	unsigned int verdict = NF_DROP;
 	const char *indev, *outdev;
@@ -356,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb,
 	 * things we don't know, ie. tcp syn flag or ports).  If the
 	 * rule is also a fragment-specific rule, non-fragments won't
 	 * match it. */
-	acpar.hotdrop = &hotdrop;
+	acpar.hotdrop = false;
 	acpar.in      = in;
 	acpar.out     = out;
 	acpar.family  = NFPROTO_IPV6;
@@ -380,7 +379,7 @@ ip6t_do_table(struct sk_buff *skb,
 
 		IP_NF_ASSERT(e);
 		if (!ip6_packet_match(skb, indev, outdev, &e->ipv6,
-		    &acpar.thoff, &acpar.fragoff, &hotdrop)) {
+		    &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) {
  no_match:
 			e = ip6t_next_entry(e);
 			continue;
@@ -447,7 +446,7 @@ ip6t_do_table(struct sk_buff *skb,
 		else
 			/* Verdict */
 			break;
-	} while (!hotdrop);
+	} while (!acpar.hotdrop);
 
 	xt_info_rdunlock_bh();
 	*stackptr = origptr;
@@ -455,7 +454,7 @@ ip6t_do_table(struct sk_buff *skb,
 #ifdef DEBUG_ALLOW_ALL
 	return NF_ACCEPT;
 #else
-	if (hotdrop)
+	if (acpar.hotdrop)
 		return NF_DROP;
 	else return verdict;
 #endif
@@ -2170,7 +2169,7 @@ icmp6_match(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		duprintf("Dropping evil ICMP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_ah.c b/net/ipv6/netfilter/ip6t_ah.c
index c89887f35a46..89cccc5a9c92 100644
--- a/net/ipv6/netfilter/ip6t_ah.c
+++ b/net/ipv6/netfilter/ip6t_ah.c
@@ -48,13 +48,13 @@ static bool ah_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_AUTH, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	ah = skb_header_pointer(skb, ptr, sizeof(_ah), &_ah);
 	if (ah == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_eui64.c b/net/ipv6/netfilter/ip6t_eui64.c
index f32fce34145a..aab0706908c5 100644
--- a/net/ipv6/netfilter/ip6t_eui64.c
+++ b/net/ipv6/netfilter/ip6t_eui64.c
@@ -27,7 +27,7 @@ eui64_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	if (!(skb_mac_header(skb) >= skb->head &&
 	      skb_mac_header(skb) + ETH_HLEN <= skb->data) &&
 	    par->fragoff != 0) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_frag.c b/net/ipv6/netfilter/ip6t_frag.c
index fcc8c72f218e..eda898fda6ca 100644
--- a/net/ipv6/netfilter/ip6t_frag.c
+++ b/net/ipv6/netfilter/ip6t_frag.c
@@ -46,13 +46,13 @@ frag_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_FRAGMENT, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	fh = skb_header_pointer(skb, ptr, sizeof(_frag), &_frag);
 	if (fh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_hbh.c b/net/ipv6/netfilter/ip6t_hbh.c
index f8aebc098d71..59df051eaef6 100644
--- a/net/ipv6/netfilter/ip6t_hbh.c
+++ b/net/ipv6/netfilter/ip6t_hbh.c
@@ -65,13 +65,13 @@ hbh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 			    NEXTHDR_HOP : NEXTHDR_DEST, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh);
 	if (oh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_mh.c b/net/ipv6/netfilter/ip6t_mh.c
index eb1c3d65271a..0c90c66b1992 100644
--- a/net/ipv6/netfilter/ip6t_mh.c
+++ b/net/ipv6/netfilter/ip6t_mh.c
@@ -47,14 +47,14 @@ static bool mh_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil MH tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	if (mh->ip6mh_proto != IPPROTO_NONE) {
 		pr_debug("Dropping invalid MH Payload Proto: %u\n",
 			 mh->ip6mh_proto);
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/ipv6/netfilter/ip6t_rt.c b/net/ipv6/netfilter/ip6t_rt.c
index ee584693ee35..d8488c50a8e0 100644
--- a/net/ipv6/netfilter/ip6t_rt.c
+++ b/net/ipv6/netfilter/ip6t_rt.c
@@ -52,13 +52,13 @@ static bool rt_mt6(const struct sk_buff *skb, struct xt_action_param *par)
 	err = ipv6_find_hdr(skb, &ptr, NEXTHDR_ROUTING, NULL);
 	if (err < 0) {
 		if (err != -ENOENT)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		return false;
 	}
 
 	rh = skb_header_pointer(skb, ptr, sizeof(_route), &_route);
 	if (rh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c
index f130fd9817be..5c5b6b921b84 100644
--- a/net/netfilter/xt_connlimit.c
+++ b/net/netfilter/xt_connlimit.c
@@ -206,14 +206,14 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	if (connections < 0) {
 		/* kmalloc failed, drop it entirely */
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
 	return (connections > info->limit) ^ info->inverse;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_dccp.c b/net/netfilter/xt_dccp.c
index cc2c2919439f..b63d2a3d80ba 100644
--- a/net/netfilter/xt_dccp.c
+++ b/net/netfilter/xt_dccp.c
@@ -107,7 +107,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 
 	dh = skb_header_pointer(skb, par->thoff, sizeof(_dh), &_dh);
 	if (dh == NULL) {
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -120,7 +120,7 @@ dccp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		&& DCCHECK(match_types(dh, info->typemask),
 			   XT_DCCP_TYPE, info->flags, info->invflags)
 		&& DCCHECK(match_option(info->option, skb, par->thoff, dh,
-					par->hotdrop),
+					&par->hotdrop),
 			   XT_DCCP_OPTION, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_esp.c b/net/netfilter/xt_esp.c
index 7c2d80020554..171ba82b5902 100644
--- a/net/netfilter/xt_esp.c
+++ b/net/netfilter/xt_esp.c
@@ -52,7 +52,7 @@ static bool esp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil ESP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c
index 25a6e548ec27..b46a8390896d 100644
--- a/net/netfilter/xt_hashlimit.c
+++ b/net/netfilter/xt_hashlimit.c
@@ -562,7 +562,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	return info->cfg.mode & XT_HASHLIMIT_INVERT;
 
  hotdrop:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_multiport.c b/net/netfilter/xt_multiport.c
index 52beb68256c8..ac1d3c3d09e7 100644
--- a/net/netfilter/xt_multiport.c
+++ b/net/netfilter/xt_multiport.c
@@ -87,7 +87,7 @@ multiport_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		 * can't.  Hence, no choice but to drop.
 		 */
 		pr_debug("Dropping evil offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c
index 2808a7e33947..76aec6a44762 100644
--- a/net/netfilter/xt_recent.c
+++ b/net/netfilter/xt_recent.c
@@ -268,7 +268,7 @@ recent_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			goto out;
 		e = recent_entry_init(t, &addr, par->family, ttl);
 		if (e == NULL)
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		ret = !ret;
 		goto out;
 	}
diff --git a/net/netfilter/xt_sctp.c b/net/netfilter/xt_sctp.c
index 94d8b5deb2d0..c04fcf385c59 100644
--- a/net/netfilter/xt_sctp.c
+++ b/net/netfilter/xt_sctp.c
@@ -128,7 +128,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	sh = skb_header_pointer(skb, par->thoff, sizeof(_sh), &_sh);
 	if (sh == NULL) {
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 	pr_debug("spt: %d\tdpt: %d\n", ntohs(sh->source), ntohs(sh->dest));
@@ -140,7 +140,7 @@ sctp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 			&& ntohs(sh->dest) <= info->dpts[1],
 			XT_SCTP_DEST_PORTS, info->flags, info->invflags)
 		&& SCCHECK(match_packet(skb, par->thoff + sizeof(sctp_sctphdr_t),
-					info, par->hotdrop),
+					info, &par->hotdrop),
 			   XT_SCTP_CHUNK_TYPES, info->flags, info->invflags);
 }
 
diff --git a/net/netfilter/xt_tcpmss.c b/net/netfilter/xt_tcpmss.c
index f90728f6b8a4..c53d4d18eadf 100644
--- a/net/netfilter/xt_tcpmss.c
+++ b/net/netfilter/xt_tcpmss.c
@@ -73,7 +73,7 @@ out:
 	return info->invert;
 
 dropit:
-	*par->hotdrop = true;
+	par->hotdrop = true;
 	return false;
 }
 
diff --git a/net/netfilter/xt_tcpudp.c b/net/netfilter/xt_tcpudp.c
index dedde33c9db6..c14d4645daa3 100644
--- a/net/netfilter/xt_tcpudp.c
+++ b/net/netfilter/xt_tcpudp.c
@@ -77,7 +77,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		*/
 		if (par->fragoff == 1) {
 			pr_debug("Dropping evil TCP offset=1 frag.\n");
-			*par->hotdrop = true;
+			par->hotdrop = true;
 		}
 		/* Must not be a fragment. */
 		return false;
@@ -90,7 +90,7 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil TCP offset=0 tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
@@ -108,13 +108,13 @@ static bool tcp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		return false;
 	if (tcpinfo->option) {
 		if (th->doff * 4 < sizeof(_tcph)) {
-			*par->hotdrop = true;
+			par->hotdrop = true;
 			return false;
 		}
 		if (!tcp_find_option(tcpinfo->option, skb, par->thoff,
 				     th->doff*4 - sizeof(_tcph),
 				     tcpinfo->invflags & XT_TCP_INV_OPTION,
-				     par->hotdrop))
+				     &par->hotdrop))
 			return false;
 	}
 	return true;
@@ -143,7 +143,7 @@ static bool udp_mt(const struct sk_buff *skb, struct xt_action_param *par)
 		/* We've been asked to examine this packet, and we
 		   can't.  Hence, no choice but to drop. */
 		pr_debug("Dropping evil UDP tinygram.\n");
-		*par->hotdrop = true;
+		par->hotdrop = true;
 		return false;
 	}
 
-- 
cgit v1.2.3


From 0350b6a0cbeaf46e0883d8c79ede2efd49965472 Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 11 May 2010 18:13:49 +0200
Subject: include/linux/usb/audio.h: add __attribute__((packed))

This was missing on the definition of struct uac_iso_endpoint_descriptor

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 57f20551939d..9fae6bdab338 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -456,7 +456,7 @@ struct uac_iso_endpoint_descriptor {
 	__u8  bmAttributes;
 	__u8  bLockDelayUnits;
 	__le16 wLockDelay;
-};
+} __attribute__((packed));
 #define UAC_ISO_ENDPOINT_DESC_SIZE	7
 
 #define UAC_EP_CS_ATTR_SAMPLE_RATE	0x01
-- 
cgit v1.2.3


From e213e9cf707c51808e372dabd1070a61af17e77b Mon Sep 17 00:00:00 2001
From: Daniel Mack <daniel@caiaq.de>
Date: Tue, 11 May 2010 18:13:50 +0200
Subject: ALSA: sound/usb: add preliminary support for UAC2 interrupts

For both UAC1 and UAC2, interrupt endpoint messages are now parsed with
structs rather that with anonymous buffer array accesses.

For UAC2, only CUR interrupt notifications are supported for now.

snd_usb_mixer_status_complete() was renamed to
snd_usb_mixer_interrupt().

Fixed one indentation flaw on the way.

Signed-off-by: Daniel Mack <daniel@caiaq.de>
Cc: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/linux/usb/audio-v2.h | 12 ++++++
 include/linux/usb/audio.h    | 15 +++++++
 sound/usb/mixer.c            | 98 ++++++++++++++++++++++++++++++++++++++------
 3 files changed, 112 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h
index 0952231e6c3f..2389f93a28b5 100644
--- a/include/linux/usb/audio-v2.h
+++ b/include/linux/usb/audio-v2.h
@@ -105,6 +105,17 @@ struct uac_as_header_descriptor_v2 {
 	__u8 iChannelNames;
 } __attribute__((packed));
 
+/* 6.1 Interrupt Data Message */
+
+#define UAC2_INTERRUPT_DATA_MSG_VENDOR	(1 << 0)
+#define UAC2_INTERRUPT_DATA_MSG_EP	(1 << 1)
+
+struct uac2_interrupt_data_msg {
+	__u8 bInfo;
+	__u8 bAttribute;
+	__le16 wValue;
+	__le16 wIndex;
+} __attribute__((packed));
 
 /* A.7 Audio Function Category Codes */
 #define UAC2_FUNCTION_SUBCLASS_UNDEFINED	0x00
@@ -153,6 +164,7 @@ struct uac_as_header_descriptor_v2 {
 /* A.14 Audio Class-Specific Request Codes */
 #define UAC2_CS_CUR			0x01
 #define UAC2_CS_RANGE			0x02
+#define UAC2_CS_MEM			0x03
 
 /* A.15 Encoder Type Codes */
 #define UAC2_ENCODER_UNDEFINED		0x00
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 9fae6bdab338..c0ef18dc2da7 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -488,6 +488,21 @@ struct uac_iso_endpoint_descriptor {
 #define UAC_FU_BASS_BOOST	(1 << (UAC_BASS_BOOST_CONTROL - 1))
 #define UAC_FU_LOUDNESS		(1 << (UAC_LOUDNESS_CONTROL - 1))
 
+/* status word format (3.7.1.1) */
+
+#define UAC1_STATUS_TYPE_ORIG_MASK		0x0f
+#define UAC1_STATUS_TYPE_ORIG_AUDIO_CONTROL_IF	0x0
+#define UAC1_STATUS_TYPE_ORIG_AUDIO_STREAM_IF	0x1
+#define UAC1_STATUS_TYPE_ORIG_AUDIO_STREAM_EP	0x2
+
+#define UAC1_STATUS_TYPE_IRQ_PENDING		(1 << 7)
+#define UAC1_STATUS_TYPE_MEM_CHANGED		(1 << 6)
+
+struct uac1_status_word {
+	__u8 bStatusType;
+	__u8 bOriginator;
+} __attribute__((packed));
+
 #ifdef __KERNEL__
 
 struct usb_audio_control {
diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c
index e350f053440a..820dfe08ac22 100644
--- a/sound/usb/mixer.c
+++ b/sound/usb/mixer.c
@@ -1443,8 +1443,8 @@ static struct procunit_info procunits[] = {
  * predefined data for extension units
  */
 static struct procunit_value_info clock_rate_xu_info[] = {
-       { USB_XU_CLOCK_RATE_SELECTOR, "Selector", USB_MIXER_U8, 0 },
-       { 0 }
+	{ USB_XU_CLOCK_RATE_SELECTOR, "Selector", USB_MIXER_U8, 0 },
+	{ 0 }
 };
 static struct procunit_value_info clock_source_xu_info[] = {
 	{ USB_XU_CLOCK_SOURCE_SELECTOR, "External", USB_MIXER_BOOLEAN },
@@ -1967,26 +1967,98 @@ static void snd_usb_mixer_proc_read(struct snd_info_entry *entry,
 	}
 }
 
-static void snd_usb_mixer_status_complete(struct urb *urb)
+static void snd_usb_mixer_interrupt_v2(struct usb_mixer_interface *mixer,
+				       int attribute, int value, int index)
+{
+	struct usb_mixer_elem_info *info;
+	__u8 unitid = (index >> 8) & 0xff;
+	__u8 control = (value >> 8) & 0xff;
+	__u8 channel = value & 0xff;
+
+	if (channel >= MAX_CHANNELS) {
+		snd_printk(KERN_DEBUG "%s(): bogus channel number %d\n",
+				__func__, channel);
+		return;
+	}
+
+	for (info = mixer->id_elems[unitid]; info; info = info->next_id_elem) {
+		if (info->control != control)
+			continue;
+
+		switch (attribute) {
+		case UAC2_CS_CUR:
+			/* invalidate cache, so the value is read from the device */
+			if (channel)
+				info->cached &= ~(1 << channel);
+			else /* master channel */
+				info->cached = 0;
+
+			snd_ctl_notify(mixer->chip->card, SNDRV_CTL_EVENT_MASK_VALUE,
+					info->elem_id);
+			break;
+
+		case UAC2_CS_RANGE:
+			/* TODO */
+			break;
+
+		case UAC2_CS_MEM:
+			/* TODO */
+			break;
+
+		default:
+			snd_printk(KERN_DEBUG "unknown attribute %d in interrupt\n",
+						attribute);
+			break;
+		} /* switch */
+	}
+}
+
+static void snd_usb_mixer_interrupt(struct urb *urb)
 {
 	struct usb_mixer_interface *mixer = urb->context;
+	int len = urb->actual_length;
+
+	if (urb->status != 0)
+		goto requeue;
 
-	if (urb->status == 0) {
-		u8 *buf = urb->transfer_buffer;
-		int i;
+	if (mixer->protocol == UAC_VERSION_1) {
+		struct uac1_status_word *status;
 
-		for (i = urb->actual_length; i >= 2; buf += 2, i -= 2) {
+		for (status = urb->transfer_buffer;
+		     len >= sizeof(*status);
+		     len -= sizeof(*status), status++) {
 			snd_printd(KERN_DEBUG "status interrupt: %02x %02x\n",
-				   buf[0], buf[1]);
+						status->bStatusType,
+						status->bOriginator);
+
 			/* ignore any notifications not from the control interface */
-			if ((buf[0] & 0x0f) != 0)
+			if ((status->bStatusType & UAC1_STATUS_TYPE_ORIG_MASK) !=
+				UAC1_STATUS_TYPE_ORIG_AUDIO_CONTROL_IF)
 				continue;
-			if (!(buf[0] & 0x40))
-				snd_usb_mixer_notify_id(mixer, buf[1]);
+
+			if (status->bStatusType & UAC1_STATUS_TYPE_MEM_CHANGED)
+				snd_usb_mixer_rc_memory_change(mixer, status->bOriginator);
 			else
-				snd_usb_mixer_rc_memory_change(mixer, buf[1]);
+				snd_usb_mixer_notify_id(mixer, status->bOriginator);
+		}
+	} else { /* UAC_VERSION_2 */
+		struct uac2_interrupt_data_msg *msg;
+
+		for (msg = urb->transfer_buffer;
+		     len >= sizeof(*msg);
+		     len -= sizeof(*msg), msg++) {
+			/* drop vendor specific and endpoint requests */
+			if ((msg->bInfo & UAC2_INTERRUPT_DATA_MSG_VENDOR) ||
+			    (msg->bInfo & UAC2_INTERRUPT_DATA_MSG_EP))
+				continue;
+
+			snd_usb_mixer_interrupt_v2(mixer, msg->bAttribute,
+						   le16_to_cpu(msg->wValue),
+						   le16_to_cpu(msg->wIndex));
 		}
 	}
+
+requeue:
 	if (urb->status != -ENOENT && urb->status != -ECONNRESET) {
 		urb->dev = mixer->chip->dev;
 		usb_submit_urb(urb, GFP_ATOMIC);
@@ -2023,7 +2095,7 @@ static int snd_usb_mixer_status_create(struct usb_mixer_interface *mixer)
 	usb_fill_int_urb(mixer->urb, mixer->chip->dev,
 			 usb_rcvintpipe(mixer->chip->dev, epnum),
 			 transfer_buffer, buffer_length,
-			 snd_usb_mixer_status_complete, mixer, ep->bInterval);
+			 snd_usb_mixer_interrupt, mixer, ep->bInterval);
 	usb_submit_urb(mixer->urb, GFP_KERNEL);
 	return 0;
 }
-- 
cgit v1.2.3


From ec66841e495b9ab4f92bdf91efe8cf56e1471fbd Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Wed, 5 May 2010 14:44:55 -0600
Subject: viafb: move some include files to include/linux

These are the files which should be available to subdevices compiled
outside of drivers/video/via.

Cc: ScottFang@viatech.com.cn
Cc: JosephChan@via.com.tw
Cc: Harald Welte <laforge@gnumonks.org>
Acked-by: Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/video/via/accel.c           |   2 +-
 drivers/video/via/dvi.c             |   4 +-
 drivers/video/via/hw.c              |   3 +-
 drivers/video/via/lcd.c             |   4 +-
 drivers/video/via/via-core.c        |   6 +-
 drivers/video/via/via-core.h        | 219 ------------------------------------
 drivers/video/via/via-gpio.c        |   4 +-
 drivers/video/via/via-gpio.h        |  14 ---
 drivers/video/via/via_i2c.c         |   4 +-
 drivers/video/via/via_i2c.h         |  42 -------
 drivers/video/via/via_modesetting.c |   2 +-
 drivers/video/via/via_utility.c     |   2 +-
 drivers/video/via/viafbdev.c        |   4 +-
 drivers/video/via/viamode.c         |   2 +-
 drivers/video/via/vt1636.c          |   4 +-
 include/linux/via-core.h            | 219 ++++++++++++++++++++++++++++++++++++
 include/linux/via-gpio.h            |  14 +++
 include/linux/via_i2c.h             |  42 +++++++
 18 files changed, 296 insertions(+), 295 deletions(-)
 delete mode 100644 drivers/video/via/via-core.h
 delete mode 100644 drivers/video/via/via-gpio.h
 delete mode 100644 drivers/video/via/via_i2c.h
 create mode 100644 include/linux/via-core.h
 create mode 100644 include/linux/via-gpio.h
 create mode 100644 include/linux/via_i2c.h

(limited to 'include/linux')

diff --git a/drivers/video/via/accel.c b/drivers/video/via/accel.c
index 189aba41f9b9..e44893ea590d 100644
--- a/drivers/video/via/accel.c
+++ b/drivers/video/via/accel.c
@@ -18,7 +18,7 @@
  * Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
-#include "via-core.h"
+#include <linux/via-core.h>
 #include "global.h"
 
 /*
diff --git a/drivers/video/via/dvi.c b/drivers/video/via/dvi.c
index 6271b7696323..39b040bb3817 100644
--- a/drivers/video/via/dvi.c
+++ b/drivers/video/via/dvi.c
@@ -18,8 +18,8 @@
  * Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
-#include "via-core.h"
-#include "via_i2c.h"
+#include <linux/via-core.h>
+#include <linux/via_i2c.h>
 #include "global.h"
 
 static void tmds_register_write(int index, u8 data);
diff --git a/drivers/video/via/hw.c b/drivers/video/via/hw.c
index e356fe8d8a90..b996803ae2c1 100644
--- a/drivers/video/via/hw.c
+++ b/drivers/video/via/hw.c
@@ -18,7 +18,8 @@
  * Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
-#include "via-core.h"
+
+#include <linux/via-core.h>
 #include "global.h"
 
 static struct pll_map pll_value[] = {
diff --git a/drivers/video/via/lcd.c b/drivers/video/via/lcd.c
index 04eec31aa9a2..2ab0f156439a 100644
--- a/drivers/video/via/lcd.c
+++ b/drivers/video/via/lcd.c
@@ -18,8 +18,8 @@
  * Foundation, Inc.,
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
-#include "via-core.h"
-#include "via_i2c.h"
+#include <linux/via-core.h>
+#include <linux/via_i2c.h>
 #include "global.h"
 #include "lcdtbl.h"
 
diff --git a/drivers/video/via/via-core.c b/drivers/video/via/via-core.c
index 188c0022dcb4..e8cfe8392110 100644
--- a/drivers/video/via/via-core.c
+++ b/drivers/video/via/via-core.c
@@ -7,9 +7,9 @@
 /*
  * Core code for the Via multifunction framebuffer device.
  */
-#include "via-core.h"
-#include "via_i2c.h"
-#include "via-gpio.h"
+#include <linux/via-core.h>
+#include <linux/via_i2c.h>
+#include <linux/via-gpio.h>
 #include "global.h"
 
 #include <linux/module.h>
diff --git a/drivers/video/via/via-core.h b/drivers/video/via/via-core.h
deleted file mode 100644
index 7ffb521e1a7a..000000000000
--- a/drivers/video/via/via-core.h
+++ /dev/null
@@ -1,219 +0,0 @@
-/*
- * Copyright 1998-2009 VIA Technologies, Inc. All Rights Reserved.
- * Copyright 2001-2008 S3 Graphics, Inc. All Rights Reserved.
- * Copyright 2009-2010 Jonathan Corbet <corbet@lwn.net>
- * Copyright 2010 Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation;
- * either version 2, or (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTIES OR REPRESENTATIONS; without even
- * the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE.See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-
-#ifndef __VIA_CORE_H__
-#define __VIA_CORE_H__
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-
-/*
- * A description of each known serial I2C/GPIO port.
- */
-enum via_port_type {
-	VIA_PORT_NONE = 0,
-	VIA_PORT_I2C,
-	VIA_PORT_GPIO,
-};
-
-enum via_port_mode {
-	VIA_MODE_OFF = 0,
-	VIA_MODE_I2C,		/* Used as I2C port */
-	VIA_MODE_GPIO,	/* Two GPIO ports */
-};
-
-enum viafb_i2c_adap {
-	VIA_PORT_26 = 0,
-	VIA_PORT_31,
-	VIA_PORT_25,
-	VIA_PORT_2C,
-	VIA_PORT_3D,
-};
-#define VIAFB_NUM_PORTS 5
-
-struct via_port_cfg {
-	enum via_port_type	type;
-	enum via_port_mode	mode;
-	u16			io_port;
-	u8			ioport_index;
-};
-
-/*
- * This is the global viafb "device" containing stuff needed by
- * all subdevs.
- */
-struct viafb_dev {
-	struct pci_dev *pdev;
-	int chip_type;
-	struct via_port_cfg *port_cfg;
-	/*
-	 * Spinlock for access to device registers.  Not yet
-	 * globally used.
-	 */
-	spinlock_t reg_lock;
-	/*
-	 * The framebuffer MMIO region.  Little, if anything, touches
-	 * this memory directly, and certainly nothing outside of the
-	 * framebuffer device itself.  We *do* have to be able to allocate
-	 * chunks of this memory for other devices, though.
-	 */
-	unsigned long fbmem_start;
-	long fbmem_len;
-	void __iomem *fbmem;
-#if defined(CONFIG_FB_VIA_CAMERA) || defined(CONFIG_FB_VIA_CAMERA_MODULE)
-	long camera_fbmem_offset;
-	long camera_fbmem_size;
-#endif
-	/*
-	 * The MMIO region for device registers.
-	 */
-	unsigned long engine_start;
-	unsigned long engine_len;
-	void __iomem *engine_mmio;
-
-};
-
-/*
- * Interrupt management.
- */
-
-void viafb_irq_enable(u32 mask);
-void viafb_irq_disable(u32 mask);
-
-/*
- * The global interrupt control register and its bits.
- */
-#define VDE_INTERRUPT	0x200	/* Video interrupt flags/masks */
-#define   VDE_I_DVISENSE  0x00000001  /* DVI sense int status */
-#define   VDE_I_VBLANK    0x00000002  /* Vertical blank status */
-#define   VDE_I_MCCFI	  0x00000004  /* MCE compl. frame int status */
-#define   VDE_I_VSYNC	  0x00000008  /* VGA VSYNC int status */
-#define   VDE_I_DMA0DDONE 0x00000010  /* DMA 0 descr done */
-#define   VDE_I_DMA0TDONE 0x00000020  /* DMA 0 transfer done */
-#define   VDE_I_DMA1DDONE 0x00000040  /* DMA 1 descr done */
-#define   VDE_I_DMA1TDONE 0x00000080  /* DMA 1 transfer done */
-#define   VDE_I_C1AV      0x00000100  /* Cap Eng 1 act vid end */
-#define   VDE_I_HQV0	  0x00000200  /* First HQV engine */
-#define   VDE_I_HQV1      0x00000400  /* Second HQV engine */
-#define   VDE_I_HQV1EN	  0x00000800  /* Second HQV engine enable */
-#define   VDE_I_C0AV      0x00001000  /* Cap Eng 0 act vid end */
-#define   VDE_I_C0VBI     0x00002000  /* Cap Eng 0 VBI end */
-#define   VDE_I_C1VBI     0x00004000  /* Cap Eng 1 VBI end */
-#define   VDE_I_VSYNC2    0x00008000  /* Sec. Disp. VSYNC */
-#define   VDE_I_DVISNSEN  0x00010000  /* DVI sense enable */
-#define   VDE_I_VSYNC2EN  0x00020000  /* Sec Disp VSYNC enable */
-#define   VDE_I_MCCFIEN	  0x00040000  /* MC comp frame int mask enable */
-#define   VDE_I_VSYNCEN   0x00080000  /* VSYNC enable */
-#define   VDE_I_DMA0DDEN  0x00100000  /* DMA 0 descr done enable */
-#define   VDE_I_DMA0TDEN  0x00200000  /* DMA 0 trans done enable */
-#define   VDE_I_DMA1DDEN  0x00400000  /* DMA 1 descr done enable */
-#define   VDE_I_DMA1TDEN  0x00800000  /* DMA 1 trans done enable */
-#define   VDE_I_C1AVEN    0x01000000  /* cap 1 act vid end enable */
-#define   VDE_I_HQV0EN	  0x02000000  /* First hqv engine enable */
-#define   VDE_I_C1VBIEN	  0x04000000  /* Cap 1 VBI end enable */
-#define   VDE_I_LVDSSI    0x08000000  /* LVDS sense interrupt */
-#define   VDE_I_C0AVEN    0x10000000  /* Cap 0 act vid end enable */
-#define   VDE_I_C0VBIEN   0x20000000  /* Cap 0 VBI end enable */
-#define   VDE_I_LVDSSIEN  0x40000000  /* LVDS Sense enable */
-#define   VDE_I_ENABLE	  0x80000000  /* Global interrupt enable */
-
-/*
- * DMA management.
- */
-int viafb_request_dma(void);
-void viafb_release_dma(void);
-/* void viafb_dma_copy_out(unsigned int offset, dma_addr_t paddr, int len); */
-int viafb_dma_copy_out_sg(unsigned int offset, struct scatterlist *sg, int nsg);
-
-/*
- * DMA Controller registers.
- */
-#define VDMA_MR0	0xe00		/* Mod reg 0 */
-#define   VDMA_MR_CHAIN   0x01		/* Chaining mode */
-#define   VDMA_MR_TDIE    0x02		/* Transfer done int enable */
-#define VDMA_CSR0	0xe04		/* Control/status */
-#define	  VDMA_C_ENABLE	  0x01		  /* DMA Enable */
-#define	  VDMA_C_START	  0x02		  /* Start a transfer */
-#define	  VDMA_C_ABORT	  0x04		  /* Abort a transfer */
-#define	  VDMA_C_DONE	  0x08		  /* Transfer is done */
-#define VDMA_MARL0	0xe20		/* Mem addr low */
-#define VDMA_MARH0	0xe24		/* Mem addr high */
-#define VDMA_DAR0	0xe28		/* Device address */
-#define VDMA_DQWCR0	0xe2c		/* Count (16-byte) */
-#define VDMA_TMR0	0xe30		/* Tile mode reg */
-#define VDMA_DPRL0	0xe34		/* Not sure */
-#define	  VDMA_DPR_IN	  0x08		/* Inbound transfer to FB */
-#define VDMA_DPRH0	0xe38
-#define VDMA_PMR0	(0xe00 + 0x134) /* Pitch mode */
-
-/*
- * Useful stuff that probably belongs somewhere global.
- */
-#define VGA_WIDTH	640
-#define VGA_HEIGHT	480
-
-/*
- * Indexed port operations.  Note that these are all multi-op
- * functions; every invocation will be racy if you're not holding
- * reg_lock.
- */
-
-#define VIAStatus   0x3DA  /* Non-indexed port */
-#define VIACR       0x3D4
-#define VIASR       0x3C4
-#define VIAGR       0x3CE
-#define VIAAR       0x3C0
-
-static inline u8 via_read_reg(u16 port, u8 index)
-{
-	outb(index, port);
-	return inb(port + 1);
-}
-
-static inline void via_write_reg(u16 port, u8 index, u8 data)
-{
-	outb(index, port);
-	outb(data, port + 1);
-}
-
-static inline void via_write_reg_mask(u16 port, u8 index, u8 data, u8 mask)
-{
-	u8 old;
-
-	outb(index, port);
-	old = inb(port + 1);
-	outb((data & mask) | (old & ~mask), port + 1);
-}
-
-#define VIA_MISC_REG_READ	0x03CC
-#define VIA_MISC_REG_WRITE	0x03C2
-
-static inline void via_write_misc_reg_mask(u8 data, u8 mask)
-{
-	u8 old = inb(VIA_MISC_REG_READ);
-	outb((data & mask) | (old & ~mask), VIA_MISC_REG_WRITE);
-}
-
-
-#endif /* __VIA_CORE_H__ */
diff --git a/drivers/video/via/via-gpio.c b/drivers/video/via/via-gpio.c
index 67d699cbfd2c..595516aea691 100644
--- a/drivers/video/via/via-gpio.c
+++ b/drivers/video/via/via-gpio.c
@@ -8,8 +8,8 @@
 #include <linux/spinlock.h>
 #include <linux/gpio.h>
 #include <linux/platform_device.h>
-#include "via-core.h"
-#include "via-gpio.h"
+#include <linux/via-core.h>
+#include <linux/via-gpio.h>
 
 /*
  * The ports we know about.  Note that the port-25 gpios are not
diff --git a/drivers/video/via/via-gpio.h b/drivers/video/via/via-gpio.h
deleted file mode 100644
index 8281aea3dd6d..000000000000
--- a/drivers/video/via/via-gpio.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * Support for viafb GPIO ports.
- *
- * Copyright 2009 Jonathan Corbet <corbet@lwn.net>
- * Distributable under version 2 of the GNU General Public License.
- */
-
-#ifndef __VIA_GPIO_H__
-#define __VIA_GPIO_H__
-
-extern int viafb_gpio_lookup(const char *name);
-extern int viafb_gpio_init(void);
-extern void viafb_gpio_exit(void);
-#endif
diff --git a/drivers/video/via/via_i2c.c b/drivers/video/via/via_i2c.c
index 2291765f2f8e..da9e4ca94b17 100644
--- a/drivers/video/via/via_i2c.c
+++ b/drivers/video/via/via_i2c.c
@@ -23,8 +23,8 @@
 #include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
-#include "via-core.h"
-#include "via_i2c.h"
+#include <linux/via-core.h>
+#include <linux/via_i2c.h>
 
 /*
  * There can only be one set of these, so there's no point in having
diff --git a/drivers/video/via/via_i2c.h b/drivers/video/via/via_i2c.h
deleted file mode 100644
index 44532e468c05..000000000000
--- a/drivers/video/via/via_i2c.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * Copyright 1998-2009 VIA Technologies, Inc. All Rights Reserved.
- * Copyright 2001-2008 S3 Graphics, Inc. All Rights Reserved.
-
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public
- * License as published by the Free Software Foundation;
- * either version 2, or (at your option) any later version.
-
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTIES OR REPRESENTATIONS; without even
- * the implied warranty of MERCHANTABILITY or FITNESS FOR
- * A PARTICULAR PURPOSE.See the GNU General Public License
- * for more details.
-
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc.,
- * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
- */
-#ifndef __VIA_I2C_H__
-#define __VIA_I2C_H__
-
-#include <linux/i2c.h>
-#include <linux/i2c-algo-bit.h>
-
-struct via_i2c_stuff {
-	u16 i2c_port;			/* GPIO or I2C port */
-	u16 is_active;			/* Being used as I2C? */
-	struct i2c_adapter adapter;
-	struct i2c_algo_bit_data algo;
-};
-
-
-int viafb_i2c_readbyte(u8 adap, u8 slave_addr, u8 index, u8 *pdata);
-int viafb_i2c_writebyte(u8 adap, u8 slave_addr, u8 index, u8 data);
-int viafb_i2c_readbytes(u8 adap, u8 slave_addr, u8 index, u8 *buff, int buff_len);
-struct i2c_adapter *viafb_find_i2c_adapter(enum viafb_i2c_adap which);
-
-extern int viafb_i2c_init(void);
-extern void viafb_i2c_exit(void);
-#endif /* __VIA_I2C_H__ */
diff --git a/drivers/video/via/via_modesetting.c b/drivers/video/via/via_modesetting.c
index b4e735cc350e..3cddcff88ab9 100644
--- a/drivers/video/via/via_modesetting.c
+++ b/drivers/video/via/via_modesetting.c
@@ -24,8 +24,8 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/via-core.h>
 #include "via_modesetting.h"
-#include "via-core.h"
 #include "share.h"
 #include "debug.h"
 
diff --git a/drivers/video/via/via_utility.c b/drivers/video/via/via_utility.c
index 575703141868..d05ccb62b55f 100644
--- a/drivers/video/via/via_utility.c
+++ b/drivers/video/via/via_utility.c
@@ -19,7 +19,7 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-#include "via-core.h"
+#include <linux/via-core.h>
 #include "global.h"
 
 void viafb_get_device_support_state(u32 *support_state)
diff --git a/drivers/video/via/viafbdev.c b/drivers/video/via/viafbdev.c
index 3d033186a822..d3dd2eb95586 100644
--- a/drivers/video/via/viafbdev.c
+++ b/drivers/video/via/viafbdev.c
@@ -22,9 +22,9 @@
 #include <linux/module.h>
 #include <linux/seq_file.h>
 #include <linux/stat.h>
-#define _MASTER_FILE
+#include <linux/via-core.h>
 
-#include "via-core.h"
+#define _MASTER_FILE
 #include "global.h"
 
 static char *viafb_name = "Via";
diff --git a/drivers/video/via/viamode.c b/drivers/video/via/viamode.c
index 2fdb9e6724a4..2dbad3c0f679 100644
--- a/drivers/video/via/viamode.c
+++ b/drivers/video/via/viamode.c
@@ -19,7 +19,7 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-#include "via-core.h"
+#include <linux/via-core.h>
 #include "global.h"
 struct res_map_refresh res_map_refresh_tbl[] = {
 /*hres, vres, vclock, vmode_refresh*/
diff --git a/drivers/video/via/vt1636.c b/drivers/video/via/vt1636.c
index e5f802472883..d65bf1aee87c 100644
--- a/drivers/video/via/vt1636.c
+++ b/drivers/video/via/vt1636.c
@@ -19,8 +19,8 @@
  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 
-#include "via-core.h"
-#include "via_i2c.h"
+#include <linux/via-core.h>
+#include <linux/via_i2c.h>
 #include "global.h"
 
 u8 viafb_gpio_i2c_read_lvds(struct lvds_setting_information
diff --git a/include/linux/via-core.h b/include/linux/via-core.h
new file mode 100644
index 000000000000..7ffb521e1a7a
--- /dev/null
+++ b/include/linux/via-core.h
@@ -0,0 +1,219 @@
+/*
+ * Copyright 1998-2009 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2008 S3 Graphics, Inc. All Rights Reserved.
+ * Copyright 2009-2010 Jonathan Corbet <corbet@lwn.net>
+ * Copyright 2010 Florian Tobias Schandinat <FlorianSchandinat@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * either version 2, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTIES OR REPRESENTATIONS; without even
+ * the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#ifndef __VIA_CORE_H__
+#define __VIA_CORE_H__
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+
+/*
+ * A description of each known serial I2C/GPIO port.
+ */
+enum via_port_type {
+	VIA_PORT_NONE = 0,
+	VIA_PORT_I2C,
+	VIA_PORT_GPIO,
+};
+
+enum via_port_mode {
+	VIA_MODE_OFF = 0,
+	VIA_MODE_I2C,		/* Used as I2C port */
+	VIA_MODE_GPIO,	/* Two GPIO ports */
+};
+
+enum viafb_i2c_adap {
+	VIA_PORT_26 = 0,
+	VIA_PORT_31,
+	VIA_PORT_25,
+	VIA_PORT_2C,
+	VIA_PORT_3D,
+};
+#define VIAFB_NUM_PORTS 5
+
+struct via_port_cfg {
+	enum via_port_type	type;
+	enum via_port_mode	mode;
+	u16			io_port;
+	u8			ioport_index;
+};
+
+/*
+ * This is the global viafb "device" containing stuff needed by
+ * all subdevs.
+ */
+struct viafb_dev {
+	struct pci_dev *pdev;
+	int chip_type;
+	struct via_port_cfg *port_cfg;
+	/*
+	 * Spinlock for access to device registers.  Not yet
+	 * globally used.
+	 */
+	spinlock_t reg_lock;
+	/*
+	 * The framebuffer MMIO region.  Little, if anything, touches
+	 * this memory directly, and certainly nothing outside of the
+	 * framebuffer device itself.  We *do* have to be able to allocate
+	 * chunks of this memory for other devices, though.
+	 */
+	unsigned long fbmem_start;
+	long fbmem_len;
+	void __iomem *fbmem;
+#if defined(CONFIG_FB_VIA_CAMERA) || defined(CONFIG_FB_VIA_CAMERA_MODULE)
+	long camera_fbmem_offset;
+	long camera_fbmem_size;
+#endif
+	/*
+	 * The MMIO region for device registers.
+	 */
+	unsigned long engine_start;
+	unsigned long engine_len;
+	void __iomem *engine_mmio;
+
+};
+
+/*
+ * Interrupt management.
+ */
+
+void viafb_irq_enable(u32 mask);
+void viafb_irq_disable(u32 mask);
+
+/*
+ * The global interrupt control register and its bits.
+ */
+#define VDE_INTERRUPT	0x200	/* Video interrupt flags/masks */
+#define   VDE_I_DVISENSE  0x00000001  /* DVI sense int status */
+#define   VDE_I_VBLANK    0x00000002  /* Vertical blank status */
+#define   VDE_I_MCCFI	  0x00000004  /* MCE compl. frame int status */
+#define   VDE_I_VSYNC	  0x00000008  /* VGA VSYNC int status */
+#define   VDE_I_DMA0DDONE 0x00000010  /* DMA 0 descr done */
+#define   VDE_I_DMA0TDONE 0x00000020  /* DMA 0 transfer done */
+#define   VDE_I_DMA1DDONE 0x00000040  /* DMA 1 descr done */
+#define   VDE_I_DMA1TDONE 0x00000080  /* DMA 1 transfer done */
+#define   VDE_I_C1AV      0x00000100  /* Cap Eng 1 act vid end */
+#define   VDE_I_HQV0	  0x00000200  /* First HQV engine */
+#define   VDE_I_HQV1      0x00000400  /* Second HQV engine */
+#define   VDE_I_HQV1EN	  0x00000800  /* Second HQV engine enable */
+#define   VDE_I_C0AV      0x00001000  /* Cap Eng 0 act vid end */
+#define   VDE_I_C0VBI     0x00002000  /* Cap Eng 0 VBI end */
+#define   VDE_I_C1VBI     0x00004000  /* Cap Eng 1 VBI end */
+#define   VDE_I_VSYNC2    0x00008000  /* Sec. Disp. VSYNC */
+#define   VDE_I_DVISNSEN  0x00010000  /* DVI sense enable */
+#define   VDE_I_VSYNC2EN  0x00020000  /* Sec Disp VSYNC enable */
+#define   VDE_I_MCCFIEN	  0x00040000  /* MC comp frame int mask enable */
+#define   VDE_I_VSYNCEN   0x00080000  /* VSYNC enable */
+#define   VDE_I_DMA0DDEN  0x00100000  /* DMA 0 descr done enable */
+#define   VDE_I_DMA0TDEN  0x00200000  /* DMA 0 trans done enable */
+#define   VDE_I_DMA1DDEN  0x00400000  /* DMA 1 descr done enable */
+#define   VDE_I_DMA1TDEN  0x00800000  /* DMA 1 trans done enable */
+#define   VDE_I_C1AVEN    0x01000000  /* cap 1 act vid end enable */
+#define   VDE_I_HQV0EN	  0x02000000  /* First hqv engine enable */
+#define   VDE_I_C1VBIEN	  0x04000000  /* Cap 1 VBI end enable */
+#define   VDE_I_LVDSSI    0x08000000  /* LVDS sense interrupt */
+#define   VDE_I_C0AVEN    0x10000000  /* Cap 0 act vid end enable */
+#define   VDE_I_C0VBIEN   0x20000000  /* Cap 0 VBI end enable */
+#define   VDE_I_LVDSSIEN  0x40000000  /* LVDS Sense enable */
+#define   VDE_I_ENABLE	  0x80000000  /* Global interrupt enable */
+
+/*
+ * DMA management.
+ */
+int viafb_request_dma(void);
+void viafb_release_dma(void);
+/* void viafb_dma_copy_out(unsigned int offset, dma_addr_t paddr, int len); */
+int viafb_dma_copy_out_sg(unsigned int offset, struct scatterlist *sg, int nsg);
+
+/*
+ * DMA Controller registers.
+ */
+#define VDMA_MR0	0xe00		/* Mod reg 0 */
+#define   VDMA_MR_CHAIN   0x01		/* Chaining mode */
+#define   VDMA_MR_TDIE    0x02		/* Transfer done int enable */
+#define VDMA_CSR0	0xe04		/* Control/status */
+#define	  VDMA_C_ENABLE	  0x01		  /* DMA Enable */
+#define	  VDMA_C_START	  0x02		  /* Start a transfer */
+#define	  VDMA_C_ABORT	  0x04		  /* Abort a transfer */
+#define	  VDMA_C_DONE	  0x08		  /* Transfer is done */
+#define VDMA_MARL0	0xe20		/* Mem addr low */
+#define VDMA_MARH0	0xe24		/* Mem addr high */
+#define VDMA_DAR0	0xe28		/* Device address */
+#define VDMA_DQWCR0	0xe2c		/* Count (16-byte) */
+#define VDMA_TMR0	0xe30		/* Tile mode reg */
+#define VDMA_DPRL0	0xe34		/* Not sure */
+#define	  VDMA_DPR_IN	  0x08		/* Inbound transfer to FB */
+#define VDMA_DPRH0	0xe38
+#define VDMA_PMR0	(0xe00 + 0x134) /* Pitch mode */
+
+/*
+ * Useful stuff that probably belongs somewhere global.
+ */
+#define VGA_WIDTH	640
+#define VGA_HEIGHT	480
+
+/*
+ * Indexed port operations.  Note that these are all multi-op
+ * functions; every invocation will be racy if you're not holding
+ * reg_lock.
+ */
+
+#define VIAStatus   0x3DA  /* Non-indexed port */
+#define VIACR       0x3D4
+#define VIASR       0x3C4
+#define VIAGR       0x3CE
+#define VIAAR       0x3C0
+
+static inline u8 via_read_reg(u16 port, u8 index)
+{
+	outb(index, port);
+	return inb(port + 1);
+}
+
+static inline void via_write_reg(u16 port, u8 index, u8 data)
+{
+	outb(index, port);
+	outb(data, port + 1);
+}
+
+static inline void via_write_reg_mask(u16 port, u8 index, u8 data, u8 mask)
+{
+	u8 old;
+
+	outb(index, port);
+	old = inb(port + 1);
+	outb((data & mask) | (old & ~mask), port + 1);
+}
+
+#define VIA_MISC_REG_READ	0x03CC
+#define VIA_MISC_REG_WRITE	0x03C2
+
+static inline void via_write_misc_reg_mask(u8 data, u8 mask)
+{
+	u8 old = inb(VIA_MISC_REG_READ);
+	outb((data & mask) | (old & ~mask), VIA_MISC_REG_WRITE);
+}
+
+
+#endif /* __VIA_CORE_H__ */
diff --git a/include/linux/via-gpio.h b/include/linux/via-gpio.h
new file mode 100644
index 000000000000..8281aea3dd6d
--- /dev/null
+++ b/include/linux/via-gpio.h
@@ -0,0 +1,14 @@
+/*
+ * Support for viafb GPIO ports.
+ *
+ * Copyright 2009 Jonathan Corbet <corbet@lwn.net>
+ * Distributable under version 2 of the GNU General Public License.
+ */
+
+#ifndef __VIA_GPIO_H__
+#define __VIA_GPIO_H__
+
+extern int viafb_gpio_lookup(const char *name);
+extern int viafb_gpio_init(void);
+extern void viafb_gpio_exit(void);
+#endif
diff --git a/include/linux/via_i2c.h b/include/linux/via_i2c.h
new file mode 100644
index 000000000000..44532e468c05
--- /dev/null
+++ b/include/linux/via_i2c.h
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1998-2009 VIA Technologies, Inc. All Rights Reserved.
+ * Copyright 2001-2008 S3 Graphics, Inc. All Rights Reserved.
+
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation;
+ * either version 2, or (at your option) any later version.
+
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTIES OR REPRESENTATIONS; without even
+ * the implied warranty of MERCHANTABILITY or FITNESS FOR
+ * A PARTICULAR PURPOSE.See the GNU General Public License
+ * for more details.
+
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+#ifndef __VIA_I2C_H__
+#define __VIA_I2C_H__
+
+#include <linux/i2c.h>
+#include <linux/i2c-algo-bit.h>
+
+struct via_i2c_stuff {
+	u16 i2c_port;			/* GPIO or I2C port */
+	u16 is_active;			/* Being used as I2C? */
+	struct i2c_adapter adapter;
+	struct i2c_algo_bit_data algo;
+};
+
+
+int viafb_i2c_readbyte(u8 adap, u8 slave_addr, u8 index, u8 *pdata);
+int viafb_i2c_writebyte(u8 adap, u8 slave_addr, u8 index, u8 data);
+int viafb_i2c_readbytes(u8 adap, u8 slave_addr, u8 index, u8 *buff, int buff_len);
+struct i2c_adapter *viafb_find_i2c_adapter(enum viafb_i2c_adap which);
+
+extern int viafb_i2c_init(void);
+extern void viafb_i2c_exit(void);
+#endif /* __VIA_I2C_H__ */
-- 
cgit v1.2.3


From 72d5a9f7a9542f88397558c65bcfc3b115a65e34 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 10 May 2010 17:12:17 -0700
Subject: rcu: remove all rcu head initializations, except on_stack
 initializations

Remove all rcu head inits. We don't care about the RCU head state before passing
it to call_rcu() anyway. Only leave the "on_stack" variants so debugobjects can
keep track of objects on stack.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h | 1 -
 kernel/rcutiny.c          | 6 ++++++
 kernel/rcutorture.c       | 2 ++
 kernel/rcutree.c          | 4 ++++
 kernel/rcutree_plugin.h   | 2 ++
 5 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index b1ed1cd8e2a8..7996fc2c9ba9 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -49,7 +49,6 @@ extern struct group_info init_groups;
 		{ .first = &init_task.pids[PIDTYPE_PGID].node },	\
 		{ .first = &init_task.pids[PIDTYPE_SID].node },		\
 	},								\
-	.rcu		= RCU_HEAD_INIT,				\
 	.level		= 0,						\
 	.numbers	= { {						\
 		.nr		= 0,					\
diff --git a/kernel/rcutiny.c b/kernel/rcutiny.c
index b1804ff83d5e..38729d3cd236 100644
--- a/kernel/rcutiny.c
+++ b/kernel/rcutiny.c
@@ -245,11 +245,13 @@ void rcu_barrier(void)
 {
 	struct rcu_synchronize rcu;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier);
 
@@ -257,11 +259,13 @@ void rcu_barrier_bh(void)
 {
 	struct rcu_synchronize rcu;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu_bh(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_bh);
 
@@ -269,11 +273,13 @@ void rcu_barrier_sched(void)
 {
 	struct rcu_synchronize rcu;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu_sched(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(rcu_barrier_sched);
 
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 58df55bf83ed..077defb34571 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -464,9 +464,11 @@ static void rcu_bh_torture_synchronize(void)
 {
 	struct rcu_bh_torture_synchronize rcu;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	call_rcu_bh(&rcu.head, rcu_bh_torture_wakeme_after_cb);
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 
 static struct rcu_torture_ops rcu_bh_ops = {
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index ba6996943e28..d4437345706f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1484,11 +1484,13 @@ void synchronize_sched(void)
 	if (rcu_blocking_is_gp())
 		return;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu_sched(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(synchronize_sched);
 
@@ -1508,11 +1510,13 @@ void synchronize_rcu_bh(void)
 	if (rcu_blocking_is_gp())
 		return;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu_bh(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
 
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index ac7d80fa895c..0e4f420245d9 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -557,11 +557,13 @@ void synchronize_rcu(void)
 	if (!rcu_scheduler_active)
 		return;
 
+	init_rcu_head_on_stack(&rcu.head);
 	init_completion(&rcu.completion);
 	/* Will wake me after RCU finished. */
 	call_rcu(&rcu.head, wakeme_after_rcu);
 	/* Wait for it. */
 	wait_for_completion(&rcu.completion);
+	destroy_rcu_head_on_stack(&rcu.head);
 }
 EXPORT_SYMBOL_GPL(synchronize_rcu);
 
-- 
cgit v1.2.3


From 34441427aab4bdb3069a4ffcda69a99357abcb2e Mon Sep 17 00:00:00 2001
From: Robin Holt <holt@sgi.com>
Date: Tue, 11 May 2010 14:06:46 -0700
Subject: revert "procfs: provide stack information for threads" and its fixup
 commits

Originally, commit d899bf7b ("procfs: provide stack information for
threads") attempted to introduce a new feature for showing where the
threadstack was located and how many pages are being utilized by the
stack.

Commit c44972f1 ("procfs: disable per-task stack usage on NOMMU") was
applied to fix the NO_MMU case.

Commit 89240ba0 ("x86, fs: Fix x86 procfs stack information for threads on
64-bit") was applied to fix a bug in ia32 executables being loaded.

Commit 9ebd4eba7 ("procfs: fix /proc/<pid>/stat stack pointer for kernel
threads") was applied to fix a bug which had kernel threads printing a
userland stack address.

Commit 1306d603f ('proc: partially revert "procfs: provide stack
information for threads"') was then applied to revert the stack pages
being used to solve a significant performance regression.

This patch nearly undoes the effect of all these patches.

The reason for reverting these is it provides an unusable value in
field 28.  For x86_64, a fork will result in the task->stack_start
value being updated to the current user top of stack and not the stack
start address.  This unpredictability of the stack_start value makes
it worthless.  That includes the intended use of showing how much stack
space a thread has.

Other architectures will get different values.  As an example, ia64
gets 0.  The do_fork() and copy_process() functions appear to treat the
stack_start and stack_size parameters as architecture specific.

I only partially reverted c44972f1 ("procfs: disable per-task stack usage
on NOMMU") .  If I had completely reverted it, I would have had to change
mm/Makefile only build pagewalk.o when CONFIG_PROC_PAGE_MONITOR is
configured.  Since I could not test the builds without significant effort,
I decided to not change mm/Makefile.

I only partially reverted 89240ba0 ("x86, fs: Fix x86 procfs stack
information for threads on 64-bit") .  I left the KSTK_ESP() change in
place as that seemed worthwhile.

Signed-off-by: Robin Holt <holt@sgi.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Michal Simek <monstr@monstr.eu>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/filesystems/proc.txt |  3 +--
 fs/compat.c                        |  2 --
 fs/exec.c                          |  2 --
 fs/proc/array.c                    |  3 +--
 fs/proc/task_mmu.c                 | 19 -------------------
 include/linux/sched.h              |  1 -
 kernel/fork.c                      |  2 --
 7 files changed, 2 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index a4f30faa4f1f..1e359b62c40a 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -316,7 +316,7 @@ address           perms offset  dev   inode      pathname
 08049000-0804a000 rw-p 00001000 03:00 8312       /opt/test
 0804a000-0806b000 rw-p 00000000 00:00 0          [heap]
 a7cb1000-a7cb2000 ---p 00000000 00:00 0
-a7cb2000-a7eb2000 rw-p 00000000 00:00 0          [threadstack:001ff4b4]
+a7cb2000-a7eb2000 rw-p 00000000 00:00 0
 a7eb2000-a7eb3000 ---p 00000000 00:00 0
 a7eb3000-a7ed5000 rw-p 00000000 00:00 0
 a7ed5000-a8008000 r-xp 00000000 03:00 4222       /lib/libc.so.6
@@ -352,7 +352,6 @@ is not associated with a file:
  [stack]                  = the stack of the main process
  [vdso]                   = the "virtual dynamic shared object",
                             the kernel system call handler
- [threadstack:xxxxxxxx]   = the stack of the thread, xxxxxxxx is the stack size
 
  or if empty, the mapping is anonymous.
 
diff --git a/fs/compat.c b/fs/compat.c
index 4b6ed03cc478..05448730f840 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -1531,8 +1531,6 @@ int compat_do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
-	current->stack_start = current->mm->start_stack;
-
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/exec.c b/fs/exec.c
index 49cdaa19e5b9..e6e94c626c2c 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1387,8 +1387,6 @@ int do_execve(char * filename,
 	if (retval < 0)
 		goto out;
 
-	current->stack_start = current->mm->start_stack;
-
 	/* execve succeeded */
 	current->fs->in_exec = 0;
 	current->in_execve = 0;
diff --git a/fs/proc/array.c b/fs/proc/array.c
index e51f2ec2c5e5..885ab5513ac5 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -81,7 +81,6 @@
 #include <linux/pid_namespace.h>
 #include <linux/ptrace.h>
 #include <linux/tracehook.h>
-#include <linux/swapops.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -495,7 +494,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
 		rsslim,
 		mm ? mm->start_code : 0,
 		mm ? mm->end_code : 0,
-		(permitted && mm) ? task->stack_start : 0,
+		(permitted && mm) ? mm->start_stack : 0,
 		esp,
 		eip,
 		/* The signal information here is obsolete.
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 070553427dd5..47f5b145f56e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -247,25 +247,6 @@ static void show_map_vma(struct seq_file *m, struct vm_area_struct *vma)
 				} else if (vma->vm_start <= mm->start_stack &&
 					   vma->vm_end >= mm->start_stack) {
 					name = "[stack]";
-				} else {
-					unsigned long stack_start;
-					struct proc_maps_private *pmp;
-
-					pmp = m->private;
-					stack_start = pmp->task->stack_start;
-
-					if (vma->vm_start <= stack_start &&
-					    vma->vm_end >= stack_start) {
-						pad_len_spaces(m, len);
-						seq_printf(m,
-						 "[threadstack:%08lx]",
-#ifdef CONFIG_STACK_GROWSUP
-						 vma->vm_end - stack_start
-#else
-						 stack_start - vma->vm_start
-#endif
-						);
-					}
 				}
 			} else {
 				name = "[vdso]";
diff --git a/include/linux/sched.h b/include/linux/sched.h
index dad7f668ebf7..2b7b81df78b3 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1497,7 +1497,6 @@ struct task_struct {
 	/* bitmask of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
-	unsigned long stack_start;
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */
 	struct memcg_batch_info {
 		int do_batch;	/* incremented when batch uncharge started */
diff --git a/kernel/fork.c b/kernel/fork.c
index 44b0791b0a2e..4c14942a0ee3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1114,8 +1114,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 
 	p->bts = NULL;
 
-	p->stack_start = stack_start;
-
 	/* Perform scheduler related setup. Assign this task to a CPU. */
 	sched_fork(p, clone_flags);
 
-- 
cgit v1.2.3


From 1cd2620ca9332943c9fff84c0c9240982534d840 Mon Sep 17 00:00:00 2001
From: Peter Huewe <peterhuewe@gmx.de>
Date: Thu, 13 May 2010 00:06:54 +0200
Subject: mtd/nand/sh_flctl: Move function mtd_to_flctl to fix build failure

This patch fixes a build failure[1] by simply moving the function mtd_to_flctl
beneath the definition of sh_flctl which it uses.

BF introduced by patch
'mtd/nand/sh_flctl: Replace the dangerous mtd_to_flctl macro' (67026418)

Signed-off-by: Peter Huewe <peterhuewe@gmx.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/sh_flctl.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/sh_flctl.h b/include/linux/mtd/sh_flctl.h
index 178b5c26c995..9cf4c4c79555 100644
--- a/include/linux/mtd/sh_flctl.h
+++ b/include/linux/mtd/sh_flctl.h
@@ -93,11 +93,6 @@
 #define INIT_FL4ECCRESULT_VAL	0x03FF03FF
 #define LOOP_TIMEOUT_MAX	0x00010000
 
-static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
-{
-	return container_of(mtdinfo, struct sh_flctl, mtd);
-}
-
 struct sh_flctl {
 	struct mtd_info		mtd;
 	struct nand_chip	chip;
@@ -128,4 +123,9 @@ struct sh_flctl_platform_data {
 	unsigned has_hwecc:1;
 };
 
+static inline struct sh_flctl *mtd_to_flctl(struct mtd_info *mtdinfo)
+{
+	return container_of(mtdinfo, struct sh_flctl, mtd);
+}
+
 #endif	/* __SH_FLCTL_H__ */
-- 
cgit v1.2.3


From 107e7be628821dcb78c43adce0331e8ddb40eabd Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:08 +0000
Subject: tipc: Add support for "-s" configuration option

Provide initial support for displaying overall TIPC status/statistics
information at runtime.  Currently, only version info for the TIPC
kernel module is displayed.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc_config.h |  1 +
 net/tipc/config.c           | 40 +++++++++++++++++++++++++++++++++++++++-
 net/tipc/core.c             |  2 --
 net/tipc/core.h             |  3 +++
 4 files changed, 43 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc_config.h b/include/linux/tipc_config.h
index 2bc6fa4adeb5..9cde86c32412 100644
--- a/include/linux/tipc_config.h
+++ b/include/linux/tipc_config.h
@@ -74,6 +74,7 @@
 #define  TIPC_CMD_SHOW_NAME_TABLE   0x0005    /* tx name_tbl_query, rx ultra_string */
 #define  TIPC_CMD_SHOW_PORTS        0x0006    /* tx none, rx ultra_string */
 #define  TIPC_CMD_SHOW_LINK_STATS   0x000B    /* tx link_name, rx ultra_string */
+#define  TIPC_CMD_SHOW_STATS        0x000F    /* tx unsigned, rx ultra_string */
 
 #if 0
 #define  TIPC_CMD_SHOW_PORT_STATS   0x0008    /* tx port_ref, rx ultra_string */
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 7370241412cb..961d1b097146 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -246,13 +246,48 @@ static void cfg_cmd_event(struct tipc_cmd_msg *msg,
 	default:
 		rv = tipc_cfg_cmd(msg, data, sz, (u32 *)&msg_sect[1].iov_len, orig);
 	}
-	exit:
+exit:
 	rmsg.result_len = htonl(msg_sect[1].iov_len);
 	rmsg.retval = htonl(rv);
 	tipc_cfg_respond(msg_sect, 2u, orig);
 }
 #endif
 
+#define MAX_STATS_INFO 2000
+
+static struct sk_buff *tipc_show_stats(void)
+{
+	struct sk_buff *buf;
+	struct tlv_desc *rep_tlv;
+	struct print_buf pb;
+	int str_len;
+	u32 value;
+
+	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_UNSIGNED))
+		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
+
+	value = ntohl(*(u32 *)TLV_DATA(req_tlv_area));
+	if (value != 0)
+		return tipc_cfg_reply_error_string("unsupported argument");
+
+	buf = tipc_cfg_reply_alloc(TLV_SPACE(MAX_STATS_INFO));
+	if (buf == NULL)
+		return NULL;
+
+	rep_tlv = (struct tlv_desc *)buf->data;
+	tipc_printbuf_init(&pb, (char *)TLV_DATA(rep_tlv), MAX_STATS_INFO);
+
+	tipc_printf(&pb, "TIPC version " TIPC_MOD_VER "\n");
+
+	/* Use additional tipc_printf()'s to return more info ... */
+
+	str_len = tipc_printbuf_validate(&pb);
+	skb_put(buf, TLV_SPACE(str_len));
+	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
+
+	return buf;
+}
+
 static struct sk_buff *cfg_enable_bearer(void)
 {
 	struct tipc_bearer_config *args;
@@ -536,6 +571,9 @@ struct sk_buff *tipc_cfg_do_cmd(u32 orig_node, u16 cmd, const void *request_area
 	case TIPC_CMD_DUMP_LOG:
 		rep_tlv_buf = tipc_log_dump();
 		break;
+	case TIPC_CMD_SHOW_STATS:
+		rep_tlv_buf = tipc_show_stats();
+		break;
 	case TIPC_CMD_SET_LINK_TOL:
 	case TIPC_CMD_SET_LINK_PRI:
 	case TIPC_CMD_SET_LINK_WINDOW:
diff --git a/net/tipc/core.c b/net/tipc/core.c
index 4e84c8431f32..b47d1842a970 100644
--- a/net/tipc/core.c
+++ b/net/tipc/core.c
@@ -49,8 +49,6 @@
 #include "config.h"
 
 
-#define TIPC_MOD_VER "2.0.0"
-
 #ifndef CONFIG_TIPC_ZONES
 #define CONFIG_TIPC_ZONES 3
 #endif
diff --git a/net/tipc/core.h b/net/tipc/core.h
index c58a1d16563a..1e149f55f3e2 100644
--- a/net/tipc/core.h
+++ b/net/tipc/core.h
@@ -59,6 +59,9 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+
+#define TIPC_MOD_VER "2.0.0"
+
 /*
  * TIPC sanity test macros
  */
-- 
cgit v1.2.3


From 8e1c298c01d4596fa2837913e531a93a791a7bec Mon Sep 17 00:00:00 2001
From: Allan Stephens <allan.stephens@windriver.com>
Date: Tue, 11 May 2010 14:30:09 +0000
Subject: tipc: Update commenting in TIPC API

Eliminate comments in TIPC's main API files that are either obsolete,
incorrect, misleading, or unhelpful.  It also adds in one new comment.

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h    |  6 +++---
 include/net/tipc/tipc.h | 16 ++++++++--------
 2 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index 9536d8aeadf1..181c8d0e6f73 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -107,7 +107,7 @@ static inline unsigned int tipc_node(__u32 addr)
  * Message importance levels
  */
 
-#define TIPC_LOW_IMPORTANCE		0  /* default */
+#define TIPC_LOW_IMPORTANCE		0
 #define TIPC_MEDIUM_IMPORTANCE		1
 #define TIPC_HIGH_IMPORTANCE		2
 #define TIPC_CRITICAL_IMPORTANCE	3
@@ -182,7 +182,7 @@ struct sockaddr_tipc {
 		struct tipc_name_seq nameseq;
 		struct {
 			struct tipc_name name;
-			__u32 domain; /* 0: own zone */
+			__u32 domain;
 		} name;
 	} addr;
 };
@@ -200,7 +200,7 @@ struct sockaddr_tipc {
  */
 
 #define TIPC_IMPORTANCE		127	/* Default: TIPC_LOW_IMPORTANCE */
-#define TIPC_SRC_DROPPABLE	128	/* Default: 0 (resend congested msg) */
+#define TIPC_SRC_DROPPABLE	128	/* Default: based on socket type */
 #define TIPC_DEST_DROPPABLE	129	/* Default: based on socket type */
 #define TIPC_CONN_TIMEOUT	130	/* Default: 8000 (ms)  */
 #define TIPC_NODE_RECVQ_DEPTH	131	/* Default: none (read only) */
diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h
index 9566608c88cf..15af6dca0b49 100644
--- a/include/net/tipc/tipc.h
+++ b/include/net/tipc/tipc.h
@@ -2,7 +2,7 @@
  * include/net/tipc/tipc.h: Main include file for TIPC users
  * 
  * Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005,2010 Wind River Systems
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -126,7 +126,7 @@ int tipc_createport(unsigned int tipc_user,
 		    tipc_msg_event message_cb, 
 		    tipc_named_msg_event named_message_cb, 
 		    tipc_conn_msg_event conn_message_cb, 
-		    tipc_continue_event continue_event_cb,/* May be zero */
+		    tipc_continue_event continue_event_cb,
 		    u32 *portref);
 
 int tipc_deleteport(u32 portref);
@@ -145,13 +145,13 @@ int tipc_set_portunreturnable(u32 portref, unsigned int isunreturnable);
 int tipc_publish(u32 portref, unsigned int scope, 
 		 struct tipc_name_seq const *name_seq);
 int tipc_withdraw(u32 portref, unsigned int scope,
-		  struct tipc_name_seq const *name_seq); /* 0: all */
+		  struct tipc_name_seq const *name_seq);
 
 int tipc_connect2port(u32 portref, struct tipc_portid const *port);
 
 int tipc_disconnect(u32 portref);
 
-int tipc_shutdown(u32 ref); /* Sends SHUTDOWN msg */
+int tipc_shutdown(u32 ref);
 
 int tipc_isconnected(u32 portref, int *isconnected);
 
@@ -176,7 +176,7 @@ int tipc_send_buf(u32 portref,
 
 int tipc_send2name(u32 portref, 
 		   struct tipc_name const *name, 
-		   u32 domain,	/* 0:own zone */
+		   u32 domain,
 		   unsigned int num_sect,
 		   struct iovec const *msg_sect);
 
@@ -188,7 +188,7 @@ int tipc_send_buf2name(u32 portref,
 
 int tipc_forward2name(u32 portref, 
 		      struct tipc_name const *name, 
-		      u32 domain,   /*0: own zone */
+		      u32 domain,
 		      unsigned int section_count,
 		      struct iovec const *msg_sect,
 		      struct tipc_portid const *origin,
@@ -228,14 +228,14 @@ int tipc_forward_buf2port(u32 portref,
 
 int tipc_multicast(u32 portref, 
 		   struct tipc_name_seq const *seq, 
-		   u32 domain,	/* 0:own zone */
+		   u32 domain,	/* currently unused */
 		   unsigned int section_count,
 		   struct iovec const *msg);
 
 #if 0
 int tipc_multicast_buf(u32 portref, 
 		       struct tipc_name_seq const *seq, 
-		       u32 domain,	/* 0:own zone */
+		       u32 domain,
 		       void *buf,
 		       unsigned int size);
 #endif
-- 
cgit v1.2.3


From d28bdf05f72238d626c8d06b61049f6df8d78e70 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 11 May 2010 13:29:17 +0000
Subject: sh: move sh asm/clock.h contents to linux/sh_clk.h V2

This patch is V2 of the clock framework move from
arch/sh/include/asm/clock.h to include/linux/sh_clk.h
and updates the include paths for files that will be
shared between SH and SH-Mobile ARM.

The file asm/clock.h is still kept in this version,
this to depend on as few files as possible at this
point. We keep SH specific stuff in there.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/clock.h | 147 +-----------------------------------------
 include/linux/sh_clk.h      | 151 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 152 insertions(+), 146 deletions(-)
 create mode 100644 include/linux/sh_clk.h

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index 35d219616110..f387e5caee16 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -1,158 +1,13 @@
 #ifndef __ASM_SH_CLOCK_H
 #define __ASM_SH_CLOCK_H
 
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/cpufreq.h>
-#include <linux/clk.h>
-#include <linux/err.h>
-
-struct clk;
-
-struct clk_ops {
-	void (*init)(struct clk *clk);
-	int (*enable)(struct clk *clk);
-	void (*disable)(struct clk *clk);
-	unsigned long (*recalc)(struct clk *clk);
-	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
-	int (*set_parent)(struct clk *clk, struct clk *parent);
-	long (*round_rate)(struct clk *clk, unsigned long rate);
-};
-
-struct clk {
-	struct list_head	node;
-	const char		*name;
-	int			id;
-
-	struct clk		*parent;
-	struct clk_ops		*ops;
-
-	struct list_head	children;
-	struct list_head	sibling;	/* node for children */
-
-	int			usecount;
-
-	unsigned long		rate;
-	unsigned long		flags;
-
-	void __iomem		*enable_reg;
-	unsigned int		enable_bit;
-
-	unsigned long		arch_flags;
-	void			*priv;
-	struct dentry		*dentry;
-	struct cpufreq_frequency_table *freq_table;
-};
-
-#define CLK_ENABLE_ON_INIT	(1 << 0)
+#include <linux/sh_clk.h>
 
 /* Should be defined by processor-specific code */
 void __deprecated arch_init_clk_ops(struct clk_ops **, int type);
 int __init arch_clk_init(void);
 
-/* arch/sh/kernel/cpu/clock.c */
-int clk_init(void);
-unsigned long followparent_recalc(struct clk *);
-void recalculate_root_clocks(void);
-void propagate_rate(struct clk *);
-int clk_reparent(struct clk *child, struct clk *parent);
-int clk_register(struct clk *);
-void clk_unregister(struct clk *);
-
 /* arch/sh/kernel/cpu/clock-cpg.c */
 int __init __deprecated cpg_clk_init(void);
 
-/* the exported API, in addition to clk_set_rate */
-/**
- * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
- * @clk: clock source
- * @rate: desired clock rate in Hz
- * @algo_id: algorithm id to be passed down to ops->set_rate
- *
- * Returns success (0) or negative errno.
- */
-int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id);
-
-enum clk_sh_algo_id {
-	NO_CHANGE = 0,
-
-	IUS_N1_N1,
-	IUS_322,
-	IUS_522,
-	IUS_N11,
-
-	SB_N1,
-
-	SB3_N1,
-	SB3_32,
-	SB3_43,
-	SB3_54,
-
-	BP_N1,
-
-	IP_N1,
-};
-
-struct clk_div_mult_table {
-	unsigned int *divisors;
-	unsigned int nr_divisors;
-	unsigned int *multipliers;
-	unsigned int nr_multipliers;
-};
-
-struct cpufreq_frequency_table;
-void clk_rate_table_build(struct clk *clk,
-			  struct cpufreq_frequency_table *freq_table,
-			  int nr_freqs,
-			  struct clk_div_mult_table *src_table,
-			  unsigned long *bitmap);
-
-long clk_rate_table_round(struct clk *clk,
-			  struct cpufreq_frequency_table *freq_table,
-			  unsigned long rate);
-
-int clk_rate_table_find(struct clk *clk,
-			struct cpufreq_frequency_table *freq_table,
-			unsigned long rate);
-
-#define SH_CLK_MSTP32(_parent, _enable_reg, _enable_bit, _flags)	\
-{									\
-	.parent		= _parent,					\
-	.enable_reg	= (void __iomem *)_enable_reg,			\
-	.enable_bit	= _enable_bit,					\
-	.flags		= _flags,					\
-}
-
-int sh_clk_mstp32_register(struct clk *clks, int nr);
-
-#define SH_CLK_DIV4(_parent, _reg, _shift, _div_bitmap, _flags)	\
-{								\
-	.parent = _parent,					\
-	.enable_reg = (void __iomem *)_reg,			\
-	.enable_bit = _shift,					\
-	.arch_flags = _div_bitmap,				\
-	.flags = _flags,					\
-}
-
-struct clk_div4_table {
-	struct clk_div_mult_table *div_mult_table;
-	void (*kick)(struct clk *clk);
-};
-
-int sh_clk_div4_register(struct clk *clks, int nr,
-			 struct clk_div4_table *table);
-int sh_clk_div4_enable_register(struct clk *clks, int nr,
-			 struct clk_div4_table *table);
-int sh_clk_div4_reparent_register(struct clk *clks, int nr,
-			 struct clk_div4_table *table);
-
-#define SH_CLK_DIV6(_parent, _reg, _flags)	\
-{						\
-	.parent = _parent,			\
-	.enable_reg = (void __iomem *)_reg,	\
-	.flags = _flags,			\
-}
-
-int sh_clk_div6_register(struct clk *clks, int nr);
-
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
new file mode 100644
index 000000000000..de911451c216
--- /dev/null
+++ b/include/linux/sh_clk.h
@@ -0,0 +1,151 @@
+#ifndef __SH_CLOCK_H
+#define __SH_CLOCK_H
+
+#include <linux/list.h>
+#include <linux/seq_file.h>
+#include <linux/cpufreq.h>
+#include <linux/clk.h>
+#include <linux/err.h>
+
+struct clk;
+
+struct clk_ops {
+	void (*init)(struct clk *clk);
+	int (*enable)(struct clk *clk);
+	void (*disable)(struct clk *clk);
+	unsigned long (*recalc)(struct clk *clk);
+	int (*set_rate)(struct clk *clk, unsigned long rate, int algo_id);
+	int (*set_parent)(struct clk *clk, struct clk *parent);
+	long (*round_rate)(struct clk *clk, unsigned long rate);
+};
+
+struct clk {
+	struct list_head	node;
+	const char		*name;
+	int			id;
+
+	struct clk		*parent;
+	struct clk_ops		*ops;
+
+	struct list_head	children;
+	struct list_head	sibling;	/* node for children */
+
+	int			usecount;
+
+	unsigned long		rate;
+	unsigned long		flags;
+
+	void __iomem		*enable_reg;
+	unsigned int		enable_bit;
+
+	unsigned long		arch_flags;
+	void			*priv;
+	struct dentry		*dentry;
+	struct cpufreq_frequency_table *freq_table;
+};
+
+#define CLK_ENABLE_ON_INIT	(1 << 0)
+
+/* arch/sh/kernel/cpu/clock.c */
+int clk_init(void);
+unsigned long followparent_recalc(struct clk *);
+void recalculate_root_clocks(void);
+void propagate_rate(struct clk *);
+int clk_reparent(struct clk *child, struct clk *parent);
+int clk_register(struct clk *);
+void clk_unregister(struct clk *);
+
+/* the exported API, in addition to clk_set_rate */
+/**
+ * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
+ * @clk: clock source
+ * @rate: desired clock rate in Hz
+ * @algo_id: algorithm id to be passed down to ops->set_rate
+ *
+ * Returns success (0) or negative errno.
+ */
+int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id);
+
+enum clk_sh_algo_id {
+	NO_CHANGE = 0,
+
+	IUS_N1_N1,
+	IUS_322,
+	IUS_522,
+	IUS_N11,
+
+	SB_N1,
+
+	SB3_N1,
+	SB3_32,
+	SB3_43,
+	SB3_54,
+
+	BP_N1,
+
+	IP_N1,
+};
+
+struct clk_div_mult_table {
+	unsigned int *divisors;
+	unsigned int nr_divisors;
+	unsigned int *multipliers;
+	unsigned int nr_multipliers;
+};
+
+struct cpufreq_frequency_table;
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap);
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate);
+
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate);
+
+#define SH_CLK_MSTP32(_parent, _enable_reg, _enable_bit, _flags)	\
+{									\
+	.parent		= _parent,					\
+	.enable_reg	= (void __iomem *)_enable_reg,			\
+	.enable_bit	= _enable_bit,					\
+	.flags		= _flags,					\
+}
+
+int sh_clk_mstp32_register(struct clk *clks, int nr);
+
+#define SH_CLK_DIV4(_parent, _reg, _shift, _div_bitmap, _flags)	\
+{								\
+	.parent = _parent,					\
+	.enable_reg = (void __iomem *)_reg,			\
+	.enable_bit = _shift,					\
+	.arch_flags = _div_bitmap,				\
+	.flags = _flags,					\
+}
+
+struct clk_div4_table {
+	struct clk_div_mult_table *div_mult_table;
+	void (*kick)(struct clk *clk);
+};
+
+int sh_clk_div4_register(struct clk *clks, int nr,
+			 struct clk_div4_table *table);
+int sh_clk_div4_enable_register(struct clk *clks, int nr,
+			 struct clk_div4_table *table);
+int sh_clk_div4_reparent_register(struct clk *clks, int nr,
+			 struct clk_div4_table *table);
+
+#define SH_CLK_DIV6(_parent, _reg, _flags)	\
+{						\
+	.parent = _parent,			\
+	.enable_reg = (void __iomem *)_reg,	\
+	.flags = _flags,			\
+}
+
+int sh_clk_div6_register(struct clk *clks, int nr);
+
+#endif /* __SH_CLOCK_H */
-- 
cgit v1.2.3


From 8b5ee113e1b97097e992a0301d0cac2530b31fc2 Mon Sep 17 00:00:00 2001
From: Magnus Damm <damm@opensource.se>
Date: Tue, 11 May 2010 13:29:25 +0000
Subject: sh: move sh clock.c contents to drivers/sh/clk.

This patch is V2 of the SH clock framework move from
arch/sh/kernel/cpu/clock.c to drivers/sh/clk.c. All
code except the following functions are moved:
clk_init(), clk_get() and clk_put().

The init function is still kept in clock.c since it
depends on the SH-specific machvec implementation.

The symbols clk_get() and clk_put() already exist in
the common ARM clkdev code, those symbols are left in
the SH tree to avoid duplicating them for SH-Mobile ARM.

Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/include/asm/clock.h |   3 +
 arch/sh/kernel/cpu/clock.c  | 543 +------------------------------------------
 drivers/sh/Makefile         |   1 +
 drivers/sh/clk.c            | 548 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/sh_clk.h      |   2 +-
 5 files changed, 561 insertions(+), 536 deletions(-)
 create mode 100644 drivers/sh/clk.c

(limited to 'include/linux')

diff --git a/arch/sh/include/asm/clock.h b/arch/sh/include/asm/clock.h
index f387e5caee16..803d4c7f09dc 100644
--- a/arch/sh/include/asm/clock.h
+++ b/arch/sh/include/asm/clock.h
@@ -10,4 +10,7 @@ int __init arch_clk_init(void);
 /* arch/sh/kernel/cpu/clock-cpg.c */
 int __init __deprecated cpg_clk_init(void);
 
+/* arch/sh/kernel/cpu/clock.c */
+int clk_init(void);
+
 #endif /* __ASM_SH_CLOCK_H */
diff --git a/arch/sh/kernel/cpu/clock.c b/arch/sh/kernel/cpu/clock.c
index 8cc6935d91ae..50f887dda565 100644
--- a/arch/sh/kernel/cpu/clock.c
+++ b/arch/sh/kernel/cpu/clock.c
@@ -16,467 +16,10 @@
  */
 #include <linux/kernel.h>
 #include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/list.h>
-#include <linux/kobject.h>
-#include <linux/sysdev.h>
-#include <linux/seq_file.h>
-#include <linux/err.h>
-#include <linux/platform_device.h>
-#include <linux/debugfs.h>
-#include <linux/cpufreq.h>
 #include <linux/clk.h>
 #include <asm/clock.h>
 #include <asm/machvec.h>
 
-static LIST_HEAD(clock_list);
-static DEFINE_SPINLOCK(clock_lock);
-static DEFINE_MUTEX(clock_list_sem);
-
-void clk_rate_table_build(struct clk *clk,
-			  struct cpufreq_frequency_table *freq_table,
-			  int nr_freqs,
-			  struct clk_div_mult_table *src_table,
-			  unsigned long *bitmap)
-{
-	unsigned long mult, div;
-	unsigned long freq;
-	int i;
-
-	for (i = 0; i < nr_freqs; i++) {
-		div = 1;
-		mult = 1;
-
-		if (src_table->divisors && i < src_table->nr_divisors)
-			div = src_table->divisors[i];
-
-		if (src_table->multipliers && i < src_table->nr_multipliers)
-			mult = src_table->multipliers[i];
-
-		if (!div || !mult || (bitmap && !test_bit(i, bitmap)))
-			freq = CPUFREQ_ENTRY_INVALID;
-		else
-			freq = clk->parent->rate * mult / div;
-
-		freq_table[i].index = i;
-		freq_table[i].frequency = freq;
-	}
-
-	/* Termination entry */
-	freq_table[i].index = i;
-	freq_table[i].frequency = CPUFREQ_TABLE_END;
-}
-
-long clk_rate_table_round(struct clk *clk,
-			  struct cpufreq_frequency_table *freq_table,
-			  unsigned long rate)
-{
-	unsigned long rate_error, rate_error_prev = ~0UL;
-	unsigned long rate_best_fit = rate;
-	unsigned long highest, lowest;
-	int i;
-
-	highest = lowest = 0;
-
-	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		unsigned long freq = freq_table[i].frequency;
-
-		if (freq == CPUFREQ_ENTRY_INVALID)
-			continue;
-
-		if (freq > highest)
-			highest = freq;
-		if (freq < lowest)
-			lowest = freq;
-
-		rate_error = abs(freq - rate);
-		if (rate_error < rate_error_prev) {
-			rate_best_fit = freq;
-			rate_error_prev = rate_error;
-		}
-
-		if (rate_error == 0)
-			break;
-	}
-
-	if (rate >= highest)
-		rate_best_fit = highest;
-	if (rate <= lowest)
-		rate_best_fit = lowest;
-
-	return rate_best_fit;
-}
-
-int clk_rate_table_find(struct clk *clk,
-			struct cpufreq_frequency_table *freq_table,
-			unsigned long rate)
-{
-	int i;
-
-	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
-		unsigned long freq = freq_table[i].frequency;
-
-		if (freq == CPUFREQ_ENTRY_INVALID)
-			continue;
-
-		if (freq == rate)
-			return i;
-	}
-
-	return -ENOENT;
-}
-
-/* Used for clocks that always have same value as the parent clock */
-unsigned long followparent_recalc(struct clk *clk)
-{
-	return clk->parent ? clk->parent->rate : 0;
-}
-
-int clk_reparent(struct clk *child, struct clk *parent)
-{
-	list_del_init(&child->sibling);
-	if (parent)
-		list_add(&child->sibling, &parent->children);
-	child->parent = parent;
-
-	/* now do the debugfs renaming to reattach the child
-	   to the proper parent */
-
-	return 0;
-}
-
-/* Propagate rate to children */
-void propagate_rate(struct clk *tclk)
-{
-	struct clk *clkp;
-
-	list_for_each_entry(clkp, &tclk->children, sibling) {
-		if (clkp->ops && clkp->ops->recalc)
-			clkp->rate = clkp->ops->recalc(clkp);
-
-		propagate_rate(clkp);
-	}
-}
-
-static void __clk_disable(struct clk *clk)
-{
-	if (clk->usecount == 0) {
-		printk(KERN_ERR "Trying disable clock %s with 0 usecount\n",
-		       clk->name);
-		WARN_ON(1);
-		return;
-	}
-
-	if (!(--clk->usecount)) {
-		if (likely(clk->ops && clk->ops->disable))
-			clk->ops->disable(clk);
-		if (likely(clk->parent))
-			__clk_disable(clk->parent);
-	}
-}
-
-void clk_disable(struct clk *clk)
-{
-	unsigned long flags;
-
-	if (!clk)
-		return;
-
-	spin_lock_irqsave(&clock_lock, flags);
-	__clk_disable(clk);
-	spin_unlock_irqrestore(&clock_lock, flags);
-}
-EXPORT_SYMBOL_GPL(clk_disable);
-
-static int __clk_enable(struct clk *clk)
-{
-	int ret = 0;
-
-	if (clk->usecount++ == 0) {
-		if (clk->parent) {
-			ret = __clk_enable(clk->parent);
-			if (unlikely(ret))
-				goto err;
-		}
-
-		if (clk->ops && clk->ops->enable) {
-			ret = clk->ops->enable(clk);
-			if (ret) {
-				if (clk->parent)
-					__clk_disable(clk->parent);
-				goto err;
-			}
-		}
-	}
-
-	return ret;
-err:
-	clk->usecount--;
-	return ret;
-}
-
-int clk_enable(struct clk *clk)
-{
-	unsigned long flags;
-	int ret;
-
-	if (!clk)
-		return -EINVAL;
-
-	spin_lock_irqsave(&clock_lock, flags);
-	ret = __clk_enable(clk);
-	spin_unlock_irqrestore(&clock_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_enable);
-
-static LIST_HEAD(root_clks);
-
-/**
- * recalculate_root_clocks - recalculate and propagate all root clocks
- *
- * Recalculates all root clocks (clocks with no parent), which if the
- * clock's .recalc is set correctly, should also propagate their rates.
- * Called at init.
- */
-void recalculate_root_clocks(void)
-{
-	struct clk *clkp;
-
-	list_for_each_entry(clkp, &root_clks, sibling) {
-		if (clkp->ops && clkp->ops->recalc)
-			clkp->rate = clkp->ops->recalc(clkp);
-		propagate_rate(clkp);
-	}
-}
-
-int clk_register(struct clk *clk)
-{
-	if (clk == NULL || IS_ERR(clk))
-		return -EINVAL;
-
-	/*
-	 * trap out already registered clocks
-	 */
-	if (clk->node.next || clk->node.prev)
-		return 0;
-
-	mutex_lock(&clock_list_sem);
-
-	INIT_LIST_HEAD(&clk->children);
-	clk->usecount = 0;
-
-	if (clk->parent)
-		list_add(&clk->sibling, &clk->parent->children);
-	else
-		list_add(&clk->sibling, &root_clks);
-
-	list_add(&clk->node, &clock_list);
-	if (clk->ops && clk->ops->init)
-		clk->ops->init(clk);
-	mutex_unlock(&clock_list_sem);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(clk_register);
-
-void clk_unregister(struct clk *clk)
-{
-	mutex_lock(&clock_list_sem);
-	list_del(&clk->sibling);
-	list_del(&clk->node);
-	mutex_unlock(&clock_list_sem);
-}
-EXPORT_SYMBOL_GPL(clk_unregister);
-
-static void clk_enable_init_clocks(void)
-{
-	struct clk *clkp;
-
-	list_for_each_entry(clkp, &clock_list, node)
-		if (clkp->flags & CLK_ENABLE_ON_INIT)
-			clk_enable(clkp);
-}
-
-unsigned long clk_get_rate(struct clk *clk)
-{
-	return clk->rate;
-}
-EXPORT_SYMBOL_GPL(clk_get_rate);
-
-int clk_set_rate(struct clk *clk, unsigned long rate)
-{
-	return clk_set_rate_ex(clk, rate, 0);
-}
-EXPORT_SYMBOL_GPL(clk_set_rate);
-
-int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
-{
-	int ret = -EOPNOTSUPP;
-	unsigned long flags;
-
-	spin_lock_irqsave(&clock_lock, flags);
-
-	if (likely(clk->ops && clk->ops->set_rate)) {
-		ret = clk->ops->set_rate(clk, rate, algo_id);
-		if (ret != 0)
-			goto out_unlock;
-	} else {
-		clk->rate = rate;
-		ret = 0;
-	}
-
-	if (clk->ops && clk->ops->recalc)
-		clk->rate = clk->ops->recalc(clk);
-
-	propagate_rate(clk);
-
-out_unlock:
-	spin_unlock_irqrestore(&clock_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_set_rate_ex);
-
-int clk_set_parent(struct clk *clk, struct clk *parent)
-{
-	unsigned long flags;
-	int ret = -EINVAL;
-
-	if (!parent || !clk)
-		return ret;
-	if (clk->parent == parent)
-		return 0;
-
-	spin_lock_irqsave(&clock_lock, flags);
-	if (clk->usecount == 0) {
-		if (clk->ops->set_parent)
-			ret = clk->ops->set_parent(clk, parent);
-		else
-			ret = clk_reparent(clk, parent);
-
-		if (ret == 0) {
-			pr_debug("clock: set parent of %s to %s (new rate %ld)\n",
-				 clk->name, clk->parent->name, clk->rate);
-			if (clk->ops->recalc)
-				clk->rate = clk->ops->recalc(clk);
-			propagate_rate(clk);
-		}
-	} else
-		ret = -EBUSY;
-	spin_unlock_irqrestore(&clock_lock, flags);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(clk_set_parent);
-
-struct clk *clk_get_parent(struct clk *clk)
-{
-	return clk->parent;
-}
-EXPORT_SYMBOL_GPL(clk_get_parent);
-
-long clk_round_rate(struct clk *clk, unsigned long rate)
-{
-	if (likely(clk->ops && clk->ops->round_rate)) {
-		unsigned long flags, rounded;
-
-		spin_lock_irqsave(&clock_lock, flags);
-		rounded = clk->ops->round_rate(clk, rate);
-		spin_unlock_irqrestore(&clock_lock, flags);
-
-		return rounded;
-	}
-
-	return clk_get_rate(clk);
-}
-EXPORT_SYMBOL_GPL(clk_round_rate);
-
-/*
- * Returns a clock. Note that we first try to use device id on the bus
- * and clock name. If this fails, we try to use clock name only.
- */
-struct clk *clk_get(struct device *dev, const char *con_id)
-{
-	const char *dev_id = dev ? dev_name(dev) : NULL;
-
-	return clk_get_sys(dev_id, con_id);
-}
-EXPORT_SYMBOL_GPL(clk_get);
-
-void clk_put(struct clk *clk)
-{
-}
-EXPORT_SYMBOL_GPL(clk_put);
-
-#ifdef CONFIG_PM
-static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state)
-{
-	static pm_message_t prev_state;
-	struct clk *clkp;
-
-	switch (state.event) {
-	case PM_EVENT_ON:
-		/* Resumeing from hibernation */
-		if (prev_state.event != PM_EVENT_FREEZE)
-			break;
-
-		list_for_each_entry(clkp, &clock_list, node) {
-			if (likely(clkp->ops)) {
-				unsigned long rate = clkp->rate;
-
-				if (likely(clkp->ops->set_parent))
-					clkp->ops->set_parent(clkp,
-						clkp->parent);
-				if (likely(clkp->ops->set_rate))
-					clkp->ops->set_rate(clkp,
-						rate, NO_CHANGE);
-				else if (likely(clkp->ops->recalc))
-					clkp->rate = clkp->ops->recalc(clkp);
-			}
-		}
-		break;
-	case PM_EVENT_FREEZE:
-		break;
-	case PM_EVENT_SUSPEND:
-		break;
-	}
-
-	prev_state = state;
-	return 0;
-}
-
-static int clks_sysdev_resume(struct sys_device *dev)
-{
-	return clks_sysdev_suspend(dev, PMSG_ON);
-}
-
-static struct sysdev_class clks_sysdev_class = {
-	.name = "clks",
-};
-
-static struct sysdev_driver clks_sysdev_driver = {
-	.suspend = clks_sysdev_suspend,
-	.resume = clks_sysdev_resume,
-};
-
-static struct sys_device clks_sysdev_dev = {
-	.cls = &clks_sysdev_class,
-};
-
-static int __init clk_sysdev_init(void)
-{
-	sysdev_class_register(&clks_sysdev_class);
-	sysdev_driver_register(&clks_sysdev_class, &clks_sysdev_driver);
-	sysdev_register(&clks_sysdev_dev);
-
-	return 0;
-}
-subsys_initcall(clk_sysdev_init);
-#endif
-
 int __init clk_init(void)
 {
 	int ret;
@@ -506,89 +49,19 @@ int __init clk_init(void)
 }
 
 /*
- *	debugfs support to trace clock tree hierarchy and attributes
+ * Returns a clock. Note that we first try to use device id on the bus
+ * and clock name. If this fails, we try to use clock name only.
  */
-static struct dentry *clk_debugfs_root;
-
-static int clk_debugfs_register_one(struct clk *c)
+struct clk *clk_get(struct device *dev, const char *con_id)
 {
-	int err;
-	struct dentry *d, *child, *child_tmp;
-	struct clk *pa = c->parent;
-	char s[255];
-	char *p = s;
-
-	p += sprintf(p, "%s", c->name);
-	if (c->id >= 0)
-		sprintf(p, ":%d", c->id);
-	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
-	if (!d)
-		return -ENOMEM;
-	c->dentry = d;
-
-	d = debugfs_create_u8("usecount", S_IRUGO, c->dentry, (u8 *)&c->usecount);
-	if (!d) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	d = debugfs_create_u32("rate", S_IRUGO, c->dentry, (u32 *)&c->rate);
-	if (!d) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	d = debugfs_create_x32("flags", S_IRUGO, c->dentry, (u32 *)&c->flags);
-	if (!d) {
-		err = -ENOMEM;
-		goto err_out;
-	}
-	return 0;
+	const char *dev_id = dev ? dev_name(dev) : NULL;
 
-err_out:
-	d = c->dentry;
-	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
-		debugfs_remove(child);
-	debugfs_remove(c->dentry);
-	return err;
+	return clk_get_sys(dev_id, con_id);
 }
+EXPORT_SYMBOL_GPL(clk_get);
 
-static int clk_debugfs_register(struct clk *c)
+void clk_put(struct clk *clk)
 {
-	int err;
-	struct clk *pa = c->parent;
-
-	if (pa && !pa->dentry) {
-		err = clk_debugfs_register(pa);
-		if (err)
-			return err;
-	}
-
-	if (!c->dentry && c->name) {
-		err = clk_debugfs_register_one(c);
-		if (err)
-			return err;
-	}
-	return 0;
 }
+EXPORT_SYMBOL_GPL(clk_put);
 
-static int __init clk_debugfs_init(void)
-{
-	struct clk *c;
-	struct dentry *d;
-	int err;
-
-	d = debugfs_create_dir("clock", NULL);
-	if (!d)
-		return -ENOMEM;
-	clk_debugfs_root = d;
-
-	list_for_each_entry(c, &clock_list, node) {
-		err = clk_debugfs_register(c);
-		if (err)
-			goto err_out;
-	}
-	return 0;
-err_out:
-	debugfs_remove_recursive(clk_debugfs_root);
-	return err;
-}
-late_initcall(clk_debugfs_init);
diff --git a/drivers/sh/Makefile b/drivers/sh/Makefile
index 4956bf1f2134..033a949c496a 100644
--- a/drivers/sh/Makefile
+++ b/drivers/sh/Makefile
@@ -4,4 +4,5 @@
 obj-$(CONFIG_SUPERHYWAY)	+= superhyway/
 obj-$(CONFIG_MAPLE)		+= maple/
 obj-$(CONFIG_GENERIC_GPIO)	+= pfc.o
+obj-$(CONFIG_SUPERH)		+= clk.o
 obj-y				+= intc.o
diff --git a/drivers/sh/clk.c b/drivers/sh/clk.c
new file mode 100644
index 000000000000..c90a3e1e1085
--- /dev/null
+++ b/drivers/sh/clk.c
@@ -0,0 +1,548 @@
+/*
+ * drivers/sh/clk.c - SuperH clock framework
+ *
+ *  Copyright (C) 2005 - 2009  Paul Mundt
+ *
+ * This clock framework is derived from the OMAP version by:
+ *
+ *	Copyright (C) 2004 - 2008 Nokia Corporation
+ *	Written by Tuukka Tikkanen <tuukka.tikkanen@elektrobit.com>
+ *
+ *  Modified for omap shared clock framework by Tony Lindgren <tony@atomide.com>
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysdev.h>
+#include <linux/seq_file.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/debugfs.h>
+#include <linux/cpufreq.h>
+#include <linux/clk.h>
+#include <linux/sh_clk.h>
+
+static LIST_HEAD(clock_list);
+static DEFINE_SPINLOCK(clock_lock);
+static DEFINE_MUTEX(clock_list_sem);
+
+void clk_rate_table_build(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  int nr_freqs,
+			  struct clk_div_mult_table *src_table,
+			  unsigned long *bitmap)
+{
+	unsigned long mult, div;
+	unsigned long freq;
+	int i;
+
+	for (i = 0; i < nr_freqs; i++) {
+		div = 1;
+		mult = 1;
+
+		if (src_table->divisors && i < src_table->nr_divisors)
+			div = src_table->divisors[i];
+
+		if (src_table->multipliers && i < src_table->nr_multipliers)
+			mult = src_table->multipliers[i];
+
+		if (!div || !mult || (bitmap && !test_bit(i, bitmap)))
+			freq = CPUFREQ_ENTRY_INVALID;
+		else
+			freq = clk->parent->rate * mult / div;
+
+		freq_table[i].index = i;
+		freq_table[i].frequency = freq;
+	}
+
+	/* Termination entry */
+	freq_table[i].index = i;
+	freq_table[i].frequency = CPUFREQ_TABLE_END;
+}
+
+long clk_rate_table_round(struct clk *clk,
+			  struct cpufreq_frequency_table *freq_table,
+			  unsigned long rate)
+{
+	unsigned long rate_error, rate_error_prev = ~0UL;
+	unsigned long rate_best_fit = rate;
+	unsigned long highest, lowest;
+	int i;
+
+	highest = lowest = 0;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq > highest)
+			highest = freq;
+		if (freq < lowest)
+			lowest = freq;
+
+		rate_error = abs(freq - rate);
+		if (rate_error < rate_error_prev) {
+			rate_best_fit = freq;
+			rate_error_prev = rate_error;
+		}
+
+		if (rate_error == 0)
+			break;
+	}
+
+	if (rate >= highest)
+		rate_best_fit = highest;
+	if (rate <= lowest)
+		rate_best_fit = lowest;
+
+	return rate_best_fit;
+}
+
+int clk_rate_table_find(struct clk *clk,
+			struct cpufreq_frequency_table *freq_table,
+			unsigned long rate)
+{
+	int i;
+
+	for (i = 0; freq_table[i].frequency != CPUFREQ_TABLE_END; i++) {
+		unsigned long freq = freq_table[i].frequency;
+
+		if (freq == CPUFREQ_ENTRY_INVALID)
+			continue;
+
+		if (freq == rate)
+			return i;
+	}
+
+	return -ENOENT;
+}
+
+/* Used for clocks that always have same value as the parent clock */
+unsigned long followparent_recalc(struct clk *clk)
+{
+	return clk->parent ? clk->parent->rate : 0;
+}
+
+int clk_reparent(struct clk *child, struct clk *parent)
+{
+	list_del_init(&child->sibling);
+	if (parent)
+		list_add(&child->sibling, &parent->children);
+	child->parent = parent;
+
+	/* now do the debugfs renaming to reattach the child
+	   to the proper parent */
+
+	return 0;
+}
+
+/* Propagate rate to children */
+void propagate_rate(struct clk *tclk)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &tclk->children, sibling) {
+		if (clkp->ops && clkp->ops->recalc)
+			clkp->rate = clkp->ops->recalc(clkp);
+
+		propagate_rate(clkp);
+	}
+}
+
+static void __clk_disable(struct clk *clk)
+{
+	if (clk->usecount == 0) {
+		printk(KERN_ERR "Trying disable clock %s with 0 usecount\n",
+		       clk->name);
+		WARN_ON(1);
+		return;
+	}
+
+	if (!(--clk->usecount)) {
+		if (likely(clk->ops && clk->ops->disable))
+			clk->ops->disable(clk);
+		if (likely(clk->parent))
+			__clk_disable(clk->parent);
+	}
+}
+
+void clk_disable(struct clk *clk)
+{
+	unsigned long flags;
+
+	if (!clk)
+		return;
+
+	spin_lock_irqsave(&clock_lock, flags);
+	__clk_disable(clk);
+	spin_unlock_irqrestore(&clock_lock, flags);
+}
+EXPORT_SYMBOL_GPL(clk_disable);
+
+static int __clk_enable(struct clk *clk)
+{
+	int ret = 0;
+
+	if (clk->usecount++ == 0) {
+		if (clk->parent) {
+			ret = __clk_enable(clk->parent);
+			if (unlikely(ret))
+				goto err;
+		}
+
+		if (clk->ops && clk->ops->enable) {
+			ret = clk->ops->enable(clk);
+			if (ret) {
+				if (clk->parent)
+					__clk_disable(clk->parent);
+				goto err;
+			}
+		}
+	}
+
+	return ret;
+err:
+	clk->usecount--;
+	return ret;
+}
+
+int clk_enable(struct clk *clk)
+{
+	unsigned long flags;
+	int ret;
+
+	if (!clk)
+		return -EINVAL;
+
+	spin_lock_irqsave(&clock_lock, flags);
+	ret = __clk_enable(clk);
+	spin_unlock_irqrestore(&clock_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_enable);
+
+static LIST_HEAD(root_clks);
+
+/**
+ * recalculate_root_clocks - recalculate and propagate all root clocks
+ *
+ * Recalculates all root clocks (clocks with no parent), which if the
+ * clock's .recalc is set correctly, should also propagate their rates.
+ * Called at init.
+ */
+void recalculate_root_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &root_clks, sibling) {
+		if (clkp->ops && clkp->ops->recalc)
+			clkp->rate = clkp->ops->recalc(clkp);
+		propagate_rate(clkp);
+	}
+}
+
+int clk_register(struct clk *clk)
+{
+	if (clk == NULL || IS_ERR(clk))
+		return -EINVAL;
+
+	/*
+	 * trap out already registered clocks
+	 */
+	if (clk->node.next || clk->node.prev)
+		return 0;
+
+	mutex_lock(&clock_list_sem);
+
+	INIT_LIST_HEAD(&clk->children);
+	clk->usecount = 0;
+
+	if (clk->parent)
+		list_add(&clk->sibling, &clk->parent->children);
+	else
+		list_add(&clk->sibling, &root_clks);
+
+	list_add(&clk->node, &clock_list);
+	if (clk->ops && clk->ops->init)
+		clk->ops->init(clk);
+	mutex_unlock(&clock_list_sem);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(clk_register);
+
+void clk_unregister(struct clk *clk)
+{
+	mutex_lock(&clock_list_sem);
+	list_del(&clk->sibling);
+	list_del(&clk->node);
+	mutex_unlock(&clock_list_sem);
+}
+EXPORT_SYMBOL_GPL(clk_unregister);
+
+void clk_enable_init_clocks(void)
+{
+	struct clk *clkp;
+
+	list_for_each_entry(clkp, &clock_list, node)
+		if (clkp->flags & CLK_ENABLE_ON_INIT)
+			clk_enable(clkp);
+}
+
+unsigned long clk_get_rate(struct clk *clk)
+{
+	return clk->rate;
+}
+EXPORT_SYMBOL_GPL(clk_get_rate);
+
+int clk_set_rate(struct clk *clk, unsigned long rate)
+{
+	return clk_set_rate_ex(clk, rate, 0);
+}
+EXPORT_SYMBOL_GPL(clk_set_rate);
+
+int clk_set_rate_ex(struct clk *clk, unsigned long rate, int algo_id)
+{
+	int ret = -EOPNOTSUPP;
+	unsigned long flags;
+
+	spin_lock_irqsave(&clock_lock, flags);
+
+	if (likely(clk->ops && clk->ops->set_rate)) {
+		ret = clk->ops->set_rate(clk, rate, algo_id);
+		if (ret != 0)
+			goto out_unlock;
+	} else {
+		clk->rate = rate;
+		ret = 0;
+	}
+
+	if (clk->ops && clk->ops->recalc)
+		clk->rate = clk->ops->recalc(clk);
+
+	propagate_rate(clk);
+
+out_unlock:
+	spin_unlock_irqrestore(&clock_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_set_rate_ex);
+
+int clk_set_parent(struct clk *clk, struct clk *parent)
+{
+	unsigned long flags;
+	int ret = -EINVAL;
+
+	if (!parent || !clk)
+		return ret;
+	if (clk->parent == parent)
+		return 0;
+
+	spin_lock_irqsave(&clock_lock, flags);
+	if (clk->usecount == 0) {
+		if (clk->ops->set_parent)
+			ret = clk->ops->set_parent(clk, parent);
+		else
+			ret = clk_reparent(clk, parent);
+
+		if (ret == 0) {
+			pr_debug("clock: set parent of %s to %s (new rate %ld)\n",
+				 clk->name, clk->parent->name, clk->rate);
+			if (clk->ops->recalc)
+				clk->rate = clk->ops->recalc(clk);
+			propagate_rate(clk);
+		}
+	} else
+		ret = -EBUSY;
+	spin_unlock_irqrestore(&clock_lock, flags);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(clk_set_parent);
+
+struct clk *clk_get_parent(struct clk *clk)
+{
+	return clk->parent;
+}
+EXPORT_SYMBOL_GPL(clk_get_parent);
+
+long clk_round_rate(struct clk *clk, unsigned long rate)
+{
+	if (likely(clk->ops && clk->ops->round_rate)) {
+		unsigned long flags, rounded;
+
+		spin_lock_irqsave(&clock_lock, flags);
+		rounded = clk->ops->round_rate(clk, rate);
+		spin_unlock_irqrestore(&clock_lock, flags);
+
+		return rounded;
+	}
+
+	return clk_get_rate(clk);
+}
+EXPORT_SYMBOL_GPL(clk_round_rate);
+
+#ifdef CONFIG_PM
+static int clks_sysdev_suspend(struct sys_device *dev, pm_message_t state)
+{
+	static pm_message_t prev_state;
+	struct clk *clkp;
+
+	switch (state.event) {
+	case PM_EVENT_ON:
+		/* Resumeing from hibernation */
+		if (prev_state.event != PM_EVENT_FREEZE)
+			break;
+
+		list_for_each_entry(clkp, &clock_list, node) {
+			if (likely(clkp->ops)) {
+				unsigned long rate = clkp->rate;
+
+				if (likely(clkp->ops->set_parent))
+					clkp->ops->set_parent(clkp,
+						clkp->parent);
+				if (likely(clkp->ops->set_rate))
+					clkp->ops->set_rate(clkp,
+						rate, NO_CHANGE);
+				else if (likely(clkp->ops->recalc))
+					clkp->rate = clkp->ops->recalc(clkp);
+			}
+		}
+		break;
+	case PM_EVENT_FREEZE:
+		break;
+	case PM_EVENT_SUSPEND:
+		break;
+	}
+
+	prev_state = state;
+	return 0;
+}
+
+static int clks_sysdev_resume(struct sys_device *dev)
+{
+	return clks_sysdev_suspend(dev, PMSG_ON);
+}
+
+static struct sysdev_class clks_sysdev_class = {
+	.name = "clks",
+};
+
+static struct sysdev_driver clks_sysdev_driver = {
+	.suspend = clks_sysdev_suspend,
+	.resume = clks_sysdev_resume,
+};
+
+static struct sys_device clks_sysdev_dev = {
+	.cls = &clks_sysdev_class,
+};
+
+static int __init clk_sysdev_init(void)
+{
+	sysdev_class_register(&clks_sysdev_class);
+	sysdev_driver_register(&clks_sysdev_class, &clks_sysdev_driver);
+	sysdev_register(&clks_sysdev_dev);
+
+	return 0;
+}
+subsys_initcall(clk_sysdev_init);
+#endif
+
+/*
+ *	debugfs support to trace clock tree hierarchy and attributes
+ */
+static struct dentry *clk_debugfs_root;
+
+static int clk_debugfs_register_one(struct clk *c)
+{
+	int err;
+	struct dentry *d, *child, *child_tmp;
+	struct clk *pa = c->parent;
+	char s[255];
+	char *p = s;
+
+	p += sprintf(p, "%s", c->name);
+	if (c->id >= 0)
+		sprintf(p, ":%d", c->id);
+	d = debugfs_create_dir(s, pa ? pa->dentry : clk_debugfs_root);
+	if (!d)
+		return -ENOMEM;
+	c->dentry = d;
+
+	d = debugfs_create_u8("usecount", S_IRUGO, c->dentry, (u8 *)&c->usecount);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_u32("rate", S_IRUGO, c->dentry, (u32 *)&c->rate);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	d = debugfs_create_x32("flags", S_IRUGO, c->dentry, (u32 *)&c->flags);
+	if (!d) {
+		err = -ENOMEM;
+		goto err_out;
+	}
+	return 0;
+
+err_out:
+	d = c->dentry;
+	list_for_each_entry_safe(child, child_tmp, &d->d_subdirs, d_u.d_child)
+		debugfs_remove(child);
+	debugfs_remove(c->dentry);
+	return err;
+}
+
+static int clk_debugfs_register(struct clk *c)
+{
+	int err;
+	struct clk *pa = c->parent;
+
+	if (pa && !pa->dentry) {
+		err = clk_debugfs_register(pa);
+		if (err)
+			return err;
+	}
+
+	if (!c->dentry && c->name) {
+		err = clk_debugfs_register_one(c);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
+static int __init clk_debugfs_init(void)
+{
+	struct clk *c;
+	struct dentry *d;
+	int err;
+
+	d = debugfs_create_dir("clock", NULL);
+	if (!d)
+		return -ENOMEM;
+	clk_debugfs_root = d;
+
+	list_for_each_entry(c, &clock_list, node) {
+		err = clk_debugfs_register(c);
+		if (err)
+			goto err_out;
+	}
+	return 0;
+err_out:
+	debugfs_remove_recursive(clk_debugfs_root);
+	return err;
+}
+late_initcall(clk_debugfs_init);
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index de911451c216..6d7de242be1d 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -47,13 +47,13 @@ struct clk {
 #define CLK_ENABLE_ON_INIT	(1 << 0)
 
 /* arch/sh/kernel/cpu/clock.c */
-int clk_init(void);
 unsigned long followparent_recalc(struct clk *);
 void recalculate_root_clocks(void);
 void propagate_rate(struct clk *);
 int clk_reparent(struct clk *child, struct clk *parent);
 int clk_register(struct clk *);
 void clk_unregister(struct clk *);
+void clk_enable_init_clocks(void);
 
 /* the exported API, in addition to clk_set_rate */
 /**
-- 
cgit v1.2.3


From a71ba09655d197f22938fffa6f5d210ff5134f98 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 13 May 2010 18:42:25 +0900
Subject: sh: fixup the docbook paths for clock framework shuffling.

Now that the definitions have been consolidated in an alternate header,
update the template accordingly.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 Documentation/DocBook/sh.tmpl | 10 +++++++---
 include/linux/sh_clk.h        |  3 +--
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/sh.tmpl b/Documentation/DocBook/sh.tmpl
index 0c3dc4c69dd1..d858d92cf6d9 100644
--- a/Documentation/DocBook/sh.tmpl
+++ b/Documentation/DocBook/sh.tmpl
@@ -19,13 +19,17 @@
   </authorgroup>
 
   <copyright>
-   <year>2008</year>
+   <year>2008-2010</year>
    <holder>Paul Mundt</holder>
   </copyright>
   <copyright>
-   <year>2008</year>
+   <year>2008-2010</year>
    <holder>Renesas Technology Corp.</holder>
   </copyright>
+  <copyright>
+   <year>2010</year>
+   <holder>Renesas Electronics Corp.</holder>
+  </copyright>
 
   <legalnotice>
    <para>
@@ -77,7 +81,7 @@
   </chapter>
   <chapter id="clk">
     <title>Clock Framework Extensions</title>
-!Iarch/sh/include/asm/clock.h
+!Iinclude/linux/sh_clk.h
   </chapter>
   <chapter id="mach">
     <title>Machine Specific Interfaces</title>
diff --git a/include/linux/sh_clk.h b/include/linux/sh_clk.h
index 6d7de242be1d..1636d1e2a5f1 100644
--- a/include/linux/sh_clk.h
+++ b/include/linux/sh_clk.h
@@ -46,7 +46,7 @@ struct clk {
 
 #define CLK_ENABLE_ON_INIT	(1 << 0)
 
-/* arch/sh/kernel/cpu/clock.c */
+/* drivers/sh/clk.c */
 unsigned long followparent_recalc(struct clk *);
 void recalculate_root_clocks(void);
 void propagate_rate(struct clk *);
@@ -55,7 +55,6 @@ int clk_register(struct clk *);
 void clk_unregister(struct clk *);
 void clk_enable_init_clocks(void);
 
-/* the exported API, in addition to clk_set_rate */
 /**
  * clk_set_rate_ex - set the clock rate for a clock source, with additional parameter
  * @clk: clock source
-- 
cgit v1.2.3


From 54b93a49d8dd90dfb658f21a3316527fe6195106 Mon Sep 17 00:00:00 2001
From: Guillaume LECERF <glecerf@gmail.com>
Date: Sat, 24 Apr 2010 17:58:02 +0200
Subject: mtd: cfi_probe: add support for SST 0x0701 vendorname

SST 39VF160x and 39VF320x chips use vendorname id 0x0701 and alternative
unlock addresses. Add support for them in cfi_probe.c.

Signed-off-by: Guillaume LECERF <glecerf@gmail.com>
Reviewed-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/cfi_probe.c | 8 ++++++++
 include/linux/mtd/cfi.h       | 1 +
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/cfi_probe.c b/drivers/mtd/chips/cfi_probe.c
index 045dc100496c..b2acd32f4fbf 100644
--- a/drivers/mtd/chips/cfi_probe.c
+++ b/drivers/mtd/chips/cfi_probe.c
@@ -206,6 +206,11 @@ static int __xipram cfi_chip_setup(struct map_info *map,
 #endif
 	}
 
+	if (cfi->cfiq->P_ID == P_ID_SST_OLD) {
+		addr_unlock1 = 0x5555;
+		addr_unlock2 = 0x2AAA;
+	}
+
 	/*
 	 * Note we put the device back into Read Mode BEFORE going into Auto
 	 * Select Mode, as some devices support nesting of modes, others
@@ -271,6 +276,9 @@ static char *vendorname(__u16 vendor)
 	case P_ID_SST_PAGE:
 		return "SST Page Write";
 
+	case P_ID_SST_OLD:
+		return "SST 39VF160x/39VF320x";
+
 	case P_ID_INTEL_PERFORMANCE:
 		return "Intel Performance Code";
 
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index cee05b1e62b1..5716fc78ca8e 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -253,6 +253,7 @@ struct cfi_bri_query {
 #define P_ID_MITSUBISHI_STD     0x0100
 #define P_ID_MITSUBISHI_EXT     0x0101
 #define P_ID_SST_PAGE           0x0102
+#define P_ID_SST_OLD            0x0701
 #define P_ID_INTEL_PERFORMANCE  0x0200
 #define P_ID_INTEL_DATA         0x0210
 #define P_ID_RESERVED           0xffff
-- 
cgit v1.2.3


From ae731822294468f213f2b56a0ddfc425148c873b Mon Sep 17 00:00:00 2001
From: Wolfram Sang <w.sang@pengutronix.de>
Date: Tue, 27 Apr 2010 04:19:34 +0200
Subject: mtd: chips: use common manufacturer codes in jedec_probe()

Factor out old manufacturers and use the generic ones from cfi.h

Signed-off-by: Wolfram Sang <w.sang@pengutronix.de>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/chips/jedec_probe.c | 252 +++++++++++++++++++---------------------
 include/linux/mtd/cfi.h         |  13 ++-
 2 files changed, 128 insertions(+), 137 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/chips/jedec_probe.c b/drivers/mtd/chips/jedec_probe.c
index 8db1148dfa47..04fb45cacc31 100644
--- a/drivers/mtd/chips/jedec_probe.c
+++ b/drivers/mtd/chips/jedec_probe.c
@@ -22,24 +22,6 @@
 #include <linux/mtd/cfi.h>
 #include <linux/mtd/gen_probe.h>
 
-/* Manufacturers */
-#define MANUFACTURER_AMD	0x0001
-#define MANUFACTURER_ATMEL	0x001f
-#define MANUFACTURER_EON	0x001c
-#define MANUFACTURER_FUJITSU	0x0004
-#define MANUFACTURER_HYUNDAI	0x00AD
-#define MANUFACTURER_INTEL	0x0089
-#define MANUFACTURER_MACRONIX	0x00C2
-#define MANUFACTURER_NEC	0x0010
-#define MANUFACTURER_PMC	0x009D
-#define MANUFACTURER_SHARP	0x00b0
-#define MANUFACTURER_SST	0x00BF
-#define MANUFACTURER_ST		0x0020
-#define MANUFACTURER_TOSHIBA	0x0098
-#define MANUFACTURER_WINBOND	0x00da
-#define CONTINUATION_CODE	0x007f
-
-
 /* AMD */
 #define AM29DL800BB	0x22CB
 #define AM29DL800BT	0x224A
@@ -309,7 +291,7 @@ struct amd_flash_info {
  */
 static const struct amd_flash_info jedec_table[] = {
 	{
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F032B,
 		.name		= "AMD AM29F032B",
 		.uaddr		= MTD_UADDR_0x0555_0x02AA,
@@ -321,7 +303,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,64)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV160DT,
 		.name		= "AMD AM29LV160DT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -336,7 +318,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV160DB,
 		.name		= "AMD AM29LV160DB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -351,7 +333,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV400BB,
 		.name		= "AMD AM29LV400BB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -366,7 +348,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,7)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV400BT,
 		.name		= "AMD AM29LV400BT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -381,7 +363,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV800BB,
 		.name		= "AMD AM29LV800BB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -397,7 +379,7 @@ static const struct amd_flash_info jedec_table[] = {
 		}
 	}, {
 /* add DL */
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29DL800BB,
 		.name		= "AMD AM29DL800BB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -414,7 +396,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,14)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29DL800BT,
 		.name		= "AMD AM29DL800BT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -431,7 +413,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F800BB,
 		.name		= "AMD AM29F800BB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -446,7 +428,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV800BT,
 		.name		= "AMD AM29LV800BT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -461,7 +443,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F800BT,
 		.name		= "AMD AM29F800BT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -476,7 +458,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F017D,
 		.name		= "AMD AM29F017D",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -488,7 +470,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F016D,
 		.name		= "AMD AM29F016D",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -500,7 +482,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F080,
 		.name		= "AMD AM29F080",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -512,7 +494,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F040,
 		.name		= "AMD AM29F040",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -524,7 +506,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29LV040B,
 		.name		= "AMD AM29LV040B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -536,7 +518,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29F002T,
 		.name		= "AMD AM29F002T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -551,7 +533,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29SL800DT,
 		.name		= "AMD AM29SL800DT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -566,7 +548,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_AMD,
+		.mfr_id		= CFI_MFR_AMD,
 		.dev_id		= AM29SL800DB,
 		.name		= "AMD AM29SL800DB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -581,7 +563,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT49BV512,
 		.name		= "Atmel AT49BV512",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -593,7 +575,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT29LV512,
 		.name		= "Atmel AT29LV512",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -606,7 +588,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x80,256)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT49BV16X,
 		.name		= "Atmel AT49BV16X",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -619,7 +601,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT49BV16XT,
 		.name		= "Atmel AT49BV16XT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -632,7 +614,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT49BV32X,
 		.name		= "Atmel AT49BV32X",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -645,7 +627,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,63)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ATMEL,
+		.mfr_id		= CFI_MFR_ATMEL,
 		.dev_id		= AT49BV32XT,
 		.name		= "Atmel AT49BV32XT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -658,7 +640,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_EON,
+		.mfr_id		= CFI_MFR_EON,
 		.dev_id		= EN29SL800BT,
 		.name		= "Eon EN29SL800BT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -673,7 +655,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_EON,
+		.mfr_id		= CFI_MFR_EON,
 		.dev_id		= EN29SL800BB,
 		.name		= "Eon EN29SL800BB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -688,7 +670,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29F040C,
 		.name		= "Fujitsu MBM29F040C",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -700,7 +682,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29F800BA,
 		.name		= "Fujitsu MBM29F800BA",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -715,7 +697,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV650UE,
 		.name		= "Fujitsu MBM29LV650UE",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -727,7 +709,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,128)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV320TE,
 		.name		= "Fujitsu MBM29LV320TE",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -740,7 +722,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV320BE,
 		.name		= "Fujitsu MBM29LV320BE",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -753,7 +735,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,63)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV160TE,
 		.name		= "Fujitsu MBM29LV160TE",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -768,7 +750,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV160BE,
 		.name		= "Fujitsu MBM29LV160BE",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -783,7 +765,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV800BA,
 		.name		= "Fujitsu MBM29LV800BA",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -798,7 +780,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV800TA,
 		.name		= "Fujitsu MBM29LV800TA",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -813,7 +795,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV400BC,
 		.name		= "Fujitsu MBM29LV400BC",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -828,7 +810,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,7)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_FUJITSU,
+		.mfr_id		= CFI_MFR_FUJITSU,
 		.dev_id		= MBM29LV400TC,
 		.name		= "Fujitsu MBM29LV400TC",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -843,7 +825,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_HYUNDAI,
+		.mfr_id		= CFI_MFR_HYUNDAI,
 		.dev_id		= HY29F002T,
 		.name		= "Hyundai HY29F002T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -858,7 +840,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F004B3B,
 		.name		= "Intel 28F004B3B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -871,7 +853,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 7),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F004B3T,
 		.name		= "Intel 28F004B3T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -884,7 +866,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F400B3B,
 		.name		= "Intel 28F400B3B",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -897,7 +879,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 7),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F400B3T,
 		.name		= "Intel 28F400B3T",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -910,7 +892,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F008B3B,
 		.name		= "Intel 28F008B3B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -923,7 +905,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F008B3T,
 		.name		= "Intel 28F008B3T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -936,7 +918,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F008S5,
 		.name		= "Intel 28F008S5",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -948,7 +930,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F016S5,
 		.name		= "Intel 28F016S5",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -960,7 +942,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F008SA,
 		.name		= "Intel 28F008SA",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -972,7 +954,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F800B3B,
 		.name		= "Intel 28F800B3B",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -985,7 +967,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F800B3T,
 		.name		= "Intel 28F800B3T",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -998,7 +980,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F016B3B,
 		.name		= "Intel 28F016B3B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1011,7 +993,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 31),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F016S3,
 		.name		= "Intel I28F016S3",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1023,7 +1005,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F016B3T,
 		.name		= "Intel 28F016B3T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1036,7 +1018,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F160B3B,
 		.name		= "Intel 28F160B3B",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1049,7 +1031,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 31),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F160B3T,
 		.name		= "Intel 28F160B3T",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1062,7 +1044,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F320B3B,
 		.name		= "Intel 28F320B3B",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1075,7 +1057,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 63),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F320B3T,
 		.name		= "Intel 28F320B3T",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1088,7 +1070,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F640B3B,
 		.name		= "Intel 28F640B3B",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1101,7 +1083,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 127),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F640B3T,
 		.name		= "Intel 28F640B3T",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1114,7 +1096,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000, 8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I28F640C3B,
 		.name		= "Intel 28F640C3B",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1127,7 +1109,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000, 127),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I82802AB,
 		.name		= "Intel 82802AB",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1139,7 +1121,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_INTEL,
+		.mfr_id		= CFI_MFR_INTEL,
 		.dev_id		= I82802AC,
 		.name		= "Intel 82802AC",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1151,7 +1133,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29LV040C,
 		.name		= "Macronix MX29LV040C",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1163,7 +1145,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29LV160T,
 		.name		= "MXIC MX29LV160T",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1178,7 +1160,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_NEC,
+		.mfr_id		= CFI_MFR_NEC,
 		.dev_id		= UPD29F064115,
 		.name		= "NEC uPD29F064115",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1192,7 +1174,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x2000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29LV160B,
 		.name		= "MXIC MX29LV160B",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1207,7 +1189,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29F040,
 		.name		= "Macronix MX29F040",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1219,7 +1201,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29F016,
 		.name		= "Macronix MX29F016",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1231,7 +1213,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29F004T,
 		.name		= "Macronix MX29F004T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1246,7 +1228,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29F004B,
 		.name		= "Macronix MX29F004B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1261,7 +1243,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,7),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_MACRONIX,
+		.mfr_id		= CFI_MFR_MACRONIX,
 		.dev_id		= MX29F002T,
 		.name		= "Macronix MX29F002T",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1276,7 +1258,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_PMC,
+		.mfr_id		= CFI_MFR_PMC,
 		.dev_id		= PM49FL002,
 		.name		= "PMC Pm49FL002",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1288,7 +1270,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO( 0x01000, 64 )
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_PMC,
+		.mfr_id		= CFI_MFR_PMC,
 		.dev_id		= PM49FL004,
 		.name		= "PMC Pm49FL004",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1300,7 +1282,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO( 0x01000, 128 )
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_PMC,
+		.mfr_id		= CFI_MFR_PMC,
 		.dev_id		= PM49FL008,
 		.name		= "PMC Pm49FL008",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1312,7 +1294,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO( 0x01000, 256 )
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SHARP,
+		.mfr_id		= CFI_MFR_SHARP,
 		.dev_id		= LH28F640BF,
 		.name		= "LH28F640BF",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1324,7 +1306,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x40000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39LF512,
 		.name		= "SST 39LF512",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1336,7 +1318,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39LF010,
 		.name		= "SST 39LF010",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1348,7 +1330,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
  		.dev_id 	= SST29EE020,
 		.name		= "SST 29EE020",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1359,7 +1341,7 @@ static const struct amd_flash_info jedec_table[] = {
 		.regions = {ERASEINFO(0x01000,64),
 		}
 	}, {
- 		.mfr_id		= MANUFACTURER_SST,
+ 		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST29LE020,
  		.name		= "SST 29LE020",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1370,7 +1352,7 @@ static const struct amd_flash_info jedec_table[] = {
 		.regions = {ERASEINFO(0x01000,64),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39LF020,
 		.name		= "SST 39LF020",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1382,7 +1364,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,64),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39LF040,
 		.name		= "SST 39LF040",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1394,7 +1376,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,128),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39SF010A,
 		.name		= "SST 39SF010A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1406,7 +1388,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39SF020A,
 		.name		= "SST 39SF020A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1418,7 +1400,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,64),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST39SF040,
 		.name		= "SST 39SF040",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1430,7 +1412,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,128),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF040B,
 		.name		= "SST 49LF040B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1443,7 +1425,7 @@ static const struct amd_flash_info jedec_table[] = {
 		}
 	}, {
 
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF004B,
 		.name		= "SST 49LF004B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1455,7 +1437,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,128),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF008A,
 		.name		= "SST 49LF008A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1467,7 +1449,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,256),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF030A,
 		.name		= "SST 49LF030A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1479,7 +1461,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,96),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF040A,
 		.name		= "SST 49LF040A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1491,7 +1473,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,128),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST49LF080A,
 		.name		= "SST 49LF080A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1503,7 +1485,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x01000,256),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,     /* should be CFI */
+		.mfr_id		= CFI_MFR_SST,     /* should be CFI */
 		.dev_id		= SST39LF160,
 		.name		= "SST 39LF160",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1516,7 +1498,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,256)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,     /* should be CFI */
+		.mfr_id		= CFI_MFR_SST,     /* should be CFI */
 		.dev_id		= SST39VF1601,
 		.name		= "SST 39VF1601",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1529,7 +1511,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,256)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,     /* should be CFI */
+		.mfr_id		= CFI_MFR_SST,     /* should be CFI */
 		.dev_id		= SST39VF3201,
 		.name		= "SST 39VF3201",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1544,7 +1526,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,256)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_SST,
+		.mfr_id		= CFI_MFR_SST,
 		.dev_id		= SST36VF3203,
 		.name		= "SST 36VF3203",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1556,7 +1538,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,64),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M29F800AB,
 		.name		= "ST M29F800AB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1571,7 +1553,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,	/* FIXME - CFI device? */
+		.mfr_id		= CFI_MFR_ST,	/* FIXME - CFI device? */
 		.dev_id		= M29W800DT,
 		.name		= "ST M29W800DT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1586,7 +1568,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,	/* FIXME - CFI device? */
+		.mfr_id		= CFI_MFR_ST,	/* FIXME - CFI device? */
 		.dev_id		= M29W800DB,
 		.name		= "ST M29W800DB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1601,7 +1583,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,15)
 		}
 	},  {
-		.mfr_id         = MANUFACTURER_ST,
+		.mfr_id         = CFI_MFR_ST,
 		.dev_id         = M29W400DT,
 		.name           = "ST M29W400DT",
 		.devtypes       = CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1616,7 +1598,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,1)
 		}
 	}, {
-		.mfr_id         = MANUFACTURER_ST,
+		.mfr_id         = CFI_MFR_ST,
 		.dev_id         = M29W400DB,
 		.name           = "ST M29W400DB",
 		.devtypes       = CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1631,7 +1613,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,7)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,	/* FIXME - CFI device? */
+		.mfr_id		= CFI_MFR_ST,	/* FIXME - CFI device? */
 		.dev_id		= M29W160DT,
 		.name		= "ST M29W160DT",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1646,7 +1628,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,	/* FIXME - CFI device? */
+		.mfr_id		= CFI_MFR_ST,	/* FIXME - CFI device? */
 		.dev_id		= M29W160DB,
 		.name		= "ST M29W160DB",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1661,7 +1643,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M29W040B,
 		.name		= "ST M29W040B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1673,7 +1655,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50FW040,
 		.name		= "ST M50FW040",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1685,7 +1667,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,8),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50FW080,
 		.name		= "ST M50FW080",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1697,7 +1679,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50FW016,
 		.name		= "ST M50FW016",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1709,7 +1691,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,32),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50LPW080,
 		.name		= "ST M50LPW080",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1721,7 +1703,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		},
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50FLW080A,
 		.name		= "ST M50FLW080A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1736,7 +1718,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_ST,
+		.mfr_id		= CFI_MFR_ST,
 		.dev_id		= M50FLW080B,
 		.name		= "ST M50FLW080B",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1751,7 +1733,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x1000,16),
 		}
 	}, {
-		.mfr_id		= 0xff00 | MANUFACTURER_ST,
+		.mfr_id		= 0xff00 | CFI_MFR_ST,
 		.dev_id		= 0xff00 | PSD4256G6V,
 		.name		= "ST PSD4256G6V",
 		.devtypes	= CFI_DEVICETYPE_X16,
@@ -1763,7 +1745,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,16),
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVT160,
 		.name		= "Toshiba TC58FVT160",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1778,7 +1760,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x04000,1)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVB160,
 		.name		= "Toshiba TC58FVB160",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1793,7 +1775,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,31)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVB321,
 		.name		= "Toshiba TC58FVB321",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1806,7 +1788,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,63)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVT321,
 		.name		= "Toshiba TC58FVT321",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1819,7 +1801,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVB641,
 		.name		= "Toshiba TC58FVB641",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1832,7 +1814,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x10000,127)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_TOSHIBA,
+		.mfr_id		= CFI_MFR_TOSHIBA,
 		.dev_id		= TC58FVT641,
 		.name		= "Toshiba TC58FVT641",
 		.devtypes	= CFI_DEVICETYPE_X16|CFI_DEVICETYPE_X8,
@@ -1845,7 +1827,7 @@ static const struct amd_flash_info jedec_table[] = {
 			ERASEINFO(0x02000,8)
 		}
 	}, {
-		.mfr_id		= MANUFACTURER_WINBOND,
+		.mfr_id		= CFI_MFR_WINBOND,
 		.dev_id		= W49V002A,
 		.name		= "Winbond W49V002A",
 		.devtypes	= CFI_DEVICETYPE_X8,
@@ -1878,7 +1860,7 @@ static inline u32 jedec_read_mfr(struct map_info *map, uint32_t base,
 		mask = (1 << (cfi->device_type * 8)) - 1;
 		result = map_read(map, base + ofs);
 		bank++;
-	} while ((result.x[0] & mask) == CONTINUATION_CODE);
+	} while ((result.x[0] & mask) == CFI_MFR_CONTINUATION);
 
 	return result.x[0] & mask;
 }
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 5716fc78ca8e..574d9ee066f1 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -516,16 +516,25 @@ struct cfi_fixup {
 	void* param;
 };
 
-#define CFI_MFR_ANY 0xffff
-#define CFI_ID_ANY  0xffff
+#define CFI_MFR_ANY		0xFFFF
+#define CFI_ID_ANY		0xFFFF
+#define CFI_MFR_CONTINUATION	0x007F
 
 #define CFI_MFR_AMD		0x0001
 #define CFI_MFR_ATMEL		0x001F
+#define CFI_MFR_EON		0x001C
+#define CFI_MFR_FUJITSU		0x0004
+#define CFI_MFR_HYUNDAI		0x00AD
 #define CFI_MFR_INTEL		0x0089
 #define CFI_MFR_MACRONIX	0x00C2
+#define CFI_MFR_NEC		0x0010
+#define CFI_MFR_PMC		0x009D
 #define CFI_MFR_SAMSUNG		0x00EC
+#define CFI_MFR_SHARP		0x00B0
 #define CFI_MFR_SST		0x00BF
 #define CFI_MFR_ST		0x0020 /* STMicroelectronics */
+#define CFI_MFR_TOSHIBA		0x0098
+#define CFI_MFR_WINBOND		0x00DA
 
 void cfi_fixup(struct mtd_info *mtd, struct cfi_fixup* fixups);
 
-- 
cgit v1.2.3


From 6a88c47bd528cb0f82692986a3ca57b3695d9c60 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Wed, 28 Apr 2010 17:46:45 +0200
Subject: mtd: onenand: add support for chips with 4KiB page size

This patch adds support for OneNAND chips that have 4KiB page size.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 32 +++++++++++++++++++-------------
 include/linux/mtd/onenand.h        |  4 ++++
 2 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 32f0ed33afe0..1b26f50e159a 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -397,7 +397,8 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		value = onenand_bufferram_address(this, block);
 		this->write_word(value, this->base + ONENAND_REG_START_ADDRESS2);
 
-		if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this))
+		if (ONENAND_IS_MLC(this) || ONENAND_IS_2PLANE(this) ||
+		    ONENAND_IS_4KB_PAGE(this))
 			/* It is always BufferRAM0 */
 			ONENAND_SET_BUFFERRAM0(this);
 		else
@@ -426,7 +427,7 @@ static int onenand_command(struct mtd_info *mtd, int cmd, loff_t addr, size_t le
 		case FLEXONENAND_CMD_RECOVER_LSB:
 		case ONENAND_CMD_READ:
 		case ONENAND_CMD_READOOB:
-			if (ONENAND_IS_MLC(this))
+			if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
 				/* It is always BufferRAM0 */
 				dataram = ONENAND_SET_BUFFERRAM0(this);
 			else
@@ -466,11 +467,11 @@ static inline int onenand_read_ecc(struct onenand_chip *this)
 {
 	int ecc, i, result = 0;
 
-	if (!FLEXONENAND(this))
+	if (!FLEXONENAND(this) && !ONENAND_IS_4KB_PAGE(this))
 		return this->read_word(this->base + ONENAND_REG_ECC_STATUS);
 
 	for (i = 0; i < 4; i++) {
-		ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS + i);
+		ecc = this->read_word(this->base + ONENAND_REG_ECC_STATUS + i*2);
 		if (likely(!ecc))
 			continue;
 		if (ecc & FLEXONENAND_UNCORRECTABLE_ERROR)
@@ -1425,7 +1426,7 @@ static int onenand_read(struct mtd_info *mtd, loff_t from, size_t len,
 	int ret;
 
 	onenand_get_device(mtd, FL_READING);
-	ret = ONENAND_IS_MLC(this) ?
+	ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
 		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
 		onenand_read_ops_nolock(mtd, from, &ops);
 	onenand_release_device(mtd);
@@ -1460,7 +1461,7 @@ static int onenand_read_oob(struct mtd_info *mtd, loff_t from,
 
 	onenand_get_device(mtd, FL_READING);
 	if (ops->datbuf)
-		ret = ONENAND_IS_MLC(this) ?
+		ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
 			onenand_mlc_read_ops_nolock(mtd, from, ops) :
 			onenand_read_ops_nolock(mtd, from, ops);
 	else
@@ -1926,7 +1927,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 		 * 2 PLANE, MLC, and Flex-OneNAND do not support
 		 * write-while-program feature.
 		 */
-		if (!ONENAND_IS_2PLANE(this) && !first) {
+		if (!ONENAND_IS_2PLANE(this) && !ONENAND_IS_4KB_PAGE(this) && !first) {
 			ONENAND_SET_PREV_BUFFERRAM(this);
 
 			ret = this->wait(mtd, FL_WRITING);
@@ -1957,7 +1958,7 @@ static int onenand_write_ops_nolock(struct mtd_info *mtd, loff_t to,
 		/*
 		 * 2 PLANE, MLC, and Flex-OneNAND wait here
 		 */
-		if (ONENAND_IS_2PLANE(this)) {
+		if (ONENAND_IS_2PLANE(this) || ONENAND_IS_4KB_PAGE(this)) {
 			ret = this->wait(mtd, FL_WRITING);
 
 			/* In partial page write we don't update bufferram */
@@ -2084,7 +2085,7 @@ static int onenand_write_oob_nolock(struct mtd_info *mtd, loff_t to,
 			memcpy(oobbuf + column, buf, thislen);
 		this->write_bufferram(mtd, ONENAND_SPARERAM, oobbuf, 0, mtd->oobsize);
 
-		if (ONENAND_IS_MLC(this)) {
+		if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this)) {
 			/* Set main area of DataRAM to 0xff*/
 			memset(this->page_buf, 0xff, mtd->writesize);
 			this->write_bufferram(mtd, ONENAND_DATARAM,
@@ -3027,7 +3028,7 @@ static int do_otp_read(struct mtd_info *mtd, loff_t from, size_t len,
 	this->command(mtd, ONENAND_CMD_OTP_ACCESS, 0, 0);
 	this->wait(mtd, FL_OTPING);
 
-	ret = ONENAND_IS_MLC(this) ?
+	ret = ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this) ?
 		onenand_mlc_read_ops_nolock(mtd, from, &ops) :
 		onenand_read_ops_nolock(mtd, from, &ops);
 
@@ -3372,7 +3373,10 @@ static void onenand_check_features(struct mtd_info *mtd)
 	/* Lock scheme */
 	switch (density) {
 	case ONENAND_DEVICE_DENSITY_4Gb:
-		this->options |= ONENAND_HAS_2PLANE;
+		if (ONENAND_IS_DDP(this))
+			this->options |= ONENAND_HAS_2PLANE;
+		else
+			this->options |= ONENAND_HAS_4KB_PAGE;
 
 	case ONENAND_DEVICE_DENSITY_2Gb:
 		/* 2Gb DDP does not have 2 plane */
@@ -3393,7 +3397,7 @@ static void onenand_check_features(struct mtd_info *mtd)
 		break;
 	}
 
-	if (ONENAND_IS_MLC(this))
+	if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
 		this->options &= ~ONENAND_HAS_2PLANE;
 
 	if (FLEXONENAND(this)) {
@@ -3407,6 +3411,8 @@ static void onenand_check_features(struct mtd_info *mtd)
 		printk(KERN_DEBUG "Chip support all block unlock\n");
 	if (this->options & ONENAND_HAS_2PLANE)
 		printk(KERN_DEBUG "Chip has 2 plane\n");
+	if (this->options & ONENAND_HAS_4KB_PAGE)
+		printk(KERN_DEBUG "Chip has 4KiB pagesize\n");
 }
 
 /**
@@ -3799,7 +3805,7 @@ static int onenand_probe(struct mtd_info *mtd)
 	/* The data buffer size is equal to page size */
 	mtd->writesize = this->read_word(this->base + ONENAND_REG_DATA_BUFFER_SIZE);
 	/* We use the full BufferRAM */
-	if (ONENAND_IS_MLC(this))
+	if (ONENAND_IS_MLC(this) || ONENAND_IS_4KB_PAGE(this))
 		mtd->writesize <<= 1;
 
 	mtd->oobsize = mtd->writesize >> 5;
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 5509eb06b326..c9a3c3596b68 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -175,10 +175,14 @@ struct onenand_chip {
 #define ONENAND_HAS_CONT_LOCK		(0x0001)
 #define ONENAND_HAS_UNLOCK_ALL		(0x0002)
 #define ONENAND_HAS_2PLANE		(0x0004)
+#define ONENAND_HAS_4KB_PAGE		(0x0008)
 #define ONENAND_SKIP_UNLOCK_CHECK	(0x0100)
 #define ONENAND_PAGEBUF_ALLOC		(0x1000)
 #define ONENAND_OOBBUF_ALLOC		(0x2000)
 
+#define ONENAND_IS_4KB_PAGE(this)					\
+	(this->options & ONENAND_HAS_4KB_PAGE)
+
 /*
  * OneNAND Flash Manufacturer ID Codes
  */
-- 
cgit v1.2.3


From 4a8ce0b030716b95004a4ace969953bc3ad7d2fe Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Wed, 28 Apr 2010 17:46:46 +0200
Subject: mtd: onenand: allocate verify buffer in the core

This patch extends OneNAND core code with support for OneNAND verify
write check. This is done by allocating the buffer for verify read
directly from the core code.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 13 ++++++++++++-
 include/linux/mtd/onenand.h        |  3 +++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 1b26f50e159a..045811f21497 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -3932,6 +3932,13 @@ int onenand_scan(struct mtd_info *mtd, int maxchips)
 				__func__);
 			return -ENOMEM;
 		}
+#ifdef CONFIG_MTD_ONENAND_VERIFY_WRITE
+		this->verify_buf = kzalloc(mtd->writesize, GFP_KERNEL);
+		if (!this->verify_buf) {
+			kfree(this->page_buf);
+			return -ENOMEM;
+		}
+#endif
 		this->options |= ONENAND_PAGEBUF_ALLOC;
 	}
 	if (!this->oob_buf) {
@@ -4059,8 +4066,12 @@ void onenand_release(struct mtd_info *mtd)
 		kfree(this->bbm);
 	}
 	/* Buffers allocated by onenand_scan */
-	if (this->options & ONENAND_PAGEBUF_ALLOC)
+	if (this->options & ONENAND_PAGEBUF_ALLOC) {
 		kfree(this->page_buf);
+#ifdef CONFIG_MTD_ONENAND_VERIFY_WRITE
+		kfree(this->verify_buf);
+#endif
+	}
 	if (this->options & ONENAND_OOBBUF_ALLOC)
 		kfree(this->oob_buf);
 	kfree(mtd->eraseregions);
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index c9a3c3596b68..9b43268224a7 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -125,6 +125,9 @@ struct onenand_chip {
 	flstate_t		state;
 	unsigned char		*page_buf;
 	unsigned char		*oob_buf;
+#ifdef CONFIG_MTD_ONENAND_VERIFY_WRITE
+	unsigned char		*verify_buf;
+#endif
 
 	int			subpagesize;
 	struct nand_ecclayout	*ecclayout;
-- 
cgit v1.2.3


From 3328dc315914aa6db486da2ceb021b6f0b36b877 Mon Sep 17 00:00:00 2001
From: Kyungmin Park <kyungmin.park@samsung.com>
Date: Wed, 28 Apr 2010 17:46:47 +0200
Subject: mtd: onenand: add new callback for bufferram read

This patch adds a new callback for the underlying drivers, which is
called instead of accessing the buffer ram directly. This callback will
be used by Samsung OneNAND driver to implement DMA transfers on S5PC110
SoC.

Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/onenand/onenand_base.c | 6 ++----
 include/linux/mtd/onenand.h        | 2 ++
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/onenand/onenand_base.c b/drivers/mtd/onenand/onenand_base.c
index 045811f21497..9827ab779c08 100644
--- a/drivers/mtd/onenand/onenand_base.c
+++ b/drivers/mtd/onenand/onenand_base.c
@@ -1635,7 +1635,6 @@ static int onenand_verify_oob(struct mtd_info *mtd, const u_char *buf, loff_t to
 static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr, size_t len)
 {
 	struct onenand_chip *this = mtd->priv;
-	void __iomem *dataram;
 	int ret = 0;
 	int thislen, column;
 
@@ -1655,10 +1654,9 @@ static int onenand_verify(struct mtd_info *mtd, const u_char *buf, loff_t addr,
 
 		onenand_update_bufferram(mtd, addr, 1);
 
-		dataram = this->base + ONENAND_DATARAM;
-		dataram += onenand_bufferram_offset(mtd, ONENAND_DATARAM);
+		this->read_bufferram(mtd, ONENAND_DATARAM, this->verify_buf, 0, mtd->writesize);
 
-		if (memcmp(buf, dataram + column, thislen))
+		if (memcmp(buf, this->verify_buf, thislen))
 			return -EBADMSG;
 
 		len -= thislen;
diff --git a/include/linux/mtd/onenand.h b/include/linux/mtd/onenand.h
index 9b43268224a7..c26ff86ad08a 100644
--- a/include/linux/mtd/onenand.h
+++ b/include/linux/mtd/onenand.h
@@ -212,6 +212,8 @@ struct mtd_partition;
 
 struct onenand_platform_data {
 	void		(*mmcontrol)(struct mtd_info *mtd, int sync_read);
+	int		(*read_bufferram)(struct mtd_info *mtd, int area,
+			unsigned char *buffer, int offset, size_t count);
 	struct mtd_partition *parts;
 	unsigned int	nr_parts;
 };
-- 
cgit v1.2.3


From 709c4efb68cccd2de9a7d63b1f90276b1617e613 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 4 May 2010 12:51:34 -0700
Subject: mtd: map.h: add missing bug.h include

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/map.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index b981b8772217..01703d425986 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -7,6 +7,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/string.h>
+#include <linux/bug.h>
 
 #include <linux/mtd/compatmac.h>
 
-- 
cgit v1.2.3


From 9ea5973883bbe26372f45d99eb3a500f08d966f9 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Sat, 10 Apr 2010 11:18:58 -0700
Subject: mtd: suppress warnings in inline_map_read()

With gcc 4.4.3 -O2 on MIPS32:

drivers/mtd/chips/cfi_util.c: In function 'cfi_qry_present':
include/linux/mtd/map.h:390: warning: 'r' may be used uninitialized in this function
include/linux/mtd/map.h:375: note: 'r' was declared here
include/linux/mtd/map.h:390: warning: 'r' may be used uninitialized in this function
include/linux/mtd/map.h:375: note: 'r' was declared here

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Artem Bityutskiy <Artem.Bityutskiy@nokia.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 include/linux/mtd/map.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h
index 01703d425986..de89eca864ce 100644
--- a/include/linux/mtd/map.h
+++ b/include/linux/mtd/map.h
@@ -387,6 +387,8 @@ static inline map_word inline_map_read(struct map_info *map, unsigned long ofs)
 #endif
 	else if (map_bankwidth_is_large(map))
 		memcpy_fromio(r.x, map->virt+ofs, map->bankwidth);
+	else
+		BUG();
 
 	return r;
 }
-- 
cgit v1.2.3


From 426c457a3216fac74e3d44dd39729b0689f4c7ab Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 4 May 2010 20:58:03 -0700
Subject: mtd: nand: extend NAND flash detection to new MLC chips

Some of the newer MLC devices have a 6-byte ID sequence in which
several field definitions differ from older chips in a manner that is
not backward compatible.  For instance:

Samsung K9GAG08U0M (5-byte sequence): ec d5 14 b6 74
4th byte, bits 1:0 encode the page size: 0=1KiB, 1=2KiB, 2=4KiB, 3=8KiB
4th byte, bits 5:4 encode the block size: 0=64KiB, 1=128KiB, ...
4th byte, bit 6 encodes the OOB size: 0=8B/512B, 1=16B/512B

Samsung K9GAG08U0D (6-byte sequence): ec d5 94 29 34 41
4th byte, bits 1:0 encode the page size: 0=2KiB, 1=4KiB, 3=8KiB, 4=rsvd
4th byte, bits 7;5:4 encode the block size: 0=128KiB, 1=256KiB, ...
4th byte, bits 6;3:2 encode the OOB size: 1=128B/page, 2=218B/page

This patch uses the new 6-byte scheme if the following conditions are
all true:

1) The ID code wraps around after exactly 6 bytes

2) Manufacturer is Samsung

3) 6th byte is zero

The patch also extends the maximum OOB size from 128B to 256B.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 64 ++++++++++++++++++++++++++++++--------------
 include/linux/mtd/nand.h     |  2 +-
 2 files changed, 45 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index b9dc65c7253c..85891dcc27ad 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -2774,8 +2774,8 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 						  int busw, int *maf_id,
 						  struct nand_flash_dev *type)
 {
-	int dev_id, maf_idx;
-	int tmp_id, tmp_manf;
+	int i, dev_id, maf_idx;
+	u8 id_data[8];
 
 	/* Select the device */
 	chip->select_chip(mtd, 0);
@@ -2801,15 +2801,15 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 
 	chip->cmdfunc(mtd, NAND_CMD_READID, 0x00, -1);
 
-	/* Read manufacturer and device IDs */
+	/* Read entire ID string */
 
-	tmp_manf = chip->read_byte(mtd);
-	tmp_id = chip->read_byte(mtd);
+	for (i = 0; i < 8; i++)
+		id_data[i] = chip->read_byte(mtd);
 
-	if (tmp_manf != *maf_id || tmp_id != dev_id) {
+	if (id_data[0] != *maf_id || id_data[1] != dev_id) {
 		printk(KERN_INFO "%s: second ID read did not match "
 		       "%02x,%02x against %02x,%02x\n", __func__,
-		       *maf_id, dev_id, tmp_manf, tmp_id);
+		       *maf_id, dev_id, id_data[0], id_data[1]);
 		return ERR_PTR(-ENODEV);
 	}
 
@@ -2832,21 +2832,45 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	if (!type->pagesize) {
 		int extid;
 		/* The 3rd id byte holds MLC / multichip data */
-		chip->cellinfo = chip->read_byte(mtd);
+		chip->cellinfo = id_data[2];
 		/* The 4th id byte is the important one */
-		extid = chip->read_byte(mtd);
-		/* Calc pagesize */
-		mtd->writesize = 1024 << (extid & 0x3);
-		extid >>= 2;
-		/* Calc oobsize */
-		mtd->oobsize = (8 << (extid & 0x01)) * (mtd->writesize >> 9);
-		extid >>= 2;
-		/* Calc blocksize. Blocksize is multiples of 64KiB */
-		mtd->erasesize = (64 * 1024) << (extid & 0x03);
-		extid >>= 2;
-		/* Get buswidth information */
-		busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0;
+		extid = id_data[3];
 
+		/*
+		 * Field definitions are in the following datasheets:
+		 * Old style (4,5 byte ID): Samsung K9GAG08U0M (p.32)
+		 * New style   (6 byte ID): Samsung K9GAG08U0D (p.40)
+		 *
+		 * Check for wraparound + Samsung ID + nonzero 6th byte
+		 * to decide what to do.
+		 */
+		if (id_data[0] == id_data[6] && id_data[1] == id_data[7] &&
+				id_data[0] == NAND_MFR_SAMSUNG &&
+				id_data[5] != 0x00) {
+			/* Calc pagesize */
+			mtd->writesize = 2048 << (extid & 0x03);
+			extid >>= 2;
+			/* Calc oobsize */
+			mtd->oobsize = (extid & 0x03) == 0x01 ? 128 : 218;
+			extid >>= 2;
+			/* Calc blocksize */
+			mtd->erasesize = (128 * 1024) <<
+				(((extid >> 1) & 0x04) | (extid & 0x03));
+			busw = 0;
+		} else {
+			/* Calc pagesize */
+			mtd->writesize = 1024 << (extid & 0x03);
+			extid >>= 2;
+			/* Calc oobsize */
+			mtd->oobsize = (8 << (extid & 0x01)) *
+				(mtd->writesize >> 9);
+			extid >>= 2;
+			/* Calc blocksize. Blocksize is multiples of 64KiB */
+			mtd->erasesize = (64 * 1024) << (extid & 0x03);
+			extid >>= 2;
+			/* Get buswidth information */
+			busw = (extid & 0x01) ? NAND_BUSWIDTH_16 : 0;
+		}
 	} else {
 		/*
 		 * Old devices have chip data hardcoded in the device id table
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8bdacb885f90..50f3aa00a452 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -53,7 +53,7 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len);
  * is supported now. If you add a chip with bigger oobsize/page
  * adjust this accordingly.
  */
-#define NAND_MAX_OOBSIZE	128
+#define NAND_MAX_OOBSIZE	256
 #define NAND_MAX_PAGESIZE	4096
 
 /*
-- 
cgit v1.2.3


From b60b08b02ca8d9575985ae6711bd656dd67e9039 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 4 May 2010 20:58:10 -0700
Subject: mtd: nand: support alternate BB marker locations on MLC

This is a slightly modified version of a patch submitted last year by
Reuben Dowle <reuben.dowle@navico.com>.  His original comments follow:

This patch adds support for some MLC NAND flashes that place the BB
marker in the LAST page of the bad block rather than the FIRST page used
for SLC NAND and other types of MLC nand.

Lifted from Samsung datasheet for K9LG8G08U0A (1Gbyte MLC NAND):
"
Identifying Initial Invalid Block(s)
All device locations are erased(FFh) except locations where the initial
invalid block(s) information is written prior to shipping. The initial
invalid block(s) status is defined by the 1st byte in the spare area.
Samsung makes sure that the last page of every initial invalid block has
non-FFh data at the column address of 2,048.
...
"

As far as I can tell, this is the same for all Samsung MLC nand, and in
fact the samsung bsp for the processor used in our project (s3c6410)
actually contained a hack similar to this patch but less portable to
enable use of their NAND parts. I discovered this problem when trying to
use a Micron NAND which does not used this layout - I wish samsung would
put their stuff in main-line to avoid this type of problem.

Currently this patch causes all MLC nand with manufacturer codes from
Samsung and ST(Numonyx) to use this alternative location, since these
are the manufactures that I know of that use this layout.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
---
 drivers/mtd/nand/nand_base.c | 15 +++++++++++++++
 drivers/mtd/nand/nand_bbt.c  |  3 +++
 include/linux/mtd/nand.h     |  2 ++
 3 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 85891dcc27ad..4a7b86423ee9 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -347,6 +347,9 @@ static int nand_block_bad(struct mtd_info *mtd, loff_t ofs, int getchip)
 	struct nand_chip *chip = mtd->priv;
 	u16 bad;
 
+	if (chip->options & NAND_BB_LAST_PAGE)
+		ofs += mtd->erasesize - mtd->writesize;
+
 	page = (int)(ofs >> chip->page_shift) & chip->pagemask;
 
 	if (getchip) {
@@ -396,6 +399,9 @@ static int nand_default_block_markbad(struct mtd_info *mtd, loff_t ofs)
 	uint8_t buf[2] = { 0, 0 };
 	int block, ret;
 
+	if (chip->options & NAND_BB_LAST_PAGE)
+		ofs += mtd->erasesize - mtd->writesize;
+
 	/* Get block number */
 	block = (int)(ofs >> chip->bbt_erase_shift);
 	if (chip->bbt)
@@ -2933,6 +2939,15 @@ static struct nand_flash_dev *nand_get_flash_type(struct mtd_info *mtd,
 	if (*maf_id != NAND_MFR_SAMSUNG && !type->pagesize)
 		chip->options &= ~NAND_SAMSUNG_LP_OPTIONS;
 
+	/*
+	 * Bad block marker is stored in the last page of each block
+	 * on Samsung and Hynix MLC devices
+	 */
+	if ((chip->cellinfo & NAND_CI_CELLTYPE_MSK) &&
+			(*maf_id == NAND_MFR_SAMSUNG ||
+			 *maf_id == NAND_MFR_HYNIX))
+		chip->options |= NAND_BB_LAST_PAGE;
+
 	/* Check for AND chips with 4 page planes */
 	if (chip->options & NAND_4PAGE_ARRAY)
 		chip->erase_cmd = multi_erase_cmd;
diff --git a/drivers/mtd/nand/nand_bbt.c b/drivers/mtd/nand/nand_bbt.c
index 387c45c366fe..ad97c0ce73b2 100644
--- a/drivers/mtd/nand/nand_bbt.c
+++ b/drivers/mtd/nand/nand_bbt.c
@@ -432,6 +432,9 @@ static int create_bbt(struct mtd_info *mtd, uint8_t *buf,
 		from = (loff_t)startblock << (this->bbt_erase_shift - 1);
 	}
 
+	if (this->options & NAND_BB_LAST_PAGE)
+		from += mtd->erasesize - (mtd->writesize * len);
+
 	for (i = startblock; i < numblocks;) {
 		int ret;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 50f3aa00a452..a81b185e23a7 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -181,6 +181,8 @@ typedef enum {
 #define NAND_NO_READRDY		0x00000100
 /* Chip does not allow subpage writes */
 #define NAND_NO_SUBPAGE_WRITE	0x00000200
+/* Chip stores bad block marker on the last page of the eraseblock */
+#define NAND_BB_LAST_PAGE	0x00000400
 
 /* Device is one of 'new' xD cards that expose fake nand command set */
 #define NAND_BROKEN_XD		0x00000400
-- 
cgit v1.2.3


From 725f2865d4df31ac0768b13ae763beadc4bb8ce9 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:46 -0400
Subject: gss_krb5: Introduce encryption type framework

Make the client and server code consistent regarding the extra buffer
space made available for the auth code when wrapping data.

Add some comments/documentation about the available buffer space
in the xdr_buf head and tail when gss_wrap is called.

Add a compile-time check to make sure we are not exceeding the available
buffer space.

Add a central function to shift head data.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       | 25 +++++++++++++++++++++++
 net/sunrpc/auth_gss/auth_gss.c        |  2 +-
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 38 +++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  6 ++----
 4 files changed, 66 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index e7bbdba474d5..31bb8a538bf1 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -40,6 +40,12 @@
 #include <linux/sunrpc/gss_err.h>
 #include <linux/sunrpc/gss_asn1.h>
 
+/* Maximum checksum function output for the supported crypto algorithms */
+#define GSS_KRB5_MAX_CKSUM_LEN  (20)
+
+/* Maximum blocksize for the supported crypto algorithms */
+#define GSS_KRB5_MAX_BLOCKSIZE  (16)
+
 struct krb5_ctx {
 	int			initiate; /* 1 = initiating, 0 = accepting */
 	struct crypto_blkcipher	*enc;
@@ -113,6 +119,22 @@ enum seal_alg {
 #define ENCTYPE_DES3_CBC_SHA1   0x0010
 #define ENCTYPE_UNKNOWN         0x01ff
 
+/*
+ * This compile-time check verifies that we will not exceed the
+ * slack space allotted by the client and server auth_gss code
+ * before they call gss_wrap().
+ */
+#define GSS_KRB5_MAX_SLACK_NEEDED \
+	(GSS_KRB5_TOK_HDR_LEN     /* gss token header */         \
+	+ GSS_KRB5_MAX_CKSUM_LEN  /* gss token checksum */       \
+	+ GSS_KRB5_MAX_BLOCKSIZE  /* confounder */               \
+	+ GSS_KRB5_MAX_BLOCKSIZE  /* possible padding */         \
+	+ GSS_KRB5_TOK_HDR_LEN    /* encrypted hdr in v2 token */\
+	+ GSS_KRB5_MAX_CKSUM_LEN  /* encryption hmac */          \
+	+ 4 + 4                   /* RPC verifier */             \
+	+ GSS_KRB5_TOK_HDR_LEN                                   \
+	+ GSS_KRB5_MAX_CKSUM_LEN)
+
 s32
 make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
 		   int body_offset, struct xdr_netobj *cksum);
@@ -157,3 +179,6 @@ s32
 krb5_get_seq_num(struct crypto_blkcipher *key,
 	       unsigned char *cksum,
 	       unsigned char *buf, int *direction, u32 *seqnum);
+
+int
+xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index c389ccf6437d..75602ece58eb 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -61,7 +61,7 @@ static const struct rpc_credops gss_nullops;
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
-#define GSS_CRED_SLACK		1024
+#define GSS_CRED_SLACK		(RPC_MAX_AUTH_SIZE * 2)
 /* length of a krb5 verifier (48), plus data added before arguments when
  * using integrity (two 4-byte integers): */
 #define GSS_VERF_SLACK		100
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index e9b636176687..746b3e139aed 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -325,3 +325,41 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 
 	return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
 }
+
+/*
+ * This function makes the assumption that it was ultimately called
+ * from gss_wrap().
+ *
+ * The client auth_gss code moves any existing tail data into a
+ * separate page before calling gss_wrap.
+ * The server svcauth_gss code ensures that both the head and the
+ * tail have slack space of RPC_MAX_AUTH_SIZE before calling gss_wrap.
+ *
+ * Even with that guarantee, this function may be called more than
+ * once in the processing of gss_wrap().  The best we can do is
+ * verify at compile-time (see GSS_KRB5_SLACK_CHECK) that the
+ * largest expected shift will fit within RPC_MAX_AUTH_SIZE.
+ * At run-time we can verify that a single invocation of this
+ * function doesn't attempt to use more the RPC_MAX_AUTH_SIZE.
+ */
+
+int
+xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
+{
+	u8 *p;
+
+	if (shiftlen == 0)
+		return 0;
+
+	BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
+	BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
+
+	p = buf->head[0].iov_base + base;
+
+	memmove(p + shiftlen, p, buf->head[0].iov_len - base);
+
+	buf->head[0].iov_len += shiftlen;
+	buf->len += shiftlen;
+
+	return 0;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a6e905637e03..496281fabb91 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -155,11 +155,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 
 	ptr = buf->head[0].iov_base + offset;
 	/* shift data to make room for header. */
+	xdr_extend_head(buf, offset, headlen);
+
 	/* XXX Would be cleverer to encrypt while copying. */
-	/* XXX bounds checking, slack, etc. */
-	memmove(ptr + headlen, ptr, buf->head[0].iov_len - offset);
-	buf->head[0].iov_len += headlen;
-	buf->len += headlen;
 	BUG_ON((buf->len - offset - headlen) % blocksize);
 
 	g_make_token_header(&kctx->mech_used,
-- 
cgit v1.2.3


From 1ac3719a2214c545c7e19d34e272a148ca9a24f1 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:49 -0400
Subject: gss_krb5: split up functions in preparation of adding new enctypes

Add encryption type to the krb5 context structure and use it to switch
to the correct functions depending on the encryption type.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  1 +
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 20 +++++++++++++++---
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 21 ++++++++++++++++---
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 38 +++++++++++++++++++++++++++++------
 5 files changed, 69 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 31bb8a538bf1..5378e455482c 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -48,6 +48,7 @@
 
 struct krb5_ctx {
 	int			initiate; /* 1 = initiating, 0 = accepting */
+	u32			enctype;
 	struct crypto_blkcipher	*enc;
 	struct crypto_blkcipher	*seq;
 	s32			endtime;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 2deb0ed72ff4..0cd940e897ed 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -139,6 +139,7 @@ gss_import_sec_context_kerberos(const void *p,
 	p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
 	if (IS_ERR(p))
 		goto out_err_free_ctx;
+	ctx->enctype = ENCTYPE_DES_CBC_RAW;
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 88fe6e75ed7e..71c2014e7ebf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -70,11 +70,10 @@
 
 DEFINE_SPINLOCK(krb5_seq_lock);
 
-u32
-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
+static u32
+gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
 {
-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	unsigned char		*ptr, *msg_start;
@@ -120,3 +119,18 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
+
+u32
+gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
+		     struct xdr_netobj *token)
+{
+	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_get_mic_v1(ctx, text, token);
+	}
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ce6c247edad0..069d4b59807a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -70,11 +70,10 @@
 /* read_token is a mic token, and message_buffer is the data that the mic was
  * supposedly taken over. */
 
-u32
-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
+static u32
+gss_verify_mic_v1(struct krb5_ctx *ctx,
 		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
 {
-	struct krb5_ctx		*ctx = gss_ctx->internal_ctx_id;
 	int			signalg;
 	int			sealalg;
 	char			cksumdata[16];
@@ -135,3 +134,19 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 
 	return GSS_S_COMPLETE;
 }
+
+u32
+gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
+			struct xdr_buf *message_buffer,
+			struct xdr_netobj *read_token)
+{
+	struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
+
+	switch (ctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_verify_mic_v1(ctx, message_buffer, read_token);
+	}
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 5d6c3b12ea70..b45b59b17ae1 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -124,11 +124,10 @@ make_confounder(char *p, u32 conflen)
 
 /* XXX factor out common code with seal/unseal. */
 
-u32
-gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
+static u32
+gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		struct xdr_buf *buf, struct page **pages)
 {
-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
 	char			cksumdata[16];
 	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
 	int			blocksize = 0, plainlen;
@@ -203,10 +202,9 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
-u32
-gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
+static u32
+gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 {
-	struct krb5_ctx		*kctx = ctx->internal_ctx_id;
 	int			signalg;
 	int			sealalg;
 	char			cksumdata[16];
@@ -294,3 +292,31 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
 
 	return GSS_S_COMPLETE;
 }
+
+u32
+gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
+		  struct xdr_buf *buf, struct page **pages)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
+	}
+}
+
+u32
+gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
+{
+	struct krb5_ctx	*kctx = gctx->internal_ctx_id;
+
+	switch (kctx->enctype) {
+	default:
+		BUG();
+	case ENCTYPE_DES_CBC_RAW:
+		return gss_unwrap_kerberos_v1(kctx, offset, buf);
+	}
+}
+
-- 
cgit v1.2.3


From 81d4a4333a1dfd6070f046265d928bb4c79aff88 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:51 -0400
Subject: gss_krb5: introduce encryption type framework

Add enctype framework and change functions to use the generic
values from it rather than the values hard-coded for des.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       | 25 +++++++++-
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 18 +++----
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 90 ++++++++++++++++++++++++++++-------
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 49 +++++++++++--------
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 15 +++---
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 79 ++++++++++++++++++++++--------
 6 files changed, 206 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 5378e455482c..f94935599d13 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -4,7 +4,7 @@
  *  Adapted from MIT Kerberos 5-1.2.1 lib/include/krb5.h,
  *  lib/gssapi/krb5/gssapiP_krb5.h, and others
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson   <andros@umich.edu>
@@ -36,6 +36,7 @@
  *
  */
 
+#include <linux/crypto.h>
 #include <linux/sunrpc/auth_gss.h>
 #include <linux/sunrpc/gss_err.h>
 #include <linux/sunrpc/gss_asn1.h>
@@ -46,9 +47,31 @@
 /* Maximum blocksize for the supported crypto algorithms */
 #define GSS_KRB5_MAX_BLOCKSIZE  (16)
 
+struct gss_krb5_enctype {
+	const u32		etype;		/* encryption (key) type */
+	const u32		ctype;		/* checksum type */
+	const char		*name;		/* "friendly" name */
+	const char		*encrypt_name;	/* crypto encrypt name */
+	const char		*cksum_name;	/* crypto checksum name */
+	const u16		signalg;	/* signing algorithm */
+	const u16		sealalg;	/* sealing algorithm */
+	const u32		blocksize;	/* encryption blocksize */
+	const u32		cksumlength;	/* checksum length */
+	const u32		keyed_cksum;	/* is it a keyed cksum? */
+	const u32		keybytes;	/* raw key len, in bytes */
+	const u32		keylength;	/* final key len, in bytes */
+	u32 (*encrypt) (struct crypto_blkcipher *tfm,
+			void *iv, void *in, void *out,
+			int length);		/* encryption function */
+	u32 (*decrypt) (struct crypto_blkcipher *tfm,
+			void *iv, void *in, void *out,
+			int length);		/* decryption function */
+};
+
 struct krb5_ctx {
 	int			initiate; /* 1 = initiating, 0 = accepting */
 	u32			enctype;
+	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
 	struct crypto_blkcipher	*enc;
 	struct crypto_blkcipher	*seq;
 	s32			endtime;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 746b3e139aed..ccd5236953f7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -1,7 +1,7 @@
 /*
  *  linux/net/sunrpc/gss_krb5_crypto.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson   <andros@umich.edu>
@@ -58,13 +58,13 @@ krb5_encrypt(
 {
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
-	u8 local_iv[16] = {0};
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5encrypt: tfm iv size too large %d\n",
 			crypto_blkcipher_ivsize(tfm));
 		goto out;
@@ -92,13 +92,13 @@ krb5_decrypt(
 {
 	u32 ret = -EINVAL;
 	struct scatterlist sg[1];
-	u8 local_iv[16] = {0};
+	u8 local_iv[GSS_KRB5_MAX_BLOCKSIZE] = {0};
 	struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
 	if (length % crypto_blkcipher_blocksize(tfm) != 0)
 		goto out;
 
-	if (crypto_blkcipher_ivsize(tfm) > 16) {
+	if (crypto_blkcipher_ivsize(tfm) > GSS_KRB5_MAX_BLOCKSIZE) {
 		dprintk("RPC:       gss_k5decrypt: tfm iv size too large %d\n",
 			crypto_blkcipher_ivsize(tfm));
 		goto out;
@@ -157,7 +157,7 @@ out:
 }
 
 struct encryptor_desc {
-	u8 iv[8]; /* XXX hard-coded blocksize */
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
 	int pos;
 	struct xdr_buf *outbuf;
@@ -198,7 +198,7 @@ encryptor(struct scatterlist *sg, void *data)
 	desc->fraglen += sg->length;
 	desc->pos += sg->length;
 
-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
@@ -256,7 +256,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
 }
 
 struct decryptor_desc {
-	u8 iv[8]; /* XXX hard-coded blocksize */
+	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
 	struct scatterlist frags[4];
 	int fragno;
@@ -278,7 +278,7 @@ decryptor(struct scatterlist *sg, void *data)
 	desc->fragno++;
 	desc->fraglen += sg->length;
 
-	fraglen = thislen & 7; /* XXX hardcoded blocksize */
+	fraglen = thislen & (crypto_blkcipher_blocksize(desc->desc.tfm) - 1);
 	thislen -= fraglen;
 
 	if (thislen == 0)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index afe09108e1b0..a66eb706aeb7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -1,7 +1,7 @@
 /*
  *  linux/net/sunrpc/gss_krb5_mech.c
  *
- *  Copyright (c) 2001 The Regents of the University of Michigan.
+ *  Copyright (c) 2001-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson <andros@umich.edu>
@@ -48,6 +48,50 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
+	/*
+	 * DES (All DES enctypes are mapped to the same gss functionality)
+	 */
+	{
+	  .etype = ENCTYPE_DES_CBC_RAW,
+	  .ctype = CKSUMTYPE_RSA_MD5,
+	  .name = "des-cbc-crc",
+	  .encrypt_name = "cbc(des)",
+	  .cksum_name = "md5",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .signalg = SGN_ALG_DES_MAC_MD5,
+	  .sealalg = SEAL_ALG_DES,
+	  .keybytes = 7,
+	  .keylength = 8,
+	  .blocksize = 8,
+	  .cksumlength = 8,
+	},
+};
+
+static const int num_supported_enctypes =
+	ARRAY_SIZE(supported_gss_krb5_enctypes);
+
+static int
+supported_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return 1;
+	return 0;
+}
+
+static const struct gss_krb5_enctype *
+get_gss_krb5_enctype(int etype)
+{
+	int i;
+	for (i = 0; i < num_supported_enctypes; i++)
+		if (supported_gss_krb5_enctypes[i].etype == etype)
+			return &supported_gss_krb5_enctypes[i];
+	return NULL;
+}
+
 static const void *
 simple_get_bytes(const void *p, const void *end, void *res, int len)
 {
@@ -78,35 +122,45 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 }
 
 static inline const void *
-get_key(const void *p, const void *end, struct crypto_blkcipher **res)
+get_key(const void *p, const void *end,
+	struct krb5_ctx *ctx, struct crypto_blkcipher **res)
 {
 	struct xdr_netobj	key;
 	int			alg;
-	char			*alg_name;
 
 	p = simple_get_bytes(p, end, &alg, sizeof(alg));
 	if (IS_ERR(p))
 		goto out_err;
+
+	switch (alg) {
+	case ENCTYPE_DES_CBC_CRC:
+	case ENCTYPE_DES_CBC_MD4:
+	case ENCTYPE_DES_CBC_MD5:
+		/* Map all these key types to ENCTYPE_DES_CBC_RAW */
+		alg = ENCTYPE_DES_CBC_RAW;
+		break;
+	}
+
+	if (!supported_gss_krb5_enctype(alg)) {
+		printk(KERN_WARNING "gss_kerberos_mech: unsupported "
+			"encryption key algorithm %d\n", alg);
+		goto out_err;
+	}
 	p = simple_get_netobj(p, end, &key);
 	if (IS_ERR(p))
 		goto out_err;
 
-	switch (alg) {
-		case ENCTYPE_DES_CBC_RAW:
-			alg_name = "cbc(des)";
-			break;
-		default:
-			printk("gss_kerberos_mech: unsupported algorithm %d\n", alg);
-			goto out_err_free_key;
-	}
-	*res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
+	*res = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+							CRYPTO_ALG_ASYNC);
 	if (IS_ERR(*res)) {
-		printk("gss_kerberos_mech: unable to initialize crypto algorithm %s\n", alg_name);
+		printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		*res = NULL;
 		goto out_err_free_key;
 	}
 	if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
-		printk("gss_kerberos_mech: error setting key for crypto algorithm %s\n", alg_name);
+		printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
 		goto out_err_free_tfm;
 	}
 
@@ -134,6 +188,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	/* Old format supports only DES!  Any other enctype uses new format */
 	ctx->enctype = ENCTYPE_DES_CBC_RAW;
 
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL)
+		goto out_err;
+
 	/* The downcall format was designed before we completely understood
 	 * the uses of the context fields; so it includes some stuff we
 	 * just give some minimal sanity-checking, and some we ignore
@@ -164,10 +222,10 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_netobj(p, end, &ctx->mech_used);
 	if (IS_ERR(p))
 		goto out_err;
-	p = get_key(p, end, &ctx->enc);
+	p = get_key(p, end, ctx, &ctx->enc);
 	if (IS_ERR(p))
 		goto out_err_free_mech;
-	p = get_key(p, end, &ctx->seq);
+	p = get_key(p, end, ctx, &ctx->seq);
 	if (IS_ERR(p))
 		goto out_err_free_key1;
 	if (p != end) {
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 71c2014e7ebf..46c6f44e5c3f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -3,7 +3,7 @@
  *
  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5seal.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson	<andros@umich.edu>
@@ -70,36 +70,47 @@
 
 DEFINE_SPINLOCK(krb5_seq_lock);
 
+static char *
+setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	__be16 *ptr, *krb5_hdr;
+	int body_size = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+
+	token->len = g_token_size(&ctx->mech_used, body_size);
+
+	ptr = (__be16 *)token->data;
+	g_make_token_header(&ctx->mech_used, body_size, (unsigned char **)&ptr);
+
+	/* ptr now at start of header described in rfc 1964, section 1.2.1: */
+	krb5_hdr = ptr;
+	*ptr++ = KG_TOK_MIC_MSG;
+	*ptr++ = cpu_to_le16(ctx->gk5e->signalg);
+	*ptr++ = SEAL_ALG_NONE;
+	*ptr++ = 0xffff;
+
+	return (char *)krb5_hdr;
+}
+
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
 {
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
-	unsigned char		*ptr, *msg_start;
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
+	void			*ptr;
 	s32			now;
 	u32			seq_send;
 
-	dprintk("RPC:       gss_krb5_seal\n");
+	dprintk("RPC:       %s\n", __func__);
 	BUG_ON(ctx == NULL);
 
 	now = get_seconds();
 
-	token->len = g_token_size(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8);
-
-	ptr = token->data;
-	g_make_token_header(&ctx->mech_used, GSS_KRB5_TOK_HDR_LEN + 8, &ptr);
-
-	/* ptr now at header described in rfc 1964, section 1.2.1: */
-	ptr[0] = (unsigned char) ((KG_TOK_MIC_MSG >> 8) & 0xff);
-	ptr[1] = (unsigned char) (KG_TOK_MIC_MSG & 0xff);
-
-	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8;
-
-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
-	memset(ptr + 4, 0xff, 4);
+	ptr = setup_token(ctx, token);
 
-	if (make_checksum("md5", ptr, 8, text, 0, &md5cksum))
+	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
+						text, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 069d4b59807a..10ee641a39d0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -3,7 +3,7 @@
  *
  *  Adapted from MIT Kerberos 5-1.2.1 lib/gssapi/krb5/k5unseal.c
  *
- *  Copyright (c) 2000 The Regents of the University of Michigan.
+ *  Copyright (c) 2000-2008 The Regents of the University of Michigan.
  *  All rights reserved.
  *
  *  Andy Adamson   <andros@umich.edu>
@@ -76,8 +76,9 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 {
 	int			signalg;
 	int			sealalg;
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	s32			now;
 	int			direction;
 	u32			seqnum;
@@ -97,7 +98,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	/* XXX sanity-check bodysize?? */
 
 	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != SGN_ALG_DES_MAC_MD5)
+	if (signalg != ctx->gk5e->signalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	sealalg = ptr[4] + (ptr[5] << 8);
@@ -107,13 +108,15 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr, 8, message_buffer, 0, &md5cksum))
+	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
+					message_buffer, 0, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
+	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN,
+					ctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index b45b59b17ae1..7188891bcc33 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -1,3 +1,33 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
 #include <linux/types.h>
 #include <linux/jiffies.h>
 #include <linux/sunrpc/gss_krb5.h>
@@ -128,8 +158,9 @@ static u32
 gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 		struct xdr_buf *buf, struct page **pages)
 {
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	int			blocksize = 0, plainlen;
 	unsigned char		*ptr, *msg_start;
 	s32			now;
@@ -137,7 +168,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	struct page		**tmp_pages;
 	u32			seq_send;
 
-	dprintk("RPC:       gss_wrap_kerberos\n");
+	dprintk("RPC:       %s\n", __func__);
 
 	now = get_seconds();
 
@@ -146,8 +177,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	BUG_ON((buf->len - offset) % blocksize);
 	plainlen = blocksize + buf->len - offset;
 
-	headlen = g_token_size(&kctx->mech_used, 24 + plainlen) -
-						(buf->len - offset);
+	headlen = g_token_size(&kctx->mech_used,
+		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
+		(buf->len - offset);
 
 	ptr = buf->head[0].iov_base + offset;
 	/* shift data to make room for header. */
@@ -157,25 +189,26 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	BUG_ON((buf->len - offset - headlen) % blocksize);
 
 	g_make_token_header(&kctx->mech_used,
-				GSS_KRB5_TOK_HDR_LEN + 8 + plainlen, &ptr);
+				GSS_KRB5_TOK_HDR_LEN +
+				kctx->gk5e->cksumlength + plainlen, &ptr);
 
 
 	/* ptr now at header described in rfc 1964, section 1.2.1: */
 	ptr[0] = (unsigned char) ((KG_TOK_WRAP_MSG >> 8) & 0xff);
 	ptr[1] = (unsigned char) (KG_TOK_WRAP_MSG & 0xff);
 
-	msg_start = ptr + 24;
+	msg_start = ptr + GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength;
 
-	*(__be16 *)(ptr + 2) = htons(SGN_ALG_DES_MAC_MD5);
+	*(__be16 *)(ptr + 2) = cpu_to_le16(kctx->gk5e->signalg);
 	memset(ptr + 4, 0xff, 4);
-	*(__be16 *)(ptr + 4) = htons(SEAL_ALG_DES);
+	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
 	make_confounder(msg_start, blocksize);
 
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum("md5", ptr, 8, buf,
+	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
 				offset + headlen - blocksize, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -207,8 +240,9 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 {
 	int			signalg;
 	int			sealalg;
-	char			cksumdata[16];
-	struct xdr_netobj	md5cksum = {.len = 0, .data = cksumdata};
+	char			cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj	md5cksum = {.len = sizeof(cksumdata),
+					    .data = cksumdata};
 	s32			now;
 	int			direction;
 	s32			seqnum;
@@ -217,6 +251,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	void			*data_start, *orig_start;
 	int			data_len;
 	int			blocksize;
+	int			crypt_offset;
 
 	dprintk("RPC:       gss_unwrap_kerberos\n");
 
@@ -234,22 +269,27 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	/* get the sign and seal algorithms */
 
 	signalg = ptr[2] + (ptr[3] << 8);
-	if (signalg != SGN_ALG_DES_MAC_MD5)
+	if (signalg != kctx->gk5e->signalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	sealalg = ptr[4] + (ptr[5] << 8);
-	if (sealalg != SEAL_ALG_DES)
+	if (sealalg != kctx->gk5e->sealalg)
 		return GSS_S_DEFECTIVE_TOKEN;
 
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (gss_decrypt_xdr_buf(kctx->enc, buf,
-			ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base))
+	/*
+	 * Data starts after token header and checksum.  ptr points
+	 * to the beginning of the token header
+	 */
+	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
+					(unsigned char *)buf->head[0].iov_base;
+	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum("md5", ptr, 8, buf,
-		 ptr + GSS_KRB5_TOK_HDR_LEN + 8 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
+						crypt_offset, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
@@ -280,7 +320,8 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	 * better to copy and encrypt at the same time. */
 
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
-	data_start = ptr + GSS_KRB5_TOK_HDR_LEN + 8 + blocksize;
+	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
+					blocksize;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);
-- 
cgit v1.2.3


From e1f6c07b1160ef28e8754d12e6c03288dd9d5ca8 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:52 -0400
Subject: gss_krb5: add ability to have a keyed checksum (hmac)

Encryption types besides DES may use a keyed checksum (hmac).
Modify the make_checksum() function to allow for a key
and take care of enctype-specific processing such as truncating
the resulting hash.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       | 11 +++++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 54 +++++++++++++++++++++++++++++------
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 13 +++++----
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 13 +++++----
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 30 +++++++++++--------
 6 files changed, 88 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index f94935599d13..abf26efd44ac 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -41,6 +41,9 @@
 #include <linux/sunrpc/gss_err.h>
 #include <linux/sunrpc/gss_asn1.h>
 
+/* Maximum key length (in bytes) for the supported crypto algorithms*/
+#define GSS_KRB5_MAX_KEYLEN (32)
+
 /* Maximum checksum function output for the supported crypto algorithms */
 #define GSS_KRB5_MAX_CKSUM_LEN  (20)
 
@@ -74,6 +77,7 @@ struct krb5_ctx {
 	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
 	struct crypto_blkcipher	*enc;
 	struct crypto_blkcipher	*seq;
+	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
 	u32			seq_send;
 	struct xdr_netobj	mech_used;
@@ -159,9 +163,10 @@ enum seal_alg {
 	+ GSS_KRB5_TOK_HDR_LEN                                   \
 	+ GSS_KRB5_MAX_CKSUM_LEN)
 
-s32
-make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
-		   int body_offset, struct xdr_netobj *cksum);
+u32
+make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
+		struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		struct xdr_netobj *cksumout);
 
 u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
 		struct xdr_netobj *);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ccd5236953f7..cae04d7a45a5 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -123,21 +123,42 @@ checksummer(struct scatterlist *sg, void *data)
 	return crypto_hash_update(desc, sg, sg->length);
 }
 
-/* checksum the plaintext data and hdrlen bytes of the token header */
-s32
-make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
-		   int body_offset, struct xdr_netobj *cksum)
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * The checksum is performed over the first 8 bytes of the
+ * gss token header and then over the data body
+ */
+u32
+make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
+	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
+	      struct xdr_netobj *cksumout)
 {
-	struct hash_desc                desc; /* XXX add to ctx? */
+	struct hash_desc                desc;
 	struct scatterlist              sg[1];
 	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
 
-	desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(desc.tfm))
 		return GSS_S_FAILURE;
-	cksum->len = crypto_hash_digestsize(desc.tfm);
 	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
 
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+
+	if (cksumkey != NULL) {
+		err = crypto_hash_setkey(desc.tfm, cksumkey,
+					 kctx->gk5e->keylength);
+		if (err)
+			goto out;
+	}
+
 	err = crypto_hash_init(&desc);
 	if (err)
 		goto out;
@@ -149,8 +170,25 @@ make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
 			      checksummer, &desc);
 	if (err)
 		goto out;
-	err = crypto_hash_final(&desc, cksum->data);
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
 
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_RSA_MD5:
+		err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
+					  checksumdata, checksumlen);
+		if (err)
+			goto out;
+		memcpy(cksumout->data,
+		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
+		       kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+	cksumout->len = kctx->gk5e->cksumlength;
 out:
 	crypto_free_hash(desc.tfm);
 	return err ? GSS_S_FAILURE : 0;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index a66eb706aeb7..6f93f4752be4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -66,6 +66,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keylength = 8,
 	  .blocksize = 8,
 	  .cksumlength = 8,
+	  .keyed_cksum = 0,
 	},
 };
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 46c6f44e5c3f..cd512719092b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -101,6 +101,7 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	void			*ptr;
 	s32			now;
 	u32			seq_send;
+	u8			*cksumkey;
 
 	dprintk("RPC:       %s\n", __func__);
 	BUG_ON(ctx == NULL);
@@ -109,15 +110,15 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 
 	ptr = setup_token(ctx, token);
 
-	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
-						text, 0, &md5cksum))
-		return GSS_S_FAILURE;
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
-			  md5cksum.data, md5cksum.len))
+	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = ctx->seq_send++;
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 10ee641a39d0..7515bffddf15 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -84,6 +84,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	u32			seqnum;
 	unsigned char		*ptr = (unsigned char *)read_token->data;
 	int			bodysize;
+	u8			*cksumkey;
 
 	dprintk("RPC:       krb5_read_token\n");
 
@@ -108,14 +109,16 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	if ((ptr[6] != 0xff) || (ptr[7] != 0xff))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum((char *)ctx->gk5e->cksum_name, ptr, 8,
-					message_buffer, 0, &md5cksum))
-		return GSS_S_FAILURE;
+	if (ctx->gk5e->keyed_cksum)
+		cksumkey = ctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
+	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
+			  cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN,
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
 					ctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 7188891bcc33..2eb3046a84ea 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -167,6 +167,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	int			headlen;
 	struct page		**tmp_pages;
 	u32			seq_send;
+	u8			*cksumkey;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -205,18 +206,20 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	make_confounder(msg_start, blocksize);
 
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
+
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
-				offset + headlen - blocksize, &md5cksum))
+	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
+					cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
 
-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-			  md5cksum.data, md5cksum.len))
-		return GSS_S_FAILURE;
-	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data + md5cksum.len - 8, 8);
+	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
 
 	spin_lock(&krb5_seq_lock);
 	seq_send = kctx->seq_send++;
@@ -252,6 +255,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	int			data_len;
 	int			blocksize;
 	int			crypt_offset;
+	u8			*cksumkey;
 
 	dprintk("RPC:       gss_unwrap_kerberos\n");
 
@@ -288,15 +292,17 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
 		return GSS_S_DEFECTIVE_TOKEN;
 
-	if (make_checksum((char *)kctx->gk5e->cksum_name, ptr, 8, buf,
-						crypt_offset, &md5cksum))
-		return GSS_S_FAILURE;
+	if (kctx->gk5e->keyed_cksum)
+		cksumkey = kctx->cksum;
+	else
+		cksumkey = NULL;
 
-	if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-			   md5cksum.data, md5cksum.len))
+	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
+						cksumkey, &md5cksum))
 		return GSS_S_FAILURE;
 
-	if (memcmp(md5cksum.data + 8, ptr + GSS_KRB5_TOK_HDR_LEN, 8))
+	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+						kctx->gk5e->cksumlength))
 		return GSS_S_BAD_SIG;
 
 	/* it got through unscathed.  Make sure the context is unexpired */
-- 
cgit v1.2.3


From 4891f2d008e4343eedea39ba1fe74864f1d32be0 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:53 -0400
Subject: gss_krb5: import functionality to derive keys into the kernel

Import the code to derive Kerberos keys from a base key into the
kernel.  This will allow us to change the format of the context
information sent down from gssd to include only a single key.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h     |  31 +++++
 net/sunrpc/auth_gss/Makefile        |   2 +-
 net/sunrpc/auth_gss/gss_krb5_keys.c | 252 ++++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c |   1 +
 4 files changed, 285 insertions(+), 1 deletion(-)
 create mode 100644 net/sunrpc/auth_gss/gss_krb5_keys.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index abf26efd44ac..d31ba0304d18 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -41,6 +41,9 @@
 #include <linux/sunrpc/gss_err.h>
 #include <linux/sunrpc/gss_asn1.h>
 
+/* Length of constant used in key derivation */
+#define GSS_KRB5_K5CLENGTH (5)
+
 /* Maximum key length (in bytes) for the supported crypto algorithms*/
 #define GSS_KRB5_MAX_KEYLEN (32)
 
@@ -69,6 +72,9 @@ struct gss_krb5_enctype {
 	u32 (*decrypt) (struct crypto_blkcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* decryption function */
+	u32 (*mk_key) (struct gss_krb5_enctype *gk5e,
+		       struct xdr_netobj *in,
+		       struct xdr_netobj *out);	/* complete key generation */
 };
 
 struct krb5_ctx {
@@ -147,6 +153,25 @@ enum seal_alg {
 #define ENCTYPE_DES3_CBC_SHA1   0x0010
 #define ENCTYPE_UNKNOWN         0x01ff
 
+/*
+ * Constants used for key derivation
+ */
+/* for 3DES */
+#define KG_USAGE_SEAL (22)
+#define KG_USAGE_SIGN (23)
+#define KG_USAGE_SEQ  (24)
+
+/* from rfc3961 */
+#define KEY_USAGE_SEED_CHECKSUM         (0x99)
+#define KEY_USAGE_SEED_ENCRYPTION       (0xAA)
+#define KEY_USAGE_SEED_INTEGRITY        (0x55)
+
+/* from rfc4121 */
+#define KG_USAGE_ACCEPTOR_SEAL  (22)
+#define KG_USAGE_ACCEPTOR_SIGN  (23)
+#define KG_USAGE_INITIATOR_SEAL (24)
+#define KG_USAGE_INITIATOR_SIGN (25)
+
 /*
  * This compile-time check verifies that we will not exceed the
  * slack space allotted by the client and server auth_gss code
@@ -211,3 +236,9 @@ krb5_get_seq_num(struct crypto_blkcipher *key,
 
 int
 xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
+
+u32
+krb5_derive_key(struct gss_krb5_enctype *gk5e,
+		const struct xdr_netobj *inkey,
+		struct xdr_netobj *outkey,
+		const struct xdr_netobj *in_constant);
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 4de8bcf26fa7..74a231735f67 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -10,7 +10,7 @@ auth_rpcgss-objs := auth_gss.o gss_generic_token.o \
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
 rpcsec_gss_krb5-objs := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
-	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o
+	gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
 
 obj-$(CONFIG_RPCSEC_GSS_SPKM3) += rpcsec_gss_spkm3.o
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
new file mode 100644
index 000000000000..832ce901bf68
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -0,0 +1,252 @@
+/*
+ * COPYRIGHT (c) 2008
+ * The Regents of the University of Michigan
+ * ALL RIGHTS RESERVED
+ *
+ * Permission is granted to use, copy, create derivative works
+ * and redistribute this software and such derivative works
+ * for any purpose, so long as the name of The University of
+ * Michigan is not used in any advertising or publicity
+ * pertaining to the use of distribution of this software
+ * without specific, written prior authorization.  If the
+ * above copyright notice or any other identification of the
+ * University of Michigan is included in any copy of any
+ * portion of this software, then the disclaimer below must
+ * also be included.
+ *
+ * THIS SOFTWARE IS PROVIDED AS IS, WITHOUT REPRESENTATION
+ * FROM THE UNIVERSITY OF MICHIGAN AS TO ITS FITNESS FOR ANY
+ * PURPOSE, AND WITHOUT WARRANTY BY THE UNIVERSITY OF
+ * MICHIGAN OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING
+ * WITHOUT LIMITATION THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE
+ * REGENTS OF THE UNIVERSITY OF MICHIGAN SHALL NOT BE LIABLE
+ * FOR ANY DAMAGES, INCLUDING SPECIAL, INDIRECT, INCIDENTAL, OR
+ * CONSEQUENTIAL DAMAGES, WITH RESPECT TO ANY CLAIM ARISING
+ * OUT OF OR IN CONNECTION WITH THE USE OF THE SOFTWARE, EVEN
+ * IF IT HAS BEEN OR IS HEREAFTER ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGES.
+ */
+
+/*
+ * Copyright (C) 1998 by the FundsXpress, INC.
+ *
+ * All rights reserved.
+ *
+ * Export of this software from the United States of America may require
+ * a specific license from the United States Government.  It is the
+ * responsibility of any person or organization contemplating export to
+ * obtain such a license before exporting.
+ *
+ * WITHIN THAT CONSTRAINT, permission to use, copy, modify, and
+ * distribute this software and its documentation for any purpose and
+ * without fee is hereby granted, provided that the above copyright
+ * notice appear in all copies and that both that copyright notice and
+ * this permission notice appear in supporting documentation, and that
+ * the name of FundsXpress. not be used in advertising or publicity pertaining
+ * to distribution of the software without specific, written prior
+ * permission.  FundsXpress makes no representations about the suitability of
+ * this software for any purpose.  It is provided "as is" without express
+ * or implied warranty.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
+ * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+ */
+
+#include <linux/err.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY        RPCDBG_AUTH
+#endif
+
+/*
+ * This is the n-fold function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+static void krb5_nfold(u32 inbits, const u8 *in,
+		       u32 outbits, u8 *out)
+{
+	int a, b, c, lcm;
+	int byte, i, msbit;
+
+	/* the code below is more readable if I make these bytes
+	   instead of bits */
+
+	inbits >>= 3;
+	outbits >>= 3;
+
+	/* first compute lcm(n,k) */
+
+	a = outbits;
+	b = inbits;
+
+	while (b != 0) {
+		c = b;
+		b = a%b;
+		a = c;
+	}
+
+	lcm = outbits*inbits/a;
+
+	/* now do the real work */
+
+	memset(out, 0, outbits);
+	byte = 0;
+
+	/* this will end up cycling through k lcm(k,n)/k times, which
+	   is correct */
+	for (i = lcm-1; i >= 0; i--) {
+		/* compute the msbit in k which gets added into this byte */
+		msbit = (
+			/* first, start with the msbit in the first,
+			 * unrotated byte */
+			 ((inbits << 3) - 1)
+			 /* then, for each byte, shift to the right
+			  * for each repetition */
+			 + (((inbits << 3) + 13) * (i/inbits))
+			 /* last, pick out the correct byte within
+			  * that shifted repetition */
+			 + ((inbits - (i % inbits)) << 3)
+			 ) % (inbits << 3);
+
+		/* pull out the byte value itself */
+		byte += (((in[((inbits - 1) - (msbit >> 3)) % inbits] << 8)|
+				  (in[((inbits) - (msbit >> 3)) % inbits]))
+				 >> ((msbit & 7) + 1)) & 0xff;
+
+		/* do the addition */
+		byte += out[i % outbits];
+		out[i % outbits] = byte & 0xff;
+
+		/* keep around the carry bit, if any */
+		byte >>= 8;
+
+	}
+
+	/* if there's a carry bit left over, add it back in */
+	if (byte) {
+		for (i = outbits - 1; i >= 0; i--) {
+			/* do the addition */
+			byte += out[i];
+			out[i] = byte & 0xff;
+
+			/* keep around the carry bit, if any */
+			byte >>= 8;
+		}
+	}
+}
+
+/*
+ * This is the DK (derive_key) function as described in rfc3961, sec 5.1
+ * Taken from MIT Kerberos and modified.
+ */
+
+u32 krb5_derive_key(struct gss_krb5_enctype *gk5e,
+		    const struct xdr_netobj *inkey,
+		    struct xdr_netobj *outkey,
+		    const struct xdr_netobj *in_constant)
+{
+	size_t blocksize, keybytes, keylength, n;
+	unsigned char *inblockdata, *outblockdata, *rawkey;
+	struct xdr_netobj inblock, outblock;
+	struct crypto_blkcipher *cipher;
+	u32 ret = EINVAL;
+
+	blocksize = gk5e->blocksize;
+	keybytes = gk5e->keybytes;
+	keylength = gk5e->keylength;
+
+	if ((inkey->len != keylength) || (outkey->len != keylength))
+		goto err_return;
+
+	cipher = crypto_alloc_blkcipher(gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		goto err_return;
+	if (crypto_blkcipher_setkey(cipher, inkey->data, inkey->len))
+		goto err_return;
+
+	/* allocate and set up buffers */
+
+	ret = ENOMEM;
+	inblockdata = kmalloc(blocksize, GFP_KERNEL);
+	if (inblockdata == NULL)
+		goto err_free_cipher;
+
+	outblockdata = kmalloc(blocksize, GFP_KERNEL);
+	if (outblockdata == NULL)
+		goto err_free_in;
+
+	rawkey = kmalloc(keybytes, GFP_KERNEL);
+	if (rawkey == NULL)
+		goto err_free_out;
+
+	inblock.data = (char *) inblockdata;
+	inblock.len = blocksize;
+
+	outblock.data = (char *) outblockdata;
+	outblock.len = blocksize;
+
+	/* initialize the input block */
+
+	if (in_constant->len == inblock.len) {
+		memcpy(inblock.data, in_constant->data, inblock.len);
+	} else {
+		krb5_nfold(in_constant->len * 8, in_constant->data,
+			   inblock.len * 8, inblock.data);
+	}
+
+	/* loop encrypting the blocks until enough key bytes are generated */
+
+	n = 0;
+	while (n < keybytes) {
+		(*(gk5e->encrypt))(cipher, NULL, inblock.data,
+				   outblock.data, inblock.len);
+
+		if ((keybytes - n) <= outblock.len) {
+			memcpy(rawkey + n, outblock.data, (keybytes - n));
+			break;
+		}
+
+		memcpy(rawkey + n, outblock.data, outblock.len);
+		memcpy(inblock.data, outblock.data, outblock.len);
+		n += outblock.len;
+	}
+
+	/* postprocess the key */
+
+	inblock.data = (char *) rawkey;
+	inblock.len = keybytes;
+
+	BUG_ON(gk5e->mk_key == NULL);
+	ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
+	if (ret) {
+		dprintk("%s: got %d from mk_key function for '%s'\n",
+			__func__, ret, gk5e->encrypt_name);
+		goto err_free_raw;
+	}
+
+	/* clean memory, free resources and exit */
+
+	ret = 0;
+
+err_free_raw:
+	memset(rawkey, 0, keybytes);
+	kfree(rawkey);
+err_free_out:
+	memset(outblockdata, 0, blocksize);
+	kfree(outblockdata);
+err_free_in:
+	memset(inblockdata, 0, blocksize);
+	kfree(inblockdata);
+err_free_cipher:
+	crypto_free_blkcipher(cipher);
+err_return:
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 6f93f4752be4..fdf0eb2057ab 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -60,6 +60,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksum_name = "md5",
 	  .encrypt = krb5_encrypt,
 	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
 	  .signalg = SGN_ALG_DES_MAC_MD5,
 	  .sealalg = SEAL_ALG_DES,
 	  .keybytes = 7,
-- 
cgit v1.2.3


From 47d84807762966c3611c38adecec6ea703ddda7a Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:54 -0400
Subject: gss_krb5: handle new context format from gssd

For encryption types other than DES, gssd sends down context information
in a new format.  This new format includes the information needed to
support the new Kerberos GSS-API tokens defined in rfc4121.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h     |  23 +++-
 net/sunrpc/auth_gss/gss_krb5_keys.c |   2 +-
 net/sunrpc/auth_gss/gss_krb5_mech.c | 237 +++++++++++++++++++++++++++++++++++-
 3 files changed, 258 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index d31ba0304d18..04d5279f17df 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -72,21 +72,36 @@ struct gss_krb5_enctype {
 	u32 (*decrypt) (struct crypto_blkcipher *tfm,
 			void *iv, void *in, void *out,
 			int length);		/* decryption function */
-	u32 (*mk_key) (struct gss_krb5_enctype *gk5e,
+	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
 		       struct xdr_netobj *in,
 		       struct xdr_netobj *out);	/* complete key generation */
 };
 
+/* krb5_ctx flags definitions */
+#define KRB5_CTX_FLAG_INITIATOR         0x00000001
+#define KRB5_CTX_FLAG_CFX               0x00000002
+#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY   0x00000004
+
 struct krb5_ctx {
 	int			initiate; /* 1 = initiating, 0 = accepting */
 	u32			enctype;
+	u32			flags;
 	const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
 	struct crypto_blkcipher	*enc;
 	struct crypto_blkcipher	*seq;
+	struct crypto_blkcipher *acceptor_enc;
+	struct crypto_blkcipher *initiator_enc;
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
 	u32			seq_send;
+	u64			seq_send64;
 	struct xdr_netobj	mech_used;
+	u8			initiator_sign[GSS_KRB5_MAX_KEYLEN];
+	u8			acceptor_sign[GSS_KRB5_MAX_KEYLEN];
+	u8			initiator_seal[GSS_KRB5_MAX_KEYLEN];
+	u8			acceptor_seal[GSS_KRB5_MAX_KEYLEN];
+	u8			initiator_integ[GSS_KRB5_MAX_KEYLEN];
+	u8			acceptor_integ[GSS_KRB5_MAX_KEYLEN];
 };
 
 extern spinlock_t krb5_seq_lock;
@@ -151,6 +166,10 @@ enum seal_alg {
 #define ENCTYPE_DES3_CBC_RAW    0x0006	/* DES-3 cbc mode raw */
 #define ENCTYPE_DES_HMAC_SHA1   0x0008
 #define ENCTYPE_DES3_CBC_SHA1   0x0010
+#define ENCTYPE_AES128_CTS_HMAC_SHA1_96 0x0011
+#define ENCTYPE_AES256_CTS_HMAC_SHA1_96 0x0012
+#define ENCTYPE_ARCFOUR_HMAC            0x0017
+#define ENCTYPE_ARCFOUR_HMAC_EXP        0x0018
 #define ENCTYPE_UNKNOWN         0x01ff
 
 /*
@@ -238,7 +257,7 @@ int
 xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen);
 
 u32
-krb5_derive_key(struct gss_krb5_enctype *gk5e,
+krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		const struct xdr_netobj *inkey,
 		struct xdr_netobj *outkey,
 		const struct xdr_netobj *in_constant);
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 832ce901bf68..253b4149584a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -147,7 +147,7 @@ static void krb5_nfold(u32 inbits, const u8 *in,
  * Taken from MIT Kerberos and modified.
  */
 
-u32 krb5_derive_key(struct gss_krb5_enctype *gk5e,
+u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		    const struct xdr_netobj *inkey,
 		    struct xdr_netobj *outkey,
 		    const struct xdr_netobj *in_constant)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index fdf0eb2057ab..8b612e733563 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -48,6 +48,8 @@
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
+static struct gss_api_mech gss_kerberos_mech;	/* forward declaration */
+
 static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	/*
 	 * DES (All DES enctypes are mapped to the same gss functionality)
@@ -247,6 +249,237 @@ out_err:
 	return PTR_ERR(p);
 }
 
+struct crypto_blkcipher *
+context_v2_alloc_cipher(struct krb5_ctx *ctx, u8 *key)
+{
+	struct crypto_blkcipher *cp;
+
+	cp = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name,
+					0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cp)) {
+		dprintk("gss_kerberos_mech: unable to initialize "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		return NULL;
+	}
+	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
+		dprintk("gss_kerberos_mech: error setting key for "
+			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+		crypto_free_blkcipher(cp);
+		return NULL;
+	}
+	return cp;
+}
+
+static inline void
+set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
+{
+	cdata[0] = (usage>>24)&0xff;
+	cdata[1] = (usage>>16)&0xff;
+	cdata[2] = (usage>>8)&0xff;
+	cdata[3] = usage&0xff;
+	cdata[4] = seed;
+}
+
+static int
+context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = rawkey;
+	keyin.len = keylen;
+	keyout.len = keylen;
+
+	/* seq uses the raw key */
+	ctx->seq = context_v2_alloc_cipher(ctx, rawkey);
+	if (ctx->seq == NULL)
+		goto out_err;
+
+	ctx->enc = context_v2_alloc_cipher(ctx, rawkey);
+	if (ctx->enc == NULL)
+		goto out_free_seq;
+
+	/* derive cksum */
+	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->cksum;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving cksum key\n",
+			__func__, err);
+		goto out_free_enc;
+	}
+
+	return 0;
+
+out_free_enc:
+	crypto_free_blkcipher(ctx->enc);
+out_free_seq:
+	crypto_free_blkcipher(ctx->seq);
+out_err:
+	return -EINVAL;
+}
+
+static int
+context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+{
+	struct xdr_netobj c, keyin, keyout;
+	u8 cdata[GSS_KRB5_K5CLENGTH];
+	u32 err;
+
+	c.len = GSS_KRB5_K5CLENGTH;
+	c.data = cdata;
+
+	keyin.data = rawkey;
+	keyin.len = keylen;
+	keyout.len = keylen;
+
+	/* initiator seal encryption */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->initiator_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_seal key\n",
+			__func__, err);
+		goto out_err;
+	}
+	ctx->initiator_enc = context_v2_alloc_cipher(ctx, ctx->initiator_seal);
+	if (ctx->initiator_enc == NULL)
+		goto out_err;
+
+	/* acceptor seal encryption */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
+	keyout.data = ctx->acceptor_seal;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_seal key\n",
+			__func__, err);
+		goto out_free_initiator_enc;
+	}
+	ctx->acceptor_enc = context_v2_alloc_cipher(ctx, ctx->acceptor_seal);
+	if (ctx->acceptor_enc == NULL)
+		goto out_free_initiator_enc;
+
+	/* initiator sign checksum */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->initiator_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor sign checksum */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
+	keyout.data = ctx->acceptor_sign;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_sign key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* initiator seal integrity */
+	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->initiator_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving initiator_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	/* acceptor seal integrity */
+	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
+	keyout.data = ctx->acceptor_integ;
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	if (err) {
+		dprintk("%s: Error %d deriving acceptor_integ key\n",
+			__func__, err);
+		goto out_free_acceptor_enc;
+	}
+
+	return 0;
+
+out_free_acceptor_enc:
+	crypto_free_blkcipher(ctx->acceptor_enc);
+out_free_initiator_enc:
+	crypto_free_blkcipher(ctx->initiator_enc);
+out_err:
+	return -EINVAL;
+}
+
+static int
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
+{
+	u8 rawkey[GSS_KRB5_MAX_KEYLEN];
+	int keylen;
+
+	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
+	if (IS_ERR(p))
+		goto out_err;
+	ctx->initiate = ctx->flags & KRB5_CTX_FLAG_INITIATOR;
+
+	p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
+	if (IS_ERR(p))
+		goto out_err;
+	p = simple_get_bytes(p, end, &ctx->seq_send64, sizeof(ctx->seq_send64));
+	if (IS_ERR(p))
+		goto out_err;
+	/* set seq_send for use by "older" enctypes */
+	ctx->seq_send = ctx->seq_send64;
+	if (ctx->seq_send64 != ctx->seq_send) {
+		dprintk("%s: seq_send64 %lx, seq_send %x overflow?\n", __func__,
+			(long unsigned)ctx->seq_send64, ctx->seq_send);
+		goto out_err;
+	}
+	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
+	if (IS_ERR(p))
+		goto out_err;
+	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+	if (ctx->gk5e == NULL) {
+		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
+			ctx->enctype);
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+	keylen = ctx->gk5e->keylength;
+
+	p = simple_get_bytes(p, end, rawkey, keylen);
+	if (IS_ERR(p))
+		goto out_err;
+
+	if (p != end) {
+		p = ERR_PTR(-EINVAL);
+		goto out_err;
+	}
+
+	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
+				      gss_kerberos_mech.gm_oid.len, GFP_KERNEL);
+	if (unlikely(ctx->mech_used.data == NULL)) {
+		p = ERR_PTR(-ENOMEM);
+		goto out_err;
+	}
+	ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
+
+	switch (ctx->enctype) {
+	case ENCTYPE_DES3_CBC_RAW:
+		return context_derive_keys_des3(ctx, rawkey, keylen);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return context_derive_keys_new(ctx, rawkey, keylen);
+	default:
+		return -EINVAL;
+	}
+
+out_err:
+	return PTR_ERR(p);
+}
+
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
 				struct gss_ctx *ctx_id)
@@ -262,7 +495,7 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
 	if (len == 85)
 		ret = gss_import_v1_context(p, end, ctx);
 	else
-		ret = -EINVAL;
+		ret = gss_import_v2_context(p, end, ctx);
 
 	if (ret == 0)
 		ctx_id->internal_ctx_id = ctx;
@@ -279,6 +512,8 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
 
 	crypto_free_blkcipher(kctx->seq);
 	crypto_free_blkcipher(kctx->enc);
+	crypto_free_blkcipher(kctx->acceptor_enc);
+	crypto_free_blkcipher(kctx->initiator_enc);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
-- 
cgit v1.2.3


From 683ac6656cb05b6e83593770ffc049eee4a4d119 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 8 Apr 2010 14:09:58 -0400
Subject: gss_krb5: Add upcall info indicating supported kerberos enctypes

The text based upcall now indicates which Kerberos encryption types are
supported by the kernel rpcsecgss code.  This is used by gssd to
determine which encryption types it should attempt to negotiate
when creating a context with a server.

The server principal's database and keytab encryption types are
what limits what it should negotiate.  Therefore, its keytab
should be created with only the enctypes listed by this file.

Currently we support des-cbc-crc, des-cbc-md4 and des-cbc-md5

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_api.h      | 2 ++
 net/sunrpc/auth_gss/auth_gss.c      | 8 +++++++-
 net/sunrpc/auth_gss/gss_krb5_mech.c | 1 +
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index 03f33330ece2..b22d7f189ceb 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -80,6 +80,8 @@ struct gss_api_mech {
 	/* pseudoflavors supported by this mechanism: */
 	int			gm_pf_num;
 	struct pf_desc *	gm_pfs;
+	/* Should the following be a callback operation instead? */
+	const char		*gm_upcall_enctypes;
 };
 
 /* and must provide the following operations: */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index d64a58b8ed33..6654c8534d32 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -377,11 +377,12 @@ static void gss_encode_v0_msg(struct gss_upcall_msg *gss_msg)
 static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 				struct rpc_clnt *clnt, int machine_cred)
 {
+	struct gss_api_mech *mech = gss_msg->auth->mech;
 	char *p = gss_msg->databuf;
 	int len = 0;
 
 	gss_msg->msg.len = sprintf(gss_msg->databuf, "mech=%s uid=%d ",
-				   gss_msg->auth->mech->gm_name,
+				   mech->gm_name,
 				   gss_msg->uid);
 	p += gss_msg->msg.len;
 	if (clnt->cl_principal) {
@@ -398,6 +399,11 @@ static void gss_encode_v1_msg(struct gss_upcall_msg *gss_msg,
 		p += len;
 		gss_msg->msg.len += len;
 	}
+	if (mech->gm_upcall_enctypes) {
+		len = sprintf(p, mech->gm_upcall_enctypes);
+		p += len;
+		gss_msg->msg.len += len;
+	}
 	len = sprintf(p, "\n");
 	gss_msg->msg.len += len;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 8b612e733563..03f1dcddbd29 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -552,6 +552,7 @@ static struct gss_api_mech gss_kerberos_mech = {
 	.gm_ops		= &gss_kerberos_ops,
 	.gm_pf_num	= ARRAY_SIZE(gss_kerberos_pfs),
 	.gm_pfs		= gss_kerberos_pfs,
+	.gm_upcall_enctypes = "enctypes=3,1,2 ",
 };
 
 static int __init init_kerberos_module(void)
-- 
cgit v1.2.3


From 958142e97e04d6c266ae093739bbbbd03afcd497 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:55 -0400
Subject: gss_krb5: add support for triple-des encryption

Add the final pieces to support the triple-des encryption type.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  5 ++++
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  3 ++
 net/sunrpc/auth_gss/gss_krb5_keys.c   | 53 +++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 23 +++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |  1 +
 net/sunrpc/auth_gss/gss_krb5_unseal.c |  1 +
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  2 ++
 7 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 04d5279f17df..db0522b4c4c9 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -261,3 +261,8 @@ krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		const struct xdr_netobj *inkey,
 		struct xdr_netobj *outkey,
 		const struct xdr_netobj *in_constant);
+
+u32
+gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
+		       struct xdr_netobj *randombits,
+		       struct xdr_netobj *key);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index cae04d7a45a5..bb76873aa019 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -184,6 +184,9 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 		       checksumdata + checksumlen - kctx->gk5e->cksumlength,
 		       kctx->gk5e->cksumlength);
 		break;
+	case CKSUMTYPE_HMAC_SHA1_DES3:
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
 	default:
 		BUG();
 		break;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 253b4149584a..d54668790f0c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -250,3 +250,56 @@ err_free_cipher:
 err_return:
 	return ret;
 }
+
+#define smask(step) ((1<<step)-1)
+#define pstep(x, step) (((x)&smask(step))^(((x)>>step)&smask(step)))
+#define parity_char(x) pstep(pstep(pstep((x), 4), 2), 1)
+
+static void mit_des_fixup_key_parity(u8 key[8])
+{
+	int i;
+	for (i = 0; i < 8; i++) {
+		key[i] &= 0xfe;
+		key[i] |= 1^parity_char(key[i]);
+	}
+}
+
+/*
+ * This is the des3 key derivation postprocess function
+ */
+u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
+			   struct xdr_netobj *randombits,
+			   struct xdr_netobj *key)
+{
+	int i;
+	u32 ret = EINVAL;
+
+	if (key->len != 24) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 21) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+
+	/* take the seven bytes, move them around into the top 7 bits of the
+	   8 key bytes, then compute the parity bits.  Do this three times. */
+
+	for (i = 0; i < 3; i++) {
+		memcpy(key->data + i*8, randombits->data + i*7, 7);
+		key->data[i*8+7] = (((key->data[i*8]&1)<<1) |
+				    ((key->data[i*8+1]&1)<<2) |
+				    ((key->data[i*8+2]&1)<<3) |
+				    ((key->data[i*8+3]&1)<<4) |
+				    ((key->data[i*8+4]&1)<<5) |
+				    ((key->data[i*8+5]&1)<<6) |
+				    ((key->data[i*8+6]&1)<<7));
+
+		mit_des_fixup_key_parity(key->data + i*8);
+	}
+	ret = 0;
+err_out:
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 03f1dcddbd29..7cebdf843266 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -71,6 +71,26 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+	/*
+	 * 3DES
+	 */
+	{
+	  .etype = ENCTYPE_DES3_CBC_RAW,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_DES3,
+	  .name = "des3-hmac-sha1",
+	  .encrypt_name = "cbc(des3_ede)",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_des3_make_key,
+	  .signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
+	  .sealalg = SEAL_ALG_DES3KD,
+	  .keybytes = 21,
+	  .keylength = 24,
+	  .blocksize = 8,
+	  .cksumlength = 20,
+	  .keyed_cksum = 1,
+	},
 };
 
 static const int num_supported_enctypes =
@@ -440,6 +460,9 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	p = simple_get_bytes(p, end, &ctx->enctype, sizeof(ctx->enctype));
 	if (IS_ERR(p))
 		goto out_err;
+	/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
+	if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
+		ctx->enctype = ENCTYPE_DES3_CBC_RAW;
 	ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
 	if (ctx->gk5e == NULL) {
 		dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index cd512719092b..7ede900049a7 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -142,6 +142,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_get_mic_v1(ctx, text, token);
 	}
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 7515bffddf15..3e15bdb5a9eb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -152,6 +152,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
 	}
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 2eb3046a84ea..1c8ebd3dbd3c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -350,6 +350,7 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
 	}
 }
@@ -363,6 +364,7 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 	default:
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
+	case ENCTYPE_DES3_CBC_RAW:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
 	}
 }
-- 
cgit v1.2.3


From de9c17eb4a912c9028f7b470eb80815144883b26 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:02:59 -0400
Subject: gss_krb5: add support for new token formats in rfc4121

This is a step toward support for AES encryption types which are
required to use the new token formats defined in rfc4121.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
[SteveD: Fixed a typo in gss_verify_mic_v2()]
Signed-off-by: Steve Dickson <steved@redhat.com>
[Trond: Got rid of the TEST_ROTATE/TEST_EXTRA_COUNT crap]
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  28 ++++++
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  74 +++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |  69 ++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_unseal.c |  61 ++++++++++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 174 ++++++++++++++++++++++++++++++++++
 5 files changed, 406 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index db0522b4c4c9..0085a30fd204 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -53,6 +53,8 @@
 /* Maximum blocksize for the supported crypto algorithms */
 #define GSS_KRB5_MAX_BLOCKSIZE  (16)
 
+struct krb5_ctx;
+
 struct gss_krb5_enctype {
 	const u32		etype;		/* encryption (key) type */
 	const u32		ctype;		/* checksum type */
@@ -75,6 +77,12 @@ struct gss_krb5_enctype {
 	u32 (*mk_key) (const struct gss_krb5_enctype *gk5e,
 		       struct xdr_netobj *in,
 		       struct xdr_netobj *out);	/* complete key generation */
+	u32 (*encrypt_v2) (struct krb5_ctx *kctx, u32 offset,
+			   struct xdr_buf *buf, int ec,
+			   struct page **pages); /* v2 encryption function */
+	u32 (*decrypt_v2) (struct krb5_ctx *kctx, u32 offset,
+			   struct xdr_buf *buf, u32 *headskip,
+			   u32 *tailskip);	/* v2 decryption function */
 };
 
 /* krb5_ctx flags definitions */
@@ -112,6 +120,18 @@ extern spinlock_t krb5_seq_lock;
 #define KG_TOK_MIC_MSG    0x0101
 #define KG_TOK_WRAP_MSG   0x0201
 
+#define KG2_TOK_INITIAL     0x0101
+#define KG2_TOK_RESPONSE    0x0202
+#define KG2_TOK_MIC         0x0404
+#define KG2_TOK_WRAP        0x0504
+
+#define KG2_TOKEN_FLAG_SENTBYACCEPTOR   0x01
+#define KG2_TOKEN_FLAG_SEALED           0x02
+#define KG2_TOKEN_FLAG_ACCEPTORSUBKEY   0x04
+
+#define KG2_RESP_FLAG_ERROR             0x0001
+#define KG2_RESP_FLAG_DELEG_OK          0x0002
+
 enum sgn_alg {
 	SGN_ALG_DES_MAC_MD5 = 0x0000,
 	SGN_ALG_MD2_5 = 0x0001,
@@ -136,6 +156,9 @@ enum seal_alg {
 #define CKSUMTYPE_RSA_MD5_DES		0x0008
 #define CKSUMTYPE_NIST_SHA		0x0009
 #define CKSUMTYPE_HMAC_SHA1_DES3	0x000c
+#define CKSUMTYPE_HMAC_SHA1_96_AES128   0x000f
+#define CKSUMTYPE_HMAC_SHA1_96_AES256   0x0010
+#define CKSUMTYPE_HMAC_MD5_ARCFOUR      -138 /* Microsoft md5 hmac cksumtype */
 
 /* from gssapi_err_krb5.h */
 #define KG_CCACHE_NOMATCH                        (39756032L)
@@ -212,6 +235,11 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 		struct xdr_buf *body, int body_offset, u8 *cksumkey,
 		struct xdr_netobj *cksumout);
 
+u32
+make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen,
+		 struct xdr_buf *body, int body_offset, u8 *key,
+		 struct xdr_netobj *cksum);
+
 u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
 		struct xdr_netobj *);
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index bb76873aa019..ca52ac28a537 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -197,6 +197,80 @@ out:
 	return err ? GSS_S_FAILURE : 0;
 }
 
+/*
+ * checksum the plaintext data and hdrlen bytes of the token header
+ * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
+ * body then over the first 16 octets of the MIC token
+ * Inclusion of the header data in the calculation of the
+ * checksum is optional.
+ */
+u32
+make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
+		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		 struct xdr_netobj *cksumout)
+{
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	unsigned int checksumlen;
+
+	if (kctx->gk5e->keyed_cksum == 0) {
+		dprintk("%s: expected keyed hash for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+	if (cksumkey == NULL) {
+		dprintk("%s: no key supplied for %s\n",
+			__func__, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+							CRYPTO_ALG_ASYNC);
+	if (IS_ERR(desc.tfm))
+		return GSS_S_FAILURE;
+	checksumlen = crypto_hash_digestsize(desc.tfm);
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_setkey(desc.tfm, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	if (header != NULL) {
+		sg_init_one(sg, header, hdrlen);
+		err = crypto_hash_update(&desc, sg, hdrlen);
+		if (err)
+			goto out;
+	}
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	cksumout->len = kctx->gk5e->cksumlength;
+
+	switch (kctx->gk5e->ctype) {
+	case CKSUMTYPE_HMAC_SHA1_96_AES128:
+	case CKSUMTYPE_HMAC_SHA1_96_AES256:
+		/* note that this truncates the hash */
+		memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+		break;
+	default:
+		BUG();
+		break;
+	}
+out:
+	crypto_free_hash(desc.tfm);
+	return err ? GSS_S_FAILURE : 0;
+}
+
 struct encryptor_desc {
 	u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
 	struct blkcipher_desc desc;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 7ede900049a7..477a546d19bb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -91,6 +91,33 @@ setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
 	return (char *)krb5_hdr;
 }
 
+static void *
+setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+	__be16 *ptr, *krb5_hdr;
+	u8 *p, flags = 0x00;
+
+	if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= 0x01;
+	if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
+		flags |= 0x04;
+
+	/* Per rfc 4121, sec 4.2.6.1, there is no header,
+	 * just start the token */
+	krb5_hdr = ptr = (__be16 *)token->data;
+
+	*ptr++ = KG2_TOK_MIC;
+	p = (u8 *)ptr;
+	*p++ = flags;
+	*p++ = 0xff;
+	ptr = (__be16 *)p;
+	*ptr++ = 0xffff;
+	*ptr++ = 0xffff;
+
+	token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+	return krb5_hdr;
+}
+
 static u32
 gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 		struct xdr_netobj *token)
@@ -132,6 +159,45 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
+u32
+gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+		struct xdr_netobj *token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
+				       .data = cksumdata};
+	void *krb5_hdr;
+	s32 now;
+	u64 seq_send;
+	u8 *cksumkey;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	krb5_hdr = setup_token_v2(ctx, token);
+
+	/* Set up the sequence number. Now 64-bits in clear
+	 * text and w/o direction indicator */
+	spin_lock(&krb5_seq_lock);
+	seq_send = ctx->seq_send64++;
+	spin_unlock(&krb5_seq_lock);
+	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
+
+	if (ctx->initiate)
+		cksumkey = ctx->initiator_sign;
+	else
+		cksumkey = ctx->acceptor_sign;
+
+	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
+			     text, 0, cksumkey, &cksumobj))
+		return GSS_S_FAILURE;
+
+	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
+
+	now = get_seconds();
+
+	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
 u32
 gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 		     struct xdr_netobj *token)
@@ -144,6 +210,9 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_get_mic_v1(ctx, text, token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_get_mic_v2(ctx, text, token);
 	}
 }
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 3e15bdb5a9eb..4ede4cc4391f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -141,6 +141,64 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 	return GSS_S_COMPLETE;
 }
 
+static u32
+gss_verify_mic_v2(struct krb5_ctx *ctx,
+		struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+{
+	char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
+				      .data = cksumdata};
+	s32 now;
+	u64 seqnum;
+	u8 *ptr = read_token->data;
+	u8 *cksumkey;
+	u8 flags;
+	int i;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_MIC)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!ctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (ctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if (flags & KG2_TOKEN_FLAG_SEALED) {
+		dprintk("%s: token has unexpected sealed flag\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	for (i = 3; i < 8; i++)
+		if (ptr[i] != 0xff)
+			return GSS_S_DEFECTIVE_TOKEN;
+
+	if (ctx->initiate)
+		cksumkey = ctx->acceptor_sign;
+	else
+		cksumkey = ctx->initiator_sign;
+
+	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
+			     cksumkey, &cksumobj))
+		return GSS_S_FAILURE;
+
+	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
+				ctx->gk5e->cksumlength))
+		return GSS_S_BAD_SIG;
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > ctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/* do sequencing checks */
+
+	seqnum = be64_to_cpup((__be64 *)ptr + 8);
+
+	return GSS_S_COMPLETE;
+}
+
 u32
 gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 			struct xdr_buf *message_buffer,
@@ -154,6 +212,9 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_verify_mic_v2(ctx, message_buffer, read_token);
 	}
 }
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 1c8ebd3dbd3c..4aa46b28298c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -340,6 +340,174 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	return GSS_S_COMPLETE;
 }
 
+/*
+ * We cannot currently handle tokens with rotated data.  We need a
+ * generalized routine to rotate the data in place.  It is anticipated
+ * that we won't encounter rotated data in the general case.
+ */
+static u32
+rotate_left(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, u16 rrc)
+{
+	unsigned int realrrc = rrc % (buf->len - offset - GSS_KRB5_TOK_HDR_LEN);
+
+	if (realrrc == 0)
+		return 0;
+
+	dprintk("%s: cannot process token with rotated data: "
+		"rrc %u, realrrc %u\n", __func__, rrc, realrrc);
+	return 1;
+}
+
+static u32
+gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, struct page **pages)
+{
+	int		blocksize;
+	u8		*ptr, *plainhdr;
+	s32		now;
+	u8		flags = 0x00;
+	__be16		*be16ptr, ec = 0;
+	__be64		*be64ptr;
+	u32		err;
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->encrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	/* make room for gss token header */
+	if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
+		return GSS_S_FAILURE;
+
+	/* construct gss token header */
+	ptr = plainhdr = buf->head[0].iov_base + offset;
+	*ptr++ = (unsigned char) ((KG2_TOK_WRAP>>8) & 0xff);
+	*ptr++ = (unsigned char) (KG2_TOK_WRAP & 0xff);
+
+	if ((kctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+		flags |= KG2_TOKEN_FLAG_SENTBYACCEPTOR;
+	if ((kctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY) != 0)
+		flags |= KG2_TOKEN_FLAG_ACCEPTORSUBKEY;
+	/* We always do confidentiality in wrap tokens */
+	flags |= KG2_TOKEN_FLAG_SEALED;
+
+	*ptr++ = flags;
+	*ptr++ = 0xff;
+	be16ptr = (__be16 *)ptr;
+
+	blocksize = crypto_blkcipher_blocksize(kctx->acceptor_enc);
+	*be16ptr++ = cpu_to_be16(ec);
+	/* "inner" token header always uses 0 for RRC */
+	*be16ptr++ = cpu_to_be16(0);
+
+	be64ptr = (__be64 *)be16ptr;
+	spin_lock(&krb5_seq_lock);
+	*be64ptr = cpu_to_be64(kctx->seq_send64++);
+	spin_unlock(&krb5_seq_lock);
+
+	err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, ec, pages);
+	if (err)
+		return err;
+
+	now = get_seconds();
+	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
+}
+
+static u32
+gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
+{
+	s32		now;
+	u64		seqnum;
+	u8		*ptr;
+	u8		flags = 0x00;
+	u16		ec, rrc;
+	int		err;
+	u32		headskip, tailskip;
+	u8		decrypted_hdr[GSS_KRB5_TOK_HDR_LEN];
+	unsigned int	movelen;
+
+
+	dprintk("RPC:       %s\n", __func__);
+
+	if (kctx->gk5e->decrypt_v2 == NULL)
+		return GSS_S_FAILURE;
+
+	ptr = buf->head[0].iov_base + offset;
+
+	if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	flags = ptr[2];
+	if ((!kctx->initiate && (flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)) ||
+	    (kctx->initiate && !(flags & KG2_TOKEN_FLAG_SENTBYACCEPTOR)))
+		return GSS_S_BAD_SIG;
+
+	if ((flags & KG2_TOKEN_FLAG_SEALED) == 0) {
+		dprintk("%s: token missing expected sealed flag\n", __func__);
+		return GSS_S_DEFECTIVE_TOKEN;
+	}
+
+	if (ptr[3] != 0xff)
+		return GSS_S_DEFECTIVE_TOKEN;
+
+	ec = be16_to_cpup((__be16 *)(ptr + 4));
+	rrc = be16_to_cpup((__be16 *)(ptr + 6));
+
+	seqnum = be64_to_cpup((__be64 *)(ptr + 8));
+
+	if (rrc != 0) {
+		err = rotate_left(kctx, offset, buf, rrc);
+		if (err)
+			return GSS_S_FAILURE;
+	}
+
+	err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf,
+					&headskip, &tailskip);
+	if (err)
+		return GSS_S_FAILURE;
+
+	/*
+	 * Retrieve the decrypted gss token header and verify
+	 * it against the original
+	 */
+	err = read_bytes_from_xdr_buf(buf,
+				buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip,
+				decrypted_hdr, GSS_KRB5_TOK_HDR_LEN);
+	if (err) {
+		dprintk("%s: error %u getting decrypted_hdr\n", __func__, err);
+		return GSS_S_FAILURE;
+	}
+	if (memcmp(ptr, decrypted_hdr, 6)
+				|| memcmp(ptr + 8, decrypted_hdr + 8, 8)) {
+		dprintk("%s: token hdr, plaintext hdr mismatch!\n", __func__);
+		return GSS_S_FAILURE;
+	}
+
+	/* do sequencing checks */
+
+	/* it got through unscathed.  Make sure the context is unexpired */
+	now = get_seconds();
+	if (now > kctx->endtime)
+		return GSS_S_CONTEXT_EXPIRED;
+
+	/*
+	 * Move the head data back to the right position in xdr_buf.
+	 * We ignore any "ec" data since it might be in the head or
+	 * the tail, and we really don't need to deal with it.
+	 * Note that buf->head[0].iov_len may indicate the available
+	 * head buffer space rather than that actually occupied.
+	 */
+	movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len);
+	movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip;
+	BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen >
+							buf->head[0].iov_len);
+	memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen);
+	buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+	buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip;
+
+	return GSS_S_COMPLETE;
+}
+
 u32
 gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 		  struct xdr_buf *buf, struct page **pages)
@@ -352,6 +520,9 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
 	}
 }
 
@@ -366,6 +537,9 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		return gss_unwrap_kerberos_v2(kctx, offset, buf);
 	}
 }
 
-- 
cgit v1.2.3


From 934a95aa1c9c6ad77838800b79c306e982437605 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:00 -0400
Subject: gss_krb5: add remaining pieces to enable AES encryption support

Add the remaining pieces to enable support for Kerberos AES
encryption types.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  20 +++
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 248 ++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  30 ++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  86 ++++++++++--
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |   6 +-
 5 files changed, 378 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 0085a30fd204..43148ec9a46c 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -99,6 +99,8 @@ struct krb5_ctx {
 	struct crypto_blkcipher	*seq;
 	struct crypto_blkcipher *acceptor_enc;
 	struct crypto_blkcipher *initiator_enc;
+	struct crypto_blkcipher *acceptor_enc_aux;
+	struct crypto_blkcipher *initiator_enc_aux;
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
 	u32			seq_send;
@@ -294,3 +296,21 @@ u32
 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
 		       struct xdr_netobj *randombits,
 		       struct xdr_netobj *key);
+
+u32
+gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
+		      struct xdr_netobj *randombits,
+		      struct xdr_netobj *key);
+
+u32
+gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, int ec,
+		     struct page **pages);
+
+u32
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, u32 *plainoffset,
+		     u32 *plainlen);
+
+void
+gss_krb5_make_confounder(char *p, u32 conflen);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ca52ac28a537..967484a914f3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -41,6 +41,7 @@
 #include <linux/crypto.h>
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
+#include <linux/random.h>
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
 
@@ -478,3 +479,250 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
 
 	return 0;
 }
+
+static u32
+gss_krb5_cts_crypt(struct crypto_blkcipher *cipher, struct xdr_buf *buf,
+		   u32 offset, u8 *iv, struct page **pages, int encrypt)
+{
+	u32 ret;
+	struct scatterlist sg[1];
+	struct blkcipher_desc desc = { .tfm = cipher, .info = iv };
+	u8 data[crypto_blkcipher_blocksize(cipher) * 2];
+	struct page **save_pages;
+	u32 len = buf->len - offset;
+
+	BUG_ON(len > crypto_blkcipher_blocksize(cipher) * 2);
+
+	/*
+	 * For encryption, we want to read from the cleartext
+	 * page cache pages, and write the encrypted data to
+	 * the supplied xdr_buf pages.
+	 */
+	save_pages = buf->pages;
+	if (encrypt)
+		buf->pages = pages;
+
+	ret = read_bytes_from_xdr_buf(buf, offset, data, len);
+	buf->pages = save_pages;
+	if (ret)
+		goto out;
+
+	sg_init_one(sg, data, len);
+
+	if (encrypt)
+		ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, len);
+	else
+		ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, len);
+
+	if (ret)
+		goto out;
+
+	ret = write_bytes_to_xdr_buf(buf, offset, data, len);
+
+out:
+	return ret;
+}
+
+u32
+gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+		     struct xdr_buf *buf, int ec, struct page **pages)
+{
+	u32 err;
+	struct xdr_netobj hmac;
+	u8 *cksumkey;
+	u8 *ecptr;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	int blocksize;
+	struct page **save_pages;
+	int nblocks, nbytes;
+	struct encryptor_desc desc;
+	u32 cbcbytes;
+
+	if (kctx->initiate) {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksumkey = kctx->initiator_integ;
+	} else {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksumkey = kctx->acceptor_integ;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+	/* hide the gss token header and insert the confounder */
+	offset += GSS_KRB5_TOK_HDR_LEN;
+	if (xdr_extend_head(buf, offset, blocksize))
+		return GSS_S_FAILURE;
+	gss_krb5_make_confounder(buf->head[0].iov_base + offset, blocksize);
+	offset -= GSS_KRB5_TOK_HDR_LEN;
+
+	if (buf->tail[0].iov_base != NULL) {
+		ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
+	} else {
+		buf->tail[0].iov_base = buf->head[0].iov_base
+							+ buf->head[0].iov_len;
+		buf->tail[0].iov_len = 0;
+		ecptr = buf->tail[0].iov_base;
+	}
+
+	memset(ecptr, 'X', ec);
+	buf->tail[0].iov_len += ec;
+	buf->len += ec;
+
+	/* copy plaintext gss token header after filler (if any) */
+	memcpy(ecptr + ec, buf->head[0].iov_base + offset,
+						GSS_KRB5_TOK_HDR_LEN);
+	buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
+	buf->len += GSS_KRB5_TOK_HDR_LEN;
+
+	/* Do the HMAC */
+	hmac.len = GSS_KRB5_MAX_CKSUM_LEN;
+	hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+
+	/*
+	 * When we are called, pages points to the real page cache
+	 * data -- which we can't go and encrypt!  buf->pages points
+	 * to scratch pages which we are going to send off to the
+	 * client/server.  Swap in the plaintext pages to calculate
+	 * the hmac.
+	 */
+	save_pages = buf->pages;
+	buf->pages = pages;
+
+	err = make_checksum_v2(kctx, NULL, 0, buf,
+			       offset + GSS_KRB5_TOK_HDR_LEN, cksumkey, &hmac);
+	buf->pages = save_pages;
+	if (err)
+		return GSS_S_FAILURE;
+
+	nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
+	nblocks = (nbytes + blocksize - 1) / blocksize;
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.pages = pages;
+		desc.outbuf = buf;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.infrags, 4);
+		sg_init_table(desc.outfrags, 4);
+
+		err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
+				      cbcbytes, encryptor, &desc);
+		if (err)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	err = gss_krb5_cts_crypt(cipher, buf,
+				 offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
+				 desc.iv, pages, 1);
+	if (err) {
+		err = GSS_S_FAILURE;
+		goto out_err;
+	}
+
+	/* Now update buf to account for HMAC */
+	buf->tail[0].iov_len += kctx->gk5e->cksumlength;
+	buf->len += kctx->gk5e->cksumlength;
+
+out_err:
+	if (err)
+		err = GSS_S_FAILURE;
+	return err;
+}
+
+u32
+gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
+		     u32 *headskip, u32 *tailskip)
+{
+	struct xdr_buf subbuf;
+	u32 ret = 0;
+	u8 *cksum_key;
+	struct crypto_blkcipher *cipher, *aux_cipher;
+	struct xdr_netobj our_hmac_obj;
+	u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+	int nblocks, blocksize, cbcbytes;
+	struct decryptor_desc desc;
+
+	if (kctx->initiate) {
+		cipher = kctx->acceptor_enc;
+		aux_cipher = kctx->acceptor_enc_aux;
+		cksum_key = kctx->acceptor_integ;
+	} else {
+		cipher = kctx->initiator_enc;
+		aux_cipher = kctx->initiator_enc_aux;
+		cksum_key = kctx->initiator_integ;
+	}
+	blocksize = crypto_blkcipher_blocksize(cipher);
+
+
+	/* create a segment skipping the header and leaving out the checksum */
+	xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
+				    (buf->len - offset - GSS_KRB5_TOK_HDR_LEN -
+				     kctx->gk5e->cksumlength));
+
+	nblocks = (subbuf.len + blocksize - 1) / blocksize;
+
+	cbcbytes = 0;
+	if (nblocks > 2)
+		cbcbytes = (nblocks - 2) * blocksize;
+
+	memset(desc.iv, 0, sizeof(desc.iv));
+
+	if (cbcbytes) {
+		desc.fragno = 0;
+		desc.fraglen = 0;
+		desc.desc.info = desc.iv;
+		desc.desc.flags = 0;
+		desc.desc.tfm = aux_cipher;
+
+		sg_init_table(desc.frags, 4);
+
+		ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
+		if (ret)
+			goto out_err;
+	}
+
+	/* Make sure IV carries forward from any CBC results. */
+	ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
+	if (ret)
+		goto out_err;
+
+
+	/* Calculate our hmac over the plaintext data */
+	our_hmac_obj.len = sizeof(our_hmac);
+	our_hmac_obj.data = our_hmac;
+
+	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
+			       cksum_key, &our_hmac_obj);
+	if (ret)
+		goto out_err;
+
+	/* Get the packet's hmac value */
+	ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength,
+				      pkt_hmac, kctx->gk5e->cksumlength);
+	if (ret)
+		goto out_err;
+
+	if (memcmp(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+		ret = GSS_S_BAD_SIG;
+		goto out_err;
+	}
+	*headskip = crypto_blkcipher_blocksize(cipher);
+	*tailskip = kctx->gk5e->cksumlength;
+out_err:
+	if (ret && ret != GSS_S_BAD_SIG)
+		ret = GSS_S_FAILURE;
+	return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index d54668790f0c..33b87f04b30b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -303,3 +303,33 @@ u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
 err_out:
 	return ret;
 }
+
+/*
+ * This is the aes key derivation postprocess function
+ */
+u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
+			  struct xdr_netobj *randombits,
+			  struct xdr_netobj *key)
+{
+	u32 ret = EINVAL;
+
+	if (key->len != 16 && key->len != 32) {
+		dprintk("%s: key->len is %d\n", __func__, key->len);
+		goto err_out;
+	}
+	if (randombits->len != 16 && randombits->len != 32) {
+		dprintk("%s: randombits->len is %d\n",
+			__func__, randombits->len);
+		goto err_out;
+	}
+	if (randombits->len != key->len) {
+		dprintk("%s: randombits->len is %d, key->len is %d\n",
+			__func__, randombits->len, key->len);
+		goto err_out;
+	}
+	memcpy(key->data, randombits->data, key->len);
+	ret = 0;
+err_out:
+	return ret;
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index ce80f996758a..694ad77c86bf 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -91,6 +91,50 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 20,
 	  .keyed_cksum = 1,
 	},
+	/*
+	 * AES128
+	 */
+	{
+	  .etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
+	  .name = "aes128-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
+	/*
+	 * AES256
+	 */
+	{
+	  .etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+	  .ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
+	  .name = "aes256-cts",
+	  .encrypt_name = "cts(cbc(aes))",
+	  .cksum_name = "hmac(sha1)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = gss_krb5_aes_make_key,
+	  .encrypt_v2 = gss_krb5_aes_encrypt,
+	  .decrypt_v2 = gss_krb5_aes_decrypt,
+	  .signalg = -1,
+	  .sealalg = -1,
+	  .keybytes = 32,
+	  .keylength = 32,
+	  .blocksize = 16,
+	  .cksumlength = 12,
+	  .keyed_cksum = 1,
+	},
 };
 
 static const int num_supported_enctypes =
@@ -270,20 +314,19 @@ out_err:
 }
 
 struct crypto_blkcipher *
-context_v2_alloc_cipher(struct krb5_ctx *ctx, u8 *key)
+context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
 {
 	struct crypto_blkcipher *cp;
 
-	cp = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name,
-					0, CRYPTO_ALG_ASYNC);
+	cp = crypto_alloc_blkcipher(cname, 0, CRYPTO_ALG_ASYNC);
 	if (IS_ERR(cp)) {
 		dprintk("gss_kerberos_mech: unable to initialize "
-			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+			"crypto algorithm %s\n", cname);
 		return NULL;
 	}
 	if (crypto_blkcipher_setkey(cp, key, ctx->gk5e->keylength)) {
 		dprintk("gss_kerberos_mech: error setting key for "
-			"crypto algorithm %s\n", ctx->gk5e->encrypt_name);
+			"crypto algorithm %s\n", cname);
 		crypto_free_blkcipher(cp);
 		return NULL;
 	}
@@ -315,11 +358,13 @@ context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	keyout.len = keylen;
 
 	/* seq uses the raw key */
-	ctx->seq = context_v2_alloc_cipher(ctx, rawkey);
+	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   rawkey);
 	if (ctx->seq == NULL)
 		goto out_err;
 
-	ctx->enc = context_v2_alloc_cipher(ctx, rawkey);
+	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
+					   rawkey);
 	if (ctx->enc == NULL)
 		goto out_free_seq;
 
@@ -366,7 +411,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 			__func__, err);
 		goto out_err;
 	}
-	ctx->initiator_enc = context_v2_alloc_cipher(ctx, ctx->initiator_seal);
+	ctx->initiator_enc = context_v2_alloc_cipher(ctx,
+						     ctx->gk5e->encrypt_name,
+						     ctx->initiator_seal);
 	if (ctx->initiator_enc == NULL)
 		goto out_err;
 
@@ -379,7 +426,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 			__func__, err);
 		goto out_free_initiator_enc;
 	}
-	ctx->acceptor_enc = context_v2_alloc_cipher(ctx, ctx->acceptor_seal);
+	ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
+						    ctx->gk5e->encrypt_name,
+						    ctx->acceptor_seal);
 	if (ctx->acceptor_enc == NULL)
 		goto out_free_initiator_enc;
 
@@ -423,6 +472,23 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 		goto out_free_acceptor_enc;
 	}
 
+	switch (ctx->enctype) {
+	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
+	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
+		ctx->initiator_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->initiator_seal);
+		if (ctx->initiator_enc_aux == NULL)
+			goto out_free_acceptor_enc;
+		ctx->acceptor_enc_aux =
+			context_v2_alloc_cipher(ctx, "cbc(aes)",
+						ctx->acceptor_seal);
+		if (ctx->acceptor_enc_aux == NULL) {
+			crypto_free_blkcipher(ctx->initiator_enc_aux);
+			goto out_free_acceptor_enc;
+		}
+	}
+
 	return 0;
 
 out_free_acceptor_enc:
@@ -537,6 +603,8 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
 	crypto_free_blkcipher(kctx->enc);
 	crypto_free_blkcipher(kctx->acceptor_enc);
 	crypto_free_blkcipher(kctx->initiator_enc);
+	crypto_free_blkcipher(kctx->acceptor_enc_aux);
+	crypto_free_blkcipher(kctx->initiator_enc_aux);
 	kfree(kctx->mech_used.data);
 	kfree(kctx);
 }
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 4aa46b28298c..a1a3585fa761 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -113,8 +113,8 @@ out:
 	return 0;
 }
 
-static void
-make_confounder(char *p, u32 conflen)
+void
+gss_krb5_make_confounder(char *p, u32 conflen)
 {
 	static u64 i = 0;
 	u64 *q = (u64 *)p;
@@ -204,7 +204,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	memset(ptr + 4, 0xff, 4);
 	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
-	make_confounder(msg_start, blocksize);
+	gss_krb5_make_confounder(msg_start, blocksize);
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
-- 
cgit v1.2.3


From 8b23707612cffdba694dcd18aa8a018918aa86dc Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:02 -0400
Subject: gssd_krb5: arcfour-hmac support

For arcfour-hmac support, the make_checksum function needs a usage
field to correctly calculate the checksum differently for MIC and
WRAP tokens.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  4 ++--
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 15 +++++++++++----
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 13 +++++++++----
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 12 ++++++++----
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  4 ++--
 5 files changed, 32 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 43148ec9a46c..633f41f11a40 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -235,12 +235,12 @@ enum seal_alg {
 u32
 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 		struct xdr_buf *body, int body_offset, u8 *cksumkey,
-		struct xdr_netobj *cksumout);
+		unsigned int usage, struct xdr_netobj *cksumout);
 
 u32
 make_checksum_v2(struct krb5_ctx *, char *header, int hdrlen,
 		 struct xdr_buf *body, int body_offset, u8 *key,
-		 struct xdr_netobj *cksum);
+		 unsigned int usage, struct xdr_netobj *cksum);
 
 u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
 		struct xdr_netobj *);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 967484a914f3..33ae7023cf3a 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -132,7 +132,7 @@ checksummer(struct scatterlist *sg, void *data)
 u32
 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	      struct xdr_buf *body, int body_offset, u8 *cksumkey,
-	      struct xdr_netobj *cksumout)
+	      unsigned int usage, struct xdr_netobj *cksumout)
 {
 	struct hash_desc                desc;
 	struct scatterlist              sg[1];
@@ -208,7 +208,7 @@ out:
 u32
 make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
 		 struct xdr_buf *body, int body_offset, u8 *cksumkey,
-		 struct xdr_netobj *cksumout)
+		 unsigned int usage, struct xdr_netobj *cksumout)
 {
 	struct hash_desc desc;
 	struct scatterlist sg[1];
@@ -537,15 +537,18 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	int nblocks, nbytes;
 	struct encryptor_desc desc;
 	u32 cbcbytes;
+	unsigned int usage;
 
 	if (kctx->initiate) {
 		cipher = kctx->initiator_enc;
 		aux_cipher = kctx->initiator_enc_aux;
 		cksumkey = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
 	} else {
 		cipher = kctx->acceptor_enc;
 		aux_cipher = kctx->acceptor_enc_aux;
 		cksumkey = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
 	}
 	blocksize = crypto_blkcipher_blocksize(cipher);
 
@@ -590,7 +593,8 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 	buf->pages = pages;
 
 	err = make_checksum_v2(kctx, NULL, 0, buf,
-			       offset + GSS_KRB5_TOK_HDR_LEN, cksumkey, &hmac);
+			       offset + GSS_KRB5_TOK_HDR_LEN,
+			       cksumkey, usage, &hmac);
 	buf->pages = save_pages;
 	if (err)
 		return GSS_S_FAILURE;
@@ -654,15 +658,18 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
 	int nblocks, blocksize, cbcbytes;
 	struct decryptor_desc desc;
+	unsigned int usage;
 
 	if (kctx->initiate) {
 		cipher = kctx->acceptor_enc;
 		aux_cipher = kctx->acceptor_enc_aux;
 		cksum_key = kctx->acceptor_integ;
+		usage = KG_USAGE_ACCEPTOR_SEAL;
 	} else {
 		cipher = kctx->initiator_enc;
 		aux_cipher = kctx->initiator_enc_aux;
 		cksum_key = kctx->initiator_integ;
+		usage = KG_USAGE_INITIATOR_SEAL;
 	}
 	blocksize = crypto_blkcipher_blocksize(cipher);
 
@@ -705,7 +712,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 	our_hmac_obj.data = our_hmac;
 
 	ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
-			       cksum_key, &our_hmac_obj);
+			       cksum_key, usage, &our_hmac_obj);
 	if (ret)
 		goto out_err;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 477a546d19bb..e22fed3d9a1b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -142,7 +142,8 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	else
 		cksumkey = NULL;
 
-	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey, &md5cksum))
+	if (make_checksum(ctx, ptr, 8, text, 0, cksumkey,
+			  KG_USAGE_SIGN, &md5cksum))
 		return GSS_S_FAILURE;
 
 	memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
@@ -170,6 +171,7 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 	s32 now;
 	u64 seq_send;
 	u8 *cksumkey;
+	unsigned int cksum_usage;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -182,13 +184,16 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
 	spin_unlock(&krb5_seq_lock);
 	*((u64 *)(krb5_hdr + 8)) = cpu_to_be64(seq_send);
 
-	if (ctx->initiate)
+	if (ctx->initiate) {
 		cksumkey = ctx->initiator_sign;
-	else
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	} else {
 		cksumkey = ctx->acceptor_sign;
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	}
 
 	if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
-			     text, 0, cksumkey, &cksumobj))
+			     text, 0, cksumkey, cksum_usage, &cksumobj))
 		return GSS_S_FAILURE;
 
 	memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 4ede4cc4391f..ef91366e3dea 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -115,7 +115,7 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 		cksumkey = NULL;
 
 	if (make_checksum(ctx, ptr, 8, message_buffer, 0,
-			  cksumkey, &md5cksum))
+			  cksumkey, KG_USAGE_SIGN, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
@@ -154,6 +154,7 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
 	u8 *cksumkey;
 	u8 flags;
 	int i;
+	unsigned int cksum_usage;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -174,13 +175,16 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
 		if (ptr[i] != 0xff)
 			return GSS_S_DEFECTIVE_TOKEN;
 
-	if (ctx->initiate)
+	if (ctx->initiate) {
 		cksumkey = ctx->acceptor_sign;
-	else
+		cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
+	} else {
 		cksumkey = ctx->initiator_sign;
+		cksum_usage = KG_USAGE_INITIATOR_SIGN;
+	}
 
 	if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
-			     cksumkey, &cksumobj))
+			     cksumkey, cksum_usage, &cksumobj))
 		return GSS_S_FAILURE;
 
 	if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a1a3585fa761..097cc27494cc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -215,7 +215,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	tmp_pages = buf->pages;
 	buf->pages = pages;
 	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
-					cksumkey, &md5cksum))
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
 
@@ -298,7 +298,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 		cksumkey = NULL;
 
 	if (make_checksum(kctx, ptr, 8, buf, crypt_offset,
-						cksumkey, &md5cksum))
+					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 
 	if (memcmp(md5cksum.data, ptr + GSS_KRB5_TOK_HDR_LEN,
-- 
cgit v1.2.3


From fc263a917afad3bda7b823a6edc803a40e7f6015 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:03 -0400
Subject: gss_krb5: Save the raw session key in the context

This is needed for deriving arcfour-hmac keys "on the fly"
using the sequence number or checksu

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h     |  1 +
 net/sunrpc/auth_gss/gss_krb5_mech.c | 27 +++++++++++++--------------
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 633f41f11a40..b0ab827add26 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -101,6 +101,7 @@ struct krb5_ctx {
 	struct crypto_blkcipher *initiator_enc;
 	struct crypto_blkcipher *acceptor_enc_aux;
 	struct crypto_blkcipher *initiator_enc_aux;
+	u8			Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
 	u8			cksum[GSS_KRB5_MAX_KEYLEN];
 	s32			endtime;
 	u32			seq_send;
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 506a2e7d4fad..893fad71e306 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -344,7 +344,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
 }
 
 static int
-context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+context_derive_keys_des3(struct krb5_ctx *ctx)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -353,18 +353,18 @@ context_derive_keys_des3(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	c.len = GSS_KRB5_K5CLENGTH;
 	c.data = cdata;
 
-	keyin.data = rawkey;
-	keyin.len = keylen;
-	keyout.len = keylen;
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
 
 	/* seq uses the raw key */
 	ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
-					   rawkey);
+					   ctx->Ksess);
 	if (ctx->seq == NULL)
 		goto out_err;
 
 	ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
-					   rawkey);
+					   ctx->Ksess);
 	if (ctx->enc == NULL)
 		goto out_free_seq;
 
@@ -389,7 +389,7 @@ out_err:
 }
 
 static int
-context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
+context_derive_keys_new(struct krb5_ctx *ctx)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -398,9 +398,9 @@ context_derive_keys_new(struct krb5_ctx *ctx, u8 *rawkey, u32 keylen)
 	c.len = GSS_KRB5_K5CLENGTH;
 	c.data = cdata;
 
-	keyin.data = rawkey;
-	keyin.len = keylen;
-	keyout.len = keylen;
+	keyin.data = ctx->Ksess;
+	keyin.len = ctx->gk5e->keylength;
+	keyout.len = ctx->gk5e->keylength;
 
 	/* initiator seal encryption */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
@@ -502,7 +502,6 @@ out_err:
 static int
 gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 {
-	u8 rawkey[GSS_KRB5_MAX_KEYLEN];
 	int keylen;
 
 	p = simple_get_bytes(p, end, &ctx->flags, sizeof(ctx->flags));
@@ -538,7 +537,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	}
 	keylen = ctx->gk5e->keylength;
 
-	p = simple_get_bytes(p, end, rawkey, keylen);
+	p = simple_get_bytes(p, end, ctx->Ksess, keylen);
 	if (IS_ERR(p))
 		goto out_err;
 
@@ -557,10 +556,10 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
-		return context_derive_keys_des3(ctx, rawkey, keylen);
+		return context_derive_keys_des3(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-		return context_derive_keys_new(ctx, rawkey, keylen);
+		return context_derive_keys_new(ctx);
 	default:
 		return -EINVAL;
 	}
-- 
cgit v1.2.3


From 1dbd9029f3024d058da1cf6c6658c28aac2e4e1c Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:04 -0400
Subject: gssd_krb5: More arcfour-hmac support

For the arcfour-hmac support, the make_seq_num and get_seq_num
functions need access to the kerberos context structure.
This will be used in a later patch.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       | 5 +++--
 net/sunrpc/auth_gss/gss_krb5_seal.c   | 5 ++---
 net/sunrpc/auth_gss/gss_krb5_seqnum.c | 6 ++++--
 net/sunrpc/auth_gss/gss_krb5_unseal.c | 3 ++-
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 6 +++---
 5 files changed, 14 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index b0ab827add26..d840856edecc 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -275,12 +275,13 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *inbuf,
 		    int offset);
 
 s32
-krb5_make_seq_num(struct crypto_blkcipher *key,
+krb5_make_seq_num(struct krb5_ctx *kctx,
+		struct crypto_blkcipher *key,
 		int direction,
 		u32 seqnum, unsigned char *cksum, unsigned char *buf);
 
 s32
-krb5_get_seq_num(struct crypto_blkcipher *key,
+krb5_get_seq_num(struct krb5_ctx *kctx,
 	       unsigned char *cksum,
 	       unsigned char *buf, int *direction, u32 *seqnum);
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index e22fed3d9a1b..36fe487d93d2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -152,9 +152,8 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
 	seq_send = ctx->seq_send++;
 	spin_unlock(&krb5_seq_lock);
 
-	if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN,
-			      ptr + 8))
+	if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
+			      seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
 		return GSS_S_FAILURE;
 
 	return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 6331cd6866ec..83b593084976 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -40,7 +40,8 @@
 #endif
 
 s32
-krb5_make_seq_num(struct crypto_blkcipher *key,
+krb5_make_seq_num(struct krb5_ctx *kctx,
+		struct crypto_blkcipher *key,
 		int direction,
 		u32 seqnum,
 		unsigned char *cksum, unsigned char *buf)
@@ -61,13 +62,14 @@ krb5_make_seq_num(struct crypto_blkcipher *key,
 }
 
 s32
-krb5_get_seq_num(struct crypto_blkcipher *key,
+krb5_get_seq_num(struct krb5_ctx *kctx,
 	       unsigned char *cksum,
 	       unsigned char *buf,
 	       int *direction, u32 *seqnum)
 {
 	s32 code;
 	unsigned char plain[8];
+	struct crypto_blkcipher *key = kctx->seq;
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ef91366e3dea..97eb91b8c70c 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -131,7 +131,8 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(ctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8, &direction, &seqnum))
+	if (krb5_get_seq_num(ctx, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
+			     &direction, &seqnum))
 		return GSS_S_FAILURE;
 
 	if ((ctx->initiate && direction != 0xff) ||
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 097cc27494cc..a95e7e0ac0e3 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -227,7 +227,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 
 	/* XXX would probably be more efficient to compute checksum
 	 * and encrypt at the same time: */
-	if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
+	if ((krb5_make_seq_num(kctx, kctx->seq, kctx->initiate ? 0 : 0xff,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
@@ -314,8 +314,8 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(kctx->seq, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8,
-				    &direction, &seqnum))
+	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
+				    ptr + 8, &direction, &seqnum))
 		return GSS_S_BAD_SIG;
 
 	if ((kctx->initiate && direction != 0xff) ||
-- 
cgit v1.2.3


From 5af46547ec451918f3ba51efe59b317d33adf701 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:05 -0400
Subject: gss_krb5: Use confounder length in wrap code

All encryption types use a confounder at the beginning of the
wrap token.  In all encryption types except arcfour-hmac, the
confounder is the same as the blocksize.  arcfour-hmac has a
blocksize of one, but uses an eight byte confounder.

Add an entry to the crypto framework definitions for the
confounder length and change the wrap/unwrap code to use
the confounder length rather than assuming it is always
the blocksize.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |  3 +++
 net/sunrpc/auth_gss/gss_krb5_crypto.c |  6 +++---
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  4 ++++
 net/sunrpc/auth_gss/gss_krb5_wrap.c   | 12 +++++++-----
 4 files changed, 17 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index d840856edecc..79f6ac2492f5 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -64,6 +64,9 @@ struct gss_krb5_enctype {
 	const u16		signalg;	/* signing algorithm */
 	const u16		sealalg;	/* sealing algorithm */
 	const u32		blocksize;	/* encryption blocksize */
+	const u32		conflen;	/* confounder length
+						   (normally the same as
+						   the blocksize) */
 	const u32		cksumlength;	/* checksum length */
 	const u32		keyed_cksum;	/* is it a keyed cksum? */
 	const u32		keybytes;	/* raw key len, in bytes */
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 33ae7023cf3a..ed4106a3daf2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -554,9 +554,9 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
 
 	/* hide the gss token header and insert the confounder */
 	offset += GSS_KRB5_TOK_HDR_LEN;
-	if (xdr_extend_head(buf, offset, blocksize))
+	if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
 		return GSS_S_FAILURE;
-	gss_krb5_make_confounder(buf->head[0].iov_base + offset, blocksize);
+	gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
 	offset -= GSS_KRB5_TOK_HDR_LEN;
 
 	if (buf->tail[0].iov_base != NULL) {
@@ -726,7 +726,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
 		ret = GSS_S_BAD_SIG;
 		goto out_err;
 	}
-	*headskip = crypto_blkcipher_blocksize(cipher);
+	*headskip = kctx->gk5e->conflen;
 	*tailskip = kctx->gk5e->cksumlength;
 out_err:
 	if (ret && ret != GSS_S_BAD_SIG)
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 893fad71e306..ef6b31349046 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -68,6 +68,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 7,
 	  .keylength = 8,
 	  .blocksize = 8,
+	  .conflen = 8,
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
@@ -88,6 +89,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 21,
 	  .keylength = 24,
 	  .blocksize = 8,
+	  .conflen = 8,
 	  .cksumlength = 20,
 	  .keyed_cksum = 1,
 	},
@@ -110,6 +112,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 16,
 	  .keylength = 16,
 	  .blocksize = 16,
+	  .conflen = 16,
 	  .cksumlength = 12,
 	  .keyed_cksum = 1,
 	},
@@ -132,6 +135,7 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .keybytes = 32,
 	  .keylength = 32,
 	  .blocksize = 16,
+	  .conflen = 16,
 	  .cksumlength = 12,
 	  .keyed_cksum = 1,
 	},
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index a95e7e0ac0e3..383db891c835 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -168,6 +168,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	struct page		**tmp_pages;
 	u32			seq_send;
 	u8			*cksumkey;
+	u32			conflen = kctx->gk5e->conflen;
 
 	dprintk("RPC:       %s\n", __func__);
 
@@ -176,7 +177,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	gss_krb5_add_padding(buf, offset, blocksize);
 	BUG_ON((buf->len - offset) % blocksize);
-	plainlen = blocksize + buf->len - offset;
+	plainlen = conflen + buf->len - offset;
 
 	headlen = g_token_size(&kctx->mech_used,
 		GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength + plainlen) -
@@ -204,7 +205,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	memset(ptr + 4, 0xff, 4);
 	*(__be16 *)(ptr + 4) = cpu_to_le16(kctx->gk5e->sealalg);
 
-	gss_krb5_make_confounder(msg_start, blocksize);
+	gss_krb5_make_confounder(msg_start, conflen);
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
@@ -214,7 +215,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 	/* XXXJBF: UGH!: */
 	tmp_pages = buf->pages;
 	buf->pages = pages;
-	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - blocksize,
+	if (make_checksum(kctx, ptr, 8, buf, offset + headlen - conflen,
 					cksumkey, KG_USAGE_SEAL, &md5cksum))
 		return GSS_S_FAILURE;
 	buf->pages = tmp_pages;
@@ -231,7 +232,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
+	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - conflen,
 									pages))
 		return GSS_S_FAILURE;
 
@@ -254,6 +255,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	void			*data_start, *orig_start;
 	int			data_len;
 	int			blocksize;
+	u32			conflen = kctx->gk5e->conflen;
 	int			crypt_offset;
 	u8			*cksumkey;
 
@@ -327,7 +329,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	blocksize = crypto_blkcipher_blocksize(kctx->enc);
 	data_start = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) +
-					blocksize;
+					conflen;
 	orig_start = buf->head[0].iov_base + offset;
 	data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start;
 	memmove(orig_start, data_start, data_len);
-- 
cgit v1.2.3


From fffdaef2eb4a7333952e55cf97f1fc0fcc35f981 Mon Sep 17 00:00:00 2001
From: Kevin Coffman <kwc@citi.umich.edu>
Date: Wed, 17 Mar 2010 13:03:06 -0400
Subject: gss_krb5: Add support for rc4-hmac encryption

Add necessary changes to add kernel support for the rc4-hmac Kerberos
encryption type used by Microsoft and described in rfc4757.

Signed-off-by: Kevin Coffman <kwc@citi.umich.edu>
Signed-off-by: Steve Dickson <steved@redhat.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_krb5.h       |   9 ++
 net/sunrpc/auth_gss/gss_krb5_crypto.c | 255 ++++++++++++++++++++++++++++++++++
 net/sunrpc/auth_gss/gss_krb5_mech.c   |  96 +++++++++++++
 net/sunrpc/auth_gss/gss_krb5_seal.c   |   1 +
 net/sunrpc/auth_gss/gss_krb5_seqnum.c |  77 ++++++++++
 net/sunrpc/auth_gss/gss_krb5_unseal.c |   1 +
 net/sunrpc/auth_gss/gss_krb5_wrap.c   |  66 +++++++--
 7 files changed, 492 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 79f6ac2492f5..5e774a5abf2c 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -317,5 +317,14 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset,
 		     struct xdr_buf *buf, u32 *plainoffset,
 		     u32 *plainlen);
 
+int
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx,
+		       struct crypto_blkcipher *cipher,
+		       unsigned char *cksum);
+
+int
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx,
+		       struct crypto_blkcipher *cipher,
+		       s32 seqnum);
 void
 gss_krb5_make_confounder(char *p, u32 conflen);
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index ed4106a3daf2..75ee993ea057 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -124,6 +124,114 @@ checksummer(struct scatterlist *sg, void *data)
 	return crypto_hash_update(desc, sg, sg->length);
 }
 
+static int
+arcfour_hmac_md5_usage_to_salt(unsigned int usage, u8 salt[4])
+{
+	unsigned int ms_usage;
+
+	switch (usage) {
+	case KG_USAGE_SIGN:
+		ms_usage = 15;
+		break;
+	case KG_USAGE_SEAL:
+		ms_usage = 13;
+		break;
+	default:
+		return EINVAL;;
+	}
+	salt[0] = (ms_usage >> 0) & 0xff;
+	salt[1] = (ms_usage >> 8) & 0xff;
+	salt[2] = (ms_usage >> 16) & 0xff;
+	salt[3] = (ms_usage >> 24) & 0xff;
+
+	return 0;
+}
+
+static u32
+make_checksum_hmac_md5(struct krb5_ctx *kctx, char *header, int hdrlen,
+		       struct xdr_buf *body, int body_offset, u8 *cksumkey,
+		       unsigned int usage, struct xdr_netobj *cksumout)
+{
+	struct hash_desc                desc;
+	struct scatterlist              sg[1];
+	int err;
+	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
+	u8 rc4salt[4];
+	struct crypto_hash *md5;
+	struct crypto_hash *hmac_md5;
+
+	if (cksumkey == NULL)
+		return GSS_S_FAILURE;
+
+	if (cksumout->len < kctx->gk5e->cksumlength) {
+		dprintk("%s: checksum buffer length, %u, too small for %s\n",
+			__func__, cksumout->len, kctx->gk5e->name);
+		return GSS_S_FAILURE;
+	}
+
+	if (arcfour_hmac_md5_usage_to_salt(usage, rc4salt)) {
+		dprintk("%s: invalid usage value %u\n", __func__, usage);
+		return GSS_S_FAILURE;
+	}
+
+	md5 = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(md5))
+		return GSS_S_FAILURE;
+
+	hmac_md5 = crypto_alloc_hash(kctx->gk5e->cksum_name, 0,
+				     CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac_md5)) {
+		crypto_free_hash(md5);
+		return GSS_S_FAILURE;
+	}
+
+	desc.tfm = md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	sg_init_one(sg, rc4salt, 4);
+	err = crypto_hash_update(&desc, sg, 4);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, header, hdrlen);
+	err = crypto_hash_update(&desc, sg, hdrlen);
+	if (err)
+		goto out;
+	err = xdr_process_buf(body, body_offset, body->len - body_offset,
+			      checksummer, &desc);
+	if (err)
+		goto out;
+	err = crypto_hash_final(&desc, checksumdata);
+	if (err)
+		goto out;
+
+	desc.tfm = hmac_md5;
+	desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out;
+	err = crypto_hash_setkey(hmac_md5, cksumkey, kctx->gk5e->keylength);
+	if (err)
+		goto out;
+
+	sg_init_one(sg, checksumdata, crypto_hash_digestsize(md5));
+	err = crypto_hash_digest(&desc, sg, crypto_hash_digestsize(md5),
+				 checksumdata);
+	if (err)
+		goto out;
+
+	memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
+	cksumout->len = kctx->gk5e->cksumlength;
+out:
+	crypto_free_hash(md5);
+	crypto_free_hash(hmac_md5);
+	return err ? GSS_S_FAILURE : 0;
+}
+
 /*
  * checksum the plaintext data and hdrlen bytes of the token header
  * The checksum is performed over the first 8 bytes of the
@@ -140,6 +248,11 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
 	u8 checksumdata[GSS_KRB5_MAX_CKSUM_LEN];
 	unsigned int checksumlen;
 
+	if (kctx->gk5e->ctype == CKSUMTYPE_HMAC_MD5_ARCFOUR)
+		return make_checksum_hmac_md5(kctx, header, hdrlen,
+					      body, body_offset,
+					      cksumkey, usage, cksumout);
+
 	if (cksumout->len < kctx->gk5e->cksumlength) {
 		dprintk("%s: checksum buffer length, %u, too small for %s\n",
 			__func__, cksumout->len, kctx->gk5e->name);
@@ -733,3 +846,145 @@ out_err:
 		ret = GSS_S_FAILURE;
 	return ret;
 }
+
+/*
+ * Compute Kseq given the initial session key and the checksum.
+ * Set the key of the given cipher.
+ */
+int
+krb5_rc4_setup_seq_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       unsigned char *cksum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kseq[GSS_KRB5_MAX_KEYLEN];
+	u32 zeroconstant = 0;
+	int err;
+
+	dprintk("%s: entered\n", __func__);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kseq from session key */
+	err = crypto_hash_setkey(hmac, kctx->Ksess, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, &zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kseq);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kseq from the checksum and intermediate Kseq */
+	err = crypto_hash_setkey(hmac, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_set_buf(sg, cksum, 8);
+
+	err = crypto_hash_digest(&desc, sg, 8, Kseq);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kseq, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
+/*
+ * Compute Kcrypt given the initial session key and the plaintext seqnum.
+ * Set the key of cipher kctx->enc.
+ */
+int
+krb5_rc4_setup_enc_key(struct krb5_ctx *kctx, struct crypto_blkcipher *cipher,
+		       s32 seqnum)
+{
+	struct crypto_hash *hmac;
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	u8 Kcrypt[GSS_KRB5_MAX_KEYLEN];
+	u8 zeroconstant[4] = {0};
+	u8 seqnumarray[4];
+	int err, i;
+
+	dprintk("%s: entered, seqnum %u\n", __func__, seqnum);
+
+	hmac = crypto_alloc_hash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld, allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), kctx->gk5e->cksum_name);
+		return PTR_ERR(hmac);
+	}
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err;
+
+	/* Compute intermediate Kcrypt from session key */
+	for (i = 0; i < kctx->gk5e->keylength; i++)
+		Kcrypt[i] = kctx->Ksess[i] ^ 0xf0;
+
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, zeroconstant, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	/* Compute final Kcrypt from the seqnum and intermediate Kcrypt */
+	err = crypto_hash_setkey(hmac, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	seqnumarray[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	seqnumarray[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	seqnumarray[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	seqnumarray[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+
+	sg_set_buf(sg, seqnumarray, 4);
+
+	err = crypto_hash_digest(&desc, sg, 4, Kcrypt);
+	if (err)
+		goto out_err;
+
+	err = crypto_blkcipher_setkey(cipher, Kcrypt, kctx->gk5e->keylength);
+	if (err)
+		goto out_err;
+
+	err = 0;
+
+out_err:
+	crypto_free_hash(hmac);
+	dprintk("%s: returning %d\n", __func__, err);
+	return err;
+}
+
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index ef6b31349046..54eda5f0c58b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -72,6 +72,27 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
 	  .cksumlength = 8,
 	  .keyed_cksum = 0,
 	},
+	/*
+	 * RC4-HMAC
+	 */
+	{
+	  .etype = ENCTYPE_ARCFOUR_HMAC,
+	  .ctype = CKSUMTYPE_HMAC_MD5_ARCFOUR,
+	  .name = "rc4-hmac",
+	  .encrypt_name = "ecb(arc4)",
+	  .cksum_name = "hmac(md5)",
+	  .encrypt = krb5_encrypt,
+	  .decrypt = krb5_decrypt,
+	  .mk_key = NULL,
+	  .signalg = SGN_ALG_HMAC_MD5,
+	  .sealalg = SEAL_ALG_MICROSOFT_RC4,
+	  .keybytes = 16,
+	  .keylength = 16,
+	  .blocksize = 1,
+	  .conflen = 8,
+	  .cksumlength = 8,
+	  .keyed_cksum = 1,
+	},
 	/*
 	 * 3DES
 	 */
@@ -392,6 +413,79 @@ out_err:
 	return -EINVAL;
 }
 
+/*
+ * Note that RC4 depends on deriving keys using the sequence
+ * number or the checksum of a token.  Therefore, the final keys
+ * cannot be calculated until the token is being constructed!
+ */
+static int
+context_derive_keys_rc4(struct krb5_ctx *ctx)
+{
+	struct crypto_hash *hmac;
+	char sigkeyconstant[] = "signaturekey";
+	int slen = strlen(sigkeyconstant) + 1;	/* include null terminator */
+	struct hash_desc desc;
+	struct scatterlist sg[1];
+	int err;
+
+	dprintk("RPC:       %s: entered\n", __func__);
+	/*
+	 * derive cksum (aka Ksign) key
+	 */
+	hmac = crypto_alloc_hash(ctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(hmac)) {
+		dprintk("%s: error %ld allocating hash '%s'\n",
+			__func__, PTR_ERR(hmac), ctx->gk5e->cksum_name);
+		err = PTR_ERR(hmac);
+		goto out_err;
+	}
+
+	err = crypto_hash_setkey(hmac, ctx->Ksess, ctx->gk5e->keylength);
+	if (err)
+		goto out_err_free_hmac;
+
+	sg_init_table(sg, 1);
+	sg_set_buf(sg, sigkeyconstant, slen);
+
+	desc.tfm = hmac;
+	desc.flags = 0;
+
+	err = crypto_hash_init(&desc);
+	if (err)
+		goto out_err_free_hmac;
+
+	err = crypto_hash_digest(&desc, sg, slen, ctx->cksum);
+	if (err)
+		goto out_err_free_hmac;
+	/*
+	 * allocate hash, and blkciphers for data and seqnum encryption
+	 */
+	ctx->enc = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->enc)) {
+		err = PTR_ERR(ctx->enc);
+		goto out_err_free_hmac;
+	}
+
+	ctx->seq = crypto_alloc_blkcipher(ctx->gk5e->encrypt_name, 0,
+					  CRYPTO_ALG_ASYNC);
+	if (IS_ERR(ctx->seq)) {
+		crypto_free_blkcipher(ctx->enc);
+		err = PTR_ERR(ctx->seq);
+		goto out_err_free_hmac;
+	}
+
+	dprintk("RPC:       %s: returning success\n", __func__);
+
+	err = 0;
+
+out_err_free_hmac:
+	crypto_free_hash(hmac);
+out_err:
+	dprintk("RPC:       %s: returning %d\n", __func__, err);
+	return err;
+}
+
 static int
 context_derive_keys_new(struct krb5_ctx *ctx)
 {
@@ -561,6 +655,8 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
 		return context_derive_keys_des3(ctx);
+	case ENCTYPE_ARCFOUR_HMAC:
+		return context_derive_keys_rc4(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
 		return context_derive_keys_new(ctx);
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 36fe487d93d2..d7941eab7796 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -213,6 +213,7 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_get_mic_v1(ctx, text, token);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 83b593084976..415c013ba382 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -39,6 +39,38 @@
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
+static s32
+krb5_make_rc4_seq_num(struct krb5_ctx *kctx, int direction, s32 seqnum,
+		      unsigned char *cksum, unsigned char *buf)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	plain[0] = (unsigned char) ((seqnum >> 24) & 0xff);
+	plain[1] = (unsigned char) ((seqnum >> 16) & 0xff);
+	plain[2] = (unsigned char) ((seqnum >> 8) & 0xff);
+	plain[3] = (unsigned char) ((seqnum >> 0) & 0xff);
+	plain[4] = direction;
+	plain[5] = direction;
+	plain[6] = direction;
+	plain[7] = direction;
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_encrypt(cipher, cksum, plain, buf, 8);
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
 s32
 krb5_make_seq_num(struct krb5_ctx *kctx,
 		struct crypto_blkcipher *key,
@@ -48,6 +80,10 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
 {
 	unsigned char plain[8];
 
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_make_rc4_seq_num(kctx, direction, seqnum,
+					     cksum, buf);
+
 	plain[0] = (unsigned char) (seqnum & 0xff);
 	plain[1] = (unsigned char) ((seqnum >> 8) & 0xff);
 	plain[2] = (unsigned char) ((seqnum >> 16) & 0xff);
@@ -61,6 +97,43 @@ krb5_make_seq_num(struct krb5_ctx *kctx,
 	return krb5_encrypt(key, cksum, plain, buf, 8);
 }
 
+static s32
+krb5_get_rc4_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
+		     unsigned char *buf, int *direction, s32 *seqnum)
+{
+	struct crypto_blkcipher *cipher;
+	unsigned char plain[8];
+	s32 code;
+
+	dprintk("RPC:       %s:\n", __func__);
+	cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+					CRYPTO_ALG_ASYNC);
+	if (IS_ERR(cipher))
+		return PTR_ERR(cipher);
+
+	code = krb5_rc4_setup_seq_key(kctx, cipher, cksum);
+	if (code)
+		goto out;
+
+	code = krb5_decrypt(cipher, cksum, buf, plain, 8);
+	if (code)
+		goto out;
+
+	if ((plain[4] != plain[5]) || (plain[4] != plain[6])
+				   || (plain[4] != plain[7])) {
+		code = (s32)KG_BAD_SEQ;
+		goto out;
+	}
+
+	*direction = plain[4];
+
+	*seqnum = ((plain[0] << 24) | (plain[1] << 16) |
+					(plain[2] << 8) | (plain[3]));
+out:
+	crypto_free_blkcipher(cipher);
+	return code;
+}
+
 s32
 krb5_get_seq_num(struct krb5_ctx *kctx,
 	       unsigned char *cksum,
@@ -73,6 +146,10 @@ krb5_get_seq_num(struct krb5_ctx *kctx,
 
 	dprintk("RPC:       krb5_get_seq_num:\n");
 
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC)
+		return krb5_get_rc4_seq_num(kctx, cksum, buf,
+					    direction, seqnum);
+
 	if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
 		return code;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 97eb91b8c70c..6cd930f3678f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -216,6 +216,7 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_verify_mic_v1(ctx, message_buffer, read_token);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 383db891c835..2763e3e48db4 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -232,9 +232,26 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
 			       seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8)))
 		return GSS_S_FAILURE;
 
-	if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - conflen,
-									pages))
-		return GSS_S_FAILURE;
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seq_send);
+
+		err = gss_encrypt_xdr_buf(cipher, buf,
+					  offset + headlen - conflen, pages);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_FAILURE;
+	} else {
+		if (gss_encrypt_xdr_buf(kctx->enc, buf,
+					offset + headlen - conflen, pages))
+			return GSS_S_FAILURE;
+	}
 
 	return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
@@ -291,8 +308,37 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 	 */
 	crypt_offset = ptr + (GSS_KRB5_TOK_HDR_LEN + kctx->gk5e->cksumlength) -
 					(unsigned char *)buf->head[0].iov_base;
-	if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
-		return GSS_S_DEFECTIVE_TOKEN;
+
+	/*
+	 * Need plaintext seqnum to derive encryption key for arcfour-hmac
+	 */
+	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
+			     ptr + 8, &direction, &seqnum))
+		return GSS_S_BAD_SIG;
+
+	if ((kctx->initiate && direction != 0xff) ||
+	    (!kctx->initiate && direction != 0))
+		return GSS_S_BAD_SIG;
+
+	if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) {
+		struct crypto_blkcipher *cipher;
+		int err;
+
+		cipher = crypto_alloc_blkcipher(kctx->gk5e->encrypt_name, 0,
+						CRYPTO_ALG_ASYNC);
+		if (IS_ERR(cipher))
+			return GSS_S_FAILURE;
+
+		krb5_rc4_setup_enc_key(kctx, cipher, seqnum);
+
+		err = gss_decrypt_xdr_buf(cipher, buf, crypt_offset);
+		crypto_free_blkcipher(cipher);
+		if (err)
+			return GSS_S_DEFECTIVE_TOKEN;
+	} else {
+		if (gss_decrypt_xdr_buf(kctx->enc, buf, crypt_offset))
+			return GSS_S_DEFECTIVE_TOKEN;
+	}
 
 	if (kctx->gk5e->keyed_cksum)
 		cksumkey = kctx->cksum;
@@ -316,14 +362,6 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf)
 
 	/* do sequencing checks */
 
-	if (krb5_get_seq_num(kctx, ptr + GSS_KRB5_TOK_HDR_LEN,
-				    ptr + 8, &direction, &seqnum))
-		return GSS_S_BAD_SIG;
-
-	if ((kctx->initiate && direction != 0xff) ||
-	    (!kctx->initiate && direction != 0))
-		return GSS_S_BAD_SIG;
-
 	/* Copy the data back to the right position.  XXX: Would probably be
 	 * better to copy and encrypt at the same time. */
 
@@ -521,6 +559,7 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
@@ -538,6 +577,7 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf)
 		BUG();
 	case ENCTYPE_DES_CBC_RAW:
 	case ENCTYPE_DES3_CBC_RAW:
+	case ENCTYPE_ARCFOUR_HMAC:
 		return gss_unwrap_kerberos_v1(kctx, offset, buf);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-- 
cgit v1.2.3


From 2d36bfde8565b315e624302d12da5a7c9d195522 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:22:45 -0400
Subject: NFS: Add helper functions for allocating filehandles and fattr
 structs

NFS Filehandles and struct fattr are really too large to be allocated on
the stack. This patch adds in a couple of helper functions to allocate them
dynamically instead.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/inode.c         | 20 ++++++++++++++++++++
 include/linux/nfs_fs.h | 14 ++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 50a56edca0b5..0f9852ab87bb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -916,6 +916,26 @@ void nfs_fattr_init(struct nfs_fattr *fattr)
 	fattr->gencount = nfs_inc_attr_generation_counter();
 }
 
+struct nfs_fattr *nfs_alloc_fattr(void)
+{
+	struct nfs_fattr *fattr;
+
+	fattr = kmalloc(sizeof(*fattr), GFP_NOFS);
+	if (fattr != NULL)
+		nfs_fattr_init(fattr);
+	return fattr;
+}
+
+struct nfs_fh *nfs_alloc_fhandle(void)
+{
+	struct nfs_fh *fh;
+
+	fh = kmalloc(sizeof(struct nfs_fh), GFP_NOFS);
+	if (fh != NULL)
+		fh->size = 0;
+	return fh;
+}
+
 /**
  * nfs_inode_attrs_need_update - check if the inode attributes need updating
  * @inode - pointer to inode
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 07ce4609fe50..77c2ae53431c 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -356,6 +356,20 @@ extern struct nfs_open_context *nfs_find_open_context(struct inode *inode, struc
 extern u64 nfs_compat_user_ino64(u64 fileid);
 extern void nfs_fattr_init(struct nfs_fattr *fattr);
 
+extern struct nfs_fattr *nfs_alloc_fattr(void);
+
+static inline void nfs_free_fattr(const struct nfs_fattr *fattr)
+{
+	kfree(fattr);
+}
+
+extern struct nfs_fh *nfs_alloc_fhandle(void);
+
+static inline void nfs_free_fhandle(const struct nfs_fh *fh)
+{
+	kfree(fh);
+}
+
 /* linux/net/ipv4/ipconfig.c: trims ip addr off front of name, too. */
 extern __be32 root_nfs_parse_addr(char *name); /*__init*/
 extern unsigned long nfs_inc_attr_generation_counter(void);
-- 
cgit v1.2.3


From d346890bea062d697e24fb4e34591428021ad011 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:22:50 -0400
Subject: NFS: Reduce stack footprint of nfs_proc_remove()

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs3proc.c       | 13 +++++++++----
 fs/nfs/nfs3xdr.c        |  2 +-
 fs/nfs/nfs4proc.c       | 13 +++++++++----
 fs/nfs/nfs4xdr.c        |  2 +-
 fs/nfs/unlink.c         |  4 +++-
 include/linux/nfs_xdr.h |  2 +-
 6 files changed, 24 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 088dceb513b8..80378d1283cb 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -406,12 +406,17 @@ nfs3_proc_remove(struct inode *dir, struct qstr *name)
 		.rpc_argp = &arg,
 		.rpc_resp = &res,
 	};
-	int			status;
+	int status = -ENOMEM;
 
 	dprintk("NFS call  remove %s\n", name->name);
-	nfs_fattr_init(&res.dir_attr);
+	res.dir_attr = nfs_alloc_fattr();
+	if (res.dir_attr == NULL)
+		goto out;
+
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
-	nfs_post_op_update_inode(dir, &res.dir_attr);
+	nfs_post_op_update_inode(dir, res.dir_attr);
+	nfs_free_fattr(res.dir_attr);
+out:
 	dprintk("NFS reply remove: %d\n", status);
 	return status;
 }
@@ -429,7 +434,7 @@ nfs3_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	if (nfs3_async_handle_jukebox(task, dir))
 		return 0;
 	res = task->tk_msg.rpc_resp;
-	nfs_post_op_update_inode(dir, &res->dir_attr);
+	nfs_post_op_update_inode(dir, res->dir_attr);
 	return 1;
 }
 
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 56a86f6ac8b5..75dcfc7da365 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -762,7 +762,7 @@ nfs3_xdr_wccstat(struct rpc_rqst *req, __be32 *p, struct nfs_fattr *fattr)
 static int
 nfs3_xdr_removeres(struct rpc_rqst *req, __be32 *p, struct nfs_removeres *res)
 {
-	return nfs3_xdr_wccstat(req, p, &res->dir_attr);
+	return nfs3_xdr_wccstat(req, p, res->dir_attr);
 }
 
 /*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 0ffd4cfd3b1f..d0cb6e163320 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2599,14 +2599,19 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
-	int			status;
+	int status = -ENOMEM;
+
+	res.dir_attr = nfs_alloc_fattr();
+	if (res.dir_attr == NULL)
+		goto out;
 
-	nfs_fattr_init(&res.dir_attr);
 	status = nfs4_call_sync(server, &msg, &args, &res, 1);
 	if (status == 0) {
 		update_changeattr(dir, &res.cinfo);
-		nfs_post_op_update_inode(dir, &res.dir_attr);
+		nfs_post_op_update_inode(dir, res.dir_attr);
 	}
+	nfs_free_fattr(res.dir_attr);
+out:
 	return status;
 }
 
@@ -2641,7 +2646,7 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
 		return 0;
 	update_changeattr(dir, &res->cinfo);
-	nfs_post_op_update_inode(dir, &res->dir_attr);
+	nfs_post_op_update_inode(dir, res->dir_attr);
 	return 1;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 38f3b582e7c2..890580642dc0 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4815,7 +4815,7 @@ static int nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, __be32 *p, struct nfs_rem
 		goto out;
 	if ((status = decode_remove(&xdr, &res->cinfo)) != 0)
 		goto out;
-	decode_getfattr(&xdr, &res->dir_attr, res->server,
+	decode_getfattr(&xdr, res->dir_attr, res->server,
 			!RPC_IS_ASYNC(rqstp->rq_task));
 out:
 	return status;
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 6da3d3ff6edd..a2242af6a17d 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -23,6 +23,7 @@ struct nfs_unlinkdata {
 	struct nfs_removeres res;
 	struct inode *dir;
 	struct rpc_cred	*cred;
+	struct nfs_fattr dir_attr;
 };
 
 /**
@@ -169,7 +170,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
 	}
 	nfs_sb_active(dir->i_sb);
 	data->args.fh = NFS_FH(dir);
-	nfs_fattr_init(&data->res.dir_attr);
+	nfs_fattr_init(data->res.dir_attr);
 
 	NFS_PROTO(dir)->unlink_setup(&msg, dir);
 
@@ -259,6 +260,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
 		goto out_free;
 	}
 	data->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE;
+	data->res.dir_attr = &data->dir_attr;
 
 	status = -EBUSY;
 	spin_lock(&dentry->d_lock);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 89b28812ec24..76e11c663403 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -386,8 +386,8 @@ struct nfs_removeargs {
 
 struct nfs_removeres {
 	const struct nfs_server *server;
+	struct nfs_fattr	*dir_attr;
 	struct nfs4_change_info	cinfo;
-	struct nfs_fattr	dir_attr;
 	struct nfs4_sequence_res 	seq_res;
 };
 
-- 
cgit v1.2.3


From a8ce4a8f37fef0a09a1e920c2e09f67a80426c7e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:42:12 -0400
Subject: SUNRPC: Fail over more quickly on connect errors

We should not allow soft tasks to wait for longer than the major timeout
period when waiting for a reconnect to occur.

Remove the field xprt->connect_timeout since it has been obsoleted by
xprt->reestablish_timeout.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h     |  3 +--
 net/sunrpc/xprt.c               |  2 +-
 net/sunrpc/xprtrdma/transport.c |  1 -
 net/sunrpc/xprtsock.c           | 17 -----------------
 4 files changed, 2 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 6f9457a75b8f..8263f7aefedf 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -174,8 +174,7 @@ struct rpc_xprt {
 	/*
 	 * Connection of transports
 	 */
-	unsigned long		connect_timeout,
-				bind_timeout,
+	unsigned long		bind_timeout,
 				reestablish_timeout;
 	unsigned int		connect_cookie;	/* A cookie that gets bumped
 						   every time the transport
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index c71d835165e2..6c9997ef386a 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -710,7 +710,7 @@ void xprt_connect(struct rpc_task *task)
 		if (task->tk_rqstp)
 			task->tk_rqstp->rq_bytes_sent = 0;
 
-		task->tk_timeout = xprt->connect_timeout;
+		task->tk_timeout = task->tk_rqstp->rq_timeout;
 		rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
 
 		if (test_bit(XPRT_CLOSING, &xprt->state))
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0607b9aaae91..3f3b38c5642f 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -305,7 +305,6 @@ xprt_setup_rdma(struct xprt_create *args)
 	/* 60 second timeout, no retries */
 	xprt->timeout = &xprt_rdma_default_timeout;
 	xprt->bind_timeout = (60U * HZ);
-	xprt->connect_timeout = (60U * HZ);
 	xprt->reestablish_timeout = (5U * HZ);
 	xprt->idle_timeout = (5U * 60 * HZ);
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d138afa3bb35..790a8f31b0bb 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -137,20 +137,6 @@ static ctl_table sunrpc_table[] = {
 
 #endif
 
-/*
- * Time out for an RPC UDP socket connect.  UDP socket connects are
- * synchronous, but we set a timeout anyway in case of resource
- * exhaustion on the local host.
- */
-#define XS_UDP_CONN_TO		(5U * HZ)
-
-/*
- * Wait duration for an RPC TCP connection to be established.  Solaris
- * NFS over TCP uses 60 seconds, for example, which is in line with how
- * long a server takes to reboot.
- */
-#define XS_TCP_CONN_TO		(60U * HZ)
-
 /*
  * Wait duration for a reply from the RPC portmapper.
  */
@@ -2324,7 +2310,6 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
 	xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
 	xprt->bind_timeout = XS_BIND_TO;
-	xprt->connect_timeout = XS_UDP_CONN_TO;
 	xprt->reestablish_timeout = XS_UDP_REEST_TO;
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
@@ -2399,7 +2384,6 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
 	xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
 	xprt->bind_timeout = XS_BIND_TO;
-	xprt->connect_timeout = XS_TCP_CONN_TO;
 	xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
 	xprt->idle_timeout = XS_IDLE_DISC_TO;
 
@@ -2475,7 +2459,6 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
 	/* backchannel */
 	xprt_set_bound(xprt);
 	xprt->bind_timeout = 0;
-	xprt->connect_timeout = 0;
 	xprt->reestablish_timeout = 0;
 	xprt->idle_timeout = 0;
 
-- 
cgit v1.2.3


From bb8b27e504c0f0463535fea31b42bcaa393c3fb0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Fri, 16 Apr 2010 16:43:06 -0400
Subject: NFSv4: Clean up the NFSv4 setclientid operation

Reviewed-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 fs/nfs/nfs4_fs.h          |  4 ++--
 fs/nfs/nfs4proc.c         | 18 ++++++++++++------
 fs/nfs/nfs4state.c        | 15 ++++++++++-----
 fs/nfs/nfs4xdr.c          | 20 ++++++++++----------
 include/linux/nfs_fs_sb.h |  1 -
 include/linux/nfs_xdr.h   |  5 +++++
 6 files changed, 39 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index a187200a7aac..509930664d74 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -206,8 +206,8 @@ extern ssize_t nfs4_listxattr(struct dentry *, char *, size_t);
 
 
 /* nfs4proc.c */
-extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *);
-extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct rpc_cred *);
+extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
+extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
 extern int nfs4_proc_async_renew(struct nfs_client *, struct rpc_cred *);
 extern int nfs4_proc_renew(struct nfs_client *, struct rpc_cred *);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 45f64701d4a6..04f4b2b2506b 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3511,7 +3511,9 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 	return _nfs4_async_handle_error(task, server, server->nfs_client, state);
 }
 
-int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short port, struct rpc_cred *cred)
+int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
+		unsigned short port, struct rpc_cred *cred,
+		struct nfs4_setclientid_res *res)
 {
 	nfs4_verifier sc_verifier;
 	struct nfs4_setclientid setclientid = {
@@ -3521,7 +3523,7 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID],
 		.rpc_argp = &setclientid,
-		.rpc_resp = clp,
+		.rpc_resp = res,
 		.rpc_cred = cred,
 	};
 	__be32 *p;
@@ -3564,12 +3566,14 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 	return status;
 }
 
-static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
+static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
+		struct nfs4_setclientid_res *arg,
+		struct rpc_cred *cred)
 {
 	struct nfs_fsinfo fsinfo;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETCLIENTID_CONFIRM],
-		.rpc_argp = clp,
+		.rpc_argp = arg,
 		.rpc_resp = &fsinfo,
 		.rpc_cred = cred,
 	};
@@ -3587,12 +3591,14 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cre
 	return status;
 }
 
-int nfs4_proc_setclientid_confirm(struct nfs_client *clp, struct rpc_cred *cred)
+int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
+		struct nfs4_setclientid_res *arg,
+		struct rpc_cred *cred)
 {
 	long timeout = 0;
 	int err;
 	do {
-		err = _nfs4_proc_setclientid_confirm(clp, cred);
+		err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
 		switch (err) {
 			case 0:
 				return err;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6c5ed51f105e..cd2d90400d46 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -62,6 +62,7 @@ static LIST_HEAD(nfs4_clientid_list);
 
 int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 {
+	struct nfs4_setclientid_res clid;
 	unsigned short port;
 	int status;
 
@@ -69,11 +70,15 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
 	if (clp->cl_addr.ss_family == AF_INET6)
 		port = nfs_callback_tcpport6;
 
-	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred);
-	if (status == 0)
-		status = nfs4_proc_setclientid_confirm(clp, cred);
-	if (status == 0)
-		nfs4_schedule_state_renewal(clp);
+	status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
+	if (status != 0)
+		goto out;
+	status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
+	if (status != 0)
+		goto out;
+	clp->cl_clientid = clid.clientid;
+	nfs4_schedule_state_renewal(clp);
+out:
 	return status;
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 890580642dc0..6bdef28efa33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1504,14 +1504,14 @@ static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclie
 	hdr->replen += decode_setclientid_maxsz;
 }
 
-static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs_client *client_state, struct compound_hdr *hdr)
+static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4_setclientid_res *arg, struct compound_hdr *hdr)
 {
 	__be32 *p;
 
 	p = reserve_space(xdr, 12 + NFS4_VERIFIER_SIZE);
 	*p++ = cpu_to_be32(OP_SETCLIENTID_CONFIRM);
-	p = xdr_encode_hyper(p, client_state->cl_clientid);
-	xdr_encode_opaque_fixed(p, client_state->cl_confirm.data, NFS4_VERIFIER_SIZE);
+	p = xdr_encode_hyper(p, arg->clientid);
+	xdr_encode_opaque_fixed(p, arg->confirm.data, NFS4_VERIFIER_SIZE);
 	hdr->nops++;
 	hdr->replen += decode_setclientid_confirm_maxsz;
 }
@@ -2324,7 +2324,7 @@ static int nfs4_xdr_enc_setclientid(struct rpc_rqst *req, __be32 *p, struct nfs4
 /*
  * a SETCLIENTID_CONFIRM request
  */
-static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs_client *clp)
+static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, struct nfs4_setclientid_res *arg)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr = {
@@ -2334,7 +2334,7 @@ static int nfs4_xdr_enc_setclientid_confirm(struct rpc_rqst *req, __be32 *p, str
 
 	xdr_init_encode(&xdr, &req->rq_snd_buf, p);
 	encode_compound_hdr(&xdr, req, &hdr);
-	encode_setclientid_confirm(&xdr, clp, &hdr);
+	encode_setclientid_confirm(&xdr, arg, &hdr);
 	encode_putrootfh(&xdr, &hdr);
 	encode_fsinfo(&xdr, lease_bitmap, &hdr);
 	encode_nops(&hdr);
@@ -4397,7 +4397,7 @@ out_overflow:
 	return -EIO;
 }
 
-static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
+static int decode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid_res *res)
 {
 	__be32 *p;
 	uint32_t opnum;
@@ -4417,8 +4417,8 @@ static int decode_setclientid(struct xdr_stream *xdr, struct nfs_client *clp)
 		p = xdr_inline_decode(xdr, 8 + NFS4_VERIFIER_SIZE);
 		if (unlikely(!p))
 			goto out_overflow;
-		p = xdr_decode_hyper(p, &clp->cl_clientid);
-		memcpy(clp->cl_confirm.data, p, NFS4_VERIFIER_SIZE);
+		p = xdr_decode_hyper(p, &res->clientid);
+		memcpy(res->confirm.data, p, NFS4_VERIFIER_SIZE);
 	} else if (nfserr == NFSERR_CLID_INUSE) {
 		uint32_t len;
 
@@ -5498,7 +5498,7 @@ static int nfs4_xdr_dec_renew(struct rpc_rqst *rqstp, __be32 *p, void *dummy)
  * Decode SETCLIENTID response
  */
 static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
-		struct nfs_client *clp)
+		struct nfs4_setclientid_res *res)
 {
 	struct xdr_stream xdr;
 	struct compound_hdr hdr;
@@ -5507,7 +5507,7 @@ static int nfs4_xdr_dec_setclientid(struct rpc_rqst *req, __be32 *p,
 	xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
 	status = decode_compound_hdr(&xdr, &hdr);
 	if (!status)
-		status = decode_setclientid(&xdr, clp);
+		status = decode_setclientid(&xdr, res);
 	return status;
 }
 
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e82957acea56..d6e10a4c06e5 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -44,7 +44,6 @@ struct nfs_client {
 
 #ifdef CONFIG_NFS_V4
 	u64			cl_clientid;	/* constant */
-	nfs4_verifier		cl_confirm;
 	unsigned long		cl_state;
 
 	struct rb_root		cl_openowner_id;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 76e11c663403..51914d7d6cc4 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -824,6 +824,11 @@ struct nfs4_setclientid {
 	u32				sc_cb_ident;
 };
 
+struct nfs4_setclientid_res {
+	u64				clientid;
+	nfs4_verifier			confirm;
+};
+
 struct nfs4_statfs_arg {
 	const struct nfs_fh *		fh;
 	const u32 *			bitmask;
-- 
cgit v1.2.3


From 9605a069f83d999e60cd57dc8010708fe08291c0 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:33:30 -0400
Subject: SUNRPC: Trivial cleanups in include/linux/sunrpc/xdr.h

Clean up: Update the documenting comment, and fix some minor white
space issues.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xdr.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index f5cc0898bc53..35cf2e8cd7c6 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -1,7 +1,10 @@
 /*
- * include/linux/sunrpc/xdr.h
+ * XDR standard data types and function declarations
  *
  * Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * Based on:
+ *   RFC 4506 "XDR: External Data Representation Standard", May 2006
  */
 
 #ifndef _SUNRPC_XDR_H_
@@ -62,7 +65,6 @@ struct xdr_buf {
 
 	unsigned int	buflen,		/* Total length of storage buffer */
 			len;		/* Length of XDR encoded message */
-
 };
 
 /*
@@ -178,7 +180,7 @@ struct xdr_array2_desc {
 };
 
 extern int xdr_decode_array2(struct xdr_buf *buf, unsigned int base,
-                             struct xdr_array2_desc *desc);
+			     struct xdr_array2_desc *desc);
 extern int xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
 			     struct xdr_array2_desc *desc);
 
-- 
cgit v1.2.3


From bbc72cea58f671665b6362be0d4e391813ac0eee Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:34:27 -0400
Subject: SUNRPC: RPC metrics and RTT estimator should use same RTT value

Compute an RPC request's RTT once, and use that value both for reporting
RPC metrics, and for adjusting the RTT context used by the RPC client's RTT
estimator algorithm.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/xprt.h |  1 -
 net/sunrpc/xprt.c           | 13 ++++---------
 net/sunrpc/xprtsock.c       |  1 -
 3 files changed, 4 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 8263f7aefedf..04fc342d9840 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -293,7 +293,6 @@ void			xprt_set_retrans_timeout_rtt(struct rpc_task *task);
 void			xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
 void			xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
 void			xprt_write_space(struct rpc_xprt *xprt);
-void			xprt_update_rtt(struct rpc_task *task);
 void			xprt_adjust_cwnd(struct rpc_task *task, int result);
 struct rpc_rqst *	xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
 void			xprt_complete_rqst(struct rpc_task *task, int copied);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 6c9997ef386a..698c62712294 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -774,12 +774,7 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
 }
 EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
 
-/**
- * xprt_update_rtt - update an RPC client's RTT state after receiving a reply
- * @task: RPC request that recently completed
- *
- */
-void xprt_update_rtt(struct rpc_task *task)
+static void xprt_update_rtt(struct rpc_task *task)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
@@ -787,12 +782,10 @@ void xprt_update_rtt(struct rpc_task *task)
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
-			rpc_update_rtt(rtt, timer,
-					(long)jiffies - req->rq_xtime);
+			rpc_update_rtt(rtt, timer, task->tk_rtt);
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
-EXPORT_SYMBOL_GPL(xprt_update_rtt);
 
 /**
  * xprt_complete_rqst - called when reply processing is complete
@@ -811,6 +804,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 
 	xprt->stat.recvs++;
 	task->tk_rtt = (long)jiffies - req->rq_xtime;
+	if (xprt->ops->timer != NULL)
+		xprt_update_rtt(task);
 
 	list_del_init(&req->rq_list);
 	req->rq_private_buf.len = copied;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 790a8f31b0bb..3d1dcdf2aef1 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -844,7 +844,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
 	dst_confirm(skb_dst(skb));
 
 	xprt_adjust_cwnd(task, copied);
-	xprt_update_rtt(task);
 	xprt_complete_rqst(task, copied);
 
  out_unlock:
-- 
cgit v1.2.3


From f56916b97fe2031761ca611f0a342efd913afb33 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:34:37 -0400
Subject: ktime: introduce ktime_to_ms()

To report ktime statistics to user space in milliseconds, a new helper
is required.

When considering how to do this conversion, I didn't immediately see
why the extra step of converting ktime to a timeval was needed.  To
make that more clear, introduce a couple of large comments.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/ktime.h | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ktime.h b/include/linux/ktime.h
index ce5983225be4..e1ceaa9b36bb 100644
--- a/include/linux/ktime.h
+++ b/include/linux/ktime.h
@@ -130,7 +130,7 @@ static inline ktime_t timeval_to_ktime(struct timeval tv)
 /* Convert ktime_t to nanoseconds - NOP in the scalar storage format: */
 #define ktime_to_ns(kt)			((kt).tv64)
 
-#else
+#else	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
 
 /*
  * Helper macros/inlines to get the ktime_t math right in the timespec
@@ -275,7 +275,7 @@ static inline s64 ktime_to_ns(const ktime_t kt)
 	return (s64) kt.tv.sec * NSEC_PER_SEC + kt.tv.nsec;
 }
 
-#endif
+#endif	/* !((BITS_PER_LONG == 64) || defined(CONFIG_KTIME_SCALAR)) */
 
 /**
  * ktime_equal - Compares two ktime_t variables to see if they are equal
@@ -295,6 +295,12 @@ static inline s64 ktime_to_us(const ktime_t kt)
 	return (s64) tv.tv_sec * USEC_PER_SEC + tv.tv_usec;
 }
 
+static inline s64 ktime_to_ms(const ktime_t kt)
+{
+	struct timeval tv = ktime_to_timeval(kt);
+	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
+}
+
 static inline s64 ktime_us_delta(const ktime_t later, const ktime_t earlier)
 {
        return ktime_to_us(ktime_sub(later, earlier));
-- 
cgit v1.2.3


From ff8399709e41bf72b4cb145612a0f9a9f7283c83 Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Fri, 7 May 2010 13:34:47 -0400
Subject: SUNRPC: Replace jiffies-based metrics with ktime-based metrics

Currently RPC performance metrics that tabulate elapsed time use
jiffies time values.  This is problematic on systems that use slow
jiffies (for instance 100HZ systems built for paravirtualized
environments).  It is also a problem for computing precise latency
statistics for advanced network transports, such as InfiniBand,
that can have round-trip latencies significanly faster than a single
clock tick.

For the RPC client, adopt the high resolution time stamp mechanism
already used by the network layer and blktrace: ktime.

We use ktime format time stamps for all internal computations, and
convert to milliseconds for presentation.  As a result, we need only
addition operations in the performance critical paths; multiply/divide
is required only for presentation.

We could report RTT metrics in microseconds.  In fact the mountstats
format is versioned to accomodate exactly this kind of interface
improvement.

For now, however, we'll stay with millisecond precision for
presentation to maintain backwards compatibility with the handful of
currently deployed user space tools.  At a later point, we'll move to
an API such as BDI_STATS where a finer timestamp precision can be
reported.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/metrics.h |  7 ++++---
 include/linux/sunrpc/sched.h   |  5 +++--
 include/linux/sunrpc/xprt.h    |  3 ++-
 net/sunrpc/sched.c             |  2 +-
 net/sunrpc/stats.c             | 27 +++++++++------------------
 net/sunrpc/xprt.c              |  8 +++++---
 6 files changed, 24 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 77f78e56c481..b6edbc0ea83d 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -26,6 +26,7 @@
 #define _LINUX_SUNRPC_METRICS_H
 
 #include <linux/seq_file.h>
+#include <linux/ktime.h>
 
 #define RPC_IOSTATS_VERS	"1.0"
 
@@ -58,9 +59,9 @@ struct rpc_iostats {
 	 * and the total time the request spent from init to release
 	 * are measured.
 	 */
-	unsigned long long	om_queue,	/* jiffies queued for xmit */
-				om_rtt,		/* jiffies for RPC RTT */
-				om_execute;	/* jiffies for RPC execution */
+	ktime_t			om_queue,	/* queued for xmit */
+				om_rtt,		/* RPC RTT */
+				om_execute;	/* RPC execution */
 } ____cacheline_aligned;
 
 struct rpc_task;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 7bc7fd5291ce..76720d262ef2 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -10,6 +10,7 @@
 #define _LINUX_SUNRPC_SCHED_H_
 
 #include <linux/timer.h>
+#include <linux/ktime.h>
 #include <linux/sunrpc/types.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
@@ -80,8 +81,8 @@ struct rpc_task {
 
 	unsigned short		tk_timeouts;	/* maj timeouts */
 	size_t			tk_bytes_sent;	/* total bytes sent */
-	unsigned long		tk_start;	/* RPC task init timestamp */
-	long			tk_rtt;		/* round-trip time (jiffies) */
+	ktime_t			tk_start,	/* RPC task init timestamp */
+				tk_rtt;		/* round-trip time */
 
 	pid_t			tk_owner;	/* Process id for batching tasks */
 	unsigned char		tk_priority : 2;/* Task priority */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 04fc342d9840..f8851861b744 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -13,6 +13,7 @@
 #include <linux/socket.h>
 #include <linux/in.h>
 #include <linux/kref.h>
+#include <linux/ktime.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/xdr.h>
 #include <linux/sunrpc/msg_prot.h>
@@ -94,7 +95,7 @@ struct rpc_rqst {
 	 */
 	u32			rq_bytes_sent;	/* Bytes we have sent */
 
-	unsigned long		rq_xtime;	/* when transmitted */
+	ktime_t			rq_xtime;	/* transmit time stamp */
 	int			rq_ntrans;
 
 #if defined(CONFIG_NFS_V4_1)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index c8979ce5d88a..aa7b07ef5d55 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -834,7 +834,7 @@ static void rpc_init_task(struct rpc_task *task, const struct rpc_task_setup *ta
 	}
 
 	/* starting timestamp */
-	task->tk_start = jiffies;
+	task->tk_start = ktime_get();
 
 	dprintk("RPC:       new task initialized, procpid %u\n",
 				task_pid_nr(current));
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 5785d2037f45..aacd95f0dce5 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -144,7 +144,7 @@ void rpc_count_iostats(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_iostats *stats;
 	struct rpc_iostats *op_metrics;
-	long rtt, execute, queue;
+	ktime_t delta;
 
 	if (!task->tk_client || !task->tk_client->cl_metrics || !req)
 		return;
@@ -159,20 +159,13 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_bytes_sent += task->tk_bytes_sent;
 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
-	queue = (long)req->rq_xtime - task->tk_start;
-	if (queue < 0)
-		queue = -queue;
-	op_metrics->om_queue += queue;
+	delta = ktime_sub(req->rq_xtime, task->tk_start);
+	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
 
-	rtt = task->tk_rtt;
-	if (rtt < 0)
-		rtt = -rtt;
-	op_metrics->om_rtt += rtt;
+	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
 
-	execute = (long)jiffies - task->tk_start;
-	if (execute < 0)
-		execute = -execute;
-	op_metrics->om_execute += execute;
+	delta = ktime_sub(ktime_get(), task->tk_start);
+	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
 }
 
 static void _print_name(struct seq_file *seq, unsigned int op,
@@ -186,8 +179,6 @@ static void _print_name(struct seq_file *seq, unsigned int op,
 		seq_printf(seq, "\t%12u: ", op);
 }
 
-#define MILLISECS_PER_JIFFY	(1000 / HZ)
-
 void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 {
 	struct rpc_iostats *stats = clnt->cl_metrics;
@@ -214,9 +205,9 @@ void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt)
 				metrics->om_timeouts,
 				metrics->om_bytes_sent,
 				metrics->om_bytes_recv,
-				metrics->om_queue * MILLISECS_PER_JIFFY,
-				metrics->om_rtt * MILLISECS_PER_JIFFY,
-				metrics->om_execute * MILLISECS_PER_JIFFY);
+				ktime_to_ms(metrics->om_queue),
+				ktime_to_ms(metrics->om_rtt),
+				ktime_to_ms(metrics->om_execute));
 	}
 }
 EXPORT_SYMBOL_GPL(rpc_print_iostats);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 698c62712294..8986b1b82862 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -43,6 +43,7 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #include <linux/net.h>
+#include <linux/ktime.h>
 
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/metrics.h>
@@ -779,10 +780,11 @@ static void xprt_update_rtt(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
+	long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
-			rpc_update_rtt(rtt, timer, task->tk_rtt);
+			rpc_update_rtt(rtt, timer, m);
 		rpc_set_timeo(rtt, timer, req->rq_ntrans - 1);
 	}
 }
@@ -803,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 			task->tk_pid, ntohl(req->rq_xid), copied);
 
 	xprt->stat.recvs++;
-	task->tk_rtt = (long)jiffies - req->rq_xtime;
+	task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
 	if (xprt->ops->timer != NULL)
 		xprt_update_rtt(task);
 
@@ -904,7 +906,7 @@ void xprt_transmit(struct rpc_task *task)
 		return;
 
 	req->rq_connect_cookie = xprt->connect_cookie;
-	req->rq_xtime = jiffies;
+	req->rq_xtime = ktime_get();
 	status = xprt->ops->send_request(task);
 	if (status != 0) {
 		task->tk_status = status;
-- 
cgit v1.2.3


From 1f4c86c0be9064ab4eebd9e67c84606c1cfeec4b Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:02 -0400
Subject: NFS: Don't use GFP_KERNEL in rpcsec_gss downcalls

Again, we can deadlock if the memory reclaim triggers a writeback that
requires a rpcsec_gss credential lookup.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/gss_api.h        |  6 ++++--
 include/linux/sunrpc/gss_krb5.h       |  3 ++-
 net/sunrpc/auth_gss/auth_gss.c        |  2 +-
 net/sunrpc/auth_gss/gss_krb5_keys.c   |  9 +++++----
 net/sunrpc/auth_gss/gss_krb5_mech.c   | 34 ++++++++++++++++++----------------
 net/sunrpc/auth_gss/gss_mech_switch.c |  7 ++++---
 net/sunrpc/auth_gss/gss_spkm3_mech.c  |  5 +++--
 net/sunrpc/auth_gss/svcauth_gss.c     |  2 +-
 8 files changed, 38 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h
index b22d7f189ceb..5d8048beb051 100644
--- a/include/linux/sunrpc/gss_api.h
+++ b/include/linux/sunrpc/gss_api.h
@@ -35,7 +35,8 @@ int gss_import_sec_context(
 		const void*		input_token,
 		size_t			bufsize,
 		struct gss_api_mech	*mech,
-		struct gss_ctx		**ctx_id);
+		struct gss_ctx		**ctx_id,
+		gfp_t			gfp_mask);
 u32 gss_get_mic(
 		struct gss_ctx		*ctx_id,
 		struct xdr_buf		*message,
@@ -89,7 +90,8 @@ struct gss_api_ops {
 	int (*gss_import_sec_context)(
 			const void		*input_token,
 			size_t			bufsize,
-			struct gss_ctx		*ctx_id);
+			struct gss_ctx		*ctx_id,
+			gfp_t			gfp_mask);
 	u32 (*gss_get_mic)(
 			struct gss_ctx		*ctx_id,
 			struct xdr_buf		*message,
diff --git a/include/linux/sunrpc/gss_krb5.h b/include/linux/sunrpc/gss_krb5.h
index 5e774a5abf2c..5af2931cf58d 100644
--- a/include/linux/sunrpc/gss_krb5.h
+++ b/include/linux/sunrpc/gss_krb5.h
@@ -295,7 +295,8 @@ u32
 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		const struct xdr_netobj *inkey,
 		struct xdr_netobj *outkey,
-		const struct xdr_netobj *in_constant);
+		const struct xdr_netobj *in_constant,
+		gfp_t gfp_mask);
 
 u32
 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 6654c8534d32..48a7939dc9e2 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -229,7 +229,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
 		p = ERR_PTR(-EFAULT);
 		goto err;
 	}
-	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx);
+	ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
 	if (ret < 0) {
 		p = ERR_PTR(ret);
 		goto err;
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 33b87f04b30b..76e42e6be755 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -150,7 +150,8 @@ static void krb5_nfold(u32 inbits, const u8 *in,
 u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 		    const struct xdr_netobj *inkey,
 		    struct xdr_netobj *outkey,
-		    const struct xdr_netobj *in_constant)
+		    const struct xdr_netobj *in_constant,
+		    gfp_t gfp_mask)
 {
 	size_t blocksize, keybytes, keylength, n;
 	unsigned char *inblockdata, *outblockdata, *rawkey;
@@ -175,15 +176,15 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
 	/* allocate and set up buffers */
 
 	ret = ENOMEM;
-	inblockdata = kmalloc(blocksize, GFP_KERNEL);
+	inblockdata = kmalloc(blocksize, gfp_mask);
 	if (inblockdata == NULL)
 		goto err_free_cipher;
 
-	outblockdata = kmalloc(blocksize, GFP_KERNEL);
+	outblockdata = kmalloc(blocksize, gfp_mask);
 	if (outblockdata == NULL)
 		goto err_free_in;
 
-	rawkey = kmalloc(keybytes, GFP_KERNEL);
+	rawkey = kmalloc(keybytes, gfp_mask);
 	if (rawkey == NULL)
 		goto err_free_out;
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 7c249a3f9a03..032644610524 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -369,7 +369,7 @@ set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
 }
 
 static int
-context_derive_keys_des3(struct krb5_ctx *ctx)
+context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -396,7 +396,7 @@ context_derive_keys_des3(struct krb5_ctx *ctx)
 	/* derive cksum */
 	set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->cksum;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving cksum key\n",
 			__func__, err);
@@ -487,7 +487,7 @@ out_err:
 }
 
 static int
-context_derive_keys_new(struct krb5_ctx *ctx)
+context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
 {
 	struct xdr_netobj c, keyin, keyout;
 	u8 cdata[GSS_KRB5_K5CLENGTH];
@@ -503,7 +503,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator seal encryption */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
 	keyout.data = ctx->initiator_seal;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_seal key\n",
 			__func__, err);
@@ -518,7 +518,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor seal encryption */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
 	keyout.data = ctx->acceptor_seal;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_seal key\n",
 			__func__, err);
@@ -533,7 +533,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator sign checksum */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->initiator_sign;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_sign key\n",
 			__func__, err);
@@ -543,7 +543,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor sign checksum */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
 	keyout.data = ctx->acceptor_sign;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_sign key\n",
 			__func__, err);
@@ -553,7 +553,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* initiator seal integrity */
 	set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
 	keyout.data = ctx->initiator_integ;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving initiator_integ key\n",
 			__func__, err);
@@ -563,7 +563,7 @@ context_derive_keys_new(struct krb5_ctx *ctx)
 	/* acceptor seal integrity */
 	set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
 	keyout.data = ctx->acceptor_integ;
-	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c);
+	err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
 	if (err) {
 		dprintk("%s: Error %d deriving acceptor_integ key\n",
 			__func__, err);
@@ -598,7 +598,8 @@ out_err:
 }
 
 static int
-gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
+gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
+		gfp_t gfp_mask)
 {
 	int keylen;
 
@@ -645,7 +646,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 	}
 
 	ctx->mech_used.data = kmemdup(gss_kerberos_mech.gm_oid.data,
-				      gss_kerberos_mech.gm_oid.len, GFP_KERNEL);
+				      gss_kerberos_mech.gm_oid.len, gfp_mask);
 	if (unlikely(ctx->mech_used.data == NULL)) {
 		p = ERR_PTR(-ENOMEM);
 		goto out_err;
@@ -654,12 +655,12 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx)
 
 	switch (ctx->enctype) {
 	case ENCTYPE_DES3_CBC_RAW:
-		return context_derive_keys_des3(ctx);
+		return context_derive_keys_des3(ctx, gfp_mask);
 	case ENCTYPE_ARCFOUR_HMAC:
 		return context_derive_keys_rc4(ctx);
 	case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
 	case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
-		return context_derive_keys_new(ctx);
+		return context_derive_keys_new(ctx, gfp_mask);
 	default:
 		return -EINVAL;
 	}
@@ -670,20 +671,21 @@ out_err:
 
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
-				struct gss_ctx *ctx_id)
+				struct gss_ctx *ctx_id,
+				gfp_t gfp_mask)
 {
 	const void *end = (const void *)((const char *)p + len);
 	struct  krb5_ctx *ctx;
 	int ret;
 
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	ctx = kzalloc(sizeof(*ctx), gfp_mask);
 	if (ctx == NULL)
 		return -ENOMEM;
 
 	if (len == 85)
 		ret = gss_import_v1_context(p, end, ctx);
 	else
-		ret = gss_import_v2_context(p, end, ctx);
+		ret = gss_import_v2_context(p, end, ctx, gfp_mask);
 
 	if (ret == 0)
 		ctx_id->internal_ctx_id = ctx;
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 28a84ef41d13..2689de39dc78 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -249,14 +249,15 @@ EXPORT_SYMBOL_GPL(gss_mech_put);
 int
 gss_import_sec_context(const void *input_token, size_t bufsize,
 		       struct gss_api_mech	*mech,
-		       struct gss_ctx		**ctx_id)
+		       struct gss_ctx		**ctx_id,
+		       gfp_t gfp_mask)
 {
-	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), GFP_KERNEL)))
+	if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
 		return -ENOMEM;
 	(*ctx_id)->mech_type = gss_mech_get(mech);
 
 	return mech->gm_ops
-		->gss_import_sec_context(input_token, bufsize, *ctx_id);
+		->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
 }
 
 /* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_spkm3_mech.c b/net/sunrpc/auth_gss/gss_spkm3_mech.c
index 035e1dd6af1b..dc3f1f5ed865 100644
--- a/net/sunrpc/auth_gss/gss_spkm3_mech.c
+++ b/net/sunrpc/auth_gss/gss_spkm3_mech.c
@@ -84,13 +84,14 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
 
 static int
 gss_import_sec_context_spkm3(const void *p, size_t len,
-				struct gss_ctx *ctx_id)
+				struct gss_ctx *ctx_id,
+				gfp_t gfp_mask)
 {
 	const void *end = (const void *)((const char *)p + len);
 	struct	spkm3_ctx *ctx;
 	int	version;
 
-	if (!(ctx = kzalloc(sizeof(*ctx), GFP_NOFS)))
+	if (!(ctx = kzalloc(sizeof(*ctx), gfp_mask)))
 		goto out_err;
 
 	p = simple_get_bytes(p, end, &version, sizeof(version));
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 1d9ac4ac818a..cc385b3a59c2 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -494,7 +494,7 @@ static int rsc_parse(struct cache_detail *cd,
 		len = qword_get(&mesg, buf, mlen);
 		if (len < 0)
 			goto out;
-		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx);
+		status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
 		if (status)
 			goto out;
 
-- 
cgit v1.2.3


From d60dbb20a74c2cfa142be0a34dac3c6547ea086c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:49 -0400
Subject: SUNRPC: Move the task->tk_bytes_sent and tk_rtt to struct rpc_rqst

It seems strange to maintain stats for bytes_sent in one structure, and
bytes received in another. Try to assemble all the RPC request-related
stats in struct rpc_rqst

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h    | 4 +---
 include/linux/sunrpc/xprt.h     | 6 ++++--
 net/sunrpc/stats.c              | 4 ++--
 net/sunrpc/xprt.c               | 4 ++--
 net/sunrpc/xprtrdma/transport.c | 2 +-
 net/sunrpc/xprtsock.c           | 4 ++--
 6 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 76720d262ef2..46ebef1788c6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -80,9 +80,7 @@ struct rpc_task {
 	} u;
 
 	unsigned short		tk_timeouts;	/* maj timeouts */
-	size_t			tk_bytes_sent;	/* total bytes sent */
-	ktime_t			tk_start,	/* RPC task init timestamp */
-				tk_rtt;		/* round-trip time */
+	ktime_t			tk_start;	/* RPC task init timestamp */
 
 	pid_t			tk_owner;	/* Process id for batching tasks */
 	unsigned char		tk_priority : 2;/* Task priority */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index f8851861b744..b51470302399 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -66,8 +66,6 @@ struct rpc_rqst {
 	struct rpc_task *	rq_task;	/* RPC task data */
 	__be32			rq_xid;		/* request XID */
 	int			rq_cong;	/* has incremented xprt->cong */
-	int			rq_reply_bytes_recvd;	/* number of reply */
-							/* bytes received */
 	u32			rq_seqno;	/* gss seq no. used on req. */
 	int			rq_enc_pages_num;
 	struct page		**rq_enc_pages;	/* scratch pages for use by
@@ -78,12 +76,16 @@ struct rpc_rqst {
 	__u32 *			rq_buffer;	/* XDR encode buffer */
 	size_t			rq_callsize,
 				rq_rcvsize;
+	size_t			rq_xmit_bytes_sent;	/* total bytes sent */
+	size_t			rq_reply_bytes_recvd;	/* total reply bytes */
+							/* received */
 
 	struct xdr_buf		rq_private_buf;		/* The receive buffer
 							 * used in the softirq.
 							 */
 	unsigned long		rq_majortimeo;	/* major timeout alarm */
 	unsigned long		rq_timeout;	/* Current timeout value */
+	ktime_t			rq_rtt;		/* round-trip time */
 	unsigned int		rq_retries;	/* # of retries */
 	unsigned int		rq_connect_cookie;
 						/* A cookie used to track the
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index aacd95f0dce5..ea1046f3f9a3 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -156,13 +156,13 @@ void rpc_count_iostats(struct rpc_task *task)
 	op_metrics->om_ntrans += req->rq_ntrans;
 	op_metrics->om_timeouts += task->tk_timeouts;
 
-	op_metrics->om_bytes_sent += task->tk_bytes_sent;
+	op_metrics->om_bytes_sent += req->rq_xmit_bytes_sent;
 	op_metrics->om_bytes_recv += req->rq_reply_bytes_recvd;
 
 	delta = ktime_sub(req->rq_xtime, task->tk_start);
 	op_metrics->om_queue = ktime_add(op_metrics->om_queue, delta);
 
-	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, task->tk_rtt);
+	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
 
 	delta = ktime_sub(ktime_get(), task->tk_start);
 	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 8986b1b82862..65fe2e4e7cbf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -780,7 +780,7 @@ static void xprt_update_rtt(struct rpc_task *task)
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_rtt *rtt = task->tk_client->cl_rtt;
 	unsigned timer = task->tk_msg.rpc_proc->p_timer;
-	long m = usecs_to_jiffies(ktime_to_us(task->tk_rtt));
+	long m = usecs_to_jiffies(ktime_to_us(req->rq_rtt));
 
 	if (timer) {
 		if (req->rq_ntrans == 1)
@@ -805,7 +805,7 @@ void xprt_complete_rqst(struct rpc_task *task, int copied)
 			task->tk_pid, ntohl(req->rq_xid), copied);
 
 	xprt->stat.recvs++;
-	task->tk_rtt = ktime_sub(ktime_get(), req->rq_xtime);
+	req->rq_rtt = ktime_sub(ktime_get(), req->rq_xtime);
 	if (xprt->ops->timer != NULL)
 		xprt_update_rtt(task);
 
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 3f3b38c5642f..a85e866a77f7 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -674,7 +674,7 @@ xprt_rdma_send_request(struct rpc_task *task)
 	if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req))
 		goto drop_connection;
 
-	task->tk_bytes_sent += rqst->rq_snd_buf.len;
+	rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len;
 	rqst->rq_bytes_sent = 0;
 	return 0;
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index beefa7a3a90e..02fc7f04dd17 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -528,7 +528,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 			xdr->len - req->rq_bytes_sent, status);
 
 	if (status >= 0) {
-		task->tk_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
 		if (status >= req->rq_slen)
 			return 0;
 		/* Still some bytes left; set up for a retry later. */
@@ -624,7 +624,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 		/* If we've sent the entire packet, immediately
 		 * reset the count of bytes sent. */
 		req->rq_bytes_sent += status;
-		task->tk_bytes_sent += status;
+		req->rq_xmit_bytes_sent += status;
 		if (likely(req->rq_bytes_sent >= req->rq_slen)) {
 			req->rq_bytes_sent = 0;
 			return 0;
-- 
cgit v1.2.3


From d72b6cec8d42eb7c2a249b613abf2c2b7a6eeb47 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:50 -0400
Subject: SUNRPC: Remove the 'tk_magic' debugging field

It has not triggered in almost a decade. Time to get rid of it...

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h |  3 ---
 net/sunrpc/sched.c           | 11 -----------
 2 files changed, 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 46ebef1788c6..41b9f971c9fb 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -41,9 +41,6 @@ struct rpc_wait {
  * This is the RPC task struct
  */
 struct rpc_task {
-#ifdef RPC_DEBUG
-	unsigned long		tk_magic;	/* 0xf00baa */
-#endif
 	atomic_t		tk_count;	/* Reference count */
 	struct list_head	tk_task;	/* global list of tasks */
 	struct rpc_clnt *	tk_client;	/* RPC client */
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index aa7b07ef5d55..4a843b883b89 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -25,7 +25,6 @@
 
 #ifdef RPC_DEBUG
 #define RPCDBG_FACILITY		RPCDBG_SCHED
-#define RPC_TASK_MAGIC_ID	0xf00baa
 #endif
 
 /*
@@ -237,7 +236,6 @@ static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
 	static atomic_t rpc_pid;
 
-	task->tk_magic = RPC_TASK_MAGIC_ID;
 	task->tk_pid = atomic_inc_return(&rpc_pid);
 }
 #else
@@ -360,9 +358,6 @@ static void __rpc_do_wake_up_task(struct rpc_wait_queue *queue, struct rpc_task
 	dprintk("RPC: %5u __rpc_wake_up_task (now %lu)\n",
 			task->tk_pid, jiffies);
 
-#ifdef RPC_DEBUG
-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
 	/* Has the task been executed yet? If not, we cannot wake it up! */
 	if (!RPC_IS_ACTIVATED(task)) {
 		printk(KERN_ERR "RPC: Inactive task (%p) being woken up!\n", task);
@@ -916,9 +911,6 @@ EXPORT_SYMBOL_GPL(rpc_put_task);
 
 static void rpc_release_task(struct rpc_task *task)
 {
-#ifdef RPC_DEBUG
-	BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
-#endif
 	dprintk("RPC: %5u release task\n", task->tk_pid);
 
 	if (!list_empty(&task->tk_task)) {
@@ -930,9 +922,6 @@ static void rpc_release_task(struct rpc_task *task)
 	}
 	BUG_ON (RPC_IS_QUEUED(task));
 
-#ifdef RPC_DEBUG
-	task->tk_magic = 0;
-#endif
 	/* Wake up anyone who is waiting for task completion */
 	rpc_mark_complete_task(task);
 
-- 
cgit v1.2.3


From 9bb0b8136a7d5b50c5807af3bf12b758fb257814 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:51:50 -0400
Subject: SUNRPC: Reorder the struct rpc_task fields

This improves the packing of the rpc_task, and ensures that on 64-bit
platforms the size reduces to 216 bytes.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/sched.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 41b9f971c9fb..7be4f3a6d246 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -45,14 +45,11 @@ struct rpc_task {
 	struct list_head	tk_task;	/* global list of tasks */
 	struct rpc_clnt *	tk_client;	/* RPC client */
 	struct rpc_rqst *	tk_rqstp;	/* RPC request */
-	int			tk_status;	/* result of last operation */
 
 	/*
 	 * RPC call state
 	 */
 	struct rpc_message	tk_msg;		/* RPC call info */
-	__u8			tk_garb_retry;
-	__u8			tk_cred_retry;
 
 	/*
 	 * callback	to be executed after waking up
@@ -65,7 +62,6 @@ struct rpc_task {
 	void *			tk_calldata;
 
 	unsigned long		tk_timeout;	/* timeout for rpc_sleep() */
-	unsigned short		tk_flags;	/* misc flags */
 	unsigned long		tk_runstate;	/* Task run status */
 	struct workqueue_struct	*tk_workqueue;	/* Normally rpciod, but could
 						 * be any workqueue
@@ -76,15 +72,19 @@ struct rpc_task {
 		struct rpc_wait		tk_wait;	/* RPC wait */
 	} u;
 
-	unsigned short		tk_timeouts;	/* maj timeouts */
 	ktime_t			tk_start;	/* RPC task init timestamp */
 
 	pid_t			tk_owner;	/* Process id for batching tasks */
-	unsigned char		tk_priority : 2;/* Task priority */
+	int			tk_status;	/* result of last operation */
+	unsigned short		tk_flags;	/* misc flags */
+	unsigned short		tk_timeouts;	/* maj timeouts */
 
 #ifdef RPC_DEBUG
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
+	unsigned char		tk_priority : 2,/* Task priority */
+				tk_garb_retry : 2,
+				tk_cred_retry : 2;
 };
 #define tk_xprt			tk_client->cl_xprt
 
-- 
cgit v1.2.3


From 126e216a8730532dfb685205309275f87e3d133e Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 13 May 2010 12:55:38 -0400
Subject: SUNRPC: Don't spam gssd with upcall requests when the kerberos key
 expired

Now that the rpc.gssd daemon can explicitly tell us that the key expired,
we should cache that information to avoid spamming gssd.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
---
 include/linux/sunrpc/auth.h     |  1 +
 include/linux/sunrpc/auth_gss.h |  1 +
 net/sunrpc/auth_gss/auth_gss.c  | 65 +++++++++++++++++++++++++++++++++--------
 3 files changed, 55 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 996df4dac7d4..87d7ec0bf779 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -54,6 +54,7 @@ struct rpc_cred {
 #define RPCAUTH_CRED_NEW	0
 #define RPCAUTH_CRED_UPTODATE	1
 #define RPCAUTH_CRED_HASHED	2
+#define RPCAUTH_CRED_NEGATIVE	3
 
 #define RPCAUTH_CRED_MAGIC	0x0f4aa4f0
 
diff --git a/include/linux/sunrpc/auth_gss.h b/include/linux/sunrpc/auth_gss.h
index d48d4e605f74..671538d25bc1 100644
--- a/include/linux/sunrpc/auth_gss.h
+++ b/include/linux/sunrpc/auth_gss.h
@@ -82,6 +82,7 @@ struct gss_cred {
 	enum rpc_gss_svc	gc_service;
 	struct gss_cl_ctx	*gc_ctx;
 	struct gss_upcall_msg	*gc_upcall;
+	unsigned long		gc_upcall_timestamp;
 	unsigned char		gc_machine_cred : 1;
 };
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 48a7939dc9e2..8da2a0e68574 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -57,6 +57,9 @@ static const struct rpc_authops authgss_ops;
 static const struct rpc_credops gss_credops;
 static const struct rpc_credops gss_nullops;
 
+#define GSS_RETRY_EXPIRED 5
+static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
+
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
@@ -349,6 +352,24 @@ gss_unhash_msg(struct gss_upcall_msg *gss_msg)
 	spin_unlock(&inode->i_lock);
 }
 
+static void
+gss_handle_downcall_result(struct gss_cred *gss_cred, struct gss_upcall_msg *gss_msg)
+{
+	switch (gss_msg->msg.errno) {
+	case 0:
+		if (gss_msg->ctx == NULL)
+			break;
+		clear_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+		gss_cred_set_ctx(&gss_cred->gc_base, gss_msg->ctx);
+		break;
+	case -EKEYEXPIRED:
+		set_bit(RPCAUTH_CRED_NEGATIVE, &gss_cred->gc_base.cr_flags);
+	}
+	gss_cred->gc_upcall_timestamp = jiffies;
+	gss_cred->gc_upcall = NULL;
+	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+}
+
 static void
 gss_upcall_callback(struct rpc_task *task)
 {
@@ -358,13 +379,9 @@ gss_upcall_callback(struct rpc_task *task)
 	struct inode *inode = &gss_msg->inode->vfs_inode;
 
 	spin_lock(&inode->i_lock);
-	if (gss_msg->ctx)
-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
-	else
-		task->tk_status = gss_msg->msg.errno;
-	gss_cred->gc_upcall = NULL;
-	rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
+	gss_handle_downcall_result(gss_cred, gss_msg);
 	spin_unlock(&inode->i_lock);
+	task->tk_status = gss_msg->msg.errno;
 	gss_release_msg(gss_msg);
 }
 
@@ -513,18 +530,16 @@ gss_refresh_upcall(struct rpc_task *task)
 	spin_lock(&inode->i_lock);
 	if (gss_cred->gc_upcall != NULL)
 		rpc_sleep_on(&gss_cred->gc_upcall->rpc_waitqueue, task, NULL);
-	else if (gss_msg->ctx != NULL) {
-		gss_cred_set_ctx(task->tk_msg.rpc_cred, gss_msg->ctx);
-		gss_cred->gc_upcall = NULL;
-		rpc_wake_up_status(&gss_msg->rpc_waitqueue, gss_msg->msg.errno);
-	} else if (gss_msg->msg.errno >= 0) {
+	else if (gss_msg->ctx == NULL && gss_msg->msg.errno >= 0) {
 		task->tk_timeout = 0;
 		gss_cred->gc_upcall = gss_msg;
 		/* gss_upcall_callback will release the reference to gss_upcall_msg */
 		atomic_inc(&gss_msg->count);
 		rpc_sleep_on(&gss_msg->rpc_waitqueue, task, gss_upcall_callback);
-	} else
+	} else {
+		gss_handle_downcall_result(gss_cred, gss_msg);
 		err = gss_msg->msg.errno;
+	}
 	spin_unlock(&inode->i_lock);
 	gss_release_msg(gss_msg);
 out:
@@ -1123,6 +1138,23 @@ static int gss_renew_cred(struct rpc_task *task)
 	return 0;
 }
 
+static int gss_cred_is_negative_entry(struct rpc_cred *cred)
+{
+	if (test_bit(RPCAUTH_CRED_NEGATIVE, &cred->cr_flags)) {
+		unsigned long now = jiffies;
+		unsigned long begin, expire;
+		struct gss_cred *gss_cred; 
+
+		gss_cred = container_of(cred, struct gss_cred, gc_base);
+		begin = gss_cred->gc_upcall_timestamp;
+		expire = begin + gss_expired_cred_retry_delay * HZ;
+
+		if (time_in_range_open(now, begin, expire))
+			return 1;
+	}
+	return 0;
+}
+
 /*
 * Refresh credentials. XXX - finish
 */
@@ -1132,6 +1164,9 @@ gss_refresh(struct rpc_task *task)
 	struct rpc_cred *cred = task->tk_msg.rpc_cred;
 	int ret = 0;
 
+	if (gss_cred_is_negative_entry(cred))
+		return -EKEYEXPIRED;
+
 	if (!test_bit(RPCAUTH_CRED_NEW, &cred->cr_flags) &&
 			!test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags)) {
 		ret = gss_renew_cred(task);
@@ -1585,5 +1620,11 @@ static void __exit exit_rpcsec_gss(void)
 }
 
 MODULE_LICENSE("GPL");
+module_param_named(expired_cred_retry_delay,
+		   gss_expired_cred_retry_delay,
+		   uint, 0644);
+MODULE_PARM_DESC(expired_cred_retry_delay, "Timeout (in seconds) until "
+		"the RPC engine retries an expired credential");
+
 module_init(init_rpcsec_gss)
 module_exit(exit_rpcsec_gss)
-- 
cgit v1.2.3


From 1c2a49f61785ebbcbfb481a2aab659020f0457f7 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Thu, 4 Mar 2010 20:06:06 +0300
Subject: ahci: Add platform driver

This can be used for AHCI-compatible interfaces implemented inside
System-On-Chip solutions, or AHCI devices connected via localbus.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/Kconfig           |   8 ++
 drivers/ata/Makefile          |   1 +
 drivers/ata/ahci_platform.c   | 191 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/ahci_platform.h |  29 +++++++
 4 files changed, 229 insertions(+)
 create mode 100644 drivers/ata/ahci_platform.c
 create mode 100644 include/linux/ahci_platform.h

(limited to 'include/linux')

diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
index 01c52c415bdc..cbadb9fa1277 100644
--- a/drivers/ata/Kconfig
+++ b/drivers/ata/Kconfig
@@ -65,6 +65,14 @@ config SATA_AHCI
 
 	  If unsure, say N.
 
+config SATA_AHCI_PLATFORM
+	tristate "Platform AHCI SATA support"
+	help
+	  This option enables support for Platform AHCI Serial ATA
+	  controllers.
+
+	  If unsure, say N.
+
 config SATA_SIL24
 	tristate "Silicon Image 3124/3132 SATA support"
 	depends on PCI
diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
index 20c5251e7e41..d0a93c4ad3ec 100644
--- a/drivers/ata/Makefile
+++ b/drivers/ata/Makefile
@@ -2,6 +2,7 @@
 obj-$(CONFIG_ATA)		+= libata.o
 
 obj-$(CONFIG_SATA_AHCI)		+= ahci.o libahci.o
+obj-$(CONFIG_SATA_AHCI_PLATFORM) += ahci_platform.o libahci.o
 obj-$(CONFIG_SATA_SVW)		+= sata_svw.o
 obj-$(CONFIG_ATA_PIIX)		+= ata_piix.o
 obj-$(CONFIG_SATA_PROMISE)	+= sata_promise.o
diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
new file mode 100644
index 000000000000..42cdd7363fad
--- /dev/null
+++ b/drivers/ata/ahci_platform.c
@@ -0,0 +1,191 @@
+/*
+ * AHCI SATA platform driver
+ *
+ * Copyright 2004-2005  Red Hat, Inc.
+ *   Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2010  MontaVista Software, LLC.
+ *   Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/device.h>
+#include <linux/platform_device.h>
+#include <linux/libata.h>
+#include <linux/ahci_platform.h>
+#include "ahci.h"
+
+static int __init ahci_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ahci_platform_data *pdata = dev->platform_data;
+	struct ata_port_info pi = {
+		.flags		= AHCI_FLAG_COMMON,
+		.pio_mask	= ATA_PIO4,
+		.udma_mask	= ATA_UDMA6,
+		.port_ops	= &ahci_ops,
+	};
+	const struct ata_port_info *ppi[] = { &pi, NULL };
+	struct ahci_host_priv *hpriv;
+	struct ata_host *host;
+	struct resource *mem;
+	int irq;
+	int n_ports;
+	int i;
+	int rc;
+
+	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!mem) {
+		dev_err(dev, "no mmio space\n");
+		return -EINVAL;
+	}
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq <= 0) {
+		dev_err(dev, "no irq\n");
+		return -EINVAL;
+	}
+
+	if (pdata && pdata->init) {
+		rc = pdata->init(dev);
+		if (rc)
+			return rc;
+	}
+
+	if (pdata && pdata->ata_port_info)
+		pi = *pdata->ata_port_info;
+
+	hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
+	if (!hpriv) {
+		rc = -ENOMEM;
+		goto err0;
+	}
+
+	hpriv->flags |= (unsigned long)pi.private_data;
+
+	hpriv->mmio = devm_ioremap(dev, mem->start, resource_size(mem));
+	if (!hpriv->mmio) {
+		dev_err(dev, "can't map %pR\n", mem);
+		rc = -ENOMEM;
+		goto err0;
+	}
+
+	ahci_save_initial_config(dev, hpriv,
+		pdata ? pdata->force_port_map : 0,
+		pdata ? pdata->mask_port_map  : 0);
+
+	/* prepare host */
+	if (hpriv->cap & HOST_CAP_NCQ)
+		pi.flags |= ATA_FLAG_NCQ;
+
+	if (hpriv->cap & HOST_CAP_PMP)
+		pi.flags |= ATA_FLAG_PMP;
+
+	ahci_set_em_messages(hpriv, &pi);
+
+	/* CAP.NP sometimes indicate the index of the last enabled
+	 * port, at other times, that of the last possible port, so
+	 * determining the maximum port number requires looking at
+	 * both CAP.NP and port_map.
+	 */
+	n_ports = max(ahci_nr_ports(hpriv->cap), fls(hpriv->port_map));
+
+	host = ata_host_alloc_pinfo(dev, ppi, n_ports);
+	if (!host) {
+		rc = -ENOMEM;
+		goto err0;
+	}
+
+	host->private_data = hpriv;
+
+	if (!(hpriv->cap & HOST_CAP_SSS) || ahci_ignore_sss)
+		host->flags |= ATA_HOST_PARALLEL_SCAN;
+	else
+		printk(KERN_INFO "ahci: SSS flag set, parallel bus scan disabled\n");
+
+	if (pi.flags & ATA_FLAG_EM)
+		ahci_reset_em(host);
+
+	for (i = 0; i < host->n_ports; i++) {
+		struct ata_port *ap = host->ports[i];
+
+		ata_port_desc(ap, "mmio %pR", mem);
+		ata_port_desc(ap, "port 0x%x", 0x100 + ap->port_no * 0x80);
+
+		/* set initial link pm policy */
+		ap->pm_policy = NOT_AVAILABLE;
+
+		/* set enclosure management message type */
+		if (ap->flags & ATA_FLAG_EM)
+			ap->em_message_type = ahci_em_messages;
+
+		/* disabled/not-implemented port */
+		if (!(hpriv->port_map & (1 << i)))
+			ap->ops = &ata_dummy_port_ops;
+	}
+
+	rc = ahci_reset_controller(host);
+	if (rc)
+		goto err0;
+
+	ahci_init_controller(host);
+	ahci_print_info(host, "platform");
+
+	rc = ata_host_activate(host, irq, ahci_interrupt, IRQF_SHARED,
+			       &ahci_sht);
+	if (rc)
+		goto err0;
+
+	return 0;
+err0:
+	if (pdata && pdata->exit)
+		pdata->exit(dev);
+	return rc;
+}
+
+static int __devexit ahci_remove(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ahci_platform_data *pdata = dev->platform_data;
+	struct ata_host *host = dev_get_drvdata(dev);
+
+	ata_host_detach(host);
+
+	if (pdata && pdata->exit)
+		pdata->exit(dev);
+
+	return 0;
+}
+
+static struct platform_driver ahci_driver = {
+	.probe = ahci_probe,
+	.remove = __devexit_p(ahci_remove),
+	.driver = {
+		.name = "ahci",
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init ahci_init(void)
+{
+	return platform_driver_probe(&ahci_driver, ahci_probe);
+}
+module_init(ahci_init);
+
+static void __exit ahci_exit(void)
+{
+	platform_driver_unregister(&ahci_driver);
+}
+module_exit(ahci_exit);
+
+MODULE_DESCRIPTION("AHCI SATA platform driver");
+MODULE_AUTHOR("Anton Vorontsov <avorontsov@ru.mvista.com>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:ahci");
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
new file mode 100644
index 000000000000..f7dd576dd5a4
--- /dev/null
+++ b/include/linux/ahci_platform.h
@@ -0,0 +1,29 @@
+/*
+ * AHCI SATA platform driver
+ *
+ * Copyright 2004-2005  Red Hat, Inc.
+ *   Jeff Garzik <jgarzik@pobox.com>
+ * Copyright 2010  MontaVista Software, LLC.
+ *   Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2, or (at your option)
+ * any later version.
+ */
+
+#ifndef _AHCI_PLATFORM_H
+#define _AHCI_PLATFORM_H
+
+struct device;
+struct ata_port_info;
+
+struct ahci_platform_data {
+	int (*init)(struct device *dev);
+	void (*exit)(struct device *dev);
+	const struct ata_port_info *ata_port_info;
+	unsigned int force_port_map;
+	unsigned int mask_port_map;
+};
+
+#endif /* _AHCI_PLATFORM_H */
-- 
cgit v1.2.3


From 294440887b32c58d220fb54b73b7a58079b78f20 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Thu, 22 Apr 2010 20:03:35 +0400
Subject: libata-sff: kill unused ata_bus_reset()

... since I see no callers of it.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/DocBook/libata.tmpl |  6 +--
 drivers/ata/libata-sff.c          | 94 ---------------------------------------
 include/linux/libata.h            |  1 -
 3 files changed, 2 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index ff3e5bec1c24..70b811e9f2ca 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -81,16 +81,14 @@ void (*port_disable) (struct ata_port *);
 	</programlisting>
 
 	<para>
-	Called from ata_bus_probe() and ata_bus_reset() error paths,
-	as well as when unregistering from the SCSI module (rmmod, hot
-	unplug).
+	Called from ata_bus_probe() error path, as well as when
+	unregistering from the SCSI module (rmmod, hot unplug).
 	This function should do whatever needs to be done to take the
 	port out of use.  In most cases, ata_port_disable() can be used
 	as this hook.
 	</para>
 	<para>
 	Called from ata_bus_probe() on a failed probe.
-	Called from ata_bus_reset() on a failed bus reset.
 	Called from ata_scsi_release().
 	</para>
 
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index e3877b6843c9..b31389605bee 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2631,100 +2631,6 @@ u8 ata_bmdma_status(struct ata_port *ap)
 }
 EXPORT_SYMBOL_GPL(ata_bmdma_status);
 
-/**
- *	ata_bus_reset - reset host port and associated ATA channel
- *	@ap: port to reset
- *
- *	This is typically the first time we actually start issuing
- *	commands to the ATA channel.  We wait for BSY to clear, then
- *	issue EXECUTE DEVICE DIAGNOSTIC command, polling for its
- *	result.  Determine what devices, if any, are on the channel
- *	by looking at the device 0/1 error register.  Look at the signature
- *	stored in each device's taskfile registers, to determine if
- *	the device is ATA or ATAPI.
- *
- *	LOCKING:
- *	PCI/etc. bus probe sem.
- *	Obtains host lock.
- *
- *	SIDE EFFECTS:
- *	Sets ATA_FLAG_DISABLED if bus reset fails.
- *
- *	DEPRECATED:
- *	This function is only for drivers which still use old EH and
- *	will be removed soon.
- */
-void ata_bus_reset(struct ata_port *ap)
-{
-	struct ata_device *device = ap->link.device;
-	struct ata_ioports *ioaddr = &ap->ioaddr;
-	unsigned int slave_possible = ap->flags & ATA_FLAG_SLAVE_POSS;
-	u8 err;
-	unsigned int dev0, dev1 = 0, devmask = 0;
-	int rc;
-
-	DPRINTK("ENTER, host %u, port %u\n", ap->print_id, ap->port_no);
-
-	/* determine if device 0/1 are present */
-	if (ap->flags & ATA_FLAG_SATA_RESET)
-		dev0 = 1;
-	else {
-		dev0 = ata_devchk(ap, 0);
-		if (slave_possible)
-			dev1 = ata_devchk(ap, 1);
-	}
-
-	if (dev0)
-		devmask |= (1 << 0);
-	if (dev1)
-		devmask |= (1 << 1);
-
-	/* select device 0 again */
-	ap->ops->sff_dev_select(ap, 0);
-
-	/* issue bus reset */
-	if (ap->flags & ATA_FLAG_SRST) {
-		rc = ata_bus_softreset(ap, devmask,
-				       ata_deadline(jiffies, 40000));
-		if (rc && rc != -ENODEV)
-			goto err_out;
-	}
-
-	/*
-	 * determine by signature whether we have ATA or ATAPI devices
-	 */
-	device[0].class = ata_sff_dev_classify(&device[0], dev0, &err);
-	if ((slave_possible) && (err != 0x81))
-		device[1].class = ata_sff_dev_classify(&device[1], dev1, &err);
-
-	/* is double-select really necessary? */
-	if (device[1].class != ATA_DEV_NONE)
-		ap->ops->sff_dev_select(ap, 1);
-	if (device[0].class != ATA_DEV_NONE)
-		ap->ops->sff_dev_select(ap, 0);
-
-	/* if no devices were detected, disable this port */
-	if ((device[0].class == ATA_DEV_NONE) &&
-	    (device[1].class == ATA_DEV_NONE))
-		goto err_out;
-
-	if (ap->flags & (ATA_FLAG_SATA_RESET | ATA_FLAG_SRST)) {
-		/* set up device control for ATA_FLAG_SATA_RESET */
-		iowrite8(ap->ctl, ioaddr->ctl_addr);
-		ap->last_ctl = ap->ctl;
-	}
-
-	DPRINTK("EXIT\n");
-	return;
-
-err_out:
-	ata_port_printk(ap, KERN_ERR, "disabling port\n");
-	ata_port_disable(ap);
-
-	DPRINTK("EXIT\n");
-}
-EXPORT_SYMBOL_GPL(ata_bus_reset);
-
 #ifdef CONFIG_PCI
 
 /**
diff --git a/include/linux/libata.h b/include/linux/libata.h
index b2f2003b92e5..4fa748e9bc01 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1631,7 +1631,6 @@ extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
 extern void ata_bmdma_start(struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
 extern u8 ata_bmdma_status(struct ata_port *ap);
-extern void ata_bus_reset(struct ata_port *ap);
 
 #ifdef CONFIG_PCI
 extern int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev);
-- 
cgit v1.2.3


From b48d58f55aa1d2d0d12378e45663842d4021916e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 9 Apr 2010 19:46:38 +0900
Subject: libata: use longer 0xff wait if parallel scan is enabled

There are some SATA devices which take relatively long to get out of
0xff status after reset.  In libata, this timeout is determined by
ATA_TMOUT_FF_WAIT.  Quantum GoVault is the worst requring about 2s for
reliable detection.  However, because 2s 0xff timeout can introduce
rather long spurious delay during boot, libata has been compromising
at the next longest timeout of 800ms for HHD424020F7SV00 iVDR drive.

Now that parallel scan is in place for common drivers, libata can
afford 2s 0xff timeout.  Use 2s 0xff timeout if parallel scan is
enabled.

Please note that the chance of spurious wait is pretty slim w/ working
SCR access so this will only affect SATA controllers w/o SCR access
which isn't too common these days.

Please read the following thread for more information on the GoVault
drive.

  http://thread.gmane.org/gmane.linux.ide/14545/focus=14663

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Gary Hade <garyhade@us.ibm.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 18 ++++++++++++------
 include/linux/libata.h    | 11 ++++++-----
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 49cffb6094a3..134b5df80ace 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3631,9 +3631,15 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
 		   int (*check_ready)(struct ata_link *link))
 {
 	unsigned long start = jiffies;
-	unsigned long nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
+	unsigned long nodev_deadline;
 	int warned = 0;
 
+	/* choose which 0xff timeout to use, read comment in libata.h */
+	if (link->ap->host->flags & ATA_HOST_PARALLEL_SCAN)
+		nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT_LONG);
+	else
+		nodev_deadline = ata_deadline(start, ATA_TMOUT_FF_WAIT);
+
 	/* Slave readiness can't be tested separately from master.  On
 	 * M/S emulation configuration, this function should be called
 	 * only on the master and it will handle both master and slave.
@@ -3651,12 +3657,12 @@ int ata_wait_ready(struct ata_link *link, unsigned long deadline,
 		if (ready > 0)
 			return 0;
 
-		/* -ENODEV could be transient.  Ignore -ENODEV if link
+		/*
+		 * -ENODEV could be transient.  Ignore -ENODEV if link
 		 * is online.  Also, some SATA devices take a long
-		 * time to clear 0xff after reset.  For example,
-		 * HHD424020F7SV00 iVDR needs >= 800ms while Quantum
-		 * GoVault needs even more than that.  Wait for
-		 * ATA_TMOUT_FF_WAIT on -ENODEV if link isn't offline.
+		 * time to clear 0xff after reset.  Wait for
+		 * ATA_TMOUT_FF_WAIT[_LONG] on -ENODEV if link isn't
+		 * offline.
 		 *
 		 * Note that some PATA controllers (pata_ali) explode
 		 * if status register is read more than once when
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 4fa748e9bc01..242eb2646101 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -256,12 +256,13 @@ enum {
 	ATA_TMOUT_INTERNAL_QUICK = 5000,
 	ATA_TMOUT_MAX_PARK	= 30000,
 
-	/* FIXME: GoVault needs 2s but we can't afford that without
-	 * parallel probing.  800ms is enough for iVDR disk
-	 * HHD424020F7SV00.  Increase to 2secs when parallel probing
-	 * is in place.
+	/*
+	 * GoVault needs 2s and iVDR disk HHD424020F7SV00 800ms.  2s
+	 * is too much without parallel probing.  Use 2s if parallel
+	 * probing is available, 800ms otherwise.
 	 */
-	ATA_TMOUT_FF_WAIT	=  800,
+	ATA_TMOUT_FF_WAIT_LONG	=  2000,
+	ATA_TMOUT_FF_WAIT	=   800,
 
 	/* Spec mandates to wait for ">= 2ms" before checking status
 	 * after reset.  We wait 150ms, because that was the magic
-- 
cgit v1.2.3


From 41dec29bcb05eb8ec396f70ce791c6e3e4ce4712 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 7 May 2010 22:47:50 +0400
Subject: libata: introduce sff_set_devctl() method

The set of libata's taskfile access methods is clearly incomplete as
it lacks a method to write to the device control register -- which
forces drivers like 'pata_bf54x' and 'pata_scc' to implement more
"high level" (and more weighty) methods like freeze() and postreset().

So, introduce the optional sff_set_devctl() method which the drivers
only have to implement if the standard iowrite8() can't be used (just
like the existing sff_check_altstatus() method) and make use of it
in the freeze() and postreset() method implementations (I could also
have used it in softreset() method but it also reads other taskfile
registers without using tf_read() making that quite pointless);
this makes freeze() method implementations in the 'pata_bf54x' and
'pata_scc' methods virtually identical to ata_sff_freeze(), so we
can get rid of them completely.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 Documentation/DocBook/libata.tmpl | 12 ++++++++++++
 drivers/ata/libata-sff.c          | 31 ++++++++++++++++++++++++------
 drivers/ata/pata_bf54x.c          | 40 +++++++++++++--------------------------
 drivers/ata/pata_scc.c            | 38 ++++++++++++-------------------------
 include/linux/libata.h            |  1 +
 5 files changed, 63 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/libata.tmpl b/Documentation/DocBook/libata.tmpl
index 70b811e9f2ca..828748c4e78d 100644
--- a/Documentation/DocBook/libata.tmpl
+++ b/Documentation/DocBook/libata.tmpl
@@ -225,6 +225,18 @@ u8   (*sff_check_altstatus)(struct ata_port *ap);
 
 	</sect2>
 
+	<sect2><title>Write specific ATA shadow register</title>
+	<programlisting>
+void (*sff_set_devctl)(struct ata_port *ap, u8 ctl);
+	</programlisting>
+
+	<para>
+	Write the device control ATA shadow register to the hardware.
+	Most drivers don't need to define this.
+	</para>
+
+	</sect2>
+
 	<sect2><title>Select ATA device on bus</title>
 	<programlisting>
 void (*sff_dev_select)(struct ata_port *ap, unsigned int device);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index b31389605bee..31b495fcd969 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -445,6 +445,27 @@ int ata_sff_wait_ready(struct ata_link *link, unsigned long deadline)
 }
 EXPORT_SYMBOL_GPL(ata_sff_wait_ready);
 
+/**
+ *	ata_sff_set_devctl - Write device control reg
+ *	@ap: port where the device is
+ *	@ctl: value to write
+ *
+ *	Writes ATA taskfile device control register.
+ *
+ *	Note: may NOT be used as the sff_set_devctl() entry in
+ *	ata_port_operations.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+static void ata_sff_set_devctl(struct ata_port *ap, u8 ctl)
+{
+	if (ap->ops->sff_set_devctl)
+		ap->ops->sff_set_devctl(ap, ctl);
+	else
+		iowrite8(ctl, ap->ioaddr.ctl_addr);
+}
+
 /**
  *	ata_sff_dev_select - Select device 0/1 on ATA bus
  *	@ap: ATA channel to manipulate
@@ -1895,13 +1916,11 @@ EXPORT_SYMBOL_GPL(ata_sff_lost_interrupt);
  */
 void ata_sff_freeze(struct ata_port *ap)
 {
-	struct ata_ioports *ioaddr = &ap->ioaddr;
-
 	ap->ctl |= ATA_NIEN;
 	ap->last_ctl = ap->ctl;
 
-	if (ioaddr->ctl_addr)
-		iowrite8(ap->ctl, ioaddr->ctl_addr);
+	if (ap->ops->sff_set_devctl || ap->ioaddr.ctl_addr)
+		ata_sff_set_devctl(ap, ap->ctl);
 
 	/* Under certain circumstances, some controllers raise IRQ on
 	 * ATA_NIEN manipulation.  Also, many controllers fail to mask
@@ -2301,8 +2320,8 @@ void ata_sff_postreset(struct ata_link *link, unsigned int *classes)
 	}
 
 	/* set up device control */
-	if (ap->ioaddr.ctl_addr) {
-		iowrite8(ap->ctl, ap->ioaddr.ctl_addr);
+	if (ap->ops->sff_set_devctl || ap->ioaddr.ctl_addr) {
+		ata_sff_set_devctl(ap, ap->ctl);
 		ap->last_ctl = ap->ctl;
 	}
 }
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 02c81f12c702..198307534d90 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -820,6 +820,18 @@ static void bfin_dev_select(struct ata_port *ap, unsigned int device)
 	ata_sff_pause(ap);
 }
 
+/**
+ *	bfin_set_devctl - Write device control reg
+ *	@ap: port where the device is
+ *	@ctl: value to write
+ */
+
+static u8 bfin_set_devctl(struct ata_port *ap, u8 ctl)
+{
+	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
+	write_atapi_register(base, ATA_REG_CTRL, ctl);
+}
+
 /**
  *	bfin_bmdma_setup - Set up IDE DMA transaction
  *	@qc: Info associated with this ATA transaction.
@@ -1239,32 +1251,6 @@ static unsigned char bfin_irq_on(struct ata_port *ap)
 	return tmp;
 }
 
-/**
- *	bfin_freeze - Freeze DMA controller port
- *	@ap: port to freeze
- *
- *	Note: Original code is ata_sff_freeze().
- */
-
-static void bfin_freeze(struct ata_port *ap)
-{
-	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
-
-	dev_dbg(ap->dev, "in atapi dma freeze\n");
-	ap->ctl |= ATA_NIEN;
-	ap->last_ctl = ap->ctl;
-
-	write_atapi_register(base, ATA_REG_CTRL, ap->ctl);
-
-	/* Under certain circumstances, some controllers raise IRQ on
-	 * ATA_NIEN manipulation.  Also, many controllers fail to mask
-	 * previously pending IRQ on ATA_NIEN assertion.  Clear it.
-	 */
-	ap->ops->sff_check_status(ap);
-
-	bfin_irq_clear(ap);
-}
-
 /**
  *	bfin_thaw - Thaw DMA controller port
  *	@ap: port to thaw
@@ -1476,6 +1462,7 @@ static struct ata_port_operations bfin_pata_ops = {
 	.sff_check_status	= bfin_check_status,
 	.sff_check_altstatus	= bfin_check_altstatus,
 	.sff_dev_select		= bfin_dev_select,
+	.sff_set_devctl		= bfin_set_devctl,
 
 	.bmdma_setup		= bfin_bmdma_setup,
 	.bmdma_start		= bfin_bmdma_start,
@@ -1485,7 +1472,6 @@ static struct ata_port_operations bfin_pata_ops = {
 
 	.qc_prep		= ata_noop_qc_prep,
 
-	.freeze			= bfin_freeze,
 	.thaw			= bfin_thaw,
 	.softreset		= bfin_softreset,
 	.postreset		= bfin_postreset,
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 77ff829af176..86b25fef34a9 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -415,6 +415,17 @@ static void scc_dev_select (struct ata_port *ap, unsigned int device)
 	ata_sff_pause(ap);
 }
 
+/**
+ *	scc_set_devctl - Write device control reg
+ *	@ap: port where the device is
+ *	@ctl: value to write
+ */
+
+static void scc_set_devctl(struct ata_port *ap, u8 ctl)
+{
+	out_be32(ap->ioaddr.ctl_addr, ctl);
+}
+
 /**
  *	scc_bmdma_setup - Set up PCI IDE BMDMA transaction
  *	@qc: Info associated with this ATA transaction.
@@ -839,31 +850,6 @@ static u8 scc_irq_on (struct ata_port *ap)
 	return tmp;
 }
 
-/**
- *	scc_freeze - Freeze BMDMA controller port
- *	@ap: port to freeze
- *
- *	Note: Original code is ata_sff_freeze().
- */
-
-static void scc_freeze (struct ata_port *ap)
-{
-	struct ata_ioports *ioaddr = &ap->ioaddr;
-
-	ap->ctl |= ATA_NIEN;
-	ap->last_ctl = ap->ctl;
-
-	out_be32(ioaddr->ctl_addr, ap->ctl);
-
-	/* Under certain circumstances, some controllers raise IRQ on
-	 * ATA_NIEN manipulation.  Also, many controllers fail to mask
-	 * previously pending IRQ on ATA_NIEN assertion.  Clear it.
-	 */
-	ap->ops->sff_check_status(ap);
-
-	ap->ops->sff_irq_clear(ap);
-}
-
 /**
  *	scc_pata_prereset - prepare for reset
  *	@ap: ATA port to be reset
@@ -977,6 +963,7 @@ static struct ata_port_operations scc_pata_ops = {
 	.sff_check_status	= scc_check_status,
 	.sff_check_altstatus	= scc_check_altstatus,
 	.sff_dev_select		= scc_dev_select,
+	.sff_set_devctl		= scc_set_devctl,
 
 	.bmdma_setup		= scc_bmdma_setup,
 	.bmdma_start		= scc_bmdma_start,
@@ -984,7 +971,6 @@ static struct ata_port_operations scc_pata_ops = {
 	.bmdma_status		= scc_bmdma_status,
 	.sff_data_xfer		= scc_data_xfer,
 
-	.freeze			= scc_freeze,
 	.prereset		= scc_pata_prereset,
 	.softreset		= scc_softreset,
 	.postreset		= scc_postreset,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 242eb2646101..6207ec600722 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -850,6 +850,7 @@ struct ata_port_operations {
 	 * SFF / taskfile oriented ops
 	 */
 	void (*sff_dev_select)(struct ata_port *ap, unsigned int device);
+	void (*sff_set_devctl)(struct ata_port *ap, u8 ctl);
 	u8   (*sff_check_status)(struct ata_port *ap);
 	u8   (*sff_check_altstatus)(struct ata_port *ap);
 	void (*sff_tf_load)(struct ata_port *ap, const struct ata_taskfile *tf);
-- 
cgit v1.2.3


From e42a542ba9cca594897176020445023c54d903d6 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Date: Fri, 7 May 2010 22:49:02 +0400
Subject: libata: make sff_irq_on() method optional

Now, with the introduction of the sff_set_devctl() method, we can
use it in sff_irq_on() method too -- that way its implementations
in 'pata_bf54x' and 'pata_scc' become virtually identical to
ata_sff_irq_on().  The sff_irq_on() method now becomes quite
superfluous, and the only reason not to remove it completely is
the existence of the 'pata_octeon_cf' driver which implements it
as an empty function. Just make the method optional then, with
ata_sff_irq_on() becoming generic taskfile-bound function, still
global for the 'pata_bf54x' driver to be able to call it from its
thaw() and postreset() methods.

While at it, make the sff_irq_on() method and ata_sff_irq_on() return
'void' as the result is always ignored anyway.

Signed-off-by: Sergei Shtylyov <sshtylyov@ru.mvista.com>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c     | 26 +++++++++++++++-----------
 drivers/ata/pata_bf54x.c     | 29 ++---------------------------
 drivers/ata/pata_octeon_cf.c |  3 +--
 drivers/ata/pata_scc.c       | 24 ------------------------
 include/linux/libata.h       |  4 ++--
 5 files changed, 20 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 31b495fcd969..b7f7f8557fee 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -63,7 +63,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 	.sff_tf_read		= ata_sff_tf_read,
 	.sff_exec_command	= ata_sff_exec_command,
 	.sff_data_xfer		= ata_sff_data_xfer,
-	.sff_irq_on		= ata_sff_irq_on,
 	.sff_irq_clear		= ata_sff_irq_clear,
 
 	.lost_interrupt		= ata_sff_lost_interrupt,
@@ -538,24 +537,29 @@ void ata_dev_select(struct ata_port *ap, unsigned int device,
  *	Enable interrupts on a legacy IDE device using MMIO or PIO,
  *	wait for idle, clear any pending interrupts.
  *
+ *	Note: may NOT be used as the sff_irq_on() entry in
+ *	ata_port_operations.
+ *
  *	LOCKING:
  *	Inherited from caller.
  */
-u8 ata_sff_irq_on(struct ata_port *ap)
+void ata_sff_irq_on(struct ata_port *ap)
 {
 	struct ata_ioports *ioaddr = &ap->ioaddr;
-	u8 tmp;
+
+	if (ap->ops->sff_irq_on) {
+		ap->ops->sff_irq_on(ap);
+		return;
+	}
 
 	ap->ctl &= ~ATA_NIEN;
 	ap->last_ctl = ap->ctl;
 
-	if (ioaddr->ctl_addr)
-		iowrite8(ap->ctl, ioaddr->ctl_addr);
-	tmp = ata_wait_idle(ap);
+	if (ap->ops->sff_set_devctl || ioaddr->ctl_addr)
+		ata_sff_set_devctl(ap, ap->ctl);
+	ata_wait_idle(ap);
 
 	ap->ops->sff_irq_clear(ap);
-
-	return tmp;
 }
 EXPORT_SYMBOL_GPL(ata_sff_irq_on);
 
@@ -1186,7 +1190,7 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 			qc = ata_qc_from_tag(ap, qc->tag);
 			if (qc) {
 				if (likely(!(qc->err_mask & AC_ERR_HSM))) {
-					ap->ops->sff_irq_on(ap);
+					ata_sff_irq_on(ap);
 					ata_qc_complete(qc);
 				} else
 					ata_port_freeze(ap);
@@ -1202,7 +1206,7 @@ static void ata_hsm_qc_complete(struct ata_queued_cmd *qc, int in_wq)
 	} else {
 		if (in_wq) {
 			spin_lock_irqsave(ap->lock, flags);
-			ap->ops->sff_irq_on(ap);
+			ata_sff_irq_on(ap);
 			ata_qc_complete(qc);
 			spin_unlock_irqrestore(ap->lock, flags);
 		} else
@@ -1946,7 +1950,7 @@ void ata_sff_thaw(struct ata_port *ap)
 	/* clear & re-enable interrupts */
 	ap->ops->sff_check_status(ap);
 	ap->ops->sff_irq_clear(ap);
-	ap->ops->sff_irq_on(ap);
+	ata_sff_irq_on(ap);
 }
 EXPORT_SYMBOL_GPL(ata_sff_thaw);
 
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 198307534d90..07c110470e25 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1227,30 +1227,6 @@ static void bfin_irq_clear(struct ata_port *ap)
 		| MULTI_TERM_INT | UDMAIN_TERM_INT | UDMAOUT_TERM_INT);
 }
 
-/**
- *	bfin_irq_on - Enable interrupts on a port.
- *	@ap: Port on which interrupts are enabled.
- *
- *	Note: Original code is ata_sff_irq_on().
- */
-
-static unsigned char bfin_irq_on(struct ata_port *ap)
-{
-	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
-	u8 tmp;
-
-	dev_dbg(ap->dev, "in atapi irq on\n");
-	ap->ctl &= ~ATA_NIEN;
-	ap->last_ctl = ap->ctl;
-
-	write_atapi_register(base, ATA_REG_CTRL, ap->ctl);
-	tmp = ata_wait_idle(ap);
-
-	bfin_irq_clear(ap);
-
-	return tmp;
-}
-
 /**
  *	bfin_thaw - Thaw DMA controller port
  *	@ap: port to thaw
@@ -1262,7 +1238,7 @@ void bfin_thaw(struct ata_port *ap)
 {
 	dev_dbg(ap->dev, "in atapi dma thaw\n");
 	bfin_check_status(ap);
-	bfin_irq_on(ap);
+	ata_sff_irq_on(ap);
 }
 
 /**
@@ -1279,7 +1255,7 @@ static void bfin_postreset(struct ata_link *link, unsigned int *classes)
 	void __iomem *base = (void __iomem *)ap->ioaddr.ctl_addr;
 
 	/* re-enable interrupts */
-	bfin_irq_on(ap);
+	ata_sff_irq_on(ap);
 
 	/* is double-select really necessary? */
 	if (classes[0] != ATA_DEV_NONE)
@@ -1477,7 +1453,6 @@ static struct ata_port_operations bfin_pata_ops = {
 	.postreset		= bfin_postreset,
 
 	.sff_irq_clear		= bfin_irq_clear,
-	.sff_irq_on		= bfin_irq_on,
 
 	.port_start		= bfin_port_start,
 	.port_stop		= bfin_port_stop,
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 005a44483a7b..303ca7e82408 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -489,9 +489,8 @@ static void octeon_cf_exec_command16(struct ata_port *ap,
 	ata_wait_idle(ap);
 }
 
-static u8 octeon_cf_irq_on(struct ata_port *ap)
+static void octeon_cf_irq_on(struct ata_port *ap)
 {
-	return 0;
 }
 
 static void octeon_cf_irq_clear(struct ata_port *ap)
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 86b25fef34a9..d3988991ca68 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -827,29 +827,6 @@ static unsigned int scc_data_xfer (struct ata_device *dev, unsigned char *buf,
 	return words << 1;
 }
 
-/**
- *	scc_irq_on - Enable interrupts on a port.
- *	@ap: Port on which interrupts are enabled.
- *
- *	Note: Original code is ata_sff_irq_on().
- */
-
-static u8 scc_irq_on (struct ata_port *ap)
-{
-	struct ata_ioports *ioaddr = &ap->ioaddr;
-	u8 tmp;
-
-	ap->ctl &= ~ATA_NIEN;
-	ap->last_ctl = ap->ctl;
-
-	out_be32(ioaddr->ctl_addr, ap->ctl);
-	tmp = ata_wait_idle(ap);
-
-	ap->ops->sff_irq_clear(ap);
-
-	return tmp;
-}
-
 /**
  *	scc_pata_prereset - prepare for reset
  *	@ap: ATA port to be reset
@@ -977,7 +954,6 @@ static struct ata_port_operations scc_pata_ops = {
 	.post_internal_cmd	= scc_bmdma_stop,
 
 	.sff_irq_clear		= scc_irq_clear,
-	.sff_irq_on		= scc_irq_on,
 
 	.port_start		= scc_port_start,
 	.port_stop		= scc_port_stop,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6207ec600722..af700923a393 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -859,7 +859,7 @@ struct ata_port_operations {
 				 const struct ata_taskfile *tf);
 	unsigned int (*sff_data_xfer)(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
-	u8   (*sff_irq_on)(struct ata_port *);
+	void (*sff_irq_on)(struct ata_port *);
 	bool (*sff_irq_check)(struct ata_port *);
 	void (*sff_irq_clear)(struct ata_port *);
 
@@ -1599,7 +1599,7 @@ extern unsigned int ata_sff_data_xfer32(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
 extern unsigned int ata_sff_data_xfer_noirq(struct ata_device *dev,
 			unsigned char *buf, unsigned int buflen, int rw);
-extern u8 ata_sff_irq_on(struct ata_port *ap);
+extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 			    u8 status, int in_wq);
-- 
cgit v1.2.3


From d83c49f3e36cecd2e8823b6c48ffba083b8a5704 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 30 Apr 2010 17:17:09 -0400
Subject: Fix the regression created by "set S_DEAD on unlink()..." commit

1) i_flags simply doesn't work for mount/unlink race prevention;
we may have many links to file and rm on one of those obviously
shouldn't prevent bind on top of another later on.  To fix it
right way we need to mark _dentry_ as unsuitable for mounting
upon; new flag (DCACHE_CANT_MOUNT) is protected by d_flags and
i_mutex on the inode in question.  Set it (with dont_mount(dentry))
in unlink/rmdir/etc., check (with cant_mount(dentry)) in places
in namespace.c that used to check for S_DEAD.  Setting S_DEAD
is still needed in places where we used to set it (for directories
getting killed), since we rely on it for readdir/rmdir race
prevention.

2) rename()/mount() protection has another bogosity - we unhash
the target before we'd checked that it's not a mountpoint.  Fixed.

3) ancient bogosity in pivot_root() - we locked i_mutex on the
right directory, but checked S_DEAD on the different (and wrong)
one.  Noticed and fixed.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/usb/core/inode.c |  1 +
 fs/configfs/dir.c        |  4 ++++
 fs/namei.c               | 21 +++++++++++++--------
 fs/namespace.c           |  6 +++---
 include/linux/dcache.h   | 14 ++++++++++++++
 5 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 4a6366a42129..111a01a747fc 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -380,6 +380,7 @@ static int usbfs_rmdir(struct inode *dir, struct dentry *dentry)
 	mutex_lock(&inode->i_mutex);
 	dentry_unhash(dentry);
 	if (usbfs_empty(dentry)) {
+		dont_mount(dentry);
 		drop_nlink(dentry->d_inode);
 		drop_nlink(dentry->d_inode);
 		dput(dentry);
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index 8e48b52205aa..0b502f80c691 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -645,6 +645,7 @@ static void detach_groups(struct config_group *group)
 
 		configfs_detach_group(sd->s_element);
 		child->d_inode->i_flags |= S_DEAD;
+		dont_mount(child);
 
 		mutex_unlock(&child->d_inode->i_mutex);
 
@@ -840,6 +841,7 @@ static int configfs_attach_item(struct config_item *parent_item,
 			mutex_lock(&dentry->d_inode->i_mutex);
 			configfs_remove_dir(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 			mutex_unlock(&dentry->d_inode->i_mutex);
 			d_delete(dentry);
 		}
@@ -882,6 +884,7 @@ static int configfs_attach_group(struct config_item *parent_item,
 		if (ret) {
 			configfs_detach_item(item);
 			dentry->d_inode->i_flags |= S_DEAD;
+			dont_mount(dentry);
 		}
 		configfs_adjust_dir_dirent_depth_after_populate(sd);
 		mutex_unlock(&dentry->d_inode->i_mutex);
@@ -1725,6 +1728,7 @@ void configfs_unregister_subsystem(struct configfs_subsystem *subsys)
 	mutex_unlock(&configfs_symlink_mutex);
 	configfs_detach_group(&group->cg_item);
 	dentry->d_inode->i_flags |= S_DEAD;
+	dont_mount(dentry);
 	mutex_unlock(&dentry->d_inode->i_mutex);
 
 	d_delete(dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 16df7277a92e..b86b96fe1dc3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2176,8 +2176,10 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry)
 		error = security_inode_rmdir(dir, dentry);
 		if (!error) {
 			error = dir->i_op->rmdir(dir, dentry);
-			if (!error)
+			if (!error) {
 				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
+			}
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2261,7 +2263,7 @@ int vfs_unlink(struct inode *dir, struct dentry *dentry)
 		if (!error) {
 			error = dir->i_op->unlink(dir, dentry);
 			if (!error)
-				dentry->d_inode->i_flags |= S_DEAD;
+				dont_mount(dentry);
 		}
 	}
 	mutex_unlock(&dentry->d_inode->i_mutex);
@@ -2572,17 +2574,20 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
 		return error;
 
 	target = new_dentry->d_inode;
-	if (target) {
+	if (target)
 		mutex_lock(&target->i_mutex);
-		dentry_unhash(new_dentry);
-	}
 	if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
 		error = -EBUSY;
-	else 
+	else {
+		if (target)
+			dentry_unhash(new_dentry);
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
+	}
 	if (target) {
-		if (!error)
+		if (!error) {
 			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
+		}
 		mutex_unlock(&target->i_mutex);
 		if (d_unhashed(new_dentry))
 			d_rehash(new_dentry);
@@ -2614,7 +2619,7 @@ static int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
 		error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
 	if (!error) {
 		if (target)
-			target->i_flags |= S_DEAD;
+			dont_mount(new_dentry);
 		if (!(old_dir->i_sb->s_type->fs_flags & FS_RENAME_DOES_D_MOVE))
 			d_move(old_dentry, new_dentry);
 	}
diff --git a/fs/namespace.c b/fs/namespace.c
index 8174c8ab5c70..f20cb57d1067 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1432,7 +1432,7 @@ static int graft_tree(struct vfsmount *mnt, struct path *path)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out_unlock;
 
 	err = security_sb_check_sb(mnt, path);
@@ -1623,7 +1623,7 @@ static int do_move_mount(struct path *path, char *old_name)
 
 	err = -ENOENT;
 	mutex_lock(&path->dentry->d_inode->i_mutex);
-	if (IS_DEADDIR(path->dentry->d_inode))
+	if (cant_mount(path->dentry))
 		goto out1;
 
 	if (d_unlinked(path->dentry))
@@ -2234,7 +2234,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root,
 	if (!check_mnt(root.mnt))
 		goto out2;
 	error = -ENOENT;
-	if (IS_DEADDIR(new.dentry->d_inode))
+	if (cant_mount(old.dentry))
 		goto out2;
 	if (d_unlinked(new.dentry))
 		goto out2;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 30b93b2a01a4..eebb617c17d8 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -186,6 +186,8 @@ d_iput:		no		no		no       yes
 
 #define DCACHE_FSNOTIFY_PARENT_WATCHED	0x0080 /* Parent inode is watched by some fsnotify listener */
 
+#define DCACHE_CANT_MOUNT	0x0100
+
 extern spinlock_t dcache_lock;
 extern seqlock_t rename_lock;
 
@@ -358,6 +360,18 @@ static inline int d_unlinked(struct dentry *dentry)
 	return d_unhashed(dentry) && !IS_ROOT(dentry);
 }
 
+static inline int cant_mount(struct dentry *dentry)
+{
+	return (dentry->d_flags & DCACHE_CANT_MOUNT);
+}
+
+static inline void dont_mount(struct dentry *dentry)
+{
+	spin_lock(&dentry->d_lock);
+	dentry->d_flags |= DCACHE_CANT_MOUNT;
+	spin_unlock(&dentry->d_lock);
+}
+
 static inline struct dentry *dget_parent(struct dentry *dentry)
 {
 	struct dentry *ret;
-- 
cgit v1.2.3


From 9f977fb7ae9ddf565b4800854212fb9a1ed6c2ea Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 5 May 2010 00:26:55 +0000
Subject: sysctl: add proc_do_large_bitmap

The new function can be used to read/write large bitmaps via /proc. A
comma separated range format is used for compact output and input
(e.g. 1,3-4,10-10).

Writing into the file will first reset the bitmap then update it
based on the given input.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: WANG Cong <amwang@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/sysctl.h |   2 +
 kernel/sysctl.c        | 161 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 163 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index f66014c90c9f..7bb5cb64f3b8 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -980,6 +980,8 @@ extern int proc_doulongvec_minmax(struct ctl_table *, int,
 				  void __user *, size_t *, loff_t *);
 extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
 				      void __user *, size_t *, loff_t *);
+extern int proc_do_large_bitmap(struct ctl_table *, int,
+				void __user *, size_t *, loff_t *);
 
 /*
  * Register a set of sysctl names by calling register_sysctl_table
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4a976208de29..bcfb79e94ec7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -2049,6 +2049,16 @@ static size_t proc_skip_spaces(char **buf)
 	return ret;
 }
 
+static void proc_skip_char(char **buf, size_t *size, const char v)
+{
+	while (*size) {
+		if (**buf != v)
+			break;
+		(*size)--;
+		(*buf)++;
+	}
+}
+
 #define TMPBUFLEN 22
 /**
  * proc_get_long - reads an ASCII formated integer from a user buffer
@@ -2675,6 +2685,157 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
 	return 0;
 }
 
+/**
+ * proc_do_large_bitmap - read/write from/to a large bitmap
+ * @table: the sysctl table
+ * @write: %TRUE if this is a write to the sysctl file
+ * @buffer: the user buffer
+ * @lenp: the size of the user buffer
+ * @ppos: file position
+ *
+ * The bitmap is stored at table->data and the bitmap length (in bits)
+ * in table->maxlen.
+ *
+ * We use a range comma separated format (e.g. 1,3-4,10-10) so that
+ * large bitmaps may be represented in a compact manner. Writing into
+ * the file will clear the bitmap then update it with the given input.
+ *
+ * Returns 0 on success.
+ */
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	int err = 0;
+	bool first = 1;
+	size_t left = *lenp;
+	unsigned long bitmap_len = table->maxlen;
+	unsigned long *bitmap = (unsigned long *) table->data;
+	unsigned long *tmp_bitmap = NULL;
+	char tr_a[] = { '-', ',', '\n' }, tr_b[] = { ',', '\n', 0 }, c;
+
+	if (!bitmap_len || !left || (*ppos && !write)) {
+		*lenp = 0;
+		return 0;
+	}
+
+	if (write) {
+		unsigned long page = 0;
+		char *kbuf;
+
+		if (left > PAGE_SIZE - 1)
+			left = PAGE_SIZE - 1;
+
+		page = __get_free_page(GFP_TEMPORARY);
+		kbuf = (char *) page;
+		if (!kbuf)
+			return -ENOMEM;
+		if (copy_from_user(kbuf, buffer, left)) {
+			free_page(page);
+			return -EFAULT;
+                }
+		kbuf[left] = 0;
+
+		tmp_bitmap = kzalloc(BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long),
+				     GFP_KERNEL);
+		if (!tmp_bitmap) {
+			free_page(page);
+			return -ENOMEM;
+		}
+		proc_skip_char(&kbuf, &left, '\n');
+		while (!err && left) {
+			unsigned long val_a, val_b;
+			bool neg;
+
+			err = proc_get_long(&kbuf, &left, &val_a, &neg, tr_a,
+					     sizeof(tr_a), &c);
+			if (err)
+				break;
+			if (val_a >= bitmap_len || neg) {
+				err = -EINVAL;
+				break;
+			}
+
+			val_b = val_a;
+			if (left) {
+				kbuf++;
+				left--;
+			}
+
+			if (c == '-') {
+				err = proc_get_long(&kbuf, &left, &val_b,
+						     &neg, tr_b, sizeof(tr_b),
+						     &c);
+				if (err)
+					break;
+				if (val_b >= bitmap_len || neg ||
+				    val_a > val_b) {
+					err = -EINVAL;
+					break;
+				}
+				if (left) {
+					kbuf++;
+					left--;
+				}
+			}
+
+			while (val_a <= val_b)
+				set_bit(val_a++, tmp_bitmap);
+
+			first = 0;
+			proc_skip_char(&kbuf, &left, '\n');
+		}
+		free_page(page);
+	} else {
+		unsigned long bit_a, bit_b = 0;
+
+		while (left) {
+			bit_a = find_next_bit(bitmap, bitmap_len, bit_b);
+			if (bit_a >= bitmap_len)
+				break;
+			bit_b = find_next_zero_bit(bitmap, bitmap_len,
+						   bit_a + 1) - 1;
+
+			if (!first) {
+				err = proc_put_char(&buffer, &left, ',');
+				if (err)
+					break;
+			}
+			err = proc_put_long(&buffer, &left, bit_a, false);
+			if (err)
+				break;
+			if (bit_a != bit_b) {
+				err = proc_put_char(&buffer, &left, '-');
+				if (err)
+					break;
+				err = proc_put_long(&buffer, &left, bit_b, false);
+				if (err)
+					break;
+			}
+
+			first = 0; bit_b++;
+		}
+		if (!err)
+			err = proc_put_char(&buffer, &left, '\n');
+	}
+
+	if (!err) {
+		if (write) {
+			if (*ppos)
+				bitmap_or(bitmap, bitmap, tmp_bitmap, bitmap_len);
+			else
+				memcpy(bitmap, tmp_bitmap,
+					BITS_TO_LONGS(bitmap_len) * sizeof(unsigned long));
+		}
+		kfree(tmp_bitmap);
+		*lenp -= left;
+		*ppos += *lenp;
+		return 0;
+	} else {
+		kfree(tmp_bitmap);
+		return err;
+	}
+}
+
 #else /* CONFIG_PROC_FS */
 
 int proc_dostring(struct ctl_table *table, int write,
-- 
cgit v1.2.3


From a14462f1bd4d3962994f518459102000438665aa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 6 May 2010 01:33:53 +0000
Subject: net: adjust handle_macvlan to pass port struct to hook

Now there's null check here and also again in the hook. Looking at bridge bits
which are simmilar, port structure is rcu_dereferenced right away in
handle_bridge and passed to hook. Looks nicer.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      |  8 ++------
 include/linux/if_macvlan.h |  3 ++-
 net/core/dev.c             | 10 +++++++---
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a939d828b47..1b78c0057a8d 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -145,19 +145,15 @@ static void macvlan_broadcast(struct sk_buff *skb,
 }
 
 /* called under rcu_read_lock() from netif_receive_skb */
-static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
+static struct sk_buff *macvlan_handle_frame(struct macvlan_port *port,
+					    struct sk_buff *skb)
 {
 	const struct ethhdr *eth = eth_hdr(skb);
-	const struct macvlan_port *port;
 	const struct macvlan_dev *vlan;
 	const struct macvlan_dev *src;
 	struct net_device *dev;
 	unsigned int len;
 
-	port = rcu_dereference(skb->dev->macvlan_port);
-	if (port == NULL)
-		return skb;
-
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (!src)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index b78a712247da..9ea047aca795 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -85,6 +85,7 @@ extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 				      struct net_device *dev);
 
 
-extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *);
+extern struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *,
+						    struct sk_buff *);
 
 #endif /* _LINUX_IF_MACVLAN_H */
diff --git a/net/core/dev.c b/net/core/dev.c
index 3daee30a7c82..5cbba0927a8e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2612,7 +2612,8 @@ static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
 #endif
 
 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
-struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
+struct sk_buff *(*macvlan_handle_frame_hook)(struct macvlan_port *p,
+					     struct sk_buff *skb) __read_mostly;
 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
 
 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
@@ -2620,14 +2621,17 @@ static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
 					     int *ret,
 					     struct net_device *orig_dev)
 {
-	if (skb->dev->macvlan_port == NULL)
+	struct macvlan_port *port;
+
+	port = rcu_dereference(skb->dev->macvlan_port);
+	if (!port)
 		return skb;
 
 	if (*pt_prev) {
 		*ret = deliver_skb(skb, *pt_prev, orig_dev);
 		*pt_prev = NULL;
 	}
-	return macvlan_handle_frame_hook(skb);
+	return macvlan_handle_frame_hook(port, skb);
 }
 #else
 #define handle_macvlan(skb, pt_prev, ret, orig_dev)	(skb)
-- 
cgit v1.2.3


From 3b098e2d7c693796cc4dffb07caa249fc0f70771 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Sat, 15 May 2010 23:57:10 -0700
Subject: net: Consistent skb timestamping

With RPS inclusion, skb timestamping is not consistent in RX path.

If netif_receive_skb() is used, its deferred after RPS dispatch.

If netif_rx() is used, its done before RPS dispatch.

This can give strange tcpdump timestamps results.

I think timestamping should be done as soon as possible in the receive
path, to get meaningful values (ie timestamps taken at the time packet
was delivered by NIC driver to our stack), even if NAPI already can
defer timestamping a bit (RPS can help to reduce the gap)

Tom Herbert prefer to sample timestamps after RPS dispatch. In case
sampling is expensive (HPET/acpi_pm on x86), this makes sense.

Let admins switch from one mode to another, using a new
sysctl, /proc/sys/net/core/netdev_tstamp_prequeue

Its default value (1), means timestamps are taken as soon as possible,
before backlog queueing, giving accurate timestamps.

Setting a 0 value permits to sample timestamps when processing backlog,
after RPS dispatch, to lower the load of the pre-RPS cpu.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt | 10 +++++++++
 include/linux/netdevice.h    |  1 +
 net/core/dev.c               | 50 +++++++++++++++++++++++++++-----------------
 net/core/sysctl_net_core.c   |  7 +++++++
 4 files changed, 49 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index df38ef046f8d..cbd05ffc606b 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -84,6 +84,16 @@ netdev_max_backlog
 Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
 receives packets faster than kernel can process them.
 
+netdev_tstamp_prequeue
+----------------------
+
+If set to 0, RX packet timestamps can be sampled after RPS processing, when
+the target CPU processes packets. It might give some delay on timestamps, but
+permit to distribute the load on several cpus.
+
+If set to 1 (default), timestamps are sampled as soon as possible, before
+queueing.
+
 optmem_max
 ----------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 69022d47d6f2..c1b2341897c2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2100,6 +2100,7 @@ extern const struct net_device_stats *dev_get_stats(struct net_device *dev);
 extern void		dev_txq_stats_fold(const struct net_device *dev, struct net_device_stats *stats);
 
 extern int		netdev_max_backlog;
+extern int		netdev_tstamp_prequeue;
 extern int		weight_p;
 extern int		netdev_set_master(struct net_device *dev, struct net_device *master);
 extern int skb_checksum_help(struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 5cbba0927a8e..988e42912e72 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1454,7 +1454,7 @@ void net_disable_timestamp(void)
 }
 EXPORT_SYMBOL(net_disable_timestamp);
 
-static inline void net_timestamp(struct sk_buff *skb)
+static inline void net_timestamp_set(struct sk_buff *skb)
 {
 	if (atomic_read(&netstamp_needed))
 		__net_timestamp(skb);
@@ -1462,6 +1462,12 @@ static inline void net_timestamp(struct sk_buff *skb)
 		skb->tstamp.tv64 = 0;
 }
 
+static inline void net_timestamp_check(struct sk_buff *skb)
+{
+	if (!skb->tstamp.tv64 && atomic_read(&netstamp_needed))
+		__net_timestamp(skb);
+}
+
 /**
  * dev_forward_skb - loopback an skb to another netif
  *
@@ -1508,9 +1514,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
 #ifdef CONFIG_NET_CLS_ACT
 	if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-		net_timestamp(skb);
+		net_timestamp_set(skb);
 #else
-	net_timestamp(skb);
+	net_timestamp_set(skb);
 #endif
 
 	rcu_read_lock();
@@ -2201,6 +2207,7 @@ EXPORT_SYMBOL(dev_queue_xmit);
   =======================================================================*/
 
 int netdev_max_backlog __read_mostly = 1000;
+int netdev_tstamp_prequeue __read_mostly = 1;
 int netdev_budget __read_mostly = 300;
 int weight_p __read_mostly = 64;            /* old backlog weight */
 
@@ -2465,8 +2472,8 @@ int netif_rx(struct sk_buff *skb)
 	if (netpoll_rx(skb))
 		return NET_RX_DROP;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 #ifdef CONFIG_RPS
 	{
@@ -2791,8 +2798,8 @@ static int __netif_receive_skb(struct sk_buff *skb)
 	int ret = NET_RX_DROP;
 	__be16 type;
 
-	if (!skb->tstamp.tv64)
-		net_timestamp(skb);
+	if (!netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
 
 	if (vlan_tx_tag_present(skb) && vlan_hwaccel_do_receive(skb))
 		return NET_RX_SUCCESS;
@@ -2910,23 +2917,28 @@ out:
  */
 int netif_receive_skb(struct sk_buff *skb)
 {
+	if (netdev_tstamp_prequeue)
+		net_timestamp_check(skb);
+
 #ifdef CONFIG_RPS
-	struct rps_dev_flow voidflow, *rflow = &voidflow;
-	int cpu, ret;
+	{
+		struct rps_dev_flow voidflow, *rflow = &voidflow;
+		int cpu, ret;
 
-	rcu_read_lock();
+		rcu_read_lock();
+
+		cpu = get_rps_cpu(skb->dev, skb, &rflow);
 
-	cpu = get_rps_cpu(skb->dev, skb, &rflow);
+		if (cpu >= 0) {
+			ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
+			rcu_read_unlock();
+		} else {
+			rcu_read_unlock();
+			ret = __netif_receive_skb(skb);
+		}
 
-	if (cpu >= 0) {
-		ret = enqueue_to_backlog(skb, cpu, &rflow->last_qtail);
-		rcu_read_unlock();
-	} else {
-		rcu_read_unlock();
-		ret = __netif_receive_skb(skb);
+		return ret;
 	}
-
-	return ret;
 #else
 	return __netif_receive_skb(skb);
 #endif
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index dcc7d25996ab..01eee5d984be 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -121,6 +121,13 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "netdev_tstamp_prequeue",
+		.data		= &netdev_tstamp_prequeue,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "message_cost",
 		.data		= &net_ratelimit_state.interval,
-- 
cgit v1.2.3


From c02db8c6290bb992442fec1407643c94cc414375 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Sun, 16 May 2010 01:05:45 -0700
Subject: rtnetlink: make SR-IOV VF interface symmetric

Now we have a set of nested attributes:

  IFLA_VFINFO_LIST (NESTED)
    IFLA_VF_INFO (NESTED)
      IFLA_VF_MAC
      IFLA_VF_VLAN
      IFLA_VF_TX_RATE

This allows a single set to operate on multiple attributes if desired.
Among other things, it means a dump can be replayed to set state.

The current interface has yet to be released, so this seems like
something to consider for 2.6.34.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h |  23 +++++--
 net/core/rtnetlink.c    | 159 +++++++++++++++++++++++++++++++++---------------
 2 files changed, 129 insertions(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c9bf92cd7653..d94963b379d9 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -79,10 +79,7 @@ enum {
 	IFLA_NET_NS_PID,
 	IFLA_IFALIAS,
 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
-	IFLA_VF_MAC,		/* Hardware queue specific attributes */
-	IFLA_VF_VLAN,
-	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
-	IFLA_VFINFO,
+	IFLA_VFINFO_LIST,
 	__IFLA_MAX
 };
 
@@ -203,6 +200,24 @@ enum macvlan_mode {
 
 /* SR-IOV virtual function managment section */
 
+enum {
+	IFLA_VF_INFO_UNSPEC,
+	IFLA_VF_INFO,
+	__IFLA_VF_INFO_MAX,
+};
+
+#define IFLA_VF_INFO_MAX (__IFLA_VF_INFO_MAX - 1)
+
+enum {
+	IFLA_VF_UNSPEC,
+	IFLA_VF_MAC,		/* Hardware queue specific attributes */
+	IFLA_VF_VLAN,
+	IFLA_VF_TX_RATE,	/* TX Bandwidth Allocation */
+	__IFLA_VF_MAX,
+};
+
+#define IFLA_VF_MAX (__IFLA_VF_MAX - 1)
+
 struct ifla_vf_mac {
 	__u32 vf;
 	__u8 mac[32]; /* MAX_ADDR_LEN */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fe776c9ddeca..31e85d327aa2 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -602,12 +602,19 @@ static void copy_rtnl_link_stats(struct rtnl_link_stats *a,
 	a->tx_compressed = b->tx_compressed;
 };
 
+/* All VF info */
 static inline int rtnl_vfinfo_size(const struct net_device *dev)
 {
-	if (dev->dev.parent && dev_is_pci(dev->dev.parent))
-		return dev_num_vf(dev->dev.parent) *
-			sizeof(struct ifla_vf_info);
-	else
+	if (dev->dev.parent && dev_is_pci(dev->dev.parent)) {
+
+		int num_vfs = dev_num_vf(dev->dev.parent);
+		size_t size = nlmsg_total_size(sizeof(struct nlattr));
+		size += nlmsg_total_size(num_vfs * sizeof(struct nlattr));
+		size += num_vfs * (sizeof(struct ifla_vf_mac) +
+				  sizeof(struct ifla_vf_vlan) +
+				  sizeof(struct ifla_vf_tx_rate));
+		return size;
+	} else
 		return 0;
 }
 
@@ -629,7 +636,7 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
-	       + nla_total_size(rtnl_vfinfo_size(dev)) /* IFLA_VFINFO */
+	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
@@ -700,14 +707,37 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
-		struct ifla_vf_info ivi;
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
-		for (i = 0; i < dev_num_vf(dev->dev.parent); i++) {
+		struct nlattr *vfinfo, *vf;
+		int num_vfs = dev_num_vf(dev->dev.parent);
+
+		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
+		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
+		if (!vfinfo)
+			goto nla_put_failure;
+		for (i = 0; i < num_vfs; i++) {
+			struct ifla_vf_info ivi;
+			struct ifla_vf_mac vf_mac;
+			struct ifla_vf_vlan vf_vlan;
+			struct ifla_vf_tx_rate vf_tx_rate;
 			if (dev->netdev_ops->ndo_get_vf_config(dev, i, &ivi))
 				break;
-			NLA_PUT(skb, IFLA_VFINFO, sizeof(ivi), &ivi);
+			vf_mac.vf = vf_vlan.vf = vf_tx_rate.vf = ivi.vf;
+			memcpy(vf_mac.mac, ivi.mac, sizeof(ivi.mac));
+			vf_vlan.vlan = ivi.vlan;
+			vf_vlan.qos = ivi.qos;
+			vf_tx_rate.rate = ivi.tx_rate;
+			vf = nla_nest_start(skb, IFLA_VF_INFO);
+			if (!vf) {
+				nla_nest_cancel(skb, vfinfo);
+				goto nla_put_failure;
+			}
+			NLA_PUT(skb, IFLA_VF_MAC, sizeof(vf_mac), &vf_mac);
+			NLA_PUT(skb, IFLA_VF_VLAN, sizeof(vf_vlan), &vf_vlan);
+			NLA_PUT(skb, IFLA_VF_TX_RATE, sizeof(vf_tx_rate), &vf_tx_rate);
+			nla_nest_end(skb, vf);
 		}
+		nla_nest_end(skb, vfinfo);
 	}
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
@@ -769,12 +799,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_LINKINFO]		= { .type = NLA_NESTED },
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
-	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_mac) },
-	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_vlan) },
-	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct ifla_vf_tx_rate) },
+	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -783,6 +808,19 @@ static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
 	[IFLA_INFO_DATA]	= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy ifla_vfinfo_policy[IFLA_VF_INFO_MAX+1] = {
+	[IFLA_VF_INFO]		= { .type = NLA_NESTED },
+};
+
+static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
+	[IFLA_VF_MAC]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_mac) },
+	[IFLA_VF_VLAN]		= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_vlan) },
+	[IFLA_VF_TX_RATE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_vf_tx_rate) },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -812,6 +850,52 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[])
 	return 0;
 }
 
+static int do_setvfinfo(struct net_device *dev, struct nlattr *attr)
+{
+	int rem, err = -EINVAL;
+	struct nlattr *vf;
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	nla_for_each_nested(vf, attr, rem) {
+		switch (nla_type(vf)) {
+		case IFLA_VF_MAC: {
+			struct ifla_vf_mac *ivm;
+			ivm = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_mac)
+				err = ops->ndo_set_vf_mac(dev, ivm->vf,
+							  ivm->mac);
+			break;
+		}
+		case IFLA_VF_VLAN: {
+			struct ifla_vf_vlan *ivv;
+			ivv = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_vlan)
+				err = ops->ndo_set_vf_vlan(dev, ivv->vf,
+							   ivv->vlan,
+							   ivv->qos);
+			break;
+		}
+		case IFLA_VF_TX_RATE: {
+			struct ifla_vf_tx_rate *ivt;
+			ivt = nla_data(vf);
+			err = -EOPNOTSUPP;
+			if (ops->ndo_set_vf_tx_rate)
+				err = ops->ndo_set_vf_tx_rate(dev, ivt->vf,
+							      ivt->rate);
+			break;
+		}
+		default:
+			err = -EINVAL;
+			break;
+		}
+		if (err)
+			break;
+	}
+	return err;
+}
+
 static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		      struct nlattr **tb, char *ifname, int modified)
 {
@@ -942,40 +1026,17 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 		write_unlock_bh(&dev_base_lock);
 	}
 
-	if (tb[IFLA_VF_MAC]) {
-		struct ifla_vf_mac *ivm;
-		ivm = nla_data(tb[IFLA_VF_MAC]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_mac)
-			err = ops->ndo_set_vf_mac(dev, ivm->vf, ivm->mac);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-
-	if (tb[IFLA_VF_VLAN]) {
-		struct ifla_vf_vlan *ivv;
-		ivv = nla_data(tb[IFLA_VF_VLAN]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_vlan)
-			err = ops->ndo_set_vf_vlan(dev, ivv->vf,
-						   ivv->vlan,
-						   ivv->qos);
-		if (err < 0)
-			goto errout;
-		modified = 1;
-	}
-	err = 0;
-
-	if (tb[IFLA_VF_TX_RATE]) {
-		struct ifla_vf_tx_rate *ivt;
-		ivt = nla_data(tb[IFLA_VF_TX_RATE]);
-		err = -EOPNOTSUPP;
-		if (ops->ndo_set_vf_tx_rate)
-			err = ops->ndo_set_vf_tx_rate(dev, ivt->vf, ivt->rate);
-		if (err < 0)
-			goto errout;
-		modified = 1;
+	if (tb[IFLA_VFINFO_LIST]) {
+		struct nlattr *attr;
+		int rem;
+		nla_for_each_nested(attr, tb[IFLA_VFINFO_LIST], rem) {
+			if (nla_type(attr) != IFLA_VF_INFO)
+				goto errout;
+			err = do_setvfinfo(dev, attr);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
 	}
 	err = 0;
 
-- 
cgit v1.2.3


From 86f2552bbd0e17b19bb5e9881042533eaea553c7 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Mon, 17 May 2010 10:00:05 +0200
Subject: [S390] add breaking event address for user space

Copy the last breaking event address from the lowcore to a new
field in the thread_struct on each system entry. Add a new
ptrace request PTRACE_GET_LAST_BREAK and a new utrace regset
REGSET_LAST_BREAK to query the last breaking event.

This is useful for debugging wild branches in user space code.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/ptrace.h      |   3 +-
 arch/s390/include/asm/thread_info.h |   1 +
 arch/s390/kernel/asm-offsets.c      |   1 +
 arch/s390/kernel/entry.S            |   4 +-
 arch/s390/kernel/entry64.S          | 270 +++++++++++++++++++-----------------
 arch/s390/kernel/ptrace.c           |  68 +++++++++
 arch/s390/kernel/signal.c           |   2 +
 include/linux/elf.h                 |   1 +
 8 files changed, 223 insertions(+), 127 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/ptrace.h b/arch/s390/include/asm/ptrace.h
index fef9b33cdd59..e2c218dc68a6 100644
--- a/arch/s390/include/asm/ptrace.h
+++ b/arch/s390/include/asm/ptrace.h
@@ -328,8 +328,8 @@ struct pt_regs
 	psw_t psw;
 	unsigned long gprs[NUM_GPRS];
 	unsigned long orig_gpr2;
-	unsigned short svcnr;
 	unsigned short ilc;
+	unsigned short svcnr;
 };
 #endif
 
@@ -436,6 +436,7 @@ typedef struct
 #define PTRACE_PEEKDATA_AREA	      0x5003
 #define PTRACE_POKETEXT_AREA	      0x5004
 #define PTRACE_POKEDATA_AREA 	      0x5005
+#define PTRACE_GET_LAST_BREAK	      0x5006
 
 /*
  * PT_PROT definition is loosely based on hppa bsd definition in
diff --git a/arch/s390/include/asm/thread_info.h b/arch/s390/include/asm/thread_info.h
index 34f0873d6525..be3d3f91d86c 100644
--- a/arch/s390/include/asm/thread_info.h
+++ b/arch/s390/include/asm/thread_info.h
@@ -50,6 +50,7 @@ struct thread_info {
 	struct restart_block	restart_block;
 	__u64			user_timer;
 	__u64			system_timer;
+	unsigned long		last_break;	/* last breaking-event-address. */
 };
 
 /*
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index 44a4336d9a33..d9b490a2716e 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -39,6 +39,7 @@ int main(void)
 	DEFINE(__TI_precount, offsetof(struct thread_info, preempt_count));
 	DEFINE(__TI_user_timer, offsetof(struct thread_info, user_timer));
 	DEFINE(__TI_system_timer, offsetof(struct thread_info, system_timer));
+	DEFINE(__TI_last_break, offsetof(struct thread_info, last_break));
 	BLANK();
 	DEFINE(__PT_ARGS, offsetof(struct pt_regs, args));
 	DEFINE(__PT_PSW, offsetof(struct pt_regs, psw));
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 0e2b16241dfa..d5e3e6007447 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -180,9 +180,9 @@ STACK_SIZE  = 1 << STACK_SHIFT
 	s	%r15,BASED(.Lc_spsize)	# make room for registers & psw
 	mvc	SP_PSW(8,%r15),0(%r12)	# move user PSW to stack
 	st	%r2,SP_ORIG_R2(%r15)	# store original content of gpr 2
-	icm	%r12,3,__LC_SVC_ILC
+	icm	%r12,12,__LC_SVC_ILC
 	stm	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
-	st	%r12,SP_SVCNR(%r15)
+	st	%r12,SP_ILC(%r15)
 	mvc	SP_R12(16,%r15),\savearea # move %r12-%r15 to stack
 	la	%r12,0
 	st	%r12,__SF_BACKCHAIN(%r15)	# clear back chain
diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
index 829b759ba1e1..178d92536d90 100644
--- a/arch/s390/kernel/entry64.S
+++ b/arch/s390/kernel/entry64.S
@@ -126,31 +126,35 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
  *    R15 - kernel stack pointer
  */
 
-	.macro	SAVE_ALL_BASE savearea
-	stmg	%r12,%r15,\savearea
-	larl	%r13,system_call
-	.endm
-
 	.macro	SAVE_ALL_SVC psworg,savearea
-	la	%r12,\psworg
+	stmg	%r11,%r15,\savearea
 	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	lg	%r11,__LC_LAST_BREAK
 	.endm
 
-	.macro	SAVE_ALL_SYNC psworg,savearea
-	la	%r12,\psworg
+	.macro	SAVE_ALL_PGM psworg,savearea
+	stmg	%r11,%r15,\savearea
 	tm	\psworg+1,0x01		# test problem state bit
-	jz	2f			# skip stack setup save
-	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
 #ifdef CONFIG_CHECK_STACK
-	j	3f
-2:	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	jz	stack_overflow
-3:
+	jnz	1f
+	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
+	jnz	2f
+	la	%r12,\psworg
+	j	stack_overflow
+#else
+	jz	2f
 #endif
-2:
+1:	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+2:	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	.endm
 
 	.macro	SAVE_ALL_ASYNC psworg,savearea
+	stmg	%r11,%r15,\savearea
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	la	%r12,\psworg
 	tm	\psworg+1,0x01		# test problem state bit
 	jnz	1f			# from user -> load kernel stack
@@ -164,27 +168,23 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 0:	lg	%r14,__LC_ASYNC_STACK	# are we already on the async. stack ?
 	slgr	%r14,%r15
 	srag	%r14,%r14,STACK_SHIFT
-	jz	2f
-1:	lg	%r15,__LC_ASYNC_STACK	# load async stack
 #ifdef CONFIG_CHECK_STACK
-	j	3f
-2:	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
-	jz	stack_overflow
-3:
+	jnz	1f
+	tml	%r15,STACK_SIZE - CONFIG_STACK_GUARD
+	jnz	2f
+	j	stack_overflow
+#else
+	jz	2f
 #endif
-2:
+1:	lg	%r15,__LC_ASYNC_STACK	# load async stack
+2:	aghi	%r15,-SP_SIZE		# make room for registers & psw
 	.endm
 
-	.macro	CREATE_STACK_FRAME psworg,savearea
-	aghi	%r15,-SP_SIZE		# make room for registers & psw
-	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	.macro	CREATE_STACK_FRAME savearea
+	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 	stg	%r2,SP_ORIG_R2(%r15)	# store original content of gpr 2
-	icm	%r12,3,__LC_SVC_ILC
-	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
-	st	%r12,SP_SVCNR(%r15)
-	mvc	SP_R12(32,%r15),\savearea # move %r12-%r15 to stack
-	la	%r12,0
-	stg	%r12,__SF_BACKCHAIN(%r15)
+	mvc	SP_R11(40,%r15),\savearea # move %r11-%r15 to stack
+	stmg	%r0,%r10,SP_R0(%r15)	# store gprs %r0-%r10 to kernel stack
 	.endm
 
 	.macro	RESTORE_ALL psworg,sync
@@ -200,6 +200,13 @@ _TIF_SYSCALL = (_TIF_SYSCALL_TRACE>>8 | _TIF_SYSCALL_AUDIT>>8 | \
 	lpswe	\psworg			# back to caller
 	.endm
 
+	.macro	LAST_BREAK
+	srag	%r10,%r11,23
+	jz	0f
+	stg	%r11,__TI_last_break(%r12)
+0:
+	.endm
+
 /*
  * Scheduler resume function, called by switch_to
  *  gpr2 = (task_struct *) prev
@@ -245,37 +252,38 @@ __critical_start:
 system_call:
 	stpt	__LC_SYNC_ENTER_TIMER
 sysc_saveall:
-	SAVE_ALL_BASE __LC_SAVE_AREA
 	SAVE_ALL_SVC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	llgh	%r7,__LC_SVC_INT_CODE	# get svc number from lowcore
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	stg	%r7,SP_ARGS(%r15)
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 sysc_vtime:
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 sysc_stime:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 sysc_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 sysc_do_svc:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	ltgr	%r7,%r7		# test for svc 0
+	llgh	%r7,SP_SVCNR(%r15)
+	slag	%r7,%r7,2	# shift and test for svc 0
 	jnz	sysc_nr_ok
 	# svc 0: system call number in %r1
-	cl	%r1,BASED(.Lnr_syscalls)
+	llgfr	%r1,%r1		# clear high word in r1
+	cghi	%r1,NR_syscalls
 	jnl	sysc_nr_ok
-	lgfr	%r7,%r1 	# clear high word in r1
+	sth	%r1,SP_SVCNR(%r15)
+	slag	%r7,%r1,2	# shift and test for svc 0
 sysc_nr_ok:
-	mvc	SP_ARGS(8,%r15),SP_R7(%r15)
-sysc_do_restart:
-	sth	%r7,SP_SVCNR(%r15)
-	sllg	%r7,%r7,2	# svc number * 4
 	larl	%r10,sys_call_table
 #ifdef CONFIG_COMPAT
-	tm	__TI_flags+5(%r9),(_TIF_31BIT>>16)  # running in 31 bit mode ?
+	tm	__TI_flags+5(%r12),(_TIF_31BIT>>16)  # running in 31 bit mode ?
 	jno	sysc_noemu
 	larl	%r10,sys_call_table_emu  # use 31 bit emulation system calls
 sysc_noemu:
 #endif
-	tm	__TI_flags+6(%r9),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_SYSCALL
 	lgf	%r8,0(%r7,%r10) # load address of system call routine
 	jnz	sysc_tracesys
 	basr	%r14,%r8	# call sys_xxxx
@@ -284,7 +292,7 @@ sysc_noemu:
 sysc_return:
 	LOCKDEP_SYS_EXIT
 sysc_tif:
-	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
+	tm	__TI_flags+7(%r12),_TIF_WORK_SVC
 	jnz	sysc_work	# there is work to do (signals etc.)
 sysc_restore:
 	RESTORE_ALL __LC_RETURN_PSW,1
@@ -301,17 +309,17 @@ sysc_work:
 # One of the work bits is on. Find out which one.
 #
 sysc_work_tif:
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jo	sysc_mcck_pending
-	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	sysc_reschedule
-	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
 	jo	sysc_sigpending
-	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	sysc_notify_resume
-	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
+	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
 	j	sysc_return		# beware of critical section cleanup
 
@@ -333,12 +341,12 @@ sysc_mcck_pending:
 # _TIF_SIGPENDING is set, call do_signal
 #
 sysc_sigpending:
-	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP # clear TIF_SINGLE_STEP
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,do_signal		# call do_signal
-	tm	__TI_flags+7(%r9),_TIF_RESTART_SVC
+	tm	__TI_flags+7(%r12),_TIF_RESTART_SVC
 	jo	sysc_restart
-	tm	__TI_flags+7(%r9),_TIF_SINGLE_STEP
+	tm	__TI_flags+7(%r12),_TIF_SINGLE_STEP
 	jo	sysc_singlestep
 	j	sysc_return
 
@@ -354,17 +362,19 @@ sysc_notify_resume:
 # _TIF_RESTART_SVC is set, set up registers and restart svc
 #
 sysc_restart:
-	ni	__TI_flags+7(%r9),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
+	ni	__TI_flags+7(%r12),255-_TIF_RESTART_SVC # clear TIF_RESTART_SVC
 	lg	%r7,SP_R2(%r15)		# load new svc number
 	mvc	SP_R2(8,%r15),SP_ORIG_R2(%r15) # restore first argument
 	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
-	j	sysc_do_restart 	# restart svc
+	sth	%r7,SP_SVCNR(%r15)
+	slag	%r7,%r7,2
+	j	sysc_nr_ok		# restart svc
 
 #
 # _TIF_SINGLE_STEP is set, call do_single_step
 #
 sysc_singlestep:
-	ni	__TI_flags+7(%r9),255-_TIF_SINGLE_STEP	# clear TIF_SINGLE_STEP
+	ni	__TI_flags+7(%r12),255-_TIF_SINGLE_STEP	# clear TIF_SINGLE_STEP
 	xc	SP_SVCNR(2,%r15),SP_SVCNR(%r15)		# clear svc number
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
 	larl	%r14,sysc_return	# load adr. of system return
@@ -377,8 +387,8 @@ sysc_singlestep:
 sysc_tracesys:
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	la	%r3,0
-	srl	%r7,2
-	stg	%r7,SP_R2(%r15)
+	llgh	%r0,SP_SVCNR(%r15)
+	stg	%r0,SP_R2(%r15)
 	brasl	%r14,do_syscall_trace_enter
 	lghi	%r0,NR_syscalls
 	clgr	%r0,%r2
@@ -391,7 +401,7 @@ sysc_tracego:
 	basr	%r14,%r8		# call sys_xxx
 	stg	%r2,SP_R2(%r15)		# store return value
 sysc_tracenogo:
-	tm	__TI_flags+6(%r9),_TIF_SYSCALL
+	tm	__TI_flags+6(%r12),_TIF_SYSCALL
 	jz	sysc_return
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	larl	%r14,sysc_return	# return point is sysc_return
@@ -403,7 +413,7 @@ sysc_tracenogo:
 	.globl	ret_from_fork
 ret_from_fork:
 	lg	%r13,__LC_SVC_NEW_PSW+8
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# forking a kernel thread ?
 	jo	0f
 	stg	%r15,SP_R15(%r15)	# store stack pointer for new kthread
@@ -437,8 +447,8 @@ kernel_execve:
 	lg	%r15,__LC_KERNEL_STACK	# load ksp
 	aghi	%r15,-SP_SIZE		# make room for registers & psw
 	lg	%r13,__LC_SVC_NEW_PSW+8
-	lg	%r9,__LC_THREAD_INFO
 	mvc	SP_PTREGS(__PT_SIZE,%r15),0(%r12)	# copy pt_regs
+	lg	%r12,__LC_THREAD_INFO
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15)
 #	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
@@ -465,21 +475,23 @@ pgm_check_handler:
  * for LPSW?).
  */
 	stpt	__LC_SYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA
 	tm	__LC_PGM_INT_CODE+1,0x80 # check whether we got a per exception
 	jnz	pgm_per 		 # got per exception -> special case
-	SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	xc	SP_ILC(4,%r15),SP_ILC(%r15)
+	mvc	SP_PSW(16,%r15),__LC_PGM_OLD_PSW
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	pgm_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 pgm_no_vtime:
 	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_CHECK_OFF
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
+	stg	%r11,SP_ARGS(%r15)
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
 	ngr	%r8,%r3
@@ -503,31 +515,32 @@ pgm_per:
 	clc	__LC_PGM_OLD_PSW(16),__LC_SVC_NEW_PSW
 	je	pgm_svcper
 # no interesting special case, ignore PER event
-	lmg	%r12,%r15,__LC_SAVE_AREA
 	lpswe	__LC_PGM_OLD_PSW
 
 #
 # Normal per exception
 #
 pgm_per_std:
-	SAVE_ALL_SYNC __LC_PGM_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_PGM_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_PGM_OLD_PSW
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	pgm_no_vtime2
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
+	LAST_BREAK
 pgm_no_vtime2:
 	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_CHECK_OFF
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
-	lg	%r1,__TI_task(%r9)
+	lg	%r1,__TI_task(%r12)
 	tm	SP_PSW+1(%r15),0x01	# kernel per event ?
 	jz	kernel_per
 	mvc	__THREAD_per+__PER_atmid(2,%r1),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r1),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r1),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
 	lgf	%r3,__LC_PGM_ILC	# load program interruption code
 	lghi	%r8,0x7f
 	ngr	%r8,%r3			# clear per-event-bit and ilc
@@ -538,19 +551,21 @@ pgm_no_vtime2:
 # it was a single stepped SVC that is causing all the trouble
 #
 pgm_svcper:
-	SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
+	SAVE_ALL_PGM __LC_SVC_OLD_PSW,__LC_SAVE_AREA
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_SYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	llgh	%r7,__LC_SVC_INT_CODE	# get svc number from lowcore
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
+	LAST_BREAK
 	TRACE_IRQS_OFF
-	lg	%r8,__TI_task(%r9)
+	lg	%r8,__TI_task(%r12)
 	mvc	__THREAD_per+__PER_atmid(2,%r8),__LC_PER_ATMID
 	mvc	__THREAD_per+__PER_address(8,%r8),__LC_PER_ADDRESS
 	mvc	__THREAD_per+__PER_access_id(1,%r8),__LC_PER_ACCESS_ID
-	oi	__TI_flags+7(%r9),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
+	oi	__TI_flags+7(%r12),_TIF_SINGLE_STEP # set TIF_SINGLE_STEP
 	TRACE_IRQS_ON
 	stosm	__SF_EMPTY(%r15),0x03	# reenable interrupts
 	lmg	%r2,%r6,SP_R2(%r15)	# load svc arguments
@@ -572,16 +587,17 @@ kernel_per:
 io_int_handler:
 	stck	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA+32
-	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
-	CREATE_STACK_FRAME __LC_IO_OLD_PSW,__LC_SAVE_AREA+32
+	SAVE_ALL_ASYNC __LC_IO_OLD_PSW,__LC_SAVE_AREA+40
+	CREATE_STACK_FRAME __LC_SAVE_AREA+40
+	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	io_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK
 io_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
@@ -590,7 +606,7 @@ io_return:
 	LOCKDEP_SYS_EXIT
 	TRACE_IRQS_ON
 io_tif:
-	tm	__TI_flags+7(%r9),_TIF_WORK_INT
+	tm	__TI_flags+7(%r12),_TIF_WORK_INT
 	jnz	io_work 		# there is work to do (signals etc.)
 io_restore:
 	RESTORE_ALL __LC_RETURN_PSW,0
@@ -610,7 +626,7 @@ io_work:
 	jo	io_work_user		# yes -> do resched & signal
 #ifdef CONFIG_PREEMPT
 	# check for preemptive scheduling
-	icm	%r0,15,__TI_precount(%r9)
+	icm	%r0,15,__TI_precount(%r12)
 	jnz	io_restore		# preemption is disabled
 	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jno	io_restore
@@ -645,13 +661,13 @@ io_work_user:
 #	       and _TIF_MCCK_PENDING
 #
 io_work_tif:
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jo	io_mcck_pending
-	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
+	tm	__TI_flags+7(%r12),_TIF_NEED_RESCHED
 	jo	io_reschedule
-	tm	__TI_flags+7(%r9),_TIF_SIGPENDING
+	tm	__TI_flags+7(%r12),_TIF_SIGPENDING
 	jo	io_sigpending
-	tm	__TI_flags+7(%r9),_TIF_NOTIFY_RESUME
+	tm	__TI_flags+7(%r12),_TIF_NOTIFY_RESUME
 	jo	io_notify_resume
 	j	io_return		# beware of critical section cleanup
 
@@ -706,16 +722,17 @@ io_notify_resume:
 ext_int_handler:
 	stck	__LC_INT_CLOCK
 	stpt	__LC_ASYNC_ENTER_TIMER
-	SAVE_ALL_BASE __LC_SAVE_AREA+32
-	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
-	CREATE_STACK_FRAME __LC_EXT_OLD_PSW,__LC_SAVE_AREA+32
+	SAVE_ALL_ASYNC __LC_EXT_OLD_PSW,__LC_SAVE_AREA+40
+	CREATE_STACK_FRAME __LC_SAVE_AREA+40
+	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
 	jz	ext_no_vtime
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_ASYNC_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_ASYNC_ENTER_TIMER
+	LAST_BREAK
 ext_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
@@ -734,7 +751,9 @@ mcck_int_handler:
 	la	%r1,4095		# revalidate r1
 	spt	__LC_CPU_TIMER_SAVE_AREA-4095(%r1)	# revalidate cpu timer
 	lmg	%r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)# revalidate gprs
-	SAVE_ALL_BASE __LC_SAVE_AREA+64
+	stmg	%r11,%r15,__LC_SAVE_AREA+80
+	larl	%r13,system_call
+	lg	%r11,__LC_LAST_BREAK
 	la	%r12,__LC_MCK_OLD_PSW
 	tm	__LC_MCCK_CODE,0x80	# system damage?
 	jo	mcck_int_main		# yes -> rest of mcck code invalid
@@ -769,7 +788,10 @@ mcck_int_main:
 	srag	%r14,%r14,PAGE_SHIFT
 	jz	0f
 	lg	%r15,__LC_PANIC_STACK	# load panic stack
-0:	CREATE_STACK_FRAME __LC_MCK_OLD_PSW,__LC_SAVE_AREA+64
+0:	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	CREATE_STACK_FRAME __LC_SAVE_AREA+80
+	mvc	SP_PSW(16,%r15),0(%r12)
+	lg	%r12,__LC_THREAD_INFO	# load pointer to thread_info struct
 	tm	__LC_MCCK_CODE+2,0x08	# mwp of old psw valid?
 	jno	mcck_no_vtime		# no -> no timer update
 	tm	SP_PSW+1(%r15),0x01	# interrupting from user ?
@@ -777,8 +799,8 @@ mcck_int_main:
 	UPDATE_VTIME __LC_EXIT_TIMER,__LC_MCCK_ENTER_TIMER,__LC_USER_TIMER
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_MCCK_ENTER_TIMER
+	LAST_BREAK
 mcck_no_vtime:
-	lg	%r9,__LC_THREAD_INFO	# load pointer to thread_info struct
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
 	brasl	%r14,s390_do_machine_check
 	tm	SP_PSW+1(%r15),0x01	# returning to user ?
@@ -789,7 +811,7 @@ mcck_no_vtime:
 	xc	__SF_BACKCHAIN(8,%r1),__SF_BACKCHAIN(%r1) # clear back chain
 	lgr	%r15,%r1
 	stosm	__SF_EMPTY(%r15),0x04	# turn dat on
-	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+	tm	__TI_flags+7(%r12),_TIF_MCCK_PENDING
 	jno	mcck_return
 	HANDLE_SIE_INTERCEPT
 	TRACE_IRQS_OFF
@@ -803,6 +825,7 @@ mcck_return:
 	jno	0f
 	stpt	__LC_EXIT_TIMER
 0:	lpswe	__LC_RETURN_MCCK_PSW	# back to caller
+mcck_done:
 
 /*
  * Restart interruption handler, kick starter for additional CPUs
@@ -858,14 +881,14 @@ stack_overflow:
 	lg	%r15,__LC_PANIC_STACK	# change to panic stack
 	aghi	%r15,-SP_SIZE
 	mvc	SP_PSW(16,%r15),0(%r12)	# move user PSW to stack
-	stmg	%r0,%r11,SP_R0(%r15)	# store gprs %r0-%r11 to kernel stack
+	stmg	%r0,%r10,SP_R0(%r15)	# store gprs %r0-%r10 to kernel stack
 	la	%r1,__LC_SAVE_AREA
 	chi	%r12,__LC_SVC_OLD_PSW
 	je	0f
 	chi	%r12,__LC_PGM_OLD_PSW
 	je	0f
-	la	%r1,__LC_SAVE_AREA+32
-0:	mvc	SP_R12(32,%r15),0(%r1)	# move %r12-%r15 to stack
+	la	%r1,__LC_SAVE_AREA+40
+0:	mvc	SP_R11(40,%r15),0(%r1)	# move %r11-%r15 to stack
 	mvc	SP_ARGS(8,%r15),__LC_LAST_BREAK
 	xc	__SF_BACKCHAIN(8,%r15),__SF_BACKCHAIN(%r15) # clear back chain
 	la	%r2,SP_PTREGS(%r15)	# load pt_regs
@@ -920,21 +943,23 @@ cleanup_system_call:
 	je	0f
 	mvc	__LC_SYNC_ENTER_TIMER(8),__LC_ASYNC_ENTER_TIMER
 0:	cghi	%r12,__LC_MCK_OLD_PSW
-	la	%r12,__LC_SAVE_AREA+64
+	la	%r12,__LC_SAVE_AREA+80
 	je	0f
-	la	%r12,__LC_SAVE_AREA+32
+	la	%r12,__LC_SAVE_AREA+40
 0:	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+16)
 	jhe	cleanup_vtime
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn)
 	jh	0f
-	mvc	__LC_SAVE_AREA(32),0(%r12)
-0:	stg	%r13,8(%r12)
-	stg	%r12,__LC_SAVE_AREA+96	# argh
-	SAVE_ALL_SYNC __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	CREATE_STACK_FRAME __LC_SVC_OLD_PSW,__LC_SAVE_AREA
-	lg	%r12,__LC_SAVE_AREA+96	# argh
-	stg	%r15,24(%r12)
-	llgh	%r7,__LC_SVC_INT_CODE
+	mvc	__LC_SAVE_AREA(40),0(%r12)
+0:	lg	%r15,__LC_KERNEL_STACK	# problem state -> load ksp
+	aghi	%r15,-SP_SIZE		# make room for registers & psw
+	stg	%r15,32(%r12)
+	stg	%r11,0(%r12)
+	CREATE_STACK_FRAME __LC_SAVE_AREA
+	mvc	SP_PSW(16,%r15),__LC_SVC_OLD_PSW
+	mvc	SP_ILC(4,%r15),__LC_SVC_ILC
+	stg	%r7,SP_ARGS(%r15)
+	mvc	8(8,%r12),__LC_THREAD_INFO
 cleanup_vtime:
 	clc	__LC_RETURN_PSW+8(8),BASED(cleanup_system_call_insn+24)
 	jhe	cleanup_stime
@@ -945,7 +970,11 @@ cleanup_stime:
 	UPDATE_VTIME __LC_LAST_UPDATE_TIMER,__LC_EXIT_TIMER,__LC_SYSTEM_TIMER
 cleanup_update:
 	mvc	__LC_LAST_UPDATE_TIMER(8),__LC_SYNC_ENTER_TIMER
-	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8)
+	srag	%r12,%r11,23
+	lg	%r12,__LC_THREAD_INFO
+	jz	0f
+	stg	%r11,__TI_last_break(%r12)
+0:	mvc	__LC_RETURN_PSW+8(8),BASED(cleanup_table_system_call+8)
 	la	%r12,__LC_RETURN_PSW
 	br	%r14
 cleanup_system_call_insn:
@@ -972,11 +1001,11 @@ cleanup_sysc_restore:
 	mvc	__LC_EXIT_TIMER(8),__LC_ASYNC_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
 	cghi	%r12,__LC_MCK_OLD_PSW
-	la	%r12,__LC_SAVE_AREA+64
+	la	%r12,__LC_SAVE_AREA+80
 	je	1f
-	la	%r12,__LC_SAVE_AREA+32
-1:	mvc	0(32,%r12),SP_R12(%r15)
-	lmg	%r0,%r11,SP_R0(%r15)
+	la	%r12,__LC_SAVE_AREA+40
+1:	mvc	0(40,%r12),SP_R11(%r15)
+	lmg	%r0,%r10,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
 2:	la	%r12,__LC_RETURN_PSW
 	br	%r14
@@ -997,8 +1026,8 @@ cleanup_io_restore:
 	jhe	0f
 	mvc	__LC_EXIT_TIMER(8),__LC_MCCK_ENTER_TIMER
 0:	mvc	__LC_RETURN_PSW(16),SP_PSW(%r15)
-	mvc	__LC_SAVE_AREA+64(32),SP_R12(%r15)
-	lmg	%r0,%r11,SP_R0(%r15)
+	mvc	__LC_SAVE_AREA+80(40),SP_R11(%r15)
+	lmg	%r0,%r10,SP_R0(%r15)
 	lg	%r15,SP_R15(%r15)
 1:	la	%r12,__LC_RETURN_PSW
 	br	%r14
@@ -1010,13 +1039,6 @@ cleanup_io_restore_insn:
  * Integer constants
  */
 		.align	4
-.Lconst:
-.Lnr_syscalls:	.long	NR_syscalls
-.L0x0130:	.short	0x130
-.L0x0140:	.short	0x140
-.L0x0150:	.short	0x150
-.L0x0160:	.short	0x160
-.L0x0170:	.short	0x170
 .Lcritical_start:
 		.quad	__critical_start
 .Lcritical_end:
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 9f654da4cecc..83339d33c4b1 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -57,6 +57,7 @@
 enum s390_regset {
 	REGSET_GENERAL,
 	REGSET_FP,
+	REGSET_LAST_BREAK,
 	REGSET_GENERAL_EXTENDED,
 };
 
@@ -381,6 +382,10 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data)
 			copied += sizeof(unsigned long);
 		}
 		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned long __user *) data);
+		return 0;
 	default:
 		/* Removing high order bit from addr (only for 31 bit). */
 		addr &= PSW_ADDR_INSN;
@@ -633,6 +638,10 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 			copied += sizeof(unsigned int);
 		}
 		return 0;
+	case PTRACE_GET_LAST_BREAK:
+		put_user(task_thread_info(child)->last_break,
+			 (unsigned int __user *) data);
+		return 0;
 	}
 	return compat_ptrace_request(child, request, addr, data);
 }
@@ -797,6 +806,28 @@ static int s390_fpregs_set(struct task_struct *target,
 	return rc;
 }
 
+#ifdef CONFIG_64BIT
+
+static int s390_last_break_get(struct task_struct *target,
+			       const struct user_regset *regset,
+			       unsigned int pos, unsigned int count,
+			       void *kbuf, void __user *ubuf)
+{
+	if (count > 0) {
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = task_thread_info(target)->last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(task_thread_info(target)->last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
+#endif
+
 static const struct user_regset s390_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -814,6 +845,15 @@ static const struct user_regset s390_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+#ifdef CONFIG_64BIT
+	[REGSET_LAST_BREAK] = {
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_last_break_get,
+	},
+#endif
 };
 
 static const struct user_regset_view user_s390_view = {
@@ -948,6 +988,27 @@ static int s390_compat_regs_high_set(struct task_struct *target,
 	return rc;
 }
 
+static int s390_compat_last_break_get(struct task_struct *target,
+				      const struct user_regset *regset,
+				      unsigned int pos, unsigned int count,
+				      void *kbuf, void __user *ubuf)
+{
+	compat_ulong_t last_break;
+
+	if (count > 0) {
+		last_break = task_thread_info(target)->last_break;
+		if (kbuf) {
+			unsigned long *k = kbuf;
+			*k = last_break;
+		} else {
+			unsigned long  __user *u = ubuf;
+			if (__put_user(last_break, u))
+				return -EFAULT;
+		}
+	}
+	return 0;
+}
+
 static const struct user_regset s390_compat_regsets[] = {
 	[REGSET_GENERAL] = {
 		.core_note_type = NT_PRSTATUS,
@@ -965,6 +1026,13 @@ static const struct user_regset s390_compat_regsets[] = {
 		.get = s390_fpregs_get,
 		.set = s390_fpregs_set,
 	},
+	[REGSET_LAST_BREAK] = {
+		.core_note_type = NT_S390_LAST_BREAK,
+		.n = 1,
+		.size = sizeof(long),
+		.align = sizeof(long),
+		.get = s390_compat_last_break_get,
+	},
 	[REGSET_GENERAL_EXTENDED] = {
 		.core_note_type = NT_S390_HIGH_GPRS,
 		.n = sizeof(s390_compat_regs_high) / sizeof(compat_long_t),
diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c
index 6289945562b0..ee7ac8b11782 100644
--- a/arch/s390/kernel/signal.c
+++ b/arch/s390/kernel/signal.c
@@ -313,6 +313,7 @@ static int setup_frame(int sig, struct k_sigaction *ka,
 	   To avoid breaking binary compatibility, they are passed as args. */
 	regs->gprs[4] = current->thread.trap_no;
 	regs->gprs[5] = current->thread.prot_addr;
+	regs->gprs[6] = task_thread_info(current)->last_break;
 
 	/* Place signal number on stack to allow backtrace from handler.  */
 	if (__put_user(regs->gprs[2], (int __user *) &frame->signo))
@@ -376,6 +377,7 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
 	regs->gprs[2] = map_signal(sig);
 	regs->gprs[3] = (unsigned long) &frame->info;
 	regs->gprs[4] = (unsigned long) &frame->uc;
+	regs->gprs[5] = task_thread_info(current)->last_break;
 	return 0;
 
 give_sigsegv:
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 597858418051..4d608014753a 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -394,6 +394,7 @@ typedef struct elf64_shdr {
 #define NT_S390_TODPREG	0x303		/* s390 TOD programmable register */
 #define NT_S390_CTRS	0x304		/* s390 control registers */
 #define NT_S390_PREFIX	0x305		/* s390 prefix register */
+#define NT_S390_LAST_BREAK	0x306	/* s390 breaking event address */
 
 
 /* Note header in a PT_NOTE section */
-- 
cgit v1.2.3


From 81880d603d00c645e0890d0a44d50711c503b72b Mon Sep 17 00:00:00 2001
From: Anton Blanchard <anton@samba.org>
Date: Mon, 17 May 2010 14:34:57 +1000
Subject: atomic_t: Remove volatile from atomic_t definition

When looking at a performance problem on PowerPC, I noticed some awful code
generation:

c00000000051fc98:       3b 60 00 01     li      r27,1
...
c00000000051fca0:       3b 80 00 00     li      r28,0
...
c00000000051fcdc:       93 61 00 70     stw     r27,112(r1)
c00000000051fce0:       93 81 00 74     stw     r28,116(r1)
c00000000051fce4:       81 21 00 70     lwz     r9,112(r1)
c00000000051fce8:       80 01 00 74     lwz     r0,116(r1)
c00000000051fcec:       7d 29 07 b4     extsw   r9,r9
c00000000051fcf0:       7c 00 07 b4     extsw   r0,r0

c00000000051fcf4:       7c 20 04 ac     lwsync
c00000000051fcf8:       7d 60 f8 28     lwarx   r11,0,r31
c00000000051fcfc:       7c 0b 48 00     cmpw    r11,r9
c00000000051fd00:       40 c2 00 10     bne-    c00000000051fd10
c00000000051fd04:       7c 00 f9 2d     stwcx.  r0,0,r31
c00000000051fd08:       40 c2 ff f0     bne+    c00000000051fcf8
c00000000051fd0c:       4c 00 01 2c     isync

We create two constants, write them out to the stack, read them straight back
in and sign extend them. What a mess.

It turns out this bad code is a result of us defining atomic_t as a
volatile int.

We removed the volatile attribute from the powerpc atomic_t definition years
ago, but commit ea435467500612636f8f4fb639ff6e76b2496e4b (atomic_t: unify all
arch definitions) added it back in.

To dig up an old quote from Linus:

> The fact is, volatile on data structures is a bug. It's a wart in the C
> language. It shouldn't be used.
>
> Volatile accesses in *code* can be ok, and if we have "atomic_read()"
> expand to a "*(volatile int *)&(x)->value", then I'd be ok with that.
>
> But marking data structures volatile just makes the compiler screw up
> totally, and makes code for initialization sequences etc much worse.

And screw up it does :)

With the volatile removed, we see much more reasonable code generation:

c00000000051f5b8:       3b 60 00 01     li      r27,1
...
c00000000051f5c0:       3b 80 00 00     li      r28,0
...

c00000000051fc7c:       7c 20 04 ac     lwsync
c00000000051fc80:       7c 00 f8 28     lwarx   r0,0,r31
c00000000051fc84:       7c 00 d8 00     cmpw    r0,r27
c00000000051fc88:       40 c2 00 10     bne-    c00000000051fc98
c00000000051fc8c:       7f 80 f9 2d     stwcx.  r28,0,r31
c00000000051fc90:       40 c2 ff f0     bne+    c00000000051fc80
c00000000051fc94:       4c 00 01 2c     isync

Six instructions less.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/types.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/types.h b/include/linux/types.h
index c42724f8c802..23d237a075e2 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -188,12 +188,12 @@ typedef u32 phys_addr_t;
 typedef phys_addr_t resource_size_t;
 
 typedef struct {
-	volatile int counter;
+	int counter;
 } atomic_t;
 
 #ifdef CONFIG_64BIT
 typedef struct {
-	volatile long counter;
+	long counter;
 } atomic64_t;
 #endif
 
-- 
cgit v1.2.3


From 0b7f1a7efb38b551f5948a13d0b36e876ba536db Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Wed, 28 Jan 2009 21:01:02 +0100
Subject: platform: Make platform resource input parameters const

Make the platform resource input parameters of platform_device_add_resources()
and platform_device_register_simple() const, as the resources are copied and
never modified.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/platform.c         | 4 ++--
 include/linux/platform_device.h | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 4b4b565c835f..c5fbe198fbdb 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -187,7 +187,7 @@ EXPORT_SYMBOL_GPL(platform_device_alloc);
  * released.
  */
 int platform_device_add_resources(struct platform_device *pdev,
-				  struct resource *res, unsigned int num)
+				  const struct resource *res, unsigned int num)
 {
 	struct resource *r;
 
@@ -367,7 +367,7 @@ EXPORT_SYMBOL_GPL(platform_device_unregister);
  */
 struct platform_device *platform_device_register_simple(const char *name,
 							int id,
-							struct resource *res,
+							const struct resource *res,
 							unsigned int num)
 {
 	struct platform_device *pdev;
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 212da17d06af..5417944d3687 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -44,12 +44,14 @@ extern int platform_get_irq_byname(struct platform_device *, const char *);
 extern int platform_add_devices(struct platform_device **, int);
 
 extern struct platform_device *platform_device_register_simple(const char *, int id,
-					struct resource *, unsigned int);
+					const struct resource *, unsigned int);
 extern struct platform_device *platform_device_register_data(struct device *,
 		const char *, int, const void *, size_t);
 
 extern struct platform_device *platform_device_alloc(const char *name, int id);
-extern int platform_device_add_resources(struct platform_device *pdev, struct resource *res, unsigned int num);
+extern int platform_device_add_resources(struct platform_device *pdev,
+					 const struct resource *res,
+					 unsigned int num);
 extern int platform_device_add_data(struct platform_device *pdev, const void *data, size_t size);
 extern int platform_device_add(struct platform_device *pdev);
 extern void platform_device_del(struct platform_device *pdev);
-- 
cgit v1.2.3


From bf54a2b3c0dbf76136f00ff785bf6d8f6291311d Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Tue, 18 Nov 2008 21:13:53 +0100
Subject: m68k: amiga - Zorro bus modalias support

Add Amiga Zorro bus modalias and uevent support

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 drivers/net/a2065.c             |  1 +
 drivers/net/ariadne.c           |  1 +
 drivers/net/hydra.c             |  1 +
 drivers/net/zorro8390.c         |  1 +
 drivers/scsi/zorro7xx.c         |  1 +
 drivers/video/cirrusfb.c        |  1 +
 drivers/video/fm2fb.c           |  1 +
 drivers/zorro/zorro-driver.c    | 24 ++++++++++++++++++++++++
 drivers/zorro/zorro-sysfs.c     | 11 +++++++++++
 include/linux/mod_devicetable.h |  9 +++++++++
 include/linux/zorro.h           | 13 +------------
 scripts/mod/file2alias.c        | 14 ++++++++++++++
 12 files changed, 66 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/a2065.c b/drivers/net/a2065.c
index ed5e9742be2c..a8f0512bad38 100644
--- a/drivers/net/a2065.c
+++ b/drivers/net/a2065.c
@@ -674,6 +674,7 @@ static struct zorro_device_id a2065_zorro_tbl[] __devinitdata = {
 	{ ZORRO_PROD_AMERISTAR_A2065 },
 	{ 0 }
 };
+MODULE_DEVICE_TABLE(zorro, a2065_zorro_tbl);
 
 static struct zorro_driver a2065_driver = {
 	.name		= "a2065",
diff --git a/drivers/net/ariadne.c b/drivers/net/ariadne.c
index fa1a2354f5f9..4b30a46486e2 100644
--- a/drivers/net/ariadne.c
+++ b/drivers/net/ariadne.c
@@ -145,6 +145,7 @@ static struct zorro_device_id ariadne_zorro_tbl[] __devinitdata = {
     { ZORRO_PROD_VILLAGE_TRONIC_ARIADNE },
     { 0 }
 };
+MODULE_DEVICE_TABLE(zorro, ariadne_zorro_tbl);
 
 static struct zorro_driver ariadne_driver = {
     .name	= "ariadne",
diff --git a/drivers/net/hydra.c b/drivers/net/hydra.c
index 24724b4ad709..07d8e5b634f3 100644
--- a/drivers/net/hydra.c
+++ b/drivers/net/hydra.c
@@ -71,6 +71,7 @@ static struct zorro_device_id hydra_zorro_tbl[] __devinitdata = {
     { ZORRO_PROD_HYDRA_SYSTEMS_AMIGANET },
     { 0 }
 };
+MODULE_DEVICE_TABLE(zorro, hydra_zorro_tbl);
 
 static struct zorro_driver hydra_driver = {
     .name	= "hydra",
diff --git a/drivers/net/zorro8390.c b/drivers/net/zorro8390.c
index 81c753a617ab..9548cbb5012a 100644
--- a/drivers/net/zorro8390.c
+++ b/drivers/net/zorro8390.c
@@ -102,6 +102,7 @@ static struct zorro_device_id zorro8390_zorro_tbl[] __devinitdata = {
     { ZORRO_PROD_INDIVIDUAL_COMPUTERS_X_SURF, },
     { 0 }
 };
+MODULE_DEVICE_TABLE(zorro, zorro8390_zorro_tbl);
 
 static struct zorro_driver zorro8390_driver = {
     .name	= "zorro8390",
diff --git a/drivers/scsi/zorro7xx.c b/drivers/scsi/zorro7xx.c
index 105449c15fa9..e17764d71476 100644
--- a/drivers/scsi/zorro7xx.c
+++ b/drivers/scsi/zorro7xx.c
@@ -69,6 +69,7 @@ static struct zorro_device_id zorro7xx_zorro_tbl[] __devinitdata = {
 	},
 	{ 0 }
 };
+MODULE_DEVICE_TABLE(zorro, zorro7xx_zorro_tbl);
 
 static int __devinit zorro7xx_init_one(struct zorro_dev *z,
 				       const struct zorro_device_id *ent)
diff --git a/drivers/video/cirrusfb.c b/drivers/video/cirrusfb.c
index 8d8dfda2f868..6df7c54db0a3 100644
--- a/drivers/video/cirrusfb.c
+++ b/drivers/video/cirrusfb.c
@@ -299,6 +299,7 @@ static const struct zorro_device_id cirrusfb_zorro_table[] = {
 	},
 	{ 0 }
 };
+MODULE_DEVICE_TABLE(zorro, cirrusfb_zorro_table);
 
 static const struct {
 	zorro_id id2;
diff --git a/drivers/video/fm2fb.c b/drivers/video/fm2fb.c
index 6c91c61cdb63..1b0feb8e7244 100644
--- a/drivers/video/fm2fb.c
+++ b/drivers/video/fm2fb.c
@@ -219,6 +219,7 @@ static struct zorro_device_id fm2fb_devices[] __devinitdata = {
 	{ ZORRO_PROD_HELFRICH_RAINBOW_II },
 	{ 0 }
 };
+MODULE_DEVICE_TABLE(zorro, fm2fb_devices);
 
 static struct zorro_driver fm2fb_driver = {
 	.name		= "fm2fb",
diff --git a/drivers/zorro/zorro-driver.c b/drivers/zorro/zorro-driver.c
index 53180a37cc9a..7ee2b6e71786 100644
--- a/drivers/zorro/zorro-driver.c
+++ b/drivers/zorro/zorro-driver.c
@@ -137,10 +137,34 @@ static int zorro_bus_match(struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
+static int zorro_uevent(struct device *dev, struct kobj_uevent_env *env)
+{
+#ifdef CONFIG_HOTPLUG
+	struct zorro_dev *z;
+
+	if (!dev)
+		return -ENODEV;
+
+	z = to_zorro_dev(dev);
+	if (!z)
+		return -ENODEV;
+
+	if (add_uevent_var(env, "ZORRO_ID=%08X", z->id) ||
+	    add_uevent_var(env, "ZORRO_SLOT_NAME=%s", dev_name(dev)) ||
+	    add_uevent_var(env, "ZORRO_SLOT_ADDR=%04X", z->slotaddr) ||
+	    add_uevent_var(env, "MODALIAS=" ZORRO_DEVICE_MODALIAS_FMT, z->id))
+		return -ENOMEM;
+
+	return 0;
+#else /* !CONFIG_HOTPLUG */
+	return -ENODEV;
+#endif /* !CONFIG_HOTPLUG */
+}
 
 struct bus_type zorro_bus_type = {
 	.name	= "zorro",
 	.match	= zorro_bus_match,
+	.uevent	= zorro_uevent,
 	.probe	= zorro_device_probe,
 	.remove	= zorro_device_remove,
 };
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index 1d2a772ea14c..eb924e0a64ce 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -77,6 +77,16 @@ static struct bin_attribute zorro_config_attr = {
 	.read = zorro_read_config,
 };
 
+static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
+			     char *buf)
+{
+	struct zorro_dev *z = to_zorro_dev(dev);
+
+	return sprintf(buf, ZORRO_DEVICE_MODALIAS_FMT "\n", z->id);
+}
+
+static DEVICE_ATTR(modalias, S_IRUGO, modalias_show, NULL);
+
 int zorro_create_sysfs_dev_files(struct zorro_dev *z)
 {
 	struct device *dev = &z->dev;
@@ -89,6 +99,7 @@ int zorro_create_sysfs_dev_files(struct zorro_dev *z)
 	    (error = device_create_file(dev, &dev_attr_slotaddr)) ||
 	    (error = device_create_file(dev, &dev_attr_slotsize)) ||
 	    (error = device_create_file(dev, &dev_attr_resource)) ||
+	    (error = device_create_file(dev, &dev_attr_modalias)) ||
 	    (error = sysfs_create_bin_file(&dev->kobj, &zorro_config_attr)))
 		return error;
 
diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index f58e9d836f32..56fde4364e4c 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -474,4 +474,13 @@ struct platform_device_id {
 			__attribute__((aligned(sizeof(kernel_ulong_t))));
 };
 
+struct zorro_device_id {
+	__u32 id;			/* Device ID or ZORRO_WILDCARD */
+	kernel_ulong_t driver_data;	/* Data private to the driver */
+};
+
+#define ZORRO_WILDCARD			(0xffffffff)	/* not official */
+
+#define ZORRO_DEVICE_MODALIAS_FMT	"zorro:i%08X"
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
diff --git a/include/linux/zorro.h b/include/linux/zorro.h
index 913bfc226dda..908db1b36d6c 100644
--- a/include/linux/zorro.h
+++ b/include/linux/zorro.h
@@ -38,8 +38,6 @@
 typedef __u32 zorro_id;
 
 
-#define ZORRO_WILDCARD		(0xffffffff)	/* not official */
-
 /* Include the ID list */
 #include <linux/zorro_ids.h>
 
@@ -116,6 +114,7 @@ struct ConfigDev {
 
 #include <linux/init.h>
 #include <linux/ioport.h>
+#include <linux/mod_devicetable.h>
 
 #include <asm/zorro.h>
 
@@ -154,16 +153,6 @@ extern struct zorro_bus zorro_bus;	/* single Zorro bus */
 extern struct bus_type zorro_bus_type;
 
 
-    /*
-     *  Zorro device IDs
-     */
-
-struct zorro_device_id {
-	zorro_id id;			/* Device ID or ZORRO_WILDCARD */
-	unsigned long driver_data;	/* Data private to the driver */
-};
-
-
     /*
      *  Zorro device drivers
      */
diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
index 220213e603db..df90f31d14bf 100644
--- a/scripts/mod/file2alias.c
+++ b/scripts/mod/file2alias.c
@@ -796,6 +796,16 @@ static int do_platform_entry(const char *filename,
 	return 1;
 }
 
+/* Looks like: zorro:iN. */
+static int do_zorro_entry(const char *filename, struct zorro_device_id *id,
+			  char *alias)
+{
+	id->id = TO_NATIVE(id->id);
+	strcpy(alias, "zorro:");
+	ADD(alias, "i", id->id != ZORRO_WILDCARD, id->id);
+	return 1;
+}
+
 /* Ignore any prefix, eg. some architectures prepend _ */
 static inline int sym_is(const char *symbol, const char *name)
 {
@@ -943,6 +953,10 @@ void handle_moddevtable(struct module *mod, struct elf_info *info,
 		do_table(symval, sym->st_size,
 			 sizeof(struct platform_device_id), "platform",
 			 do_platform_entry, mod);
+	else if (sym_is(symname, "__mod_zorro_device_table"))
+		do_table(symval, sym->st_size,
+			 sizeof(struct zorro_device_id), "zorro",
+			 do_zorro_entry, mod);
 	free(zeros);
 }
 
-- 
cgit v1.2.3


From 0d305464aefff342c85b4db8b3d7a4345246e5a1 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Sun, 5 Apr 2009 12:40:41 +0200
Subject: m68k: amiga - Zorro host bridge platform device conversion

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
---
 arch/m68k/amiga/Makefile   |   2 +-
 arch/m68k/amiga/platform.c |  58 +++++++++++
 drivers/zorro/proc.c       |   6 +-
 drivers/zorro/zorro.c      | 243 ++++++++++++++++++++++++---------------------
 include/linux/zorro.h      |   9 --
 5 files changed, 190 insertions(+), 128 deletions(-)
 create mode 100644 arch/m68k/amiga/platform.c

(limited to 'include/linux')

diff --git a/arch/m68k/amiga/Makefile b/arch/m68k/amiga/Makefile
index 6a0d7650f980..11dd30b16b3b 100644
--- a/arch/m68k/amiga/Makefile
+++ b/arch/m68k/amiga/Makefile
@@ -2,6 +2,6 @@
 # Makefile for Linux arch/m68k/amiga source directory
 #
 
-obj-y		:= config.o amiints.o cia.o chipram.o amisound.o
+obj-y		:= config.o amiints.o cia.o chipram.o amisound.o platform.o
 
 obj-$(CONFIG_AMIGA_PCMCIA)	+= pcmcia.o
diff --git a/arch/m68k/amiga/platform.c b/arch/m68k/amiga/platform.c
new file mode 100644
index 000000000000..33a7669b4415
--- /dev/null
+++ b/arch/m68k/amiga/platform.c
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (C) 2007-2009 Geert Uytterhoeven
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive
+ * for more details.
+ */
+
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/zorro.h>
+
+#include <asm/amigahw.h>
+
+
+#ifdef CONFIG_ZORRO
+
+static const struct resource zorro_resources[] __initconst = {
+	/* Zorro II regions (on Zorro II/III) */
+	{
+		.name	= "Zorro II exp",
+		.start	= 0x00e80000,
+		.end	= 0x00efffff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Zorro II mem",
+		.start	= 0x00200000,
+		.end	= 0x009fffff,
+		.flags	= IORESOURCE_MEM,
+	},
+	/* Zorro III regions (on Zorro III only) */
+	{
+		.name	= "Zorro III exp",
+		.start	= 0xff000000,
+		.end	= 0xffffffff,
+		.flags	= IORESOURCE_MEM,
+	}, {
+		.name	= "Zorro III cfg",
+		.start	= 0x40000000,
+		.end	= 0x7fffffff,
+		.flags	= IORESOURCE_MEM,
+	}
+};
+
+
+static int __init amiga_init_bus(void)
+{
+	if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(ZORRO))
+		return -ENODEV;
+
+	platform_device_register_simple("amiga-zorro", -1, zorro_resources,
+					AMIGAHW_PRESENT(ZORRO3) ? 4 : 2);
+	return 0;
+}
+
+subsys_initcall(amiga_init_bus);
+
+#endif /* CONFIG_ZORRO */
diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c
index d47c47fc048f..3c7046d79654 100644
--- a/drivers/zorro/proc.c
+++ b/drivers/zorro/proc.c
@@ -97,7 +97,7 @@ static void zorro_seq_stop(struct seq_file *m, void *v)
 
 static int zorro_seq_show(struct seq_file *m, void *v)
 {
-	u_int slot = *(loff_t *)v;
+	unsigned int slot = *(loff_t *)v;
 	struct zorro_dev *z = &zorro_autocon[slot];
 
 	seq_printf(m, "%02x\t%08x\t%08lx\t%08lx\t%02x\n", slot, z->id,
@@ -129,7 +129,7 @@ static const struct file_operations zorro_devices_proc_fops = {
 
 static struct proc_dir_entry *proc_bus_zorro_dir;
 
-static int __init zorro_proc_attach_device(u_int slot)
+static int __init zorro_proc_attach_device(unsigned int slot)
 {
 	struct proc_dir_entry *entry;
 	char name[4];
@@ -146,7 +146,7 @@ static int __init zorro_proc_attach_device(u_int slot)
 
 static int __init zorro_proc_init(void)
 {
-	u_int slot;
+	unsigned int slot;
 
 	if (MACH_IS_AMIGA && AMIGAHW_PRESENT(ZORRO)) {
 		proc_bus_zorro_dir = proc_mkdir("bus/zorro", NULL);
diff --git a/drivers/zorro/zorro.c b/drivers/zorro/zorro.c
index d45fb34e2d23..6455f3a244c5 100644
--- a/drivers/zorro/zorro.c
+++ b/drivers/zorro/zorro.c
@@ -15,6 +15,8 @@
 #include <linux/zorro.h>
 #include <linux/bitops.h>
 #include <linux/string.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
 
 #include <asm/setup.h>
 #include <asm/amigahw.h>
@@ -26,24 +28,17 @@
      *  Zorro Expansion Devices
      */
 
-u_int zorro_num_autocon = 0;
+unsigned int zorro_num_autocon;
 struct zorro_dev zorro_autocon[ZORRO_NUM_AUTO];
 
 
     /*
-     *  Single Zorro bus
+     *  Zorro bus
      */
 
-struct zorro_bus zorro_bus = {\
-    .resources = {
-	/* Zorro II regions (on Zorro II/III) */
-	{ .name = "Zorro II exp", .start = 0x00e80000, .end = 0x00efffff },
-	{ .name = "Zorro II mem", .start = 0x00200000, .end = 0x009fffff },
-	/* Zorro III regions (on Zorro III only) */
-	{ .name = "Zorro III exp", .start = 0xff000000, .end = 0xffffffff },
-	{ .name = "Zorro III cfg", .start = 0x40000000, .end = 0x7fffffff }
-    },
-    .name = "Zorro bus"
+struct zorro_bus {
+	struct list_head devices;	/* list of devices on this bus */
+	struct device dev;
 };
 
 
@@ -53,18 +48,19 @@ struct zorro_bus zorro_bus = {\
 
 struct zorro_dev *zorro_find_device(zorro_id id, struct zorro_dev *from)
 {
-    struct zorro_dev *z;
+	struct zorro_dev *z;
 
-    if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(ZORRO))
-	return NULL;
+	if (!zorro_num_autocon)
+		return NULL;
 
-    for (z = from ? from+1 : &zorro_autocon[0];
-	 z < zorro_autocon+zorro_num_autocon;
-	 z++)
-	if (id == ZORRO_WILDCARD || id == z->id)
-	    return z;
-    return NULL;
+	for (z = from ? from+1 : &zorro_autocon[0];
+	     z < zorro_autocon+zorro_num_autocon;
+	     z++)
+		if (id == ZORRO_WILDCARD || id == z->id)
+			return z;
+	return NULL;
 }
+EXPORT_SYMBOL(zorro_find_device);
 
 
     /*
@@ -83,121 +79,138 @@ struct zorro_dev *zorro_find_device(zorro_id id, struct zorro_dev *from)
      */
 
 DECLARE_BITMAP(zorro_unused_z2ram, 128);
+EXPORT_SYMBOL(zorro_unused_z2ram);
 
 
 static void __init mark_region(unsigned long start, unsigned long end,
 			       int flag)
 {
-    if (flag)
-	start += Z2RAM_CHUNKMASK;
-    else
-	end += Z2RAM_CHUNKMASK;
-    start &= ~Z2RAM_CHUNKMASK;
-    end &= ~Z2RAM_CHUNKMASK;
-
-    if (end <= Z2RAM_START || start >= Z2RAM_END)
-	return;
-    start = start < Z2RAM_START ? 0x00000000 : start-Z2RAM_START;
-    end = end > Z2RAM_END ? Z2RAM_SIZE : end-Z2RAM_START;
-    while (start < end) {
-	u32 chunk = start>>Z2RAM_CHUNKSHIFT;
 	if (flag)
-	    set_bit(chunk, zorro_unused_z2ram);
+		start += Z2RAM_CHUNKMASK;
 	else
-	    clear_bit(chunk, zorro_unused_z2ram);
-	start += Z2RAM_CHUNKSIZE;
-    }
+		end += Z2RAM_CHUNKMASK;
+	start &= ~Z2RAM_CHUNKMASK;
+	end &= ~Z2RAM_CHUNKMASK;
+
+	if (end <= Z2RAM_START || start >= Z2RAM_END)
+		return;
+	start = start < Z2RAM_START ? 0x00000000 : start-Z2RAM_START;
+	end = end > Z2RAM_END ? Z2RAM_SIZE : end-Z2RAM_START;
+	while (start < end) {
+		u32 chunk = start>>Z2RAM_CHUNKSHIFT;
+		if (flag)
+			set_bit(chunk, zorro_unused_z2ram);
+		else
+			clear_bit(chunk, zorro_unused_z2ram);
+		start += Z2RAM_CHUNKSIZE;
+	}
 }
 
 
-static struct resource __init *zorro_find_parent_resource(struct zorro_dev *z)
+static struct resource __init *zorro_find_parent_resource(
+	struct platform_device *bridge, struct zorro_dev *z)
 {
-    int i;
+	int i;
 
-    for (i = 0; i < zorro_bus.num_resources; i++)
-	if (zorro_resource_start(z) >= zorro_bus.resources[i].start &&
-	    zorro_resource_end(z) <= zorro_bus.resources[i].end)
-		return &zorro_bus.resources[i];
-    return &iomem_resource;
+	for (i = 0; i < bridge->num_resources; i++) {
+		struct resource *r = &bridge->resource[i];
+		if (zorro_resource_start(z) >= r->start &&
+		    zorro_resource_end(z) <= r->end)
+			return r;
+	}
+	return &iomem_resource;
 }
 
 
-    /*
-     *  Initialization
-     */
 
-static int __init zorro_init(void)
+static int __init amiga_zorro_probe(struct platform_device *pdev)
 {
-    struct zorro_dev *z;
-    unsigned int i;
-    int error;
-
-    if (!MACH_IS_AMIGA || !AMIGAHW_PRESENT(ZORRO))
-	return 0;
-
-    pr_info("Zorro: Probing AutoConfig expansion devices: %d device%s\n",
-	   zorro_num_autocon, zorro_num_autocon == 1 ? "" : "s");
-
-    /* Initialize the Zorro bus */
-    INIT_LIST_HEAD(&zorro_bus.devices);
-    dev_set_name(&zorro_bus.dev, "zorro");
-    error = device_register(&zorro_bus.dev);
-    if (error) {
-	pr_err("Zorro: Error registering zorro_bus\n");
-	return error;
-    }
-
-    /* Request the resources */
-    zorro_bus.num_resources = AMIGAHW_PRESENT(ZORRO3) ? 4 : 2;
-    for (i = 0; i < zorro_bus.num_resources; i++)
-	request_resource(&iomem_resource, &zorro_bus.resources[i]);
-
-    /* Register all devices */
-    for (i = 0; i < zorro_num_autocon; i++) {
-	z = &zorro_autocon[i];
-	z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8);
-	if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
-	    /* GVP quirk */
-	    unsigned long magic = zorro_resource_start(z)+0x8000;
-	    z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK;
-	}
-	sprintf(z->name, "Zorro device %08x", z->id);
-	zorro_name_device(z);
-	z->resource.name = z->name;
-	if (request_resource(zorro_find_parent_resource(z), &z->resource))
-	    pr_err("Zorro: Address space collision on device %s %pR\n",
-		   z->name, &z->resource);
-	dev_set_name(&z->dev, "%02x", i);
-	z->dev.parent = &zorro_bus.dev;
-	z->dev.bus = &zorro_bus_type;
-	error = device_register(&z->dev);
+	struct zorro_bus *bus;
+	struct zorro_dev *z;
+	struct resource *r;
+	unsigned int i;
+	int error;
+
+	/* Initialize the Zorro bus */
+	bus = kzalloc(sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&bus->devices);
+	bus->dev.parent = &pdev->dev;
+	dev_set_name(&bus->dev, "zorro");
+	error = device_register(&bus->dev);
 	if (error) {
-	    pr_err("Zorro: Error registering device %s\n", z->name);
-	    continue;
+		pr_err("Zorro: Error registering zorro_bus\n");
+		kfree(bus);
+		return error;
 	}
-	error = zorro_create_sysfs_dev_files(z);
-	if (error)
-	    dev_err(&z->dev, "Error creating sysfs files\n");
-    }
-
-    /* Mark all available Zorro II memory */
-    zorro_for_each_dev(z) {
-	if (z->rom.er_Type & ERTF_MEMLIST)
-	    mark_region(zorro_resource_start(z), zorro_resource_end(z)+1, 1);
-    }
-
-    /* Unmark all used Zorro II memory */
-    for (i = 0; i < m68k_num_memory; i++)
-	if (m68k_memory[i].addr < 16*1024*1024)
-	    mark_region(m68k_memory[i].addr,
-			m68k_memory[i].addr+m68k_memory[i].size, 0);
-
-    return 0;
+	platform_set_drvdata(pdev, bus);
+
+	/* Register all devices */
+	pr_info("Zorro: Probing AutoConfig expansion devices: %u device%s\n",
+		 zorro_num_autocon, zorro_num_autocon == 1 ? "" : "s");
+
+	for (i = 0; i < zorro_num_autocon; i++) {
+		z = &zorro_autocon[i];
+		z->id = (z->rom.er_Manufacturer<<16) | (z->rom.er_Product<<8);
+		if (z->id == ZORRO_PROD_GVP_EPC_BASE) {
+			/* GVP quirk */
+			unsigned long magic = zorro_resource_start(z)+0x8000;
+			z->id |= *(u16 *)ZTWO_VADDR(magic) & GVP_PRODMASK;
+		}
+		sprintf(z->name, "Zorro device %08x", z->id);
+		zorro_name_device(z);
+		z->resource.name = z->name;
+		r = zorro_find_parent_resource(pdev, z);
+		error = request_resource(r, &z->resource);
+		if (error)
+			dev_err(&bus->dev,
+				"Address space collision on device %s %pR\n",
+				z->name, &z->resource);
+		dev_set_name(&z->dev, "%02x", i);
+		z->dev.parent = &bus->dev;
+		z->dev.bus = &zorro_bus_type;
+		error = device_register(&z->dev);
+		if (error) {
+			dev_err(&bus->dev, "Error registering device %s\n",
+				z->name);
+			continue;
+		}
+		error = zorro_create_sysfs_dev_files(z);
+		if (error)
+			dev_err(&z->dev, "Error creating sysfs files\n");
+	}
+
+	/* Mark all available Zorro II memory */
+	zorro_for_each_dev(z) {
+		if (z->rom.er_Type & ERTF_MEMLIST)
+			mark_region(zorro_resource_start(z),
+				    zorro_resource_end(z)+1, 1);
+	}
+
+	/* Unmark all used Zorro II memory */
+	for (i = 0; i < m68k_num_memory; i++)
+		if (m68k_memory[i].addr < 16*1024*1024)
+			mark_region(m68k_memory[i].addr,
+				    m68k_memory[i].addr+m68k_memory[i].size,
+				    0);
+
+	return 0;
 }
 
-subsys_initcall(zorro_init);
+static struct platform_driver amiga_zorro_driver = {
+	.driver   = {
+		.name	= "amiga-zorro",
+		.owner	= THIS_MODULE,
+	},
+};
 
-EXPORT_SYMBOL(zorro_find_device);
-EXPORT_SYMBOL(zorro_unused_z2ram);
+static int __init amiga_zorro_init(void)
+{
+	return platform_driver_probe(&amiga_zorro_driver, amiga_zorro_probe);
+}
+
+module_init(amiga_zorro_init);
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/zorro.h b/include/linux/zorro.h
index 908db1b36d6c..7bf9db525e9e 100644
--- a/include/linux/zorro.h
+++ b/include/linux/zorro.h
@@ -141,15 +141,6 @@ struct zorro_dev {
      *  Zorro bus
      */
 
-struct zorro_bus {
-    struct list_head devices;		/* list of devices on this bus */
-    unsigned int num_resources;		/* number of resources */
-    struct resource resources[4];	/* address space routed to this bus */
-    struct device dev;
-    char name[10];
-};
-
-extern struct zorro_bus zorro_bus;	/* single Zorro bus */
 extern struct bus_type zorro_bus_type;
 
 
-- 
cgit v1.2.3


From 7fee226ad2397b635e2fd565a59ca3ae08a164cd Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Tue, 11 May 2010 23:19:48 +0000
Subject: net: add a noref bit on skb dst

Use low order bit of skb->_skb_dst to tell dst is not refcounted.

Change _skb_dst to _skb_refdst to make sure all uses are catched.

skb_dst() returns the dst, regardless of noref bit set or not, but
with a lockdep check to make sure a noref dst is not given if current
user is not rcu protected.

New skb_dst_set_noref() helper to set an notrefcounted dst on a skb.
(with lockdep check)

skb_dst_drop() drops a reference only if skb dst was refcounted.

skb_dst_force() helper is used to force a refcount on dst, when skb
is queued and not anymore RCU protected.

Use skb_dst_force() in __sk_add_backlog(), __dev_xmit_skb() if
!IFF_XMIT_DST_RELEASE or skb enqueued on qdisc queue, in
sock_queue_rcv_skb(), in __nf_queue().

Use skb_dst_force() in dev_requeue_skb().

Note: dst_use_noref() still dirties dst, we might transform it
later to do one dirtying per jiffies.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h   | 58 ++++++++++++++++++++++++++++++++++++++++++++----
 include/net/dst.h        | 48 ++++++++++++++++++++++++++++++++++++---
 include/net/sock.h       | 13 ++++++-----
 net/core/dev.c           |  3 +++
 net/core/skbuff.c        |  2 +-
 net/core/sock.c          |  6 +++++
 net/ipv4/icmp.c          |  6 ++---
 net/ipv4/ip_options.c    |  9 ++++----
 net/ipv4/netfilter.c     |  6 ++---
 net/ipv4/route.c         |  2 +-
 net/netfilter/nf_queue.c |  2 ++
 net/sched/sch_generic.c  |  4 +++-
 12 files changed, 134 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c9525bce80f6..7cdfb4d52847 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -264,7 +264,7 @@ typedef unsigned char *sk_buff_data_t;
  *	@transport_header: Transport layer header
  *	@network_header: Network layer header
  *	@mac_header: Link layer header
- *	@_skb_dst: destination entry
+ *	@_skb_refdst: destination entry (with norefcount bit)
  *	@sp: the security path, used for xfrm
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@len: Length of actual data
@@ -328,7 +328,7 @@ struct sk_buff {
 	 */
 	char			cb[48] __aligned(8);
 
-	unsigned long		_skb_dst;
+	unsigned long		_skb_refdst;
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
 #endif
@@ -419,14 +419,64 @@ struct sk_buff {
 
 #include <asm/system.h>
 
+/*
+ * skb might have a dst pointer attached, refcounted or not.
+ * _skb_refdst low order bit is set if refcount was _not_ taken
+ */
+#define SKB_DST_NOREF	1UL
+#define SKB_DST_PTRMASK	~(SKB_DST_NOREF)
+
+/**
+ * skb_dst - returns skb dst_entry
+ * @skb: buffer
+ *
+ * Returns skb dst_entry, regardless of reference taken or not.
+ */
 static inline struct dst_entry *skb_dst(const struct sk_buff *skb)
 {
-	return (struct dst_entry *)skb->_skb_dst;
+	/* If refdst was not refcounted, check we still are in a 
+	 * rcu_read_lock section
+	 */
+	WARN_ON((skb->_skb_refdst & SKB_DST_NOREF) &&
+		!rcu_read_lock_held() &&
+		!rcu_read_lock_bh_held());
+	return (struct dst_entry *)(skb->_skb_refdst & SKB_DST_PTRMASK);
 }
 
+/**
+ * skb_dst_set - sets skb dst
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was taken on dst and should
+ * be released by skb_dst_drop()
+ */
 static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 {
-	skb->_skb_dst = (unsigned long)dst;
+	skb->_skb_refdst = (unsigned long)dst;
+}
+
+/**
+ * skb_dst_set_noref - sets skb dst, without a reference
+ * @skb: buffer
+ * @dst: dst entry
+ *
+ * Sets skb dst, assuming a reference was not taken on dst
+ * skb_dst_drop() should not dst_release() this dst
+ */
+static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
+{
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
+}
+
+/**
+ * skb_dst_is_noref - Test if skb dst isnt refcounted
+ * @skb: buffer
+ */
+static inline bool skb_dst_is_noref(const struct sk_buff *skb)
+{
+	return (skb->_skb_refdst & SKB_DST_NOREF) && skb_dst(skb);
 }
 
 static inline struct rtable *skb_rtable(const struct sk_buff *skb)
diff --git a/include/net/dst.h b/include/net/dst.h
index aac5a5fcfda9..27207a13f2a6 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -168,6 +168,12 @@ static inline void dst_use(struct dst_entry *dst, unsigned long time)
 	dst->lastuse = time;
 }
 
+static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
+{
+	dst->__use++;
+	dst->lastuse = time;
+}
+
 static inline
 struct dst_entry * dst_clone(struct dst_entry * dst)
 {
@@ -177,11 +183,47 @@ struct dst_entry * dst_clone(struct dst_entry * dst)
 }
 
 extern void dst_release(struct dst_entry *dst);
+
+static inline void refdst_drop(unsigned long refdst)
+{
+	if (!(refdst & SKB_DST_NOREF))
+		dst_release((struct dst_entry *)(refdst & SKB_DST_PTRMASK));
+}
+
+/**
+ * skb_dst_drop - drops skb dst
+ * @skb: buffer
+ *
+ * Drops dst reference count if a reference was taken.
+ */
 static inline void skb_dst_drop(struct sk_buff *skb)
 {
-	if (skb->_skb_dst)
-		dst_release(skb_dst(skb));
-	skb->_skb_dst = 0UL;
+	if (skb->_skb_refdst) {
+		refdst_drop(skb->_skb_refdst);
+		skb->_skb_refdst = 0UL;
+	}
+}
+
+static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb)
+{
+	nskb->_skb_refdst = oskb->_skb_refdst;
+	if (!(nskb->_skb_refdst & SKB_DST_NOREF))
+		dst_clone(skb_dst(nskb));
+}
+
+/**
+ * skb_dst_force - makes sure skb dst is refcounted
+ * @skb: buffer
+ *
+ * If dst is not yet refcounted, let's do it
+ */
+static inline void skb_dst_force(struct sk_buff *skb)
+{
+	if (skb_dst_is_noref(skb)) {
+		WARN_ON(!rcu_read_lock_held());
+		skb->_skb_refdst &= ~SKB_DST_NOREF;
+		dst_clone(skb_dst(skb));
+	}
 }
 
 /* Children define the path of the packet through the
diff --git a/include/net/sock.h b/include/net/sock.h
index aed16eb9db4b..5697caf8cc76 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -600,12 +600,15 @@ static inline int sk_stream_memory_free(struct sock *sk)
 /* OOB backlog add */
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
-	if (!sk->sk_backlog.tail) {
-		sk->sk_backlog.head = sk->sk_backlog.tail = skb;
-	} else {
+	/* dont let skb dst not refcounted, we are going to leave rcu lock */
+	skb_dst_force(skb);
+
+	if (!sk->sk_backlog.tail)
+		sk->sk_backlog.head = skb;
+	else
 		sk->sk_backlog.tail->next = skb;
-		sk->sk_backlog.tail = skb;
-	}
+
+	sk->sk_backlog.tail = skb;
 	skb->next = NULL;
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index cdcb9cbedf41..6c820650b80f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2052,6 +2052,8 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
+		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
+			skb_dst_force(skb);
 		__qdisc_update_bstats(q, skb->len);
 		if (sch_direct_xmit(skb, q, dev, txq, root_lock))
 			__qdisc_run(q);
@@ -2060,6 +2062,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
+		skb_dst_force(skb);
 		rc = qdisc_enqueue_root(skb, q);
 		qdisc_run(q);
 	}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a9b0e1f77806..c543dd252433 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -520,7 +520,7 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 	new->transport_header	= old->transport_header;
 	new->network_header	= old->network_header;
 	new->mac_header		= old->mac_header;
-	skb_dst_set(new, dst_clone(skb_dst(old)));
+	skb_dst_copy(new, old);
 	new->rxhash		= old->rxhash;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
diff --git a/net/core/sock.c b/net/core/sock.c
index 63530a03b8c2..bf88a167c8f2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -307,6 +307,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
 	 */
 	skb_len = skb->len;
 
+	/* we escape from rcu protected region, make sure we dont leak
+	 * a norefcounted dst
+	 */
+	skb_dst_force(skb);
+
 	spin_lock_irqsave(&list->lock, flags);
 	skb->dropcount = atomic_read(&sk->sk_drops);
 	__skb_queue_tail(list, skb);
@@ -1536,6 +1541,7 @@ static void __release_sock(struct sock *sk)
 		do {
 			struct sk_buff *next = skb->next;
 
+			WARN_ON_ONCE(skb_dst_is_noref(skb));
 			skb->next = NULL;
 			sk_backlog_rcv(sk, skb);
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index f3d339f728b0..d65e9215bcd7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -587,20 +587,20 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 			err = __ip_route_output_key(net, &rt2, &fl);
 		else {
 			struct flowi fl2 = {};
-			struct dst_entry *odst;
+			unsigned long orefdst;
 
 			fl2.fl4_dst = fl.fl4_src;
 			if (ip_route_output_key(net, &rt2, &fl2))
 				goto relookup_failed;
 
 			/* Ugh! */
-			odst = skb_dst(skb_in);
+			orefdst = skb_in->_skb_refdst; /* save old refdst */
 			err = ip_route_input(skb_in, fl.fl4_dst, fl.fl4_src,
 					     RT_TOS(tos), rt2->u.dst.dev);
 
 			dst_release(&rt2->u.dst);
 			rt2 = skb_rtable(skb_in);
-			skb_dst_set(skb_in, odst);
+			skb_in->_skb_refdst = orefdst; /* restore old refdst */
 		}
 
 		if (err)
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 4c09a31fd140..3244133c24f6 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -601,6 +601,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 	unsigned char *optptr = skb_network_header(skb) + opt->srr;
 	struct rtable *rt = skb_rtable(skb);
 	struct rtable *rt2;
+	unsigned long orefdst;
 	int err;
 
 	if (!opt->srr)
@@ -624,16 +625,16 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 		}
 		memcpy(&nexthop, &optptr[srrptr-1], 4);
 
-		rt = skb_rtable(skb);
+		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
 		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
-			ip_rt_put(rt2);
-			skb_dst_set(skb, &rt->u.dst);
+			skb_dst_drop(skb);
+			skb->_skb_refdst = orefdst;
 			return -EINVAL;
 		}
-		ip_rt_put(rt);
+		refdst_drop(orefdst);
 		if (rt2->rt_type != RTN_LOCAL)
 			break;
 		/* Superfast 8) loopback forward */
diff --git a/net/ipv4/netfilter.c b/net/ipv4/netfilter.c
index 82fb43c5c59e..07de855e2175 100644
--- a/net/ipv4/netfilter.c
+++ b/net/ipv4/netfilter.c
@@ -17,7 +17,7 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 	const struct iphdr *iph = ip_hdr(skb);
 	struct rtable *rt;
 	struct flowi fl = {};
-	struct dst_entry *odst;
+	unsigned long orefdst;
 	unsigned int hh_len;
 	unsigned int type;
 
@@ -51,14 +51,14 @@ int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type)
 		if (ip_route_output_key(net, &rt, &fl) != 0)
 			return -1;
 
-		odst = skb_dst(skb);
+		orefdst = skb->_skb_refdst;
 		if (ip_route_input(skb, iph->daddr, iph->saddr,
 				   RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
 			dst_release(&rt->u.dst);
 			return -1;
 		}
 		dst_release(&rt->u.dst);
-		dst_release(odst);
+		refdst_drop(orefdst);
 	}
 
 	if (skb_dst(skb)->error)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index dea3f9264250..705eccfb4769 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3033,7 +3033,7 @@ int ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb)
 				continue;
 			if (rt_is_expired(rt))
 				continue;
-			skb_dst_set(skb, dst_clone(&rt->u.dst));
+			skb_dst_set_noref(skb, &rt->u.dst);
 			if (rt_fill_info(net, skb, NETLINK_CB(cb->skb).pid,
 					 cb->nlh->nlmsg_seq, RTM_NEWROUTE,
 					 1, NLM_F_MULTI) <= 0) {
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 0b1103c0b1f3..78b3cf9c519c 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -9,6 +9,7 @@
 #include <linux/rcupdate.h>
 #include <net/protocol.h>
 #include <net/netfilter/nf_queue.h>
+#include <net/dst.h>
 
 #include "nf_internals.h"
 
@@ -170,6 +171,7 @@ static int __nf_queue(struct sk_buff *skb,
 			dev_hold(physoutdev);
 	}
 #endif
+	skb_dst_force(skb);
 	afinfo->saveroute(skb, entry);
 	status = qh->outfn(entry, queuenum);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a969b111bd76..a63029ef3edd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -26,6 +26,7 @@
 #include <linux/list.h>
 #include <linux/slab.h>
 #include <net/pkt_sched.h>
+#include <net/dst.h>
 
 /* Main transmission queue. */
 
@@ -40,6 +41,7 @@
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
+	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -179,7 +181,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	skb = dequeue_skb(q);
 	if (unlikely(!skb))
 		return 0;
-
+	WARN_ON_ONCE(skb_dst_is_noref(skb));
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
-- 
cgit v1.2.3


From 3e4ec3443f70fbe144799ccf0b1c3797f78d1715 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:30 +0200
Subject: libata: kill ATA_FLAG_DISABLED

ATA_FLAG_DISABLED is only used by drivers which don't use
->error_handler framework and is largely broken.  Its only meaningful
function is to make irq handlers skip processing if the flag is set,
which is largely useless and even harmful as it makes those ports more
likely to cause IRQ storms.

Kill ATA_FLAG_DISABLED and makes the callers disable attached devices
instead.  ata_port_probe() and ata_port_disable() which manipulate the
flag are also killed.

This simplifies condition check in IRQ handlers.  While updating IRQ
handlers, remove ap NULL check as libata guarantees consecutive port
allocation (unoccupied ports are initialized with dummies) and
long-obsolete ATA_QCFLAG_ACTIVE check (checked by ata_qc_from_tag()).

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c           |  66 +---------
 drivers/ata/libata-scsi.c           |   3 -
 drivers/ata/libata-sff.c            |  10 +-
 drivers/ata/pata_bf54x.c            |  16 +--
 drivers/ata/pata_octeon_cf.c        |   9 +-
 drivers/ata/pdc_adma.c              |  66 +++++-----
 drivers/ata/sata_inic162x.c         |  17 +--
 drivers/ata/sata_mv.c               |  18 +--
 drivers/ata/sata_nv.c               | 236 +++++++++++++++++-------------------
 drivers/ata/sata_promise.c          |   6 +-
 drivers/ata/sata_qstor.c            |  79 ++++++------
 drivers/ata/sata_sil.c              |   3 -
 drivers/ata/sata_sil24.c            |   9 +-
 drivers/ata/sata_sx4.c              |   3 +-
 drivers/ata/sata_vsc.c              |  10 +-
 drivers/scsi/ipr.c                  |   6 +-
 drivers/scsi/libsas/sas_scsi_host.c |   2 +-
 include/linux/libata.h              |   8 --
 18 files changed, 209 insertions(+), 358 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 86f405b4831c..3d8b62f7441c 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1907,22 +1907,6 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 	ap->qc_active = preempted_qc_active;
 	ap->nr_active_links = preempted_nr_active_links;
 
-	/* XXX - Some LLDDs (sata_mv) disable port on command failure.
-	 * Until those drivers are fixed, we detect the condition
-	 * here, fail the command with AC_ERR_SYSTEM and reenable the
-	 * port.
-	 *
-	 * Note that this doesn't change any behavior as internal
-	 * command failure results in disabling the device in the
-	 * higher layer for LLDDs without new reset/EH callbacks.
-	 *
-	 * Kill the following code as soon as those drivers are fixed.
-	 */
-	if (ap->flags & ATA_FLAG_DISABLED) {
-		err_mask |= AC_ERR_SYSTEM;
-		ata_port_probe(ap);
-	}
-
 	spin_unlock_irqrestore(ap->lock, flags);
 
 	if ((err_mask & AC_ERR_TIMEOUT) && auto_timeout)
@@ -2768,8 +2752,6 @@ int ata_bus_probe(struct ata_port *ap)
 	int rc;
 	struct ata_device *dev;
 
-	ata_port_probe(ap);
-
 	ata_for_each_dev(dev, &ap->link, ALL)
 		tries[dev->devno] = ATA_PROBE_MAX_TRIES;
 
@@ -2797,8 +2779,7 @@ int ata_bus_probe(struct ata_port *ap)
 	ap->ops->phy_reset(ap);
 
 	ata_for_each_dev(dev, &ap->link, ALL) {
-		if (!(ap->flags & ATA_FLAG_DISABLED) &&
-		    dev->class != ATA_DEV_UNKNOWN)
+		if (dev->class != ATA_DEV_UNKNOWN)
 			classes[dev->devno] = dev->class;
 		else
 			classes[dev->devno] = ATA_DEV_NONE;
@@ -2806,8 +2787,6 @@ int ata_bus_probe(struct ata_port *ap)
 		dev->class = ATA_DEV_UNKNOWN;
 	}
 
-	ata_port_probe(ap);
-
 	/* read IDENTIFY page and configure devices. We have to do the identify
 	   specific sequence bass-ackwards so that PDIAG- is released by
 	   the slave device */
@@ -2857,8 +2836,6 @@ int ata_bus_probe(struct ata_port *ap)
 	ata_for_each_dev(dev, &ap->link, ENABLED)
 		return 0;
 
-	/* no device present, disable port */
-	ata_port_disable(ap);
 	return -ENODEV;
 
  fail:
@@ -2889,22 +2866,6 @@ int ata_bus_probe(struct ata_port *ap)
 	goto retry;
 }
 
-/**
- *	ata_port_probe - Mark port as enabled
- *	@ap: Port for which we indicate enablement
- *
- *	Modify @ap data structure such that the system
- *	thinks that the entire port is enabled.
- *
- *	LOCKING: host lock, or some other form of
- *	serialization.
- */
-
-void ata_port_probe(struct ata_port *ap)
-{
-	ap->flags &= ~ATA_FLAG_DISABLED;
-}
-
 /**
  *	sata_print_link_status - Print SATA link status
  *	@link: SATA link to printk link status about
@@ -2951,26 +2912,6 @@ struct ata_device *ata_dev_pair(struct ata_device *adev)
 	return pair;
 }
 
-/**
- *	ata_port_disable - Disable port.
- *	@ap: Port to be disabled.
- *
- *	Modify @ap data structure such that the system
- *	thinks that the entire port is disabled, and should
- *	never attempt to probe or communicate with devices
- *	on this port.
- *
- *	LOCKING: host lock, or some other form of
- *	serialization.
- */
-
-void ata_port_disable(struct ata_port *ap)
-{
-	ap->link.device[0].class = ATA_DEV_NONE;
-	ap->link.device[1].class = ATA_DEV_NONE;
-	ap->flags |= ATA_FLAG_DISABLED;
-}
-
 /**
  *	sata_down_spd_limit - adjust SATA spd limit downward
  *	@link: Link to adjust SATA spd limit for
@@ -5716,7 +5657,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 
 	ap->pflags |= ATA_PFLAG_INITIALIZING;
 	ap->lock = &host->lock;
-	ap->flags = ATA_FLAG_DISABLED;
 	ap->print_id = -1;
 	ap->ctl = ATA_DEVCTL_OBS;
 	ap->host = host;
@@ -6145,8 +6085,6 @@ static void async_port_probe(void *data, async_cookie_t cookie)
 		struct ata_eh_info *ehi = &ap->link.eh_info;
 		unsigned long flags;
 
-		ata_port_probe(ap);
-
 		/* kick EH for boot probing */
 		spin_lock_irqsave(ap->lock, flags);
 
@@ -6823,7 +6761,6 @@ EXPORT_SYMBOL_GPL(ata_port_start);
 EXPORT_SYMBOL_GPL(ata_do_set_mode);
 EXPORT_SYMBOL_GPL(ata_std_qc_defer);
 EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
-EXPORT_SYMBOL_GPL(ata_port_probe);
 EXPORT_SYMBOL_GPL(ata_dev_disable);
 EXPORT_SYMBOL_GPL(sata_set_spd);
 EXPORT_SYMBOL_GPL(ata_wait_after_reset);
@@ -6835,7 +6772,6 @@ EXPORT_SYMBOL_GPL(sata_std_hardreset);
 EXPORT_SYMBOL_GPL(ata_std_postreset);
 EXPORT_SYMBOL_GPL(ata_dev_classify);
 EXPORT_SYMBOL_GPL(ata_dev_pair);
-EXPORT_SYMBOL_GPL(ata_port_disable);
 EXPORT_SYMBOL_GPL(ata_ratelimit);
 EXPORT_SYMBOL_GPL(ata_wait_register);
 EXPORT_SYMBOL_GPL(ata_scsi_queuecmd);
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0088cdeb0b1e..cfa9dd3d7253 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -3345,9 +3345,6 @@ void ata_scsi_scan_host(struct ata_port *ap, int sync)
 	struct ata_link *link;
 	struct ata_device *dev;
 
-	if (ap->flags & ATA_FLAG_DISABLED)
-		return;
-
  repeat:
 	ata_for_each_link(link, ap, EDGE) {
 		ata_for_each_dev(dev, link, ENABLED) {
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index d5dd88d6dea0..0e2c972292cf 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1807,9 +1807,6 @@ retry:
 		struct ata_port *ap = host->ports[i];
 		struct ata_queued_cmd *qc;
 
-		if (unlikely(ap->flags & ATA_FLAG_DISABLED))
-			continue;
-
 		qc = ata_qc_from_tag(ap, ap->link.active_tag);
 		if (qc) {
 			if (!(qc->tf.flags & ATA_TFLAG_POLLING))
@@ -1884,11 +1881,8 @@ void ata_sff_lost_interrupt(struct ata_port *ap)
 
 	/* Only one outstanding command per SFF channel */
 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	/* Check we have a live one.. */
-	if (qc == NULL ||  !(qc->flags & ATA_QCFLAG_ACTIVE))
-		return;
-	/* We cannot lose an interrupt on a polled command */
-	if (qc->tf.flags & ATA_TFLAG_POLLING)
+	/* We cannot lose an interrupt on a non-existent or polled command */
+	if (!qc || qc->tf.flags & ATA_TFLAG_POLLING)
 		return;
 	/* See if the controller thinks it is still busy - if so the command
 	   isn't a lost IRQ but is still in progress */
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 07c110470e25..e9bf44cf4142 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1400,18 +1400,12 @@ static irqreturn_t bfin_ata_interrupt(int irq, void *dev_instance)
 	spin_lock_irqsave(&host->lock, flags);
 
 	for (i = 0; i < host->n_ports; i++) {
-		struct ata_port *ap;
+		struct ata_port *ap = host->ports[i];
+		struct ata_queued_cmd *qc;
 
-		ap = host->ports[i];
-		if (ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
-			struct ata_queued_cmd *qc;
-
-			qc = ata_qc_from_tag(ap, ap->link.active_tag);
-			if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) &&
-			    (qc->flags & ATA_QCFLAG_ACTIVE))
-				handled |= bfin_ata_host_intr(ap, qc);
-		}
+		qc = ata_qc_from_tag(ap, ap->link.active_tag);
+		if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)))
+			handled |= bfin_ata_host_intr(ap, qc);
 	}
 
 	spin_unlock_irqrestore(&host->lock, flags);
diff --git a/drivers/ata/pata_octeon_cf.c b/drivers/ata/pata_octeon_cf.c
index 303ca7e82408..3001109352ea 100644
--- a/drivers/ata/pata_octeon_cf.c
+++ b/drivers/ata/pata_octeon_cf.c
@@ -654,9 +654,6 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
 		ap = host->ports[i];
 		ocd = ap->dev->platform_data;
 
-		if (ap->flags & ATA_FLAG_DISABLED)
-			continue;
-
 		ocd = ap->dev->platform_data;
 		cf_port = ap->private_data;
 		dma_int.u64 =
@@ -666,8 +663,7 @@ static irqreturn_t octeon_cf_interrupt(int irq, void *dev_instance)
 
 		qc = ata_qc_from_tag(ap, ap->link.active_tag);
 
-		if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) &&
-		    (qc->flags & ATA_QCFLAG_ACTIVE)) {
+		if (qc && !(qc->tf.flags & ATA_TFLAG_POLLING)) {
 			if (dma_int.s.done && !dma_cfg.s.en) {
 				if (!sg_is_last(qc->cursg)) {
 					qc->cursg = sg_next(qc->cursg);
@@ -737,8 +733,7 @@ static void octeon_cf_delayed_finish(struct work_struct *work)
 		goto out;
 	}
 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)) &&
-	    (qc->flags & ATA_QCFLAG_ACTIVE))
+	if (qc && !(qc->tf.flags & ATA_TFLAG_POLLING))
 		octeon_cf_dma_finished(ap, qc);
 out:
 	spin_unlock_irqrestore(&host->lock, flags);
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index 5904cfdb8dbe..ed18d8b42c52 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -442,8 +442,6 @@ static inline unsigned int adma_intr_pkt(struct ata_host *host)
 			continue;
 		handled = 1;
 		adma_enter_reg_mode(ap);
-		if (ap->flags & ATA_FLAG_DISABLED)
-			continue;
 		pp = ap->private_data;
 		if (!pp || pp->state != adma_state_pkt)
 			continue;
@@ -484,42 +482,38 @@ static inline unsigned int adma_intr_mmio(struct ata_host *host)
 	unsigned int handled = 0, port_no;
 
 	for (port_no = 0; port_no < host->n_ports; ++port_no) {
-		struct ata_port *ap;
-		ap = host->ports[port_no];
-		if (ap && (!(ap->flags & ATA_FLAG_DISABLED))) {
-			struct ata_queued_cmd *qc;
-			struct adma_port_priv *pp = ap->private_data;
-			if (!pp || pp->state != adma_state_mmio)
+		struct ata_port *ap = host->ports[port_no];
+		struct adma_port_priv *pp = ap->private_data;
+		struct ata_queued_cmd *qc;
+
+		if (!pp || pp->state != adma_state_mmio)
+			continue;
+		qc = ata_qc_from_tag(ap, ap->link.active_tag);
+		if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING))) {
+
+			/* check main status, clearing INTRQ */
+			u8 status = ata_sff_check_status(ap);
+			if ((status & ATA_BUSY))
 				continue;
-			qc = ata_qc_from_tag(ap, ap->link.active_tag);
-			if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING))) {
-
-				/* check main status, clearing INTRQ */
-				u8 status = ata_sff_check_status(ap);
-				if ((status & ATA_BUSY))
-					continue;
-				DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n",
-					ap->print_id, qc->tf.protocol, status);
-
-				/* complete taskfile transaction */
-				pp->state = adma_state_idle;
-				qc->err_mask |= ac_err_mask(status);
-				if (!qc->err_mask)
-					ata_qc_complete(qc);
-				else {
-					struct ata_eh_info *ehi =
-						&ap->link.eh_info;
-					ata_ehi_clear_desc(ehi);
-					ata_ehi_push_desc(ehi,
-						"status 0x%02X", status);
-
-					if (qc->err_mask == AC_ERR_DEV)
-						ata_port_abort(ap);
-					else
-						ata_port_freeze(ap);
-				}
-				handled = 1;
+			DPRINTK("ata%u: protocol %d (dev_stat 0x%X)\n",
+				ap->print_id, qc->tf.protocol, status);
+
+			/* complete taskfile transaction */
+			pp->state = adma_state_idle;
+			qc->err_mask |= ac_err_mask(status);
+			if (!qc->err_mask)
+				ata_qc_complete(qc);
+			else {
+				struct ata_eh_info *ehi = &ap->link.eh_info;
+				ata_ehi_clear_desc(ehi);
+				ata_ehi_push_desc(ehi, "status 0x%02X", status);
+
+				if (qc->err_mask == AC_ERR_DEV)
+					ata_port_abort(ap);
+				else
+					ata_port_freeze(ap);
 			}
+			handled = 1;
 		}
 	}
 	return handled;
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index 27dc6c86a4cd..0ac6cdd9d1e8 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -415,22 +415,11 @@ static irqreturn_t inic_interrupt(int irq, void *dev_instance)
 
 	spin_lock(&host->lock);
 
-	for (i = 0; i < NR_PORTS; i++) {
-		struct ata_port *ap = host->ports[i];
-
-		if (!(host_irq_stat & (HIRQ_PORT0 << i)))
-			continue;
-
-		if (likely(ap && !(ap->flags & ATA_FLAG_DISABLED))) {
-			inic_host_intr(ap);
+	for (i = 0; i < NR_PORTS; i++)
+		if (host_irq_stat & (HIRQ_PORT0 << i)) {
+			inic_host_intr(host->ports[i]);
 			handled++;
-		} else {
-			if (ata_ratelimit())
-				dev_printk(KERN_ERR, host->dev, "interrupt "
-					   "from disabled port %d (0x%x)\n",
-					   i, host_irq_stat);
 		}
-	}
 
 	spin_unlock(&host->lock);
 
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index bd486462951c..318862e8a8a9 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2355,13 +2355,9 @@ static struct ata_queued_cmd *mv_get_active_qc(struct ata_port *ap)
 	if (pp->pp_flags & MV_PP_FLAG_NCQ_EN)
 		return NULL;
 	qc = ata_qc_from_tag(ap, ap->link.active_tag);
-	if (qc) {
-		if (qc->tf.flags & ATA_TFLAG_POLLING)
-			qc = NULL;
-		else if (!(qc->flags & ATA_QCFLAG_ACTIVE))
-			qc = NULL;
-	}
-	return qc;
+	if (qc && !(qc->tf.flags & ATA_TFLAG_POLLING))
+		return qc;
+	return NULL;
 }
 
 static void mv_pmp_error_handler(struct ata_port *ap)
@@ -2546,9 +2542,7 @@ static void mv_unexpected_intr(struct ata_port *ap, int edma_was_enabled)
 	char *when = "idle";
 
 	ata_ehi_clear_desc(ehi);
-	if (ap->flags & ATA_FLAG_DISABLED) {
-		when = "disabled";
-	} else if (edma_was_enabled) {
+	if (edma_was_enabled) {
 		when = "EDMA enabled";
 	} else {
 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
@@ -2782,10 +2776,6 @@ static void mv_port_intr(struct ata_port *ap, u32 port_cause)
 	struct mv_port_priv *pp;
 	int edma_was_enabled;
 
-	if (ap->flags & ATA_FLAG_DISABLED) {
-		mv_unexpected_intr(ap, 0);
-		return;
-	}
 	/*
 	 * Grab a snapshot of the EDMA_EN flag setting,
 	 * so that we have a consistent view for this port,
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 9d3dbc8a1184..20cc510595db 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -933,107 +933,108 @@ static irqreturn_t nv_adma_interrupt(int irq, void *dev_instance)
 
 	for (i = 0; i < host->n_ports; i++) {
 		struct ata_port *ap = host->ports[i];
+		struct nv_adma_port_priv *pp = ap->private_data;
+		void __iomem *mmio = pp->ctl_block;
+		u16 status;
+		u32 gen_ctl;
+		u32 notifier, notifier_error;
+
 		notifier_clears[i] = 0;
 
-		if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
-			struct nv_adma_port_priv *pp = ap->private_data;
-			void __iomem *mmio = pp->ctl_block;
-			u16 status;
-			u32 gen_ctl;
-			u32 notifier, notifier_error;
-
-			/* if ADMA is disabled, use standard ata interrupt handler */
-			if (pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) {
-				u8 irq_stat = readb(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_CK804)
-					>> (NV_INT_PORT_SHIFT * i);
-				handled += nv_host_intr(ap, irq_stat);
-				continue;
-			}
+		/* if ADMA is disabled, use standard ata interrupt handler */
+		if (pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) {
+			u8 irq_stat = readb(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_CK804)
+				>> (NV_INT_PORT_SHIFT * i);
+			handled += nv_host_intr(ap, irq_stat);
+			continue;
+		}
 
-			/* if in ATA register mode, check for standard interrupts */
-			if (pp->flags & NV_ADMA_PORT_REGISTER_MODE) {
-				u8 irq_stat = readb(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_CK804)
-					>> (NV_INT_PORT_SHIFT * i);
-				if (ata_tag_valid(ap->link.active_tag))
-					/** NV_INT_DEV indication seems unreliable at times
-					    at least in ADMA mode. Force it on always when a
-					    command is active, to prevent losing interrupts. */
-					irq_stat |= NV_INT_DEV;
-				handled += nv_host_intr(ap, irq_stat);
-			}
+		/* if in ATA register mode, check for standard interrupts */
+		if (pp->flags & NV_ADMA_PORT_REGISTER_MODE) {
+			u8 irq_stat = readb(host->iomap[NV_MMIO_BAR] + NV_INT_STATUS_CK804)
+				>> (NV_INT_PORT_SHIFT * i);
+			if (ata_tag_valid(ap->link.active_tag))
+				/** NV_INT_DEV indication seems unreliable
+				    at times at least in ADMA mode. Force it
+				    on always when a command is active, to
+				    prevent losing interrupts. */
+				irq_stat |= NV_INT_DEV;
+			handled += nv_host_intr(ap, irq_stat);
+		}
+
+		notifier = readl(mmio + NV_ADMA_NOTIFIER);
+		notifier_error = readl(mmio + NV_ADMA_NOTIFIER_ERROR);
+		notifier_clears[i] = notifier | notifier_error;
+
+		gen_ctl = readl(pp->gen_block + NV_ADMA_GEN_CTL);
+
+		if (!NV_ADMA_CHECK_INTR(gen_ctl, ap->port_no) && !notifier &&
+		    !notifier_error)
+			/* Nothing to do */
+			continue;
+
+		status = readw(mmio + NV_ADMA_STAT);
+
+		/*
+		 * Clear status. Ensure the controller sees the
+		 * clearing before we start looking at any of the CPB
+		 * statuses, so that any CPB completions after this
+		 * point in the handler will raise another interrupt.
+		 */
+		writew(status, mmio + NV_ADMA_STAT);
+		readw(mmio + NV_ADMA_STAT); /* flush posted write */
+		rmb();
+
+		handled++; /* irq handled if we got here */
 
-			notifier = readl(mmio + NV_ADMA_NOTIFIER);
-			notifier_error = readl(mmio + NV_ADMA_NOTIFIER_ERROR);
-			notifier_clears[i] = notifier | notifier_error;
-
-			gen_ctl = readl(pp->gen_block + NV_ADMA_GEN_CTL);
-
-			if (!NV_ADMA_CHECK_INTR(gen_ctl, ap->port_no) && !notifier &&
-			    !notifier_error)
-				/* Nothing to do */
-				continue;
-
-			status = readw(mmio + NV_ADMA_STAT);
-
-			/* Clear status. Ensure the controller sees the clearing before we start
-			   looking at any of the CPB statuses, so that any CPB completions after
-			   this point in the handler will raise another interrupt. */
-			writew(status, mmio + NV_ADMA_STAT);
-			readw(mmio + NV_ADMA_STAT); /* flush posted write */
-			rmb();
-
-			handled++; /* irq handled if we got here */
-
-			/* freeze if hotplugged or controller error */
-			if (unlikely(status & (NV_ADMA_STAT_HOTPLUG |
-					       NV_ADMA_STAT_HOTUNPLUG |
-					       NV_ADMA_STAT_TIMEOUT |
-					       NV_ADMA_STAT_SERROR))) {
-				struct ata_eh_info *ehi = &ap->link.eh_info;
-
-				ata_ehi_clear_desc(ehi);
-				__ata_ehi_push_desc(ehi, "ADMA status 0x%08x: ", status);
-				if (status & NV_ADMA_STAT_TIMEOUT) {
-					ehi->err_mask |= AC_ERR_SYSTEM;
-					ata_ehi_push_desc(ehi, "timeout");
-				} else if (status & NV_ADMA_STAT_HOTPLUG) {
-					ata_ehi_hotplugged(ehi);
-					ata_ehi_push_desc(ehi, "hotplug");
-				} else if (status & NV_ADMA_STAT_HOTUNPLUG) {
-					ata_ehi_hotplugged(ehi);
-					ata_ehi_push_desc(ehi, "hot unplug");
-				} else if (status & NV_ADMA_STAT_SERROR) {
-					/* let libata analyze SError and figure out the cause */
-					ata_ehi_push_desc(ehi, "SError");
-				} else
-					ata_ehi_push_desc(ehi, "unknown");
-				ata_port_freeze(ap);
-				continue;
+		/* freeze if hotplugged or controller error */
+		if (unlikely(status & (NV_ADMA_STAT_HOTPLUG |
+				       NV_ADMA_STAT_HOTUNPLUG |
+				       NV_ADMA_STAT_TIMEOUT |
+				       NV_ADMA_STAT_SERROR))) {
+			struct ata_eh_info *ehi = &ap->link.eh_info;
+
+			ata_ehi_clear_desc(ehi);
+			__ata_ehi_push_desc(ehi, "ADMA status 0x%08x: ", status);
+			if (status & NV_ADMA_STAT_TIMEOUT) {
+				ehi->err_mask |= AC_ERR_SYSTEM;
+				ata_ehi_push_desc(ehi, "timeout");
+			} else if (status & NV_ADMA_STAT_HOTPLUG) {
+				ata_ehi_hotplugged(ehi);
+				ata_ehi_push_desc(ehi, "hotplug");
+			} else if (status & NV_ADMA_STAT_HOTUNPLUG) {
+				ata_ehi_hotplugged(ehi);
+				ata_ehi_push_desc(ehi, "hot unplug");
+			} else if (status & NV_ADMA_STAT_SERROR) {
+				/* let EH analyze SError and figure out cause */
+				ata_ehi_push_desc(ehi, "SError");
+			} else
+				ata_ehi_push_desc(ehi, "unknown");
+			ata_port_freeze(ap);
+			continue;
+		}
+
+		if (status & (NV_ADMA_STAT_DONE |
+			      NV_ADMA_STAT_CPBERR |
+			      NV_ADMA_STAT_CMD_COMPLETE)) {
+			u32 check_commands = notifier_clears[i];
+			int pos, error = 0;
+
+			if (status & NV_ADMA_STAT_CPBERR) {
+				/* check all active commands */
+				if (ata_tag_valid(ap->link.active_tag))
+					check_commands = 1 <<
+						ap->link.active_tag;
+				else
+					check_commands = ap->link.sactive;
 			}
 
-			if (status & (NV_ADMA_STAT_DONE |
-				      NV_ADMA_STAT_CPBERR |
-				      NV_ADMA_STAT_CMD_COMPLETE)) {
-				u32 check_commands = notifier_clears[i];
-				int pos, error = 0;
-
-				if (status & NV_ADMA_STAT_CPBERR) {
-					/* Check all active commands */
-					if (ata_tag_valid(ap->link.active_tag))
-						check_commands = 1 <<
-							ap->link.active_tag;
-					else
-						check_commands = ap->
-							link.sactive;
-				}
-
-				/** Check CPBs for completed commands */
-				while ((pos = ffs(check_commands)) && !error) {
-					pos--;
-					error = nv_adma_check_cpb(ap, pos,
+			/* check CPBs for completed commands */
+			while ((pos = ffs(check_commands)) && !error) {
+				pos--;
+				error = nv_adma_check_cpb(ap, pos,
 						notifier_error & (1 << pos));
-					check_commands &= ~(1 << pos);
-				}
+				check_commands &= ~(1 << pos);
 			}
 		}
 	}
@@ -1498,22 +1499,19 @@ static irqreturn_t nv_generic_interrupt(int irq, void *dev_instance)
 	spin_lock_irqsave(&host->lock, flags);
 
 	for (i = 0; i < host->n_ports; i++) {
-		struct ata_port *ap;
-
-		ap = host->ports[i];
-		if (ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
-			struct ata_queued_cmd *qc;
+		struct ata_port *ap = host->ports[i];
+		struct ata_queued_cmd *qc;
 
-			qc = ata_qc_from_tag(ap, ap->link.active_tag);
-			if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING)))
-				handled += ata_sff_host_intr(ap, qc);
-			else
-				// No request pending?  Clear interrupt status
-				// anyway, in case there's one pending.
-				ap->ops->sff_check_status(ap);
+		qc = ata_qc_from_tag(ap, ap->link.active_tag);
+		if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING))) {
+			handled += ata_sff_host_intr(ap, qc);
+		} else {
+			/*
+			 * No request pending?  Clear interrupt status
+			 * anyway, in case there's one pending.
+			 */
+			ap->ops->sff_check_status(ap);
 		}
-
 	}
 
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -1526,11 +1524,7 @@ static irqreturn_t nv_do_interrupt(struct ata_host *host, u8 irq_stat)
 	int i, handled = 0;
 
 	for (i = 0; i < host->n_ports; i++) {
-		struct ata_port *ap = host->ports[i];
-
-		if (ap && !(ap->flags & ATA_FLAG_DISABLED))
-			handled += nv_host_intr(ap, irq_stat);
-
+		handled += nv_host_intr(host->ports[i], irq_stat);
 		irq_stat >>= NV_INT_PORT_SHIFT;
 	}
 
@@ -2380,16 +2374,14 @@ static irqreturn_t nv_swncq_interrupt(int irq, void *dev_instance)
 	for (i = 0; i < host->n_ports; i++) {
 		struct ata_port *ap = host->ports[i];
 
-		if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
-			if (ap->link.sactive) {
-				nv_swncq_host_interrupt(ap, (u16)irq_stat);
-				handled = 1;
-			} else {
-				if (irq_stat)	/* reserve Hotplug */
-					nv_swncq_irq_clear(ap, 0xfff0);
+		if (ap->link.sactive) {
+			nv_swncq_host_interrupt(ap, (u16)irq_stat);
+			handled = 1;
+		} else {
+			if (irq_stat)	/* reserve Hotplug */
+				nv_swncq_irq_clear(ap, 0xfff0);
 
-				handled += nv_host_intr(ap, (u8)irq_stat);
-			}
+			handled += nv_host_intr(ap, (u8)irq_stat);
 		}
 		irq_stat >>= NV_INT_PORT_SHIFT_MCP55;
 	}
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 5356ec00d2b4..2c029ea12c54 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -984,8 +984,7 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance)
 		/* check for a plug or unplug event */
 		ata_no = pdc_port_no_to_ata_no(i, is_sataii_tx4);
 		tmp = hotplug_status & (0x11 << ata_no);
-		if (tmp && ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
+		if (tmp) {
 			struct ata_eh_info *ehi = &ap->link.eh_info;
 			ata_ehi_clear_desc(ehi);
 			ata_ehi_hotplugged(ehi);
@@ -997,8 +996,7 @@ static irqreturn_t pdc_interrupt(int irq, void *dev_instance)
 
 		/* check for a packet interrupt */
 		tmp = mask & (1 << (i + 1));
-		if (tmp && ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
+		if (tmp) {
 			struct ata_queued_cmd *qc;
 
 			qc = ata_qc_from_tag(ap, ap->link.active_tag);
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index 92ba45e6689b..febc6e748420 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -404,26 +404,24 @@ static inline unsigned int qs_intr_pkt(struct ata_host *host)
 			u8 sHST = sff1 & 0x3f;	/* host status */
 			unsigned int port_no = (sff1 >> 8) & 0x03;
 			struct ata_port *ap = host->ports[port_no];
+			struct qs_port_priv *pp = ap->private_data;
+			struct ata_queued_cmd *qc;
 
 			DPRINTK("SFF=%08x%08x: sCHAN=%u sHST=%d sDST=%02x\n",
 					sff1, sff0, port_no, sHST, sDST);
 			handled = 1;
-			if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
-				struct ata_queued_cmd *qc;
-				struct qs_port_priv *pp = ap->private_data;
-				if (!pp || pp->state != qs_state_pkt)
-					continue;
-				qc = ata_qc_from_tag(ap, ap->link.active_tag);
-				if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING))) {
-					switch (sHST) {
-					case 0: /* successful CPB */
-					case 3: /* device error */
-						qs_enter_reg_mode(qc->ap);
-						qs_do_or_die(qc, sDST);
-						break;
-					default:
-						break;
-					}
+			if (!pp || pp->state != qs_state_pkt)
+				continue;
+			qc = ata_qc_from_tag(ap, ap->link.active_tag);
+			if (qc && (!(qc->tf.flags & ATA_TFLAG_POLLING))) {
+				switch (sHST) {
+				case 0: /* successful CPB */
+				case 3: /* device error */
+					qs_enter_reg_mode(qc->ap);
+					qs_do_or_die(qc, sDST);
+					break;
+				default:
+					break;
 				}
 			}
 		}
@@ -436,33 +434,30 @@ static inline unsigned int qs_intr_mmio(struct ata_host *host)
 	unsigned int handled = 0, port_no;
 
 	for (port_no = 0; port_no < host->n_ports; ++port_no) {
-		struct ata_port *ap;
-		ap = host->ports[port_no];
-		if (ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
-			struct ata_queued_cmd *qc;
-			struct qs_port_priv *pp;
-			qc = ata_qc_from_tag(ap, ap->link.active_tag);
-			if (!qc || !(qc->flags & ATA_QCFLAG_ACTIVE)) {
-				/*
-				 * The qstor hardware generates spurious
-				 * interrupts from time to time when switching
-				 * in and out of packet mode.
-				 * There's no obvious way to know if we're
-				 * here now due to that, so just ack the irq
-				 * and pretend we knew it was ours.. (ugh).
-				 * This does not affect packet mode.
-				 */
-				ata_sff_check_status(ap);
-				handled = 1;
-				continue;
-			}
-			pp = ap->private_data;
-			if (!pp || pp->state != qs_state_mmio)
-				continue;
-			if (!(qc->tf.flags & ATA_TFLAG_POLLING))
-				handled |= ata_sff_host_intr(ap, qc);
+		struct ata_port *ap = host->ports[port_no];
+		struct qs_port_priv *pp = ap->private_data;
+		struct ata_queued_cmd *qc;
+
+		qc = ata_qc_from_tag(ap, ap->link.active_tag);
+		if (!qc) {
+			/*
+			 * The qstor hardware generates spurious
+			 * interrupts from time to time when switching
+			 * in and out of packet mode.  There's no
+			 * obvious way to know if we're here now due
+			 * to that, so just ack the irq and pretend we
+			 * knew it was ours.. (ugh).  This does not
+			 * affect packet mode.
+			 */
+			ata_sff_check_status(ap);
+			handled = 1;
+			continue;
 		}
+
+		if (!pp || pp->state != qs_state_mmio)
+			continue;
+		if (!(qc->tf.flags & ATA_TFLAG_POLLING))
+			handled |= ata_sff_host_intr(ap, qc);
 	}
 	return handled;
 }
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 3cb69d5fb817..9c367f71c627 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -532,9 +532,6 @@ static irqreturn_t sil_interrupt(int irq, void *dev_instance)
 		struct ata_port *ap = host->ports[i];
 		u32 bmdma2 = readl(mmio_base + sil_port[ap->port_no].bmdma2);
 
-		if (unlikely(ap->flags & ATA_FLAG_DISABLED))
-			continue;
-
 		/* turn off SATA_IRQ if not supported */
 		if (ap->flags & SIL_FLAG_NO_SATA_IRQ)
 			bmdma2 &= ~SIL_DMA_SATA_IRQ;
diff --git a/drivers/ata/sata_sil24.c b/drivers/ata/sata_sil24.c
index 433b6b89c795..e9250514734b 100644
--- a/drivers/ata/sata_sil24.c
+++ b/drivers/ata/sata_sil24.c
@@ -1160,13 +1160,8 @@ static irqreturn_t sil24_interrupt(int irq, void *dev_instance)
 
 	for (i = 0; i < host->n_ports; i++)
 		if (status & (1 << i)) {
-			struct ata_port *ap = host->ports[i];
-			if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
-				sil24_host_intr(ap);
-				handled++;
-			} else
-				printk(KERN_ERR DRV_NAME
-				       ": interrupt from disabled port %d\n", i);
+			sil24_host_intr(host->ports[i]);
+			handled++;
 		}
 
 	spin_unlock(&host->lock);
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 232468f2ea90..7e3a25ec93b1 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -840,8 +840,7 @@ static irqreturn_t pdc20621_interrupt(int irq, void *dev_instance)
 			ap = host->ports[port_no];
 		tmp = mask & (1 << i);
 		VPRINTK("seq %u, port_no %u, ap %p, tmp %x\n", i, port_no, ap, tmp);
-		if (tmp && ap &&
-		    !(ap->flags & ATA_FLAG_DISABLED)) {
+		if (tmp && ap) {
 			struct ata_queued_cmd *qc;
 
 			qc = ata_qc_from_tag(ap, ap->link.active_tag);
diff --git a/drivers/ata/sata_vsc.c b/drivers/ata/sata_vsc.c
index 8b2a278b2547..2107952ebff1 100644
--- a/drivers/ata/sata_vsc.c
+++ b/drivers/ata/sata_vsc.c
@@ -284,14 +284,8 @@ static irqreturn_t vsc_sata_interrupt(int irq, void *dev_instance)
 	for (i = 0; i < host->n_ports; i++) {
 		u8 port_status = (status >> (8 * i)) & 0xff;
 		if (port_status) {
-			struct ata_port *ap = host->ports[i];
-
-			if (ap && !(ap->flags & ATA_FLAG_DISABLED)) {
-				vsc_port_intr(port_status, ap);
-				handled++;
-			} else
-				dev_printk(KERN_ERR, host->dev,
-					"interrupt from disabled port %d\n", i);
+			vsc_port_intr(port_status, host->ports[i]);
+			handled++;
 		}
 	}
 
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index 520461b9bc09..b90c118119d7 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4295,7 +4295,7 @@ static void ipr_slave_destroy(struct scsi_device *sdev)
 	res = (struct ipr_resource_entry *) sdev->hostdata;
 	if (res) {
 		if (res->sata_port)
-			ata_port_disable(res->sata_port->ap);
+			res->sata_port->ap->link.device[0].class = ATA_DEV_NONE;
 		sdev->hostdata = NULL;
 		res->sdev = NULL;
 		res->sata_port = NULL;
@@ -5751,13 +5751,13 @@ static void ipr_ata_phy_reset(struct ata_port *ap)
 	rc = ipr_device_reset(ioa_cfg, res);
 
 	if (rc) {
-		ata_port_disable(ap);
+		ap->link.device[0].class = ATA_DEV_NONE;
 		goto out_unlock;
 	}
 
 	ap->link.device[0].class = res->ata_class;
 	if (ap->link.device[0].class == ATA_DEV_UNKNOWN)
-		ata_port_disable(ap);
+		ap->link.device[0].class = ATA_DEV_NONE;
 
 out_unlock:
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, flags);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 822835055cef..b71b6d41baa1 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -818,7 +818,7 @@ void sas_slave_destroy(struct scsi_device *scsi_dev)
 	struct domain_device *dev = sdev_to_domain_dev(scsi_dev);
 
 	if (dev_is_sata(dev))
-		ata_port_disable(dev->sata_dev.ap);
+		dev->sata_dev.ap->link.device[0].class = ATA_DEV_NONE;
 }
 
 int sas_change_queue_depth(struct scsi_device *scsi_dev, int new_depth,
diff --git a/include/linux/libata.h b/include/linux/libata.h
index af700923a393..bd1b2b49e121 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -202,12 +202,6 @@ enum {
 	ATA_FLAG_SW_ACTIVITY	= (1 << 22), /* driver supports sw activity
 					      * led */
 
-	/* The following flag belongs to ap->pflags but is kept in
-	 * ap->flags because it's referenced in many LLDs and will be
-	 * removed in not-too-distant future.
-	 */
-	ATA_FLAG_DISABLED	= (1 << 23), /* port is disabled, ignore it */
-
 	/* bits 24:31 of ap->flags are reserved for LLD specific flags */
 
 
@@ -937,7 +931,6 @@ static inline int ata_port_is_dummy(struct ata_port *ap)
 	return ap->ops == &ata_dummy_port_ops;
 }
 
-extern void ata_port_probe(struct ata_port *);
 extern int sata_set_spd(struct ata_link *link);
 extern int ata_std_prereset(struct ata_link *link, unsigned long deadline);
 extern int ata_wait_after_reset(struct ata_link *link, unsigned long deadline,
@@ -952,7 +945,6 @@ extern int sata_link_hardreset(struct ata_link *link,
 extern int sata_std_hardreset(struct ata_link *link, unsigned int *class,
 			      unsigned long deadline);
 extern void ata_std_postreset(struct ata_link *link, unsigned int *classes);
-extern void ata_port_disable(struct ata_port *);
 
 extern struct ata_host *ata_host_alloc(struct device *dev, int max_ports);
 extern struct ata_host *ata_host_alloc_pinfo(struct device *dev,
-- 
cgit v1.2.3


From 9f2f72107ff621fdf3066e5a1b5ecb03ee587ebc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:32 +0200
Subject: libata-sff: reorder SFF/BMDMA functions

Reorder functions such that SFF and BMDMA functions are grouped.
While at it, s/BMDMA/SFF in a few comments where it actually meant
SFF.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c | 448 ++++++++++++++++++++++++-----------------------
 include/linux/libata.h   |  21 ++-
 2 files changed, 238 insertions(+), 231 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 0e2c972292cf..6400e8751391 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -71,26 +71,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 };
 EXPORT_SYMBOL_GPL(ata_sff_port_ops);
 
-const struct ata_port_operations ata_bmdma_port_ops = {
-	.inherits		= &ata_sff_port_ops,
-
-	.mode_filter		= ata_bmdma_mode_filter,
-
-	.bmdma_setup		= ata_bmdma_setup,
-	.bmdma_start		= ata_bmdma_start,
-	.bmdma_stop		= ata_bmdma_stop,
-	.bmdma_status		= ata_bmdma_status,
-};
-EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
-
-const struct ata_port_operations ata_bmdma32_port_ops = {
-	.inherits		= &ata_bmdma_port_ops,
-
-	.sff_data_xfer		= ata_sff_data_xfer32,
-	.port_start		= ata_sff_port_start32,
-};
-EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
-
 /**
  *	ata_fill_sg - Fill PCI IDE PRD table
  *	@qc: Metadata associated with taskfile to be transferred
@@ -1750,7 +1730,7 @@ unsigned int ata_sff_host_intr(struct ata_port *ap,
 			goto idle_irq;
 	}
 
-	/* ack bmdma irq events */
+	/* clear irq events */
 	ap->ops->sff_irq_clear(ap);
 
 	ata_sff_hsm_move(ap, qc, status, 0);
@@ -1904,7 +1884,7 @@ EXPORT_SYMBOL_GPL(ata_sff_lost_interrupt);
  *	ata_sff_freeze - Freeze SFF controller port
  *	@ap: port to freeze
  *
- *	Freeze BMDMA controller port.
+ *	Freeze SFF controller port.
  *
  *	LOCKING:
  *	Inherited from caller.
@@ -2533,208 +2513,8 @@ void ata_sff_std_ports(struct ata_ioports *ioaddr)
 }
 EXPORT_SYMBOL_GPL(ata_sff_std_ports);
 
-unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
-				    unsigned long xfer_mask)
-{
-	/* Filter out DMA modes if the device has been configured by
-	   the BIOS as PIO only */
-
-	if (adev->link->ap->ioaddr.bmdma_addr == NULL)
-		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
-	return xfer_mask;
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
-
-/**
- *	ata_bmdma_setup - Set up PCI IDE BMDMA transaction
- *	@qc: Info associated with this ATA transaction.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_bmdma_setup(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
-	u8 dmactl;
-
-	/* load PRD table addr. */
-	mb();	/* make sure PRD table writes are visible to controller */
-	iowrite32(ap->prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
-
-	/* specify data direction, triple-check start bit is clear */
-	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
-	dmactl &= ~(ATA_DMA_WR | ATA_DMA_START);
-	if (!rw)
-		dmactl |= ATA_DMA_WR;
-	iowrite8(dmactl, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
-
-	/* issue r/w command */
-	ap->ops->sff_exec_command(ap, &qc->tf);
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_setup);
-
-/**
- *	ata_bmdma_start - Start a PCI IDE BMDMA transaction
- *	@qc: Info associated with this ATA transaction.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_bmdma_start(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	u8 dmactl;
-
-	/* start host DMA transaction */
-	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
-	iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
-
-	/* Strictly, one may wish to issue an ioread8() here, to
-	 * flush the mmio write.  However, control also passes
-	 * to the hardware at this point, and it will interrupt
-	 * us when we are to resume control.  So, in effect,
-	 * we don't care when the mmio write flushes.
-	 * Further, a read of the DMA status register _immediately_
-	 * following the write may not be what certain flaky hardware
-	 * is expected, so I think it is best to not add a readb()
-	 * without first all the MMIO ATA cards/mobos.
-	 * Or maybe I'm just being paranoid.
-	 *
-	 * FIXME: The posting of this write means I/O starts are
-	 * unneccessarily delayed for MMIO
-	 */
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_start);
-
-/**
- *	ata_bmdma_stop - Stop PCI IDE BMDMA transfer
- *	@qc: Command we are ending DMA for
- *
- *	Clears the ATA_DMA_START flag in the dma control register
- *
- *	May be used as the bmdma_stop() entry in ata_port_operations.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_bmdma_stop(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	void __iomem *mmio = ap->ioaddr.bmdma_addr;
-
-	/* clear start/stop bit */
-	iowrite8(ioread8(mmio + ATA_DMA_CMD) & ~ATA_DMA_START,
-		 mmio + ATA_DMA_CMD);
-
-	/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
-	ata_sff_dma_pause(ap);
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_stop);
-
-/**
- *	ata_bmdma_status - Read PCI IDE BMDMA status
- *	@ap: Port associated with this ATA transaction.
- *
- *	Read and return BMDMA status register.
- *
- *	May be used as the bmdma_status() entry in ata_port_operations.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-u8 ata_bmdma_status(struct ata_port *ap)
-{
-	return ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_status);
-
 #ifdef CONFIG_PCI
 
-/**
- *	ata_pci_bmdma_clear_simplex -	attempt to kick device out of simplex
- *	@pdev: PCI device
- *
- *	Some PCI ATA devices report simplex mode but in fact can be told to
- *	enter non simplex mode. This implements the necessary logic to
- *	perform the task on such devices. Calling it on other devices will
- *	have -undefined- behaviour.
- */
-int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev)
-{
-	unsigned long bmdma = pci_resource_start(pdev, 4);
-	u8 simplex;
-
-	if (bmdma == 0)
-		return -ENOENT;
-
-	simplex = inb(bmdma + 0x02);
-	outb(simplex & 0x60, bmdma + 0x02);
-	simplex = inb(bmdma + 0x02);
-	if (simplex & 0x80)
-		return -EOPNOTSUPP;
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
-
-/**
- *	ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host
- *	@host: target ATA host
- *
- *	Acquire PCI BMDMA resources and initialize @host accordingly.
- *
- *	LOCKING:
- *	Inherited from calling layer (may sleep).
- *
- *	RETURNS:
- *	0 on success, -errno otherwise.
- */
-int ata_pci_bmdma_init(struct ata_host *host)
-{
-	struct device *gdev = host->dev;
-	struct pci_dev *pdev = to_pci_dev(gdev);
-	int i, rc;
-
-	/* No BAR4 allocation: No DMA */
-	if (pci_resource_start(pdev, 4) == 0)
-		return 0;
-
-	/* TODO: If we get no DMA mask we should fall back to PIO */
-	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
-	if (rc)
-		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
-	if (rc)
-		return rc;
-
-	/* request and iomap DMA region */
-	rc = pcim_iomap_regions(pdev, 1 << 4, dev_driver_string(gdev));
-	if (rc) {
-		dev_printk(KERN_ERR, gdev, "failed to request/iomap BAR4\n");
-		return -ENOMEM;
-	}
-	host->iomap = pcim_iomap_table(pdev);
-
-	for (i = 0; i < 2; i++) {
-		struct ata_port *ap = host->ports[i];
-		void __iomem *bmdma = host->iomap[4] + 8 * i;
-
-		if (ata_port_is_dummy(ap))
-			continue;
-
-		ap->ioaddr.bmdma_addr = bmdma;
-		if ((!(ap->flags & ATA_FLAG_IGN_SIMPLEX)) &&
-		    (ioread8(bmdma + 2) & 0x80))
-			host->flags |= ATA_HOST_SIMPLEX;
-
-		ata_port_desc(ap, "bmdma 0x%llx",
-		    (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
-
 static int ata_resources_present(struct pci_dev *pdev, int port)
 {
 	int i;
@@ -3059,3 +2839,227 @@ out:
 EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
 
 #endif /* CONFIG_PCI */
+
+const struct ata_port_operations ata_bmdma_port_ops = {
+	.inherits		= &ata_sff_port_ops,
+
+	.mode_filter		= ata_bmdma_mode_filter,
+
+	.bmdma_setup		= ata_bmdma_setup,
+	.bmdma_start		= ata_bmdma_start,
+	.bmdma_stop		= ata_bmdma_stop,
+	.bmdma_status		= ata_bmdma_status,
+};
+EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
+
+const struct ata_port_operations ata_bmdma32_port_ops = {
+	.inherits		= &ata_bmdma_port_ops,
+
+	.sff_data_xfer		= ata_sff_data_xfer32,
+	.port_start		= ata_sff_port_start32,
+};
+EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
+
+unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
+				    unsigned long xfer_mask)
+{
+	/* Filter out DMA modes if the device has been configured by
+	   the BIOS as PIO only */
+
+	if (adev->link->ap->ioaddr.bmdma_addr == NULL)
+		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
+	return xfer_mask;
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
+
+/**
+ *	ata_bmdma_setup - Set up PCI IDE BMDMA transaction
+ *	@qc: Info associated with this ATA transaction.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_bmdma_setup(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	unsigned int rw = (qc->tf.flags & ATA_TFLAG_WRITE);
+	u8 dmactl;
+
+	/* load PRD table addr. */
+	mb();	/* make sure PRD table writes are visible to controller */
+	iowrite32(ap->prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
+
+	/* specify data direction, triple-check start bit is clear */
+	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+	dmactl &= ~(ATA_DMA_WR | ATA_DMA_START);
+	if (!rw)
+		dmactl |= ATA_DMA_WR;
+	iowrite8(dmactl, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+
+	/* issue r/w command */
+	ap->ops->sff_exec_command(ap, &qc->tf);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_setup);
+
+/**
+ *	ata_bmdma_start - Start a PCI IDE BMDMA transaction
+ *	@qc: Info associated with this ATA transaction.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_bmdma_start(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	u8 dmactl;
+
+	/* start host DMA transaction */
+	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+	iowrite8(dmactl | ATA_DMA_START, ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
+
+	/* Strictly, one may wish to issue an ioread8() here, to
+	 * flush the mmio write.  However, control also passes
+	 * to the hardware at this point, and it will interrupt
+	 * us when we are to resume control.  So, in effect,
+	 * we don't care when the mmio write flushes.
+	 * Further, a read of the DMA status register _immediately_
+	 * following the write may not be what certain flaky hardware
+	 * is expected, so I think it is best to not add a readb()
+	 * without first all the MMIO ATA cards/mobos.
+	 * Or maybe I'm just being paranoid.
+	 *
+	 * FIXME: The posting of this write means I/O starts are
+	 * unneccessarily delayed for MMIO
+	 */
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_start);
+
+/**
+ *	ata_bmdma_stop - Stop PCI IDE BMDMA transfer
+ *	@qc: Command we are ending DMA for
+ *
+ *	Clears the ATA_DMA_START flag in the dma control register
+ *
+ *	May be used as the bmdma_stop() entry in ata_port_operations.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_bmdma_stop(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	void __iomem *mmio = ap->ioaddr.bmdma_addr;
+
+	/* clear start/stop bit */
+	iowrite8(ioread8(mmio + ATA_DMA_CMD) & ~ATA_DMA_START,
+		 mmio + ATA_DMA_CMD);
+
+	/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+	ata_sff_dma_pause(ap);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_stop);
+
+/**
+ *	ata_bmdma_status - Read PCI IDE BMDMA status
+ *	@ap: Port associated with this ATA transaction.
+ *
+ *	Read and return BMDMA status register.
+ *
+ *	May be used as the bmdma_status() entry in ata_port_operations.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+u8 ata_bmdma_status(struct ata_port *ap)
+{
+	return ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_STATUS);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_status);
+
+#ifdef CONFIG_PCI
+
+/**
+ *	ata_pci_bmdma_clear_simplex -	attempt to kick device out of simplex
+ *	@pdev: PCI device
+ *
+ *	Some PCI ATA devices report simplex mode but in fact can be told to
+ *	enter non simplex mode. This implements the necessary logic to
+ *	perform the task on such devices. Calling it on other devices will
+ *	have -undefined- behaviour.
+ */
+int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev)
+{
+	unsigned long bmdma = pci_resource_start(pdev, 4);
+	u8 simplex;
+
+	if (bmdma == 0)
+		return -ENOENT;
+
+	simplex = inb(bmdma + 0x02);
+	outb(simplex & 0x60, bmdma + 0x02);
+	simplex = inb(bmdma + 0x02);
+	if (simplex & 0x80)
+		return -EOPNOTSUPP;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
+
+/**
+ *	ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host
+ *	@host: target ATA host
+ *
+ *	Acquire PCI BMDMA resources and initialize @host accordingly.
+ *
+ *	LOCKING:
+ *	Inherited from calling layer (may sleep).
+ *
+ *	RETURNS:
+ *	0 on success, -errno otherwise.
+ */
+int ata_pci_bmdma_init(struct ata_host *host)
+{
+	struct device *gdev = host->dev;
+	struct pci_dev *pdev = to_pci_dev(gdev);
+	int i, rc;
+
+	/* No BAR4 allocation: No DMA */
+	if (pci_resource_start(pdev, 4) == 0)
+		return 0;
+
+	/* TODO: If we get no DMA mask we should fall back to PIO */
+	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+	if (rc)
+		return rc;
+
+	/* request and iomap DMA region */
+	rc = pcim_iomap_regions(pdev, 1 << 4, dev_driver_string(gdev));
+	if (rc) {
+		dev_printk(KERN_ERR, gdev, "failed to request/iomap BAR4\n");
+		return -ENOMEM;
+	}
+	host->iomap = pcim_iomap_table(pdev);
+
+	for (i = 0; i < 2; i++) {
+		struct ata_port *ap = host->ports[i];
+		void __iomem *bmdma = host->iomap[4] + 8 * i;
+
+		if (ata_port_is_dummy(ap))
+			continue;
+
+		ap->ioaddr.bmdma_addr = bmdma;
+		if ((!(ap->flags & ATA_FLAG_IGN_SIMPLEX)) &&
+		    (ioread8(bmdma + 2) & 0x80))
+			host->flags |= ATA_HOST_SIMPLEX;
+
+		ata_port_desc(ap, "bmdma 0x%llx",
+		    (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
+
+#endif /* CONFIG_PCI */
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bd1b2b49e121..d735728873b5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1619,16 +1619,7 @@ extern void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc);
 extern int ata_sff_port_start(struct ata_port *ap);
 extern int ata_sff_port_start32(struct ata_port *ap);
 extern void ata_sff_std_ports(struct ata_ioports *ioaddr);
-extern unsigned long ata_bmdma_mode_filter(struct ata_device *dev,
-					   unsigned long xfer_mask);
-extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
-extern void ata_bmdma_start(struct ata_queued_cmd *qc);
-extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
-extern u8 ata_bmdma_status(struct ata_port *ap);
-
 #ifdef CONFIG_PCI
-extern int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev);
-extern int ata_pci_bmdma_init(struct ata_host *host);
 extern int ata_pci_sff_init_host(struct ata_host *host);
 extern int ata_pci_sff_prepare_host(struct pci_dev *pdev,
 				    const struct ata_port_info * const * ppi,
@@ -1641,6 +1632,18 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 		struct scsi_host_template *sht, void *host_priv, int hflags);
 #endif /* CONFIG_PCI */
 
+extern unsigned long ata_bmdma_mode_filter(struct ata_device *dev,
+					   unsigned long xfer_mask);
+extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
+extern void ata_bmdma_start(struct ata_queued_cmd *qc);
+extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
+extern u8 ata_bmdma_status(struct ata_port *ap);
+
+#ifdef CONFIG_PCI
+extern int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev);
+extern int ata_pci_bmdma_init(struct ata_host *host);
+#endif /* CONFIG_PCI */
+
 /**
  *	ata_sff_busy_wait - Wait for a port status register
  *	@ap: Port to wait for.
-- 
cgit v1.2.3


From 9701dc94a14e54a33c3c99744ec3a761f6385fc6 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Mon, 14 Sep 2009 09:42:41 -0300
Subject: V4L/DVB (12770): Add tm6000 driver to staging tree

Adds a driver for Trident TV Master tm5600/tm6000 chips.

Those USB devices are usually found with a Xceive xc2028/xc3028
tuner, although the firmware seems to be modified to work with
those chips on some older devices.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/staging/tm6000/Kconfig           |   14 +
 drivers/staging/tm6000/Makefile          |    8 +
 drivers/staging/tm6000/tm6000-cards.c    |  409 ++++++++
 drivers/staging/tm6000/tm6000-core.c     |  633 ++++++++++++
 drivers/staging/tm6000/tm6000-i2c.c      |  460 +++++++++
 drivers/staging/tm6000/tm6000-regs.h     |   76 ++
 drivers/staging/tm6000/tm6000-usb-isoc.h |   41 +
 drivers/staging/tm6000/tm6000-video.c    | 1559 ++++++++++++++++++++++++++++++
 drivers/staging/tm6000/tm6000.h          |  230 +++++
 include/linux/videodev2.h                |    1 +
 10 files changed, 3431 insertions(+)
 create mode 100644 drivers/staging/tm6000/Kconfig
 create mode 100644 drivers/staging/tm6000/Makefile
 create mode 100644 drivers/staging/tm6000/tm6000-cards.c
 create mode 100644 drivers/staging/tm6000/tm6000-core.c
 create mode 100644 drivers/staging/tm6000/tm6000-i2c.c
 create mode 100644 drivers/staging/tm6000/tm6000-regs.h
 create mode 100644 drivers/staging/tm6000/tm6000-usb-isoc.h
 create mode 100644 drivers/staging/tm6000/tm6000-video.c
 create mode 100644 drivers/staging/tm6000/tm6000.h

(limited to 'include/linux')

diff --git a/drivers/staging/tm6000/Kconfig b/drivers/staging/tm6000/Kconfig
new file mode 100644
index 000000000000..841e026ecef0
--- /dev/null
+++ b/drivers/staging/tm6000/Kconfig
@@ -0,0 +1,14 @@
+config VIDEO_TM6000
+	tristate "TV Master TM5600/6000 driver"
+	select VIDEO_V4L2
+	select TUNER_XC2028
+	select VIDEO_USB_ISOC
+	select VIDEOBUF_VMALLOC
+	help
+	  Support for TM5600/TM6000 USB Device
+
+	  Since these cards have no MPEG decoder onboard, they transmit
+	  only compressed MPEG data over the usb bus, so you need
+	  an external software decoder to watch TV on your computer.
+
+	  Say Y if you own such a device and want to use it.
diff --git a/drivers/staging/tm6000/Makefile b/drivers/staging/tm6000/Makefile
new file mode 100644
index 000000000000..1efc583c10a4
--- /dev/null
+++ b/drivers/staging/tm6000/Makefile
@@ -0,0 +1,8 @@
+tm6000-objs := tm6000-cards.o \
+		   tm6000-core.o  \
+		   tm6000-i2c.o   \
+		   tm6000-video.o
+
+obj-$(CONFIG_VIDEO_TM6000) += tm6000.o
+
+EXTRA_CFLAGS = -Idrivers/media/video
diff --git a/drivers/staging/tm6000/tm6000-cards.c b/drivers/staging/tm6000/tm6000-cards.c
new file mode 100644
index 000000000000..b0719a52ef42
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-cards.c
@@ -0,0 +1,409 @@
+/*
+   tm6000-cards.c - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/i2c.h>
+#include <linux/usb.h>
+#include <linux/version.h>
+#include <media/v4l2-common.h>
+#include <media/tuner.h>
+
+#include "tm6000.h"
+
+#define TM6000_BOARD_UNKNOWN		0
+#define TM5600_BOARD_GENERIC		1
+#define TM6000_BOARD_GENERIC		2
+#define TM5600_BOARD_10MOONS_UT821	3
+#define TM6000_BOARD_10MOONS_UT330	4
+#define TM6000_BOARD_ADSTECH_DUAL_TV	5
+
+#define TM6000_MAXBOARDS        16
+static unsigned int card[]     = {[0 ... (TM6000_MAXBOARDS - 1)] = UNSET };
+
+module_param_array(card,  int, NULL, 0444);
+
+struct tm6000_board {
+	char            *name;
+
+	struct tm6000_capabilities caps;
+
+	int             tuner_type;     /* type of the tuner */
+	int             tuner_addr;     /* tuner address */
+};
+
+
+struct tm6000_board tm6000_boards[] = {
+	[TM6000_BOARD_UNKNOWN] = {
+		.name         = "Unknown tm6000 video grabber",
+		.caps = {
+			.has_tuner    = 1,
+		},
+	},
+	[TM5600_BOARD_GENERIC] = {
+		.name         = "Generic tm5600 board",
+		.tuner_type   = TUNER_XC2028,
+		.tuner_addr   = 0xc2,
+		.caps = {
+			.has_tuner	= 1,
+		},
+	},
+	[TM6000_BOARD_GENERIC] = {
+		.name         = "Generic tm6000 board",
+		.tuner_type   = TUNER_XC2028,
+		.tuner_addr   = 0xc2,
+		.caps = {
+			.has_tuner	= 1,
+			.has_dvb	= 1,
+		},
+	},
+	[TM5600_BOARD_10MOONS_UT821] = {
+		.name         = "10Moons UT 821",
+		.tuner_type   = TUNER_XC2028,
+		.tuner_addr   = 0xc2,
+		.caps = {
+			.has_tuner    = 1,
+			.has_eeprom   = 1,
+		},
+	},
+	[TM6000_BOARD_10MOONS_UT330] = {
+		.name         = "10Moons UT 330",
+		.tuner_type   = TUNER_XC2028,
+		.tuner_addr   = 0xc8,
+		.caps = {
+			.has_tuner    = 1,
+			.has_dvb      = 1,
+			.has_zl10353  = 1,
+			.has_eeprom   = 1,
+		},
+	},
+	[TM6000_BOARD_ADSTECH_DUAL_TV] = {
+		.name         = "ADSTECH Dual TV USB",
+		.tuner_type   = TUNER_XC2028,
+		.tuner_addr   = 0xc8,
+		.caps = {
+			.has_tuner    = 1,
+			.has_tda9874  = 1,
+			.has_dvb      = 1,
+			.has_zl10353  = 1,
+			.has_eeprom   = 1,
+		},
+	},
+};
+
+/* table of devices that work with this driver */
+struct usb_device_id tm6000_id_table [] = {
+	{ USB_DEVICE(0x6000, 0x0001), .driver_info = TM5600_BOARD_10MOONS_UT821 },
+	{ USB_DEVICE(0x06e1, 0xf332), .driver_info = TM6000_BOARD_ADSTECH_DUAL_TV },
+	{ },
+};
+
+static int tm6000_init_dev(struct tm6000_core *dev)
+{
+	struct v4l2_frequency f;
+	int rc = 0;
+
+	mutex_init(&dev->lock);
+
+	mutex_lock(&dev->lock);
+
+	/* Initializa board-specific data */
+	dev->tuner_type = tm6000_boards[dev->model].tuner_type;
+	dev->tuner_addr = tm6000_boards[dev->model].tuner_addr;
+
+	dev->caps = tm6000_boards[dev->model].caps;
+
+	/* initialize hardware */
+	rc=tm6000_init (dev);
+	if (rc<0)
+		goto err;
+
+	/* register i2c bus */
+	rc=tm6000_i2c_register(dev);
+	if (rc<0)
+		goto err;
+
+	/* register and initialize V4L2 */
+	rc=tm6000_v4l2_register(dev);
+	if (rc<0)
+		goto err;
+
+	/* Request tuner */
+	request_module ("tuner");
+//	norm=V4L2_STD_NTSC_M;
+	dev->norm=V4L2_STD_PAL_M;
+	tm6000_i2c_call_clients(dev, VIDIOC_S_STD, &dev->norm);
+
+	/* configure tuner */
+	f.tuner = 0;
+	f.type = V4L2_TUNER_ANALOG_TV;
+	f.frequency = 3092;	/* 193.25 MHz */
+	dev->freq = f.frequency;
+
+	tm6000_i2c_call_clients(dev, VIDIOC_S_FREQUENCY, &f);
+
+err:
+	mutex_unlock(&dev->lock);
+	return rc;
+}
+
+/* high bandwidth multiplier, as encoded in highspeed endpoint descriptors */
+#define hb_mult(wMaxPacketSize) (1 + (((wMaxPacketSize) >> 11) & 0x03))
+
+static void get_max_endpoint (  struct usb_device *usbdev,
+				char *msgtype,
+				struct usb_host_endpoint *curr_e,
+				unsigned int *maxsize,
+				struct usb_host_endpoint **ep  )
+{
+	u16 tmp = le16_to_cpu(curr_e->desc.wMaxPacketSize);
+	unsigned int size = tmp & 0x7ff;
+
+	if (usbdev->speed == USB_SPEED_HIGH)
+		size = size * hb_mult (tmp);
+
+	if (size>*maxsize) {
+		*ep = curr_e;
+		*maxsize = size;
+		printk("tm6000: %s endpoint: 0x%02x (max size=%u bytes)\n",
+					msgtype, curr_e->desc.bEndpointAddress,
+					size);
+	}
+}
+
+/*
+ * tm6000_usb_probe()
+ * checks for supported devices
+ */
+static int tm6000_usb_probe(struct usb_interface *interface,
+			    const struct usb_device_id *id)
+{
+	struct usb_device *usbdev;
+	struct tm6000_core *dev = NULL;
+	int i,rc=0;
+	int nr=0;
+	char *speed;
+
+
+	usbdev=usb_get_dev(interface_to_usbdev(interface));
+
+	/* Selects the proper interface */
+	rc=usb_set_interface(usbdev,0,1);
+	if (rc<0)
+		goto err;
+
+	/* Check to see next free device and mark as used */
+	nr=find_first_zero_bit(&tm6000_devused,TM6000_MAXBOARDS);
+	if (nr >= TM6000_MAXBOARDS) {
+		printk ("tm6000: Supports only %i em28xx boards.\n",TM6000_MAXBOARDS);
+		usb_put_dev(usbdev);
+		return -ENOMEM;
+	}
+
+	/* Create and initialize dev struct */
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		printk ("tm6000" ": out of memory!\n");
+		usb_put_dev(usbdev);
+		return -ENOMEM;
+	}
+	spin_lock_init(&dev->slock);
+
+	/* Increment usage count */
+	tm6000_devused|=1<<nr;
+
+	dev->udev= usbdev;
+	dev->model=id->driver_info;
+	snprintf(dev->name, 29, "tm6000 #%d", nr);
+	dev->devno=nr;
+
+	switch (usbdev->speed) {
+	case USB_SPEED_LOW:
+		speed = "1.5";
+		break;
+	case USB_SPEED_UNKNOWN:
+	case USB_SPEED_FULL:
+		speed = "12";
+		break;
+	case USB_SPEED_HIGH:
+		speed = "480";
+		break;
+	default:
+		speed = "unknown";
+	}
+
+
+
+	/* Get endpoints */
+	for (i = 0; i < interface->num_altsetting; i++) {
+		int ep;
+
+		for (ep = 0; ep < interface->altsetting[i].desc.bNumEndpoints; ep++) {
+			struct usb_host_endpoint	*e;
+			int dir_out;
+
+			e = &interface->altsetting[i].endpoint[ep];
+
+			dir_out = ((e->desc.bEndpointAddress &
+					USB_ENDPOINT_DIR_MASK) == USB_DIR_OUT);
+
+			printk("tm6000: alt %d, interface %i, class %i\n",
+			       i,
+			       interface->altsetting[i].desc.bInterfaceNumber,
+			       interface->altsetting[i].desc.bInterfaceClass);
+
+			switch (e->desc.bmAttributes) {
+			case USB_ENDPOINT_XFER_BULK:
+				if (!dir_out) {
+					get_max_endpoint (usbdev, "Bulk IN", e,
+							&dev->max_bulk_in,
+							&dev->bulk_in);
+				} else {
+					get_max_endpoint (usbdev, "Bulk OUT", e,
+							&dev->max_bulk_out,
+							&dev->bulk_out);
+				}
+				break;
+			case USB_ENDPOINT_XFER_ISOC:
+				if (!dir_out) {
+					get_max_endpoint (usbdev, "ISOC IN", e,
+							&dev->max_isoc_in,
+							&dev->isoc_in);
+				} else {
+					get_max_endpoint (usbdev, "ISOC OUT", e,
+							&dev->max_isoc_out,
+							&dev->isoc_out);
+				}
+				break;
+			}
+		}
+	}
+
+	if (interface->altsetting->desc.bAlternateSetting) {
+		printk("selecting alt setting %d\n",
+		       interface->altsetting->desc.bAlternateSetting);
+		rc = usb_set_interface (usbdev,
+				interface->altsetting->desc.bInterfaceNumber,
+				interface->altsetting->desc.bAlternateSetting);
+		if (rc<0)
+			goto err;
+	}
+
+	printk("tm6000: New video device @ %s Mbps (%04x:%04x, ifnum %d)\n",
+		speed,
+		le16_to_cpu(dev->udev->descriptor.idVendor),
+		le16_to_cpu(dev->udev->descriptor.idProduct),
+		interface->altsetting->desc.bInterfaceNumber);
+
+/* check if the the device has the iso in endpoint at the correct place */
+	if (!dev->isoc_in) {
+		printk("tm6000: probing error: no IN ISOC endpoint!\n");
+		rc= -ENODEV;
+
+		goto err;
+	}
+
+	/* save our data pointer in this interface device */
+	usb_set_intfdata(interface, dev);
+
+	printk("tm6000: Found %s\n", tm6000_boards[dev->model].name);
+
+	rc=tm6000_init_dev(dev);
+
+	if (rc<0)
+		goto err;
+
+	return 0;
+
+err:
+	tm6000_devused&=~(1<<nr);
+	usb_put_dev(usbdev);
+
+	kfree(dev);
+	return rc;
+}
+
+/*
+ * tm6000_usb_disconnect()
+ * called when the device gets diconencted
+ * video device will be unregistered on v4l2_close in case it is still open
+ */
+static void tm6000_usb_disconnect(struct usb_interface *interface)
+{
+	struct tm6000_core *dev = usb_get_intfdata(interface);
+	usb_set_intfdata(interface, NULL);
+
+	if (!dev)
+		return;
+
+	tm6000_i2c_unregister(dev);
+
+	printk("tm6000: disconnecting %s\n", dev->name);
+
+	mutex_lock(&dev->lock);
+
+	tm6000_i2c_unregister(dev);
+
+	tm6000_v4l2_unregister(dev);
+
+//	wake_up_interruptible_all(&dev->open);
+
+	dev->state |= DEV_DISCONNECTED;
+
+	mutex_unlock(&dev->lock);
+}
+
+static struct usb_driver tm6000_usb_driver = {
+		.name = "tm6000",
+		.probe = tm6000_usb_probe,
+		.disconnect = tm6000_usb_disconnect,
+		.id_table = tm6000_id_table,
+};
+
+static int __init tm6000_module_init(void)
+{
+	int result;
+
+	printk(KERN_INFO "tm6000" " v4l2 driver version %d.%d.%d loaded\n",
+	       (TM6000_VERSION  >> 16) & 0xff,
+	       (TM6000_VERSION  >> 8) & 0xff, TM6000_VERSION  & 0xff);
+
+	/* register this driver with the USB subsystem */
+	result = usb_register(&tm6000_usb_driver);
+	if (result)
+		printk("tm6000"
+			   " usb_register failed. Error number %d.\n", result);
+
+	return result;
+}
+
+static void __exit tm6000_module_exit(void)
+{
+	/* deregister at USB subsystem */
+	usb_deregister(&tm6000_usb_driver);
+}
+
+module_init(tm6000_module_init);
+module_exit(tm6000_module_exit);
+
+MODULE_DESCRIPTION("Trident TVMaster TM5600/TM6000 USB2 adapter");
+MODULE_AUTHOR("Mauro Carvalho Chehab");
+MODULE_LICENSE("GPL");
diff --git a/drivers/staging/tm6000/tm6000-core.c b/drivers/staging/tm6000/tm6000-core.c
new file mode 100644
index 000000000000..ea26f0daa82f
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-core.c
@@ -0,0 +1,633 @@
+/*
+   tm6000-core.c - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/usb.h>
+#include <linux/i2c.h>
+#include <linux/video_decoder.h>
+#include "tm6000.h"
+#include "tm6000-regs.h"
+#include <media/v4l2-common.h>
+#include <media/tuner.h>
+
+#ifdef HACK /* HACK */
+#include "tm6000-hack.c"
+#endif
+
+#define USB_TIMEOUT	5*HZ /* ms */
+
+int tm6000_read_write_usb (struct tm6000_core *dev, u8 req_type, u8 req,
+			   u16 value, u16 index, u8 *buf, u16 len)
+{
+	int          ret, i;
+	unsigned int pipe;
+	static int   ini=0, last=0, n=0;
+	u8	     *data=NULL;
+
+	if (len)
+		data = kzalloc(len, GFP_KERNEL);
+
+
+	if (req_type & USB_DIR_IN)
+		pipe=usb_rcvctrlpipe(dev->udev, 0);
+	else {
+		pipe=usb_sndctrlpipe(dev->udev, 0);
+		memcpy(data, buf, len);
+	}
+
+	if (tm6000_debug & V4L2_DEBUG_I2C) {
+		if (!ini)
+			last=ini=jiffies;
+
+		printk("%06i (dev %p, pipe %08x): ", n, dev->udev, pipe);
+
+		printk( "%s: %06u ms %06u ms %02x %02x %02x %02x %02x %02x %02x %02x ",
+			(req_type & USB_DIR_IN)?" IN":"OUT",
+			jiffies_to_msecs(jiffies-last),
+			jiffies_to_msecs(jiffies-ini),
+			req_type, req,value&0xff,value>>8, index&0xff, index>>8,
+			len&0xff, len>>8);
+		last=jiffies;
+		n++;
+
+		if ( !(req_type & USB_DIR_IN) ) {
+			printk(">>> ");
+			for (i=0;i<len;i++) {
+				printk(" %02x",buf[i]);
+			}
+			printk("\n");
+		}
+	}
+
+	ret = usb_control_msg(dev->udev, pipe, req, req_type, value, index, data,
+			      len, USB_TIMEOUT);
+
+	if (req_type &  USB_DIR_IN)
+		memcpy(buf, data, len);
+
+	if (tm6000_debug & V4L2_DEBUG_I2C) {
+		if (ret<0) {
+			if (req_type &  USB_DIR_IN)
+				printk("<<< (len=%d)\n",len);
+
+			printk("%s: Error #%d\n", __FUNCTION__, ret);
+		} else if (req_type &  USB_DIR_IN) {
+			printk("<<< ");
+			for (i=0;i<len;i++) {
+				printk(" %02x",buf[i]);
+			}
+			printk("\n");
+		}
+	}
+
+	kfree(data);
+
+	return ret;
+}
+
+int tm6000_set_reg (struct tm6000_core *dev, u8 req, u16 value, u16 index)
+{
+	return
+		tm6000_read_write_usb (dev, USB_DIR_OUT | USB_TYPE_VENDOR,
+				       req, value, index, NULL, 0);
+}
+
+int tm6000_get_reg (struct tm6000_core *dev, u8 req, u16 value, u16 index)
+{
+	int rc;
+	u8 buf[1];
+
+	rc=tm6000_read_write_usb (dev, USB_DIR_IN | USB_TYPE_VENDOR, req,
+				       value, index, buf, 1);
+
+	if (rc<0)
+		return rc;
+
+	return *buf;
+}
+
+int tm6000_get_reg16 (struct tm6000_core *dev, u8 req, u16 value, u16 index)
+{
+	int rc;
+	u8 buf[2];
+
+	rc=tm6000_read_write_usb (dev, USB_DIR_IN | USB_TYPE_VENDOR, req,
+				       value, index, buf, 2);
+
+	if (rc<0)
+		return rc;
+
+	return buf[1]|buf[0]<<8;
+}
+
+void tm6000_set_fourcc_format(struct tm6000_core *dev)
+{
+	if (dev->fourcc==V4L2_PIX_FMT_UYVY) {
+		/* Sets driver to UYUV */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xc1, 0xd0);
+	} else {
+		/* Sets driver to YUV2 */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xc1, 0x90);
+	}
+}
+
+int tm6000_init_analog_mode (struct tm6000_core *dev)
+{
+
+	/* Enables soft reset */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x01);
+
+	if (dev->scaler) {
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xc0, 0x20);
+	} else {
+		/* Enable Hfilter and disable TS Drop err */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xc0, 0x80);
+	}
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xc3, 0x88);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xda, 0x23);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd1, 0xc0);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd2, 0xd8);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd6, 0x06);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xdf, 0x1f);
+
+	/* AP Software reset */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xff, 0x08);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xff, 0x00);
+
+	tm6000_set_fourcc_format(dev);
+
+	/* Disables soft reset */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x00);
+
+	/* E3: Select input 0 - TV tuner */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe3, 0x00);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x60);
+
+	/* Tuner firmware can now be loaded */
+
+	tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN, TM6000_GPIO_1, 0x00);
+	msleep(11);
+
+	/* This controls input */
+	tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN, TM6000_GPIO_2, 0x0);
+	tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN, TM6000_GPIO_3, 0x01);
+	msleep(20);
+
+	/*FIXME: Hack!!! */
+	struct v4l2_frequency f;
+	mutex_lock(&dev->lock);
+	f.frequency=dev->freq;
+	tm6000_i2c_call_clients(dev,VIDIOC_S_FREQUENCY,&f);
+	mutex_unlock(&dev->lock);
+
+	msleep(100);
+	tm6000_set_standard (dev, &dev->norm);
+	tm6000_set_audio_bitrate (dev,48000);
+
+
+	return 0;
+}
+
+
+/* The meaning of those initializations are unknown */
+u8 init_tab[][2] = {
+	/* REG  VALUE */
+	{ 0xdf, 0x1f },
+	{ 0xff, 0x08 },
+	{ 0xff, 0x00 },
+	{ 0xd5, 0x4f },
+	{ 0xda, 0x23 },
+	{ 0xdb, 0x08 },
+	{ 0xe2, 0x00 },
+	{ 0xe3, 0x10 },
+	{ 0xe5, 0x00 },
+	{ 0xe8, 0x00 },
+	{ 0xeb, 0x64 },		/* 48000 bits/sample, external input */
+	{ 0xee, 0xc2 },
+	{ 0x3f, 0x01 },		/* Start of soft reset */
+	{ 0x00, 0x00 },
+	{ 0x01, 0x07 },
+	{ 0x02, 0x5f },
+	{ 0x03, 0x00 },
+	{ 0x05, 0x64 },
+	{ 0x07, 0x01 },
+	{ 0x08, 0x82 },
+	{ 0x09, 0x36 },
+	{ 0x0a, 0x50 },
+	{ 0x0c, 0x6a },
+	{ 0x11, 0xc9 },
+	{ 0x12, 0x07 },
+	{ 0x13, 0x3b },
+	{ 0x14, 0x47 },
+	{ 0x15, 0x6f },
+	{ 0x17, 0xcd },
+	{ 0x18, 0x1e },
+	{ 0x19, 0x8b },
+	{ 0x1a, 0xa2 },
+	{ 0x1b, 0xe9 },
+	{ 0x1c, 0x1c },
+	{ 0x1d, 0xcc },
+	{ 0x1e, 0xcc },
+	{ 0x1f, 0xcd },
+	{ 0x20, 0x3c },
+	{ 0x21, 0x3c },
+	{ 0x2d, 0x48 },
+	{ 0x2e, 0x88 },
+	{ 0x30, 0x22 },
+	{ 0x31, 0x61 },
+	{ 0x32, 0x74 },
+	{ 0x33, 0x1c },
+	{ 0x34, 0x74 },
+	{ 0x35, 0x1c },
+	{ 0x36, 0x7a },
+	{ 0x37, 0x26 },
+	{ 0x38, 0x40 },
+	{ 0x39, 0x0a },
+	{ 0x42, 0x55 },
+	{ 0x51, 0x11 },
+	{ 0x55, 0x01 },
+	{ 0x57, 0x02 },
+	{ 0x58, 0x35 },
+	{ 0x59, 0xa0 },
+	{ 0x80, 0x15 },
+	{ 0x82, 0x42 },
+	{ 0xc1, 0xd0 },
+	{ 0xc3, 0x88 },
+	{ 0x3f, 0x00 },		/* End of the soft reset */
+};
+
+int tm6000_init (struct tm6000_core *dev)
+{
+	int board, rc=0, i;
+
+#ifdef HACK /* HACK */
+	init_tm6000(dev);
+	return 0;
+#else
+
+	/* Load board's initialization table */
+	for (i=0; i< ARRAY_SIZE(init_tab); i++) {
+		rc= tm6000_set_reg (dev, REQ_07_SET_GET_AVREG,
+			init_tab[i][0],init_tab[i][1]);
+		if (rc<0) {
+			printk (KERN_ERR "Error %i while setting reg %d to value %d\n",
+			rc, init_tab[i][0],init_tab[i][1]);
+			return rc;
+		}
+	}
+
+	/* Check board version - maybe 10Moons specific */
+	board=tm6000_get_reg16 (dev, 0x40, 0, 0);
+	if (board >=0) {
+		printk (KERN_INFO "Board version = 0x%04x\n",board);
+	} else {
+		printk (KERN_ERR "Error %i while retrieving board version\n",board);
+	}
+
+	tm6000_set_reg (dev, REQ_05_SET_GET_USBREG, 0x18, 0x00);
+	msleep(5); /* Just to be conservative */
+
+	/* Reset GPIO1. Maybe, this is 10 Moons specific */
+	for (i=0; i< 3; i++) {
+		rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN, TM6000_GPIO_1, 0);
+		if (rc<0) {
+			printk (KERN_ERR "Error %i doing GPIO1 reset\n",rc);
+			return rc;
+		}
+
+		msleep(10); /* Just to be conservative */
+		rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN, TM6000_GPIO_1, 1);
+		if (rc<0) {
+			printk (KERN_ERR "Error %i doing GPIO1 reset\n",rc);
+			return rc;
+		}
+
+		if (!i)
+			rc=tm6000_get_reg16(dev, 0x40,0,0);
+	}
+	return 0;
+
+#endif /* HACK */
+}
+
+#define tm6000_wrt(dev,req,reg,val, data...)				\
+	{ const static u8 _val[] = data;				\
+	tm6000_read_write_usb(dev,USB_DIR_OUT | USB_TYPE_VENDOR,	\
+	req,reg, val, (u8 *) _val, ARRAY_SIZE(_val));			\
+	}
+
+/*
+TM5600/6000 register values to set video standards.
+	There's an adjust, common to all, for composite video
+	Additional adjustments are required for S-Video, based on std.
+
+      Standards values for TV             S-Video Changes
+REG   PAL   PAL_M PAL_N SECAM NTSC  Comp. PAL  PAL_M PAL_N SECAM NTSC
+0xdf  0x1f  0x1f  0x1f  0x1f  0x1f
+0xe2  0x00  0x00  0x00  0x00  0x00
+0xe8  0x0f  0x0f  0x0f  0x0f  0x0f        0x00 0x00  0x00  0x00  0x00
+0xeb  0x60  0x60  0x60  0x60  0x60  0x64  0x64 0x64  0x64  0x64  0x64
+0xd5  0x5f  0x5f  0x5f  0x4f  0x4f        0x4f 0x4f  0x4f  0x4f  0x4f
+0xe3  0x00  0x00  0x00  0x00  0x00  0x10  0x10 0x10  0x10  0x10  0x10
+0xe5  0x00  0x00  0x00  0x00  0x00        0x10 0x10  0x10  0x10  0x10
+0x3f  0x01  0x01  0x01  0x01  0x01
+0x00  0x32  0x04  0x36  0x38  0x00        0x33 0x05  0x37  0x39  0x01
+0x01  0x0e  0x0e  0x0e  0x0e  0x0f
+0x02  0x5f  0x5f  0x5f  0x5f  0x5f
+0x03  0x02  0x00  0x02  0x02  0x00        0x04 0x04  0x04  0x03  0x03
+0x07  0x01  0x01  0x01  0x01  0x01        0x00                   0x00
+0x17  0xcd  0xcd  0xcd  0xcd  0xcd                               0x8b
+0x18  0x25  0x1e  0x1e  0x24  0x1e
+0x19  0xd5  0x83  0x91  0x92  0x8b
+0x1a  0x63  0x0a  0x1f  0xe8  0xa2
+0x1b  0x50  0xe0  0x0c  0xed  0xe9
+0x1c  0x1c  0x1c  0x1c  0x1c  0x1c
+0x1d  0xcc  0xcc  0xcc  0xcc  0xcc
+0x1e  0xcc  0xcc  0xcc  0xcc  0xcc
+0x1f  0xcd  0xcd  0xcd  0xcd  0xcd
+0x2e  0x8c  0x88  0x8c  0x8c  0x88                   0x88
+0x30  0x2c  0x20  0x2c  0x2c  0x22        0x2a 0x22  0x22  0x2a
+0x31  0xc1  0x61  0xc1  0xc1  0x61
+0x33  0x0c  0x0c  0x0c  0x2c  0x1c
+0x35  0x1c  0x1c  0x1c  0x18  0x1c
+0x82  0x52  0x52  0x52  0x42  0x42
+0x04  0xdc  0xdc  0xdc        0xdd
+0x0d  0x07  0x07  0x07  0x87  0x07
+0x3f  0x00  0x00  0x00  0x00  0x00
+*/
+
+int tm6000_set_standard (struct tm6000_core *dev, v4l2_std_id *norm)
+{
+	dev->norm=*norm;
+
+	/* HACK: Should use, instead, the common code!!! */
+	if (*norm & V4L2_STD_PAL_M) {
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xdf, 0x1f);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe2, 0x00);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe8, 0x0f);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x60);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd5, 0x5f);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe3, 0x00);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe5, 0x00);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x01);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x04);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x01, 0x0e);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x02, 0x5f);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x00);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x07, 0x01);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x1e);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0x83);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0x0a);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0xe0);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1c, 0x1c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1d, 0xcc);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1e, 0xcc);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1f, 0xcd);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x88);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x20);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x31, 0x61);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x33, 0x0c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x35, 0x1c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x82, 0x52);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x04, 0xdc);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0d, 0x07);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x00);
+		return 0;
+	}
+
+	/* */
+//	tm6000_set_reg (dev, REQ_04_EN_DISABLE_MCU_INT, 0x02, 0x01);
+//	tm6000_set_reg (dev, REQ_04_EN_DISABLE_MCU_INT, 0x02, 0x00);
+
+	/* Set registers common to all standards */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xdf, 0x1f);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe2, 0x00);
+
+	switch (dev->input) {
+	case TM6000_INPUT_TV:
+		/* Seems to disable ADC2 - needed for TV and RCA */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe8, 0x0f);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x60);
+
+		if (*norm & V4L2_STD_PAL) {
+			/* Enable UV_FLT_EN */
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd5, 0x5f);
+		} else {
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd5, 0x4f);
+		}
+
+		/* E3: Select input 0 */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe3, 0x00);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe5, 0x10);
+
+		break;
+	case TM6000_INPUT_COMPOSITE:
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x64);
+		/* E3: Select input 1 */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe3, 0x10);
+		break;
+	case TM6000_INPUT_SVIDEO:
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe8, 0x00);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x64);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xd5, 0x4f);
+		/* E3: Select input 1 */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe3, 0x10);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xe5, 0x10);
+
+		break;
+	}
+
+	/* Software reset */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x01);
+
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x02, 0x5f);
+
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x07, 0x01);
+//	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x17, 0xcd);
+
+	/* Horizontal Sync DTO = 0x1ccccccd */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1c, 0x1c);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1d, 0xcc);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1e, 0xcc);
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1f, 0xcd);
+
+	/* Vertical Height */
+	if (*norm & V4L2_STD_525_60) {
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x31, 0x61);
+	} else {
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x31, 0xc1);
+	}
+
+	/* Horizontal Length */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2f, 640/8);
+
+	if (*norm & V4L2_STD_PAL) {
+		/* Common to All PAL Standards */
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x01, 0x0e);
+
+		/* Vsync Hsinc Lockout End */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x33, 0x0c);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x35, 0x1c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x82, 0x52);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x04, 0xdc);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0d, 0x07);
+		if (*norm & V4L2_STD_PAL_M) {
+
+			/* Chroma DTO */
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x1e);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0x83);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0x0a);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0xe0);
+
+			/* Active Video Horiz Start Time */
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x88);
+
+			if (dev->input==TM6000_INPUT_SVIDEO) {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x05);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x04);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x22);
+			} else {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x04);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x00);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x20);
+			}
+		} else if (*norm & V4L2_STD_PAL_N) {
+			/* Chroma DTO */
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x1e);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0x91);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0x1f);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0x0c);
+
+			if (dev->input==TM6000_INPUT_SVIDEO) {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x37);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x04);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x88);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x22);
+			} else {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x36);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x02);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x8c);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2c);
+			}
+		} else {	// Other PAL standards
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x25);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0xd5);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0x63);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0x50);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x8c);
+
+			if (dev->input==TM6000_INPUT_SVIDEO) {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x33);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x04);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2a);
+
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2c);
+			} else {
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x32);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x02);
+				tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2c);
+			}
+		}
+	} if (*norm & V4L2_STD_SECAM) {
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x01, 0x0e);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x24);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0x92);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0xe8);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0xed);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x8c);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x33, 0x2c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x35, 0x18);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x82, 0x42);
+		// Register 0x04 is not initialized on SECAM
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0d, 0x87);
+
+		if (dev->input==TM6000_INPUT_SVIDEO) {
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x39);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x03);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2a);
+		} else {
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x38);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x02);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x2c);
+		}
+	} else {	/* NTSC */
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x01, 0x0f);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x18, 0x1e);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x19, 0x8b);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1a, 0xa2);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x1b, 0xe9);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x2e, 0x88);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x30, 0x22);
+
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x33, 0x1c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x35, 0x1c);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x82, 0x42);
+		tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0d, 0x07);
+		if (dev->input==TM6000_INPUT_SVIDEO) {
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x01);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x03);
+
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x07, 0x00);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x17, 0x8b);
+		} else {
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x00, 0x00);
+			tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x03, 0x00);
+		}
+	}
+
+
+	/* End of software reset */
+	tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x3f, 0x00);
+
+	msleep(40);
+
+	return 0;
+}
+
+int tm6000_set_audio_bitrate (struct tm6000_core *dev, int bitrate)
+{
+	int val;
+
+	val=tm6000_get_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, 0x0);
+printk("Original value=%d\n",val);
+	if (val<0)
+		return val;
+
+	val &= 0x0f;		/* Preserve the audio input control bits */
+	switch (bitrate) {
+	case 44100:
+		val|=0xd0;
+		break;
+	case 48000:
+		val|=0x60;
+		break;
+	}
+	val=tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0xeb, val);
+
+	return val;
+}
diff --git a/drivers/staging/tm6000/tm6000-i2c.c b/drivers/staging/tm6000/tm6000-i2c.c
new file mode 100644
index 000000000000..5e165ed25eee
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-i2c.c
@@ -0,0 +1,460 @@
+/*
+   tm6000-i2c.c - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/usb.h>
+#include <linux/i2c.h>
+
+#include "tm6000.h"
+#include "tm6000-regs.h"
+#include <media/v4l2-common.h>
+#include <media/tuner.h>
+#include "tuner-xc2028.h"
+
+
+/*FIXME: Hack to avoid needing to patch i2c-id.h */
+#define I2C_HW_B_TM6000 I2C_HW_B_EM28XX
+/* ----------------------------------------------------------- */
+
+static unsigned int i2c_scan = 0;
+module_param(i2c_scan, int, 0444);
+MODULE_PARM_DESC(i2c_scan, "scan i2c bus at insmod time");
+
+static unsigned int i2c_debug = 0;
+module_param(i2c_debug, int, 0644);
+MODULE_PARM_DESC(i2c_debug, "enable debug messages [i2c]");
+
+#define i2c_dprintk(lvl,fmt, args...) if (i2c_debug>=lvl) do{ \
+			printk(KERN_DEBUG "%s at %s: " fmt, \
+			dev->name, __FUNCTION__ , ##args); } while (0)
+
+
+/* Returns 0 if address is found */
+static int tm6000_i2c_scan(struct i2c_adapter *i2c_adap, int addr)
+{
+	struct tm6000_core *dev = i2c_adap->algo_data;
+
+#if 1
+	/* HACK: i2c scan is not working yet */
+	if (
+		(dev->caps.has_tuner   && (addr==dev->tuner_addr)) ||
+		(dev->caps.has_tda9874 && (addr==0xb0)) ||
+		(dev->caps.has_zl10353 && (addr==0x1e)) ||
+		(dev->caps.has_eeprom  && (addr==0xa0))
+	   ) {
+		printk("Hack: enabling device at addr 0x%02x\n",addr);
+		return (1);
+	} else {
+		return -ENODEV;
+	}
+#else
+	int rc=-ENODEV;
+	char buf[1];
+
+	/* This sends addr + 1 byte with 0 */
+	rc = tm6000_read_write_usb (dev,
+		USB_DIR_IN | USB_TYPE_VENDOR,
+		REQ_16_SET_GET_I2CSEQ,
+		addr, 0,
+		buf, 0);
+	msleep(10);
+
+	if (rc<0) {
+		if (i2c_debug>=2)
+			printk("no device at addr 0x%02x\n",addr);
+	}
+
+	printk("Hack: check on addr 0x%02x returned %d\n",addr,rc);
+
+	return rc;
+#endif
+}
+
+static int tm6000_i2c_xfer(struct i2c_adapter *i2c_adap,
+			   struct i2c_msg msgs[], int num)
+{
+	struct tm6000_core *dev = i2c_adap->algo_data;
+	int addr, rc, i, byte;
+
+	if (num <= 0)
+		return 0;
+	for (i = 0; i < num; i++) {
+		addr = (msgs[i].addr << 1) &0xff;
+		i2c_dprintk(2,"%s %s addr=0x%x len=%d:",
+			 (msgs[i].flags & I2C_M_RD) ? "read" : "write",
+			 i == num - 1 ? "stop" : "nonstop", addr, msgs[i].len);
+
+		if (!msgs[i].len) {
+			/* Do I2C scan */
+			rc=tm6000_i2c_scan(i2c_adap, addr);
+		} else if (msgs[i].flags & I2C_M_RD) {
+			char buf[msgs[i].len];
+			memcpy(buf,msgs[i].buf, msgs[i].len-1);
+			buf[msgs[i].len-1]=0;
+
+			/* Read bytes */
+	/* I2C is assumed to have always a subaddr at the first byte of the
+	   message bus. Also, the first i2c value of the answer is returned
+	   out of message data.
+	 */
+			rc = tm6000_read_write_usb (dev,
+				USB_DIR_IN | USB_TYPE_VENDOR,
+				REQ_16_SET_GET_I2CSEQ,
+				addr|(*msgs[i].buf)<<8, 0,
+				msgs[i].buf, msgs[i].len);
+			if (i2c_debug>=2) {
+				for (byte = 0; byte < msgs[i].len; byte++) {
+					printk(" %02x", msgs[i].buf[byte]);
+				}
+			}
+		} else {
+			/* write bytes */
+			if (i2c_debug>=2) {
+				for (byte = 0; byte < msgs[i].len; byte++)
+					printk(" %02x", msgs[i].buf[byte]);
+			}
+
+			rc = tm6000_read_write_usb (dev,
+				USB_DIR_OUT | USB_TYPE_VENDOR | USB_RECIP_DEVICE,
+				REQ_16_SET_GET_I2CSEQ,
+				addr|(*msgs[i].buf)<<8, 0,
+				msgs[i].buf+1, msgs[i].len-1);
+		}
+		if (i2c_debug>=2)
+			printk("\n");
+		if (rc < 0)
+			goto err;
+	}
+
+	return num;
+err:
+	i2c_dprintk(2," ERROR: %i\n", rc);
+	return rc;
+}
+
+
+static int tm6000_i2c_eeprom( struct tm6000_core *dev,
+			      unsigned char *eedata, int len )
+{
+	int i, rc;
+	unsigned char *p = eedata;
+	unsigned char bytes[17];
+
+	dev->i2c_client.addr = 0xa0 >> 1;
+
+//006779:  OUT: 000006 ms 089867 ms c0 0e a0 00 00 00 01 00 <<<  00
+//006780:  OUT: 000005 ms 089873 ms c0 10 a0 00 00 00 01 00 <<<  00
+//006781:  OUT: 000108 ms 089878 ms 40 0e a0 00 00 00 01 00 >>>  99
+//006782:  OUT: 000015 ms 089986 ms c0 0e a0 00 01 00 01 00 <<<  99
+//006783:  OUT: 000004 ms 090001 ms c0 0e a0 00 10 00 01 00 <<<  99
+//006784:  OUT: 000005 ms 090005 ms 40 10 a0 00 00 00 01 00 >>>  00
+//006785:  OUT: 000308 ms 090010 ms 40 0e a0 00 00 00 01 00 >>>  00
+
+
+	for (i = 0; i < len; i++) {
+		bytes[0x14+i] = 0;
+
+		rc = i2c_master_recv(&dev->i2c_client, p, 1);
+		if (rc<1) {
+			if (p==eedata) {
+				printk (KERN_WARNING "%s doesn't have eeprom",
+					dev->name);
+			} else {
+				printk(KERN_WARNING
+				"%s: i2c eeprom read error (err=%d)\n",
+				dev->name, rc);
+			}
+			return -1;
+		}
+		p++;
+		if (0 == (i % 16))
+			printk(KERN_INFO "%s: i2c eeprom %02x:", dev->name, i);
+		printk(" %02x", eedata[i]);
+		if ((eedata[i]>=' ')&&(eedata[i]<='z')) {
+			bytes[i%16]=eedata[i];
+		} else {
+			bytes[i%16]='.';
+		}
+		if (15 == (i % 16)) {
+			bytes[i%16]='\0';
+			printk("  %s\n", bytes);
+		}
+	}
+	if ((i%16)!=15) {
+		bytes[i%16]='\0';
+		printk("  %s\n", bytes);
+	}
+	return 0;
+}
+
+/* ----------------------------------------------------------- */
+
+/*
+ * algo_control()
+ */
+static int algo_control(struct i2c_adapter *adapter,
+			unsigned int cmd, unsigned long arg)
+{
+	return 0;
+}
+
+/*
+ * functionality()
+ */
+static u32 functionality(struct i2c_adapter *adap)
+{
+	return I2C_FUNC_SMBUS_EMUL;
+}
+
+#ifndef I2C_PEC
+static void inc_use(struct i2c_adapter *adap)
+{
+	MOD_INC_USE_COUNT;
+}
+
+static void dec_use(struct i2c_adapter *adap)
+{
+	MOD_DEC_USE_COUNT;
+}
+#endif
+
+#define mass_write(addr, reg, data...)					\
+	{ const static u8 _val[] = data;				\
+	rc=tm6000_read_write_usb(dev,USB_DIR_OUT | USB_TYPE_VENDOR,	\
+	REQ_16_SET_GET_I2CSEQ,(reg<<8)+addr, 0x00, (u8 *) _val,		\
+	ARRAY_SIZE(_val));						\
+	if (rc<0) {							\
+		printk(KERN_ERR "Error on line %d: %d\n",__LINE__,rc);	\
+		return rc;						\
+	}								\
+	msleep (10);							\
+	}
+
+int static init_zl10353 (struct tm6000_core *dev, u8 addr)
+{
+	int rc=0;
+
+	mass_write (addr, 0x89, { 0x38 });
+	mass_write (addr, 0x8a, { 0x2d });
+	mass_write (addr, 0x50, { 0xff });
+	mass_write (addr, 0x51, { 0x00 , 0x00 , 0x50 });
+	mass_write (addr, 0x54, { 0x72 , 0x49 });
+	mass_write (addr, 0x87, { 0x0e , 0x0e });
+	mass_write (addr, 0x7b, { 0x04 });
+	mass_write (addr, 0x57, { 0xb8 , 0xc2 });
+	mass_write (addr, 0x59, { 0x00 , 0x02 , 0x00 , 0x00 , 0x01 });
+	mass_write (addr, 0x59, { 0x00 , 0x00 , 0xb3 , 0xd0 , 0x01 });
+	mass_write (addr, 0x58, { 0xc0 , 0x11 , 0xc5 , 0xc2 , 0xa4 , 0x01 });
+	mass_write (addr, 0x5e, { 0x01 });
+	mass_write (addr, 0x67, { 0x1c , 0x20 });
+	mass_write (addr, 0x75, { 0x33 });
+	mass_write (addr, 0x85, { 0x10 , 0x40 });
+	mass_write (addr, 0x8c, { 0x0b , 0x00 , 0x40 , 0x00 });
+
+	return 0;
+}
+
+/* Tuner callback to provide the proper gpio changes needed for xc2028 */
+
+static int tm6000_tuner_callback(void *ptr, int command, int arg)
+{
+	int rc=0;
+	struct tm6000_core *dev = ptr;
+
+	if (dev->tuner_type!=TUNER_XC2028)
+		return 0;
+
+	switch (command) {
+	case XC2028_RESET_CLK:
+		tm6000_set_reg (dev, REQ_04_EN_DISABLE_MCU_INT,
+					0x02, arg);
+		msleep(10);
+		rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+					TM6000_GPIO_CLK, 0);
+		if (rc<0)
+			return rc;
+		msleep(10);
+		rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+					TM6000_GPIO_CLK, 1);
+		break;
+	case XC2028_TUNER_RESET:
+		/* Reset codes during load firmware */
+		switch (arg) {
+		case 0:
+			tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+						TM6000_GPIO_1, 0x00);
+			msleep(10);
+			tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+						TM6000_GPIO_1, 0x01);
+			break;
+		case 1:
+			tm6000_set_reg (dev, REQ_04_EN_DISABLE_MCU_INT,
+						0x02, 0x01);
+			msleep(10);
+			break;
+
+		case 2:
+			rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+						TM6000_GPIO_CLK, 0);
+			if (rc<0)
+				return rc;
+			msleep(10);
+			rc=tm6000_set_reg (dev, REQ_03_SET_GET_MCU_PIN,
+						TM6000_GPIO_CLK, 1);
+			break;
+		}
+	}
+	return (rc);
+}
+
+static int attach_inform(struct i2c_client *client)
+{
+	struct tm6000_core *dev = client->adapter->algo_data;
+	struct tuner_setup tun_setup;
+	unsigned char eedata[11];
+
+	i2c_dprintk(1, "%s i2c attach [addr=0x%x,client=%s]\n",
+		client->driver->driver.name, client->addr, client->name);
+
+	switch (client->addr<<1) {
+	case 0x1e:
+		init_zl10353 (dev, client->addr);
+		return 0;
+	case 0xa0:
+		tm6000_i2c_eeprom(dev, eedata, sizeof(eedata)-1);
+		eedata[sizeof(eedata)]='\0';
+
+		printk("Board string ID = %s\n",eedata);
+		return 0;
+	case 0xb0:
+		request_module("tvaudio");
+		return 0;
+	}
+
+	/* If tuner, initialize the tuner part */
+	if ( dev->tuner_addr != client->addr<<1 ) {
+		return 0;
+	}
+
+	memset (&tun_setup, 0, sizeof(tun_setup));
+
+	tun_setup.mode_mask = T_ANALOG_TV | T_RADIO;
+	tun_setup.type = dev->tuner_type;
+	tun_setup.addr = dev->tuner_addr>>1;
+	tun_setup.tuner_callback = tm6000_tuner_callback;
+
+	client->driver->command (client,TUNER_SET_TYPE_ADDR, &tun_setup);
+
+	return 0;
+}
+
+static struct i2c_algorithm tm6000_algo = {
+	.master_xfer   = tm6000_i2c_xfer,
+	.algo_control  = algo_control,
+	.functionality = functionality,
+};
+
+static struct i2c_adapter tm6000_adap_template = {
+#ifdef I2C_PEC
+	.owner = THIS_MODULE,
+#else
+	.inc_use = inc_use,
+	.dec_use = dec_use,
+#endif
+	.class = I2C_CLASS_TV_ANALOG,
+	.name = "tm6000",
+	.id = I2C_HW_B_TM6000,
+	.algo = &tm6000_algo,
+	.client_register = attach_inform,
+};
+
+static struct i2c_client tm6000_client_template = {
+	.name = "tm6000 internal",
+};
+
+/* ----------------------------------------------------------- */
+
+/*
+ * i2c_devs
+ * incomplete list of known devices
+ */
+static char *i2c_devs[128] = {
+	[0xc2 >> 1] = "tuner (analog)",
+};
+
+/*
+ * do_i2c_scan()
+ * check i2c address range for devices
+ */
+static void do_i2c_scan(char *name, struct i2c_client *c)
+{
+	unsigned char buf;
+	int i, rc;
+
+	for (i = 0; i < 128; i++) {
+		c->addr = i;
+		rc = i2c_master_recv(c, &buf, 0);
+		if (rc < 0)
+			continue;
+		printk(KERN_INFO "%s: found i2c device @ 0x%x [%s]\n", name,
+		       i << 1, i2c_devs[i] ? i2c_devs[i] : "???");
+	}
+}
+
+/*
+ * tm6000_i2c_call_clients()
+ * send commands to all attached i2c devices
+ */
+void tm6000_i2c_call_clients(struct tm6000_core *dev, unsigned int cmd, void *arg)
+{
+	BUG_ON(NULL == dev->i2c_adap.algo_data);
+	i2c_clients_command(&dev->i2c_adap, cmd, arg);
+}
+
+/*
+ * tm6000_i2c_register()
+ * register i2c bus
+ */
+int tm6000_i2c_register(struct tm6000_core *dev)
+{
+	dev->i2c_adap = tm6000_adap_template;
+	dev->i2c_adap.dev.parent = &dev->udev->dev;
+	strcpy(dev->i2c_adap.name, dev->name);
+	dev->i2c_adap.algo_data = dev;
+	i2c_add_adapter(&dev->i2c_adap);
+
+	dev->i2c_client = tm6000_client_template;
+	dev->i2c_client.adapter = &dev->i2c_adap;
+
+	if (i2c_scan)
+		do_i2c_scan(dev->name, &dev->i2c_client);
+
+	return 0;
+}
+
+/*
+ * tm6000_i2c_unregister()
+ * unregister i2c_bus
+ */
+int tm6000_i2c_unregister(struct tm6000_core *dev)
+{
+	i2c_del_adapter(&dev->i2c_adap);
+	return 0;
+}
diff --git a/drivers/staging/tm6000/tm6000-regs.h b/drivers/staging/tm6000/tm6000-regs.h
new file mode 100644
index 000000000000..414852e69fce
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-regs.h
@@ -0,0 +1,76 @@
+/*
+   tm6000-regs.h - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+/*
+ * Define TV Master TM5600/TM6000 Request codes
+ */
+#define REQ_00_SET_IR_VALUE		0
+#define REQ_01_SET_WAKEUP_IRCODE	1
+#define REQ_02_GET_IR_CODE		2
+#define REQ_03_SET_GET_MCU_PIN		3
+#define REQ_04_EN_DISABLE_MCU_INT	4
+#define REQ_05_SET_GET_USBREG		5
+	/* Write: RegNum, Value, 0 */
+	/* Read : RegNum, Value, 1, RegStatus */
+#define REQ_06_SET_GET_USBREG_BIT	6
+#define REQ_07_SET_GET_AVREG		7
+	/* Write: RegNum, Value, 0 */
+	/* Read : RegNum, Value, 1, RegStatus */
+#define REQ_08_SET_GET_AVREG_BIT	8
+#define REQ_09_SET_GET_TUNER_FQ		9
+#define REQ_10_SET_TUNER_SYSTEM		10
+#define REQ_11_SET_EEPROM_ADDR		11
+#define REQ_12_SET_GET_EEPROMBYTE	12
+#define REQ_13_GET_EEPROM_SEQREAD	13
+#define REQ_14_SET_GET_EEPROM_PAGE	14
+#define REQ_15_SET_GET_I2CBYTE		15
+	/* Write: Subaddr, Slave Addr, value, 0 */
+	/* Read : Subaddr, Slave Addr, value, 1 */
+#define REQ_16_SET_GET_I2CSEQ		16
+	/* Subaddr, Slave Addr, 0, length */
+#define REQ_17_SET_GET_I2CFP		17
+	/* Write: Slave Addr, register, value */
+	/* Read : Slave Addr, register, 2, data */
+
+/*
+ * Define TV Master TM5600/TM6000 GPIO lines
+ */
+
+#define TM6000_GPIO_CLK		0x101
+#define TM6000_GPIO_DATA	0x100
+#define TM6000_GPIO_1		0x102
+#define TM6000_GPIO_2		0x103
+#define TM6000_GPIO_3		0x104
+#define TM6000_GPIO_4		0x300
+#define TM6000_GPIO_5		0x301
+#define TM6000_GPIO_6		0x304
+#define TM6000_GPIO_7		0x305
+
+/*
+ * Define TV Master TM5600/TM6000 URB message codes and length
+ */
+
+#define TM6000_URB_MSG_LEN 180
+enum {
+	TM6000_URB_MSG_VIDEO=1,
+	TM6000_URB_MSG_AUDIO,
+	TM6000_URB_MSG_VBI,
+	TM6000_URB_MSG_PTS,
+	TM6000_URB_MSG_ERR,
+};
diff --git a/drivers/staging/tm6000/tm6000-usb-isoc.h b/drivers/staging/tm6000/tm6000-usb-isoc.h
new file mode 100644
index 000000000000..27b103f939d9
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-usb-isoc.h
@@ -0,0 +1,41 @@
+/*
+   tm6000-buf.c - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/videodev2.h>
+
+struct usb_isoc_ctl {
+		/* max packet size of isoc transaction */
+	int				max_pkt_size;
+
+		/* number of allocated urbs */
+	int				num_bufs;
+
+		/* urb for isoc transfers */
+	struct urb			**urb;
+
+		/* transfer buffers for isoc transfer */
+	char				**transfer_buffer;
+
+		/* Last buffer command and region */
+	u8				cmd;
+	int				pos, size, pktsize;
+
+		/* Last field: ODD or EVEN? */
+	int				field;
+};
diff --git a/drivers/staging/tm6000/tm6000-video.c b/drivers/staging/tm6000/tm6000-video.c
new file mode 100644
index 000000000000..970f3a11308d
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000-video.c
@@ -0,0 +1,1559 @@
+/*
+   tm6000-video.c - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/ioport.h>
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/random.h>
+#include <linux/version.h>
+#include <linux/usb.h>
+#include <linux/videodev2.h>
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+#include <linux/videodev.h>
+#endif
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/highmem.h>
+#include <linux/freezer.h>
+
+#include "tm6000-regs.h"
+#include "tm6000.h"
+
+#define BUFFER_TIMEOUT     msecs_to_jiffies(2000)  /* 2 seconds */
+
+/* Declare static vars that will be used as parameters */
+static unsigned int vid_limit = 16;	/* Video memory limit, in Mb */
+static int video_nr = -1;		/* /dev/videoN, -1 for autodetect */
+
+unsigned long tm6000_devused;
+
+/* Debug level */
+int tm6000_debug;
+
+/* supported controls */
+static struct v4l2_queryctrl tm6000_qctrl[] = {
+	{
+		.id            = V4L2_CID_BRIGHTNESS,
+		.type          = V4L2_CTRL_TYPE_INTEGER,
+		.name          = "Brightness",
+		.minimum       = 0,
+		.maximum       = 255,
+		.step          = 1,
+		.default_value = 54,
+		.flags         = 0,
+	}, {
+		.id            = V4L2_CID_CONTRAST,
+		.type          = V4L2_CTRL_TYPE_INTEGER,
+		.name          = "Contrast",
+		.minimum       = 0,
+		.maximum       = 255,
+		.step          = 0x1,
+		.default_value = 119,
+		.flags         = 0,
+	}, {
+		.id            = V4L2_CID_SATURATION,
+		.type          = V4L2_CTRL_TYPE_INTEGER,
+		.name          = "Saturation",
+		.minimum       = 0,
+		.maximum       = 255,
+		.step          = 0x1,
+		.default_value = 112,
+		.flags         = 0,
+	}, {
+		.id            = V4L2_CID_HUE,
+		.type          = V4L2_CTRL_TYPE_INTEGER,
+		.name          = "Hue",
+		.minimum       = -128,
+		.maximum       = 127,
+		.step          = 0x1,
+		.default_value = 0,		//4 ?
+		.flags         = 0,
+	}
+};
+
+static int qctl_regs[ARRAY_SIZE(tm6000_qctrl)];
+
+static struct tm6000_fmt format[] = {
+	{
+		.name     = "4:2:2, packed, YVY2",
+		.fourcc   = V4L2_PIX_FMT_YUYV,
+		.depth    = 16,
+	},{
+		.name     = "4:2:2, packed, UYVY",
+		.fourcc   = V4L2_PIX_FMT_UYVY,
+		.depth    = 16,
+	},{
+		.name     = "A/V + VBI mux packet",
+		.fourcc   = V4L2_PIX_FMT_TM6000,
+		.depth    = 16,
+	}
+};
+
+static LIST_HEAD(tm6000_corelist);
+
+/* ------------------------------------------------------------------
+	DMA and thread functions
+   ------------------------------------------------------------------*/
+
+#define norm_maxw(a) 720
+#define norm_maxh(a) 480
+
+//#define norm_minw(a) norm_maxw(a)
+#define norm_minw(a) norm_maxw(a)
+#define norm_minh(a) norm_maxh(a)
+
+/*
+ * video-buf generic routine to get the next available buffer
+ */
+static int inline get_next_buf (struct tm6000_dmaqueue *dma_q,
+					  struct tm6000_buffer    **buf)
+{
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+
+	if (list_empty(&dma_q->active)) {
+		dprintk(dev, V4L2_DEBUG_QUEUE,"No active queue to serve\n");
+		return 0;
+	}
+
+	*buf = list_entry(dma_q->active.next,
+			struct tm6000_buffer, vb.queue);
+
+	/* Nobody is waiting something to be done, just return */
+	if (!waitqueue_active(&(*buf)->vb.done)) {
+		mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
+		return -1;
+	}
+
+	return 1;
+}
+
+/*
+ * Announces that a buffer were filled and request the next
+ */
+static void inline buffer_filled (struct tm6000_core *dev,
+				  struct tm6000_buffer *buf)
+{
+	/* Advice that buffer was filled */
+	dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] wakeup\n",buf,buf->vb.i);
+	buf->vb.state = STATE_DONE;
+	buf->vb.field_count++;
+	do_gettimeofday(&buf->vb.ts);
+
+	list_del(&buf->vb.queue);
+	wake_up(&buf->vb.done);
+}
+
+/*
+ * Macro to allow copying data into the proper memory type
+ */
+
+#define bufcpy(buf,out_ptr,in_ptr,size) 				\
+	{								\
+		if (__copy_to_user(out_ptr,in_ptr,size)!=0)		\
+			tm6000_err("copy_to_user failed.\n");		\
+	}
+
+/*
+ * Identify the tm5600/6000 buffer header type and properly handles
+ */
+static int copy_streams(u8 *data, u8 *out_p, unsigned long len,
+			struct urb *urb, struct tm6000_buffer **buf)
+{
+	struct tm6000_dmaqueue  *dma_q = urb->context;
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+	u8 *ptr=data, *endp=data+len;
+	u8 c;
+	unsigned int cmd, cpysize, pktsize, size, field, block, line, pos=0;
+	unsigned long header;
+	int rc=0;
+
+	/* FIXME: this is the hardcoded window size
+	 */
+	unsigned int linesize=720*2;
+
+//static int last_line=-2;
+
+	for (ptr=data; ptr<endp;) {
+		if (!dev->isoc_ctl.cmd) {
+			/* Seek for sync */
+			for (ptr+=3;ptr<endp;ptr++) {
+				if (*ptr==0x47) {
+					ptr-=3;
+					break;
+				}
+			}
+			if (ptr>=endp)
+				return rc;
+
+			/* Get message header */
+			header=*(unsigned long *)ptr;
+			ptr+=4;
+			c=(header>>24) & 0xff;
+
+			/* split the header fields */
+			size  = (((header & 0x7e)<<1) -1) *4;
+			block = (header>>7) & 0xf;
+			field = (header>>11) & 0x1;
+			line  = (header>>12) & 0x1ff;
+			cmd   = (header>>21) & 0x7;
+
+			/* FIXME: Maximum possible line is 511.
+			 * This doesn't seem to be enough for PAL standards
+			 */
+
+			/* Validates header fields */
+			if(size>TM6000_URB_MSG_LEN)
+				size=TM6000_URB_MSG_LEN;
+			if(block>=8)
+				cmd = TM6000_URB_MSG_ERR;
+
+			/* FIXME: Mounts the image as field0+field1
+			 * It should, instead, check if the user selected
+			 * entrelaced or non-entrelaced mode
+			 */
+			pos=((line<<1)+field)*linesize+
+						block*TM6000_URB_MSG_LEN;
+
+
+
+			/* Don't allow to write out of the buffer */
+			if (pos+TM6000_URB_MSG_LEN > (*buf)->vb.size)
+				cmd = TM6000_URB_MSG_ERR;
+
+			/* Prints debug info */
+			dprintk(dev, V4L2_DEBUG_ISOC, "size=%d, num=%d, "
+					" line=%d, field=%d\n",
+					size, block, line, field);
+
+			dev->isoc_ctl.cmd  = cmd;
+			dev->isoc_ctl.size = size;
+			dev->isoc_ctl.pos  = pos;
+			dev->isoc_ctl.pktsize = pktsize = TM6000_URB_MSG_LEN;
+		} else {
+			cmd = dev->isoc_ctl.cmd;
+			size= dev->isoc_ctl.size;
+			pos = dev->isoc_ctl.pos;
+			pktsize = dev->isoc_ctl.pktsize;
+		}
+		cpysize=(endp-ptr>size)?size:endp-ptr;
+
+		if (cpysize) {
+			/* handles each different URB message */
+			switch(cmd) {
+			case TM6000_URB_MSG_VIDEO:
+				/* Fills video buffer */
+				bufcpy(*buf,&out_p[pos],ptr,cpysize);
+			break;
+			}
+		}
+		if (cpysize<size) {
+			/* End of URB packet, but cmd processing is not
+			 * complete. Preserve the state for a next packet
+			 */
+			dev->isoc_ctl.pos = pos+cpysize;
+			dev->isoc_ctl.size= size-cpysize;
+			dev->isoc_ctl.cmd = cmd;
+			dev->isoc_ctl.pktsize = pktsize-cpysize;
+			ptr+=cpysize;
+		} else {
+			dev->isoc_ctl.cmd = 0;
+			ptr+=pktsize;
+		}
+	}
+
+	return rc;
+}
+/*
+ * Identify the tm5600/6000 buffer header type and properly handles
+ */
+static int copy_multiplexed(u8 *ptr, u8 *out_p, unsigned long len,
+			struct urb *urb, struct tm6000_buffer **buf)
+{
+	struct tm6000_dmaqueue  *dma_q = urb->context;
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+	unsigned int pos=dev->isoc_ctl.pos,cpysize;
+	int rc=1;
+
+	while (len>0) {
+		cpysize=min(len,(*buf)->vb.size-pos);
+//printk("Copying %d bytes (max=%lu) from %p to %p[%u]\n",cpysize,(*buf)->vb.size,ptr,out_p,pos);
+		bufcpy(*buf,&out_p[pos],ptr,cpysize);
+		pos+=cpysize;
+		ptr+=cpysize;
+		len-=cpysize;
+		if (pos >= (*buf)->vb.size) {
+			pos=0;
+			/* Announces that a new buffer were filled */
+			buffer_filled (dev, *buf);
+			dprintk(dev, V4L2_DEBUG_QUEUE, "new buffer filled\n");
+
+			rc=get_next_buf (dma_q, buf);
+			if (rc<=0) {
+				*buf=NULL;
+				printk(KERN_ERR "tm6000: buffer underrun\n");
+				break;
+			}
+		}
+	}
+
+	dev->isoc_ctl.pos=pos;
+	return rc;
+}
+
+/*
+ * Controls the isoc copy of each urb packet
+ */
+static inline int tm6000_isoc_copy(struct urb *urb, struct tm6000_buffer **buf)
+{
+	struct tm6000_dmaqueue  *dma_q = urb->context;
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+	void *outp=videobuf_to_vmalloc (&((*buf)->vb));
+	int i, len=0, rc=1;
+	int size=(*buf)->vb.size;
+	char *p;
+	unsigned long copied;
+
+	copied=0;
+
+
+	for (i = 0; i < urb->number_of_packets; i++) {
+		int status = urb->iso_frame_desc[i].status;
+		char *errmsg = "Unknown";
+
+		switch(status) {
+		case -ENOENT:
+			errmsg = "unlinked synchronuously";
+			break;
+		case -ECONNRESET:
+			errmsg = "unlinked asynchronuously";
+			break;
+		case -ENOSR:
+			errmsg = "Buffer error (overrun)";
+			break;
+		case -EPIPE:
+			errmsg = "Stalled (device not responding)";
+			break;
+		case -EOVERFLOW:
+			errmsg = "Babble (bad cable?)";
+			break;
+		case -EPROTO:
+			errmsg = "Bit-stuff error (bad cable?)";
+			break;
+		case -EILSEQ:
+			errmsg = "CRC/Timeout (could be anything)";
+			break;
+		case -ETIME:
+			errmsg = "Device does not respond";
+			break;
+		}
+		dprintk(dev, V4L2_DEBUG_QUEUE, "URB status %d [%s].\n",
+			status, errmsg);
+
+		if (status<0)
+			continue;
+
+		len=urb->iso_frame_desc[i].actual_length;
+
+		if (len>=TM6000_URB_MSG_LEN) {
+			p=urb->transfer_buffer + urb->iso_frame_desc[i].offset;
+			if (!urb->iso_frame_desc[i].status) {
+				if (((*buf)->fmt->fourcc)==V4L2_PIX_FMT_TM6000) {
+					rc=copy_multiplexed(p,outp,len,urb,buf);
+					if (rc<=0)
+						return rc;
+				} else {
+					rc=copy_streams(p,outp,len,urb,buf);
+				}
+			}
+			copied += len;
+			if (copied>=size)
+				break;
+		}
+	}
+
+	if (((*buf)->fmt->fourcc)!=V4L2_PIX_FMT_TM6000) {
+		buffer_filled (dev, *buf);
+		dprintk(dev, V4L2_DEBUG_QUEUE, "new buffer filled\n");
+	}
+
+	return rc;
+}
+
+/* ------------------------------------------------------------------
+	URB control
+   ------------------------------------------------------------------*/
+
+/*
+ * IRQ callback, called by URB callback
+ */
+static void tm6000_irq_callback(struct urb *urb)
+{
+	struct tm6000_buffer    *buf;
+	struct tm6000_dmaqueue  *dma_q = urb->context;
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+	int rc,i;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->slock,flags);
+
+	rc=get_next_buf (dma_q, &buf);
+	if (rc<=0)
+		goto ret;
+
+	/* Copy data from URB */
+	rc=tm6000_isoc_copy(urb, &buf);
+
+ret:
+	/* Reset urb buffers */
+	for (i = 0; i < urb->number_of_packets; i++) {
+		urb->iso_frame_desc[i].status = 0;
+		urb->iso_frame_desc[i].actual_length = 0;
+	}
+	urb->status = 0;
+
+	if ((urb->status = usb_submit_urb(urb, GFP_ATOMIC))) {
+		tm6000_err("urb resubmit failed (error=%i)\n",
+			urb->status);
+	}
+
+	if (rc>=0) {
+		if (!rc) {
+			dprintk(dev, V4L2_DEBUG_QUEUE, "No active queue to serve\n");
+			del_timer(&dma_q->timeout);
+		} else {
+			/* Data filled, reset watchdog */
+			mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
+		}
+	}
+	spin_unlock_irqrestore(&dev->slock,flags);
+}
+
+/*
+ * Stop and Deallocate URBs
+ */
+static void tm6000_uninit_isoc(struct tm6000_core *dev)
+{
+	struct urb *urb;
+	int i;
+
+	for (i = 0; i < dev->isoc_ctl.num_bufs; i++) {
+		urb=dev->isoc_ctl.urb[i];
+		if (urb) {
+			usb_kill_urb(urb);
+			usb_unlink_urb(urb);
+			if (dev->isoc_ctl.transfer_buffer[i]) {
+				usb_buffer_free(dev->udev,
+						urb->transfer_buffer_length,
+						dev->isoc_ctl.transfer_buffer[i],
+						urb->transfer_dma);
+			}
+			usb_free_urb(urb);
+			dev->isoc_ctl.urb[i] = NULL;
+		}
+		dev->isoc_ctl.transfer_buffer[i] = NULL;
+	}
+
+	kfree (dev->isoc_ctl.urb);
+	kfree (dev->isoc_ctl.transfer_buffer);
+	dev->isoc_ctl.urb=NULL;
+	dev->isoc_ctl.transfer_buffer=NULL;
+
+	dev->isoc_ctl.num_bufs=0;
+}
+
+/*
+ * Stop video thread - FIXME: Can be easily removed
+ */
+static void tm6000_stop_thread(struct tm6000_dmaqueue  *dma_q)
+{
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+
+	tm6000_uninit_isoc(dev);
+}
+
+
+/*
+ * Allocate URBs and start IRQ
+ */
+static int tm6000_prepare_isoc(struct tm6000_core *dev,
+		     int max_packets, int num_bufs)
+{
+	struct tm6000_dmaqueue *dma_q = &dev->vidq;
+	int i;
+	int sb_size, pipe;
+	struct urb *urb;
+	int j, k;
+
+	/* De-allocates all pending stuff */
+	tm6000_uninit_isoc(dev);
+
+	dev->isoc_ctl.num_bufs=num_bufs;
+
+	dev->isoc_ctl.urb=kmalloc(sizeof(void *)*num_bufs,
+				   GFP_KERNEL);
+	if (!dev->isoc_ctl.urb) {
+		tm6000_err("cannot alloc memory for usb buffers\n");
+		return -ENOMEM;
+	}
+
+	dev->isoc_ctl.transfer_buffer=kmalloc(sizeof(void *)*num_bufs,
+				   GFP_KERNEL);
+	if (!dev->isoc_ctl.urb) {
+		tm6000_err("cannot allocate memory for usbtransfer\n");
+		kfree(dev->isoc_ctl.urb);
+		return -ENOMEM;
+	}
+
+	dev->isoc_ctl.max_pkt_size=dev->max_isoc_in;
+
+	sb_size = max_packets * dev->isoc_ctl.max_pkt_size;
+
+
+	/* allocate urbs and transfer buffers */
+	for (i = 0; i < dev->isoc_ctl.num_bufs; i++) {
+		urb = usb_alloc_urb(max_packets, GFP_KERNEL);
+		if (!urb) {
+			tm6000_err("cannot alloc isoc_ctl.urb %i\n", i);
+			tm6000_uninit_isoc(dev);
+			return -ENOMEM;
+		}
+		dev->isoc_ctl.urb[i] = urb;
+
+		dev->isoc_ctl.transfer_buffer[i] = usb_buffer_alloc(dev->udev,
+			sb_size, GFP_KERNEL,
+			&dev->isoc_ctl.urb[i]->transfer_dma);
+		if (!dev->isoc_ctl.transfer_buffer[i]) {
+			tm6000_err ("unable to allocate %i bytes for transfer"
+					" buffer %i\n", sb_size, i);
+			tm6000_uninit_isoc(dev);
+			return -ENOMEM;
+		}
+		memset(dev->isoc_ctl.transfer_buffer[i], 0, sb_size);
+
+		pipe=usb_rcvisocpipe(dev->udev,
+					dev->isoc_in->desc.bEndpointAddress &
+					USB_ENDPOINT_NUMBER_MASK);
+		usb_fill_int_urb(urb, dev->udev, pipe,
+					dev->isoc_ctl.transfer_buffer[i],sb_size,
+					tm6000_irq_callback, dma_q,
+					dev->isoc_in->desc.bInterval);
+
+		urb->number_of_packets = max_packets;
+		urb->transfer_flags = URB_ISO_ASAP;
+
+		k = 0;
+		for (j = 0; j < max_packets; j++) {
+			urb->iso_frame_desc[j].offset = k;
+			urb->iso_frame_desc[j].length =
+						dev->isoc_ctl.max_pkt_size;
+			k += dev->isoc_ctl.max_pkt_size;
+		}
+	}
+
+	return 0;
+}
+
+static int tm6000_start_thread( struct tm6000_dmaqueue  *dma_q,
+				struct tm6000_buffer *buf)
+{
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+	int i,rc;
+
+	dma_q->frame=0;
+	dma_q->ini_jiffies=jiffies;
+
+	init_waitqueue_head(&dma_q->wq);
+
+	/* submit urbs and enables IRQ */
+	for (i = 0; i < dev->isoc_ctl.num_bufs; i++) {
+		rc = usb_submit_urb(dev->isoc_ctl.urb[i], GFP_KERNEL);
+		if (rc) {
+			tm6000_err("submit of urb %i failed (error=%i)\n", i,
+				   rc);
+			tm6000_uninit_isoc(dev);
+			return rc;
+		}
+	}
+
+	if (rc<0)
+		return rc;
+
+	return 0;
+}
+
+static int restart_video_queue(struct tm6000_dmaqueue *dma_q)
+{
+	struct tm6000_core *dev= container_of(dma_q,struct tm6000_core,vidq);
+
+	struct tm6000_buffer *buf, *prev;
+	struct list_head *item;
+
+	dprintk(dev, V4L2_DEBUG_QUEUE, "%s dma_q=0x%08lx\n",
+					 __FUNCTION__,(unsigned long)dma_q);
+
+	if (!list_empty(&dma_q->active)) {
+		buf = list_entry(dma_q->active.next, struct tm6000_buffer, vb.queue);
+		dprintk(dev, V4L2_DEBUG_QUEUE,
+			"restart_queue [%p/%d]: restart dma\n", buf, buf->vb.i);
+
+		dprintk(dev, V4L2_DEBUG_QUEUE, "Restarting video dma\n");
+		tm6000_stop_thread(dma_q);
+		tm6000_start_thread(dma_q, buf);
+
+		/* cancel all outstanding capture / vbi requests */
+		list_for_each(item,&dma_q->active) {
+			buf = list_entry(item, struct tm6000_buffer, vb.queue);
+
+			list_del(&buf->vb.queue);
+			buf->vb.state = STATE_ERROR;
+			wake_up(&buf->vb.done);
+		}
+		mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
+
+		return 0;
+	}
+
+	prev = NULL;
+	for (;;) {
+		if (list_empty(&dma_q->queued))
+			return 0;
+		buf = list_entry(dma_q->queued.next, struct tm6000_buffer, vb.queue);
+		if (NULL == prev) {
+			list_del(&buf->vb.queue);
+			list_add_tail(&buf->vb.queue,&dma_q->active);
+
+			dprintk(dev, V4L2_DEBUG_QUEUE, "Restarting video dma\n");
+			tm6000_stop_thread(dma_q);
+			tm6000_start_thread(dma_q, buf);
+
+			buf->vb.state = STATE_ACTIVE;
+			mod_timer(&dma_q->timeout, jiffies+BUFFER_TIMEOUT);
+			dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] restart_queue -"
+					" first active\n", buf, buf->vb.i);
+
+		} else if (prev->vb.width  == buf->vb.width  &&
+			   prev->vb.height == buf->vb.height &&
+			   prev->fmt       == buf->fmt) {
+			list_del(&buf->vb.queue);
+			list_add_tail(&buf->vb.queue,&dma_q->active);
+			buf->vb.state = STATE_ACTIVE;
+			dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] restart_queue -"
+					" move to active\n",buf,buf->vb.i);
+		} else {
+			return 0;
+		}
+		prev = buf;
+	}
+}
+
+static void tm6000_vid_timeout(unsigned long data)
+{
+	struct tm6000_core      *dev  = (struct tm6000_core*)data;
+	struct tm6000_dmaqueue *vidq = &dev->vidq;
+	struct tm6000_buffer   *buf;
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->slock,flags);
+	while (!list_empty(&vidq->active)) {
+		buf = list_entry(vidq->active.next, struct tm6000_buffer,
+								 vb.queue);
+		list_del(&buf->vb.queue);
+		buf->vb.state = STATE_ERROR;
+		wake_up(&buf->vb.done);
+		dprintk(dev, V4L2_DEBUG_QUEUE, "tm6000/0: [%p/%d] timeout\n",
+							 buf, buf->vb.i);
+	}
+
+	restart_video_queue(vidq);
+	spin_unlock_irqrestore(&dev->slock,flags);
+}
+
+/* ------------------------------------------------------------------
+	Videobuf operations
+   ------------------------------------------------------------------*/
+static int
+buffer_setup(struct videobuf_queue *vq, unsigned int *count, unsigned int *size)
+{
+	struct tm6000_fh *fh = vq->priv_data;
+
+	*size = fh->fmt->depth * fh->width * fh->height >> 3;
+	if (0 == *count)
+		*count = 32;
+	while (*size * *count > vid_limit * 1024 * 1024)
+		(*count)--;
+	return 0;
+}
+
+static void free_buffer(struct videobuf_queue *vq, struct tm6000_buffer *buf)
+{
+	if (in_interrupt())
+		BUG();
+
+	videobuf_waiton(&buf->vb,0,0);
+	videobuf_vmalloc_free(&buf->vb);
+	buf->vb.state = STATE_NEEDS_INIT;
+}
+
+static int
+buffer_prepare(struct videobuf_queue *vq, struct videobuf_buffer *vb,
+						enum v4l2_field field)
+{
+	struct tm6000_fh     *fh  = vq->priv_data;
+	struct tm6000_buffer *buf = container_of(vb,struct tm6000_buffer,vb);
+	struct tm6000_core   *dev = fh->dev;
+	int rc=0, urbsize, urb_init=0;
+
+	BUG_ON(NULL == fh->fmt);
+
+	if (fh->width  < norm_minw(core) || fh->width  > norm_maxw(core) ||
+	    fh->height < norm_minh(core) || fh->height > norm_maxh(core)) {
+		dprintk(dev, V4L2_DEBUG_QUEUE, "Window size (%dx%d) is out of "
+				"supported range\n", fh->width, fh->height);
+		dprintk(dev, V4L2_DEBUG_QUEUE, "Valid range is from (%dx%d) to "
+				"(%dx%d)\n", norm_minw(core), norm_minh(core),
+				norm_maxw(core),norm_maxh(core));
+		return -EINVAL;
+	}
+
+	/* FIXME: It assumes depth=2 */
+	/* The only currently supported format is 16 bits/pixel */
+	buf->vb.size = fh->fmt->depth*fh->width*fh->height >> 3;
+	if (0 != buf->vb.baddr  &&  buf->vb.bsize < buf->vb.size)
+		return -EINVAL;
+
+	if (buf->fmt       != fh->fmt    ||
+	    buf->vb.width  != fh->width  ||
+	    buf->vb.height != fh->height ||
+	    buf->vb.field  != field) {
+		buf->fmt       = fh->fmt;
+		buf->vb.width  = fh->width;
+		buf->vb.height = fh->height;
+		buf->vb.field  = field;
+		buf->vb.state = STATE_NEEDS_INIT;
+	}
+
+	if (STATE_NEEDS_INIT == buf->vb.state) {
+		if (0 != (rc = videobuf_iolock(vq,&buf->vb,NULL)))
+			goto fail;
+		urb_init=1;
+	}
+
+
+	if (!dev->isoc_ctl.num_bufs)
+		urb_init=1;
+
+	if (urb_init) {
+		/* Should allocate/request at least h
+		   res x v res x 2 bytes/pixel */
+		urbsize=(buf->vb.size+dev->max_isoc_in-1)/dev->max_isoc_in;
+
+		 /* Hack to allocate memory for Video + Audio */
+		/* FIXME: should also consider header ovehead of
+		   4 bytes/180 bytes */
+		urbsize+=((48000*4+24)/25+dev->max_isoc_in-1)/dev->max_isoc_in;
+
+		dprintk(dev, V4L2_DEBUG_QUEUE, "Allocating %d packets to handle "
+					"%lu size\n", urbsize,buf->vb.size);
+		rc = tm6000_prepare_isoc(dev, urbsize, 2);
+
+		if (rc<0)
+			goto fail;
+	}
+
+	buf->vb.state = STATE_PREPARED;
+	return 0;
+
+fail:
+	free_buffer(vq,buf);
+	return rc;
+}
+
+static void
+buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb)
+{
+	struct tm6000_buffer    *buf     = container_of(vb,struct tm6000_buffer,vb);
+	struct tm6000_fh        *fh      = vq->priv_data;
+	struct tm6000_core      *dev     = fh->dev;
+	struct tm6000_dmaqueue  *vidq    = &dev->vidq;
+	struct tm6000_buffer    *prev;
+
+	if (!list_empty(&vidq->queued)) {
+		list_add_tail(&buf->vb.queue,&vidq->queued);
+		buf->vb.state = STATE_QUEUED;
+		dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] buffer_queue - "
+					"append to queued\n", buf, buf->vb.i);
+	} else if (list_empty(&vidq->active)) {
+		list_add_tail(&buf->vb.queue,&vidq->active);
+		buf->vb.state = STATE_ACTIVE;
+		mod_timer(&vidq->timeout, jiffies+BUFFER_TIMEOUT);
+		dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] buffer_queue - "
+					"first active\n", buf, buf->vb.i);
+		tm6000_start_thread(vidq, buf);
+	} else {
+		prev = list_entry(vidq->active.prev, struct tm6000_buffer, vb.queue);
+		if (prev->vb.width  == buf->vb.width  &&
+		    prev->vb.height == buf->vb.height &&
+		    prev->fmt       == buf->fmt) {
+			list_add_tail(&buf->vb.queue,&vidq->active);
+			buf->vb.state = STATE_ACTIVE;
+			dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] buffer_queue -"
+					" append to active\n", buf, buf->vb.i);
+		} else {
+			list_add_tail(&buf->vb.queue,&vidq->queued);
+			buf->vb.state = STATE_QUEUED;
+			dprintk(dev, V4L2_DEBUG_QUEUE, "[%p/%d] buffer_queue -"
+					" first queued\n", buf, buf->vb.i);
+		}
+	}
+}
+
+static void buffer_release(struct videobuf_queue *vq, struct videobuf_buffer *vb)
+{
+	struct tm6000_buffer   *buf  = container_of(vb,struct tm6000_buffer,vb);
+	struct tm6000_fh       *fh   = vq->priv_data;
+	struct tm6000_core      *dev  = (struct tm6000_core*)fh->dev;
+	struct tm6000_dmaqueue *vidq = &dev->vidq;
+
+	tm6000_stop_thread(vidq);
+
+	free_buffer(vq,buf);
+}
+
+static struct videobuf_queue_ops tm6000_video_qops = {
+	.buf_setup      = buffer_setup,
+	.buf_prepare    = buffer_prepare,
+	.buf_queue      = buffer_queue,
+	.buf_release    = buffer_release,
+};
+
+/* ------------------------------------------------------------------
+	IOCTL handling
+   ------------------------------------------------------------------*/
+
+static int res_get(struct tm6000_core *dev, struct tm6000_fh *fh)
+{
+	/* is it free? */
+	mutex_lock(&dev->lock);
+	if (dev->resources) {
+		/* no, someone else uses it */
+		mutex_unlock(&dev->lock);
+		return 0;
+	}
+	/* it's free, grab it */
+	dev->resources =1;
+	dprintk(dev, V4L2_DEBUG_RES_LOCK, "res: get\n");
+	mutex_unlock(&dev->lock);
+	return 1;
+}
+
+static int res_locked(struct tm6000_core *dev)
+{
+	return (dev->resources);
+}
+
+static void res_free(struct tm6000_core *dev, struct tm6000_fh *fh)
+{
+	mutex_lock(&dev->lock);
+	dev->resources = 0;
+	dprintk(dev, V4L2_DEBUG_RES_LOCK, "res: put\n");
+	mutex_unlock(&dev->lock);
+}
+
+/* ------------------------------------------------------------------
+	IOCTL vidioc handling
+   ------------------------------------------------------------------*/
+static int vidioc_querycap (struct file *file, void  *priv,
+					struct v4l2_capability *cap)
+{
+	//	struct tm6000_core *dev = ((struct tm6000_fh *)priv)->dev;
+
+	strlcpy(cap->driver, "tm6000", sizeof(cap->driver));
+	strlcpy(cap->card,"Trident TVMaster TM5600/6000", sizeof(cap->card));
+	//	strlcpy(cap->bus_info, dev->udev->dev.bus_id, sizeof(cap->bus_info));
+	cap->version = TM6000_VERSION;
+	cap->capabilities =	V4L2_CAP_VIDEO_CAPTURE |
+				V4L2_CAP_STREAMING     |
+				V4L2_CAP_TUNER	       |
+				V4L2_CAP_READWRITE;
+	return 0;
+}
+
+static int vidioc_enum_fmt_cap (struct file *file, void  *priv,
+					struct v4l2_fmtdesc *f)
+{
+	if (unlikely(f->index >= ARRAY_SIZE(format)))
+		return -EINVAL;
+
+	strlcpy(f->description,format[f->index].name,sizeof(f->description));
+	f->pixelformat = format[f->index].fourcc;
+	return 0;
+}
+
+static int vidioc_g_fmt_cap (struct file *file, void *priv,
+					struct v4l2_format *f)
+{
+	struct tm6000_fh  *fh=priv;
+
+	f->fmt.pix.width        = fh->width;
+	f->fmt.pix.height       = fh->height;
+	f->fmt.pix.field        = fh->vb_vidq.field;
+	f->fmt.pix.pixelformat  = fh->fmt->fourcc;
+	f->fmt.pix.bytesperline =
+		(f->fmt.pix.width * fh->fmt->depth) >> 3;
+	f->fmt.pix.sizeimage =
+		f->fmt.pix.height * f->fmt.pix.bytesperline;
+
+	return (0);
+}
+
+static struct tm6000_fmt* format_by_fourcc(unsigned int fourcc)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(format); i++)
+		if (format[i].fourcc == fourcc)
+			return format+i;
+	return NULL;
+}
+
+static int vidioc_try_fmt_cap (struct file *file, void *priv,
+			struct v4l2_format *f)
+{
+	struct tm6000_core *dev = ((struct tm6000_fh *)priv)->dev;
+	struct tm6000_fmt *fmt;
+	enum v4l2_field field;
+
+	fmt = format_by_fourcc(f->fmt.pix.pixelformat);
+	if (NULL == fmt) {
+		dprintk(dev, V4L2_DEBUG_IOCTL_ARG, "Fourcc format (0x%08x)"
+				" invalid.\n", f->fmt.pix.pixelformat);
+		return -EINVAL;
+	}
+
+	field = f->fmt.pix.field;
+
+	if (field == V4L2_FIELD_ANY) {
+//		field=V4L2_FIELD_INTERLACED;
+		field=V4L2_FIELD_SEQ_TB;
+	} else if (V4L2_FIELD_INTERLACED != field) {
+		dprintk(dev, V4L2_DEBUG_IOCTL_ARG, "Field type invalid.\n");
+		return -EINVAL;
+	}
+
+	if (f->fmt.pix.width  < norm_minw(core))
+		f->fmt.pix.width = norm_minw(core);
+
+	if (f->fmt.pix.width  > norm_maxw(core))
+		f->fmt.pix.width = norm_maxw(core);
+
+	if (f->fmt.pix.height < norm_minh(core))
+		f->fmt.pix.height = norm_minh(core);
+
+	if (f->fmt.pix.height > norm_maxh(core))
+		f->fmt.pix.height = norm_maxh(core);
+
+	f->fmt.pix.width &= ~0x01;
+
+	f->fmt.pix.field = field;
+
+	f->fmt.pix.bytesperline =
+		(f->fmt.pix.width * fmt->depth) >> 3;
+	f->fmt.pix.sizeimage =
+		f->fmt.pix.height * f->fmt.pix.bytesperline;
+
+	return 0;
+}
+
+/*FIXME: This seems to be generic enough to be at videodev2 */
+static int vidioc_s_fmt_cap (struct file *file, void *priv,
+					struct v4l2_format *f)
+{
+	struct tm6000_fh  *fh=priv;
+	struct tm6000_core *dev = fh->dev;
+	int ret = vidioc_try_fmt_cap(file,fh,f);
+	if (ret < 0)
+		return (ret);
+
+	fh->fmt           = format_by_fourcc(f->fmt.pix.pixelformat);
+	fh->width         = f->fmt.pix.width;
+	fh->height        = f->fmt.pix.height;
+	fh->vb_vidq.field = f->fmt.pix.field;
+	fh->type          = f->type;
+
+	dev->fourcc       = f->fmt.pix.pixelformat;
+
+	tm6000_set_fourcc_format(dev);
+
+	return (0);
+}
+
+static int vidioc_reqbufs (struct file *file, void *priv,
+			   struct v4l2_requestbuffers *p)
+{
+	struct tm6000_fh  *fh=priv;
+
+	return (videobuf_reqbufs(&fh->vb_vidq, p));
+}
+
+static int vidioc_querybuf (struct file *file, void *priv,
+			    struct v4l2_buffer *p)
+{
+	struct tm6000_fh  *fh=priv;
+
+	return (videobuf_querybuf(&fh->vb_vidq, p));
+}
+
+static int vidioc_qbuf (struct file *file, void *priv, struct v4l2_buffer *p)
+{
+	struct tm6000_fh  *fh=priv;
+
+	return (videobuf_qbuf(&fh->vb_vidq, p));
+}
+
+static int vidioc_dqbuf (struct file *file, void *priv, struct v4l2_buffer *p)
+{
+	struct tm6000_fh  *fh=priv;
+
+	return (videobuf_dqbuf(&fh->vb_vidq, p,
+				file->f_flags & O_NONBLOCK));
+}
+
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+static int vidiocgmbuf (struct file *file, void *priv, struct video_mbuf *mbuf)
+{
+	struct tm6000_fh  *fh=priv;
+
+	return videobuf_cgmbuf (&fh->vb_vidq, mbuf, 8);
+}
+#endif
+
+static int vidioc_streamon(struct file *file, void *priv, enum v4l2_buf_type i)
+{
+	struct tm6000_fh  *fh=priv;
+	struct tm6000_core *dev    = fh->dev;
+
+	if (fh->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+	if (i != fh->type)
+		return -EINVAL;
+
+	if (!res_get(dev,fh))
+		return -EBUSY;
+	return (videobuf_streamon(&fh->vb_vidq));
+}
+
+static int vidioc_streamoff(struct file *file, void *priv, enum v4l2_buf_type i)
+{
+	struct tm6000_fh  *fh=priv;
+	struct tm6000_core *dev    = fh->dev;
+
+	if (fh->type != V4L2_BUF_TYPE_VIDEO_CAPTURE)
+		return -EINVAL;
+	if (i != fh->type)
+		return -EINVAL;
+
+	videobuf_streamoff(&fh->vb_vidq);
+	res_free(dev,fh);
+
+	return (0);
+}
+
+static int vidioc_s_std (struct file *file, void *priv, v4l2_std_id *norm)
+{
+	int rc=0;
+	struct tm6000_fh   *fh=priv;
+	struct tm6000_core *dev = fh->dev;
+
+	rc=tm6000_set_standard (dev, norm);
+	if (rc<0)
+		return rc;
+
+	tm6000_i2c_call_clients(dev, VIDIOC_S_STD, &dev->norm);
+
+	return 0;
+}
+
+static int vidioc_enum_input (struct file *file, void *priv,
+				struct v4l2_input *inp)
+{
+	switch (inp->index) {
+	case TM6000_INPUT_TV:
+		inp->type = V4L2_INPUT_TYPE_TUNER;
+		strcpy(inp->name,"Television");
+		break;
+	case TM6000_INPUT_COMPOSITE:
+		inp->type = V4L2_INPUT_TYPE_CAMERA;
+		strcpy(inp->name,"Composite");
+		break;
+	case TM6000_INPUT_SVIDEO:
+		inp->type = V4L2_INPUT_TYPE_CAMERA;
+		strcpy(inp->name,"S-Video");
+		break;
+	default:
+		return -EINVAL;
+	}
+	inp->std = TM6000_STD;
+
+	return 0;
+}
+
+static int vidioc_g_input (struct file *file, void *priv, unsigned int *i)
+{
+	struct tm6000_fh   *fh=priv;
+	struct tm6000_core *dev = fh->dev;
+
+	*i=dev->input;
+
+	return 0;
+}
+static int vidioc_s_input (struct file *file, void *priv, unsigned int i)
+{
+	struct tm6000_fh   *fh=priv;
+	struct tm6000_core *dev = fh->dev;
+	int rc=0;
+	char buf[1];
+
+	switch (i) {
+	case TM6000_INPUT_TV:
+		dev->input=i;
+		*buf=0;
+		break;
+	case TM6000_INPUT_COMPOSITE:
+	case TM6000_INPUT_SVIDEO:
+		dev->input=i;
+		*buf=1;
+		break;
+	default:
+		return -EINVAL;
+	}
+	rc=tm6000_read_write_usb (dev, USB_DIR_OUT | USB_TYPE_VENDOR,
+			       REQ_03_SET_GET_MCU_PIN, 0x03, 1, buf, 1);
+
+	if (!rc) {
+		dev->input=i;
+		rc=vidioc_s_std (file, priv, &dev->vfd.current_norm);
+	}
+
+	return (rc);
+}
+
+	/* --- controls ---------------------------------------------- */
+static int vidioc_queryctrl (struct file *file, void *priv,
+				struct v4l2_queryctrl *qc)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(tm6000_qctrl); i++)
+		if (qc->id && qc->id == tm6000_qctrl[i].id) {
+			memcpy(qc, &(tm6000_qctrl[i]),
+				sizeof(*qc));
+			return (0);
+		}
+
+	return -EINVAL;
+}
+
+static int vidioc_g_ctrl (struct file *file, void *priv,
+				struct v4l2_control *ctrl)
+{
+	struct tm6000_fh  *fh=priv;
+	struct tm6000_core *dev    = fh->dev;
+	int  val;
+
+	/* FIXME: Probably, those won't work! Maybe we need shadow regs */
+	switch (ctrl->id) {
+	case V4L2_CID_CONTRAST:
+		val=tm6000_get_reg (dev, REQ_07_SET_GET_AVREG, 0x08, 0);
+		break;
+	case V4L2_CID_BRIGHTNESS:
+		val=tm6000_get_reg (dev, REQ_07_SET_GET_AVREG, 0x09, 0);
+		return 0;
+	case V4L2_CID_SATURATION:
+		val=tm6000_get_reg (dev, REQ_07_SET_GET_AVREG, 0x0a, 0);
+		return 0;
+	case V4L2_CID_HUE:
+		val=tm6000_get_reg (dev, REQ_07_SET_GET_AVREG, 0x0b, 0);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+
+	if (val<0)
+		return val;
+
+	ctrl->value=val;
+
+	return 0;
+}
+static int vidioc_s_ctrl (struct file *file, void *priv,
+				struct v4l2_control *ctrl)
+{
+	struct tm6000_fh   *fh  =priv;
+	struct tm6000_core *dev = fh->dev;
+	u8  val=ctrl->value;
+
+	switch (ctrl->id) {
+	case V4L2_CID_CONTRAST:
+  tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x08, val);
+		return 0;
+	case V4L2_CID_BRIGHTNESS:
+  tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x09, val);
+		return 0;
+	case V4L2_CID_SATURATION:
+  tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0a, val);
+		return 0;
+	case V4L2_CID_HUE:
+  tm6000_set_reg (dev, REQ_07_SET_GET_AVREG, 0x0b, val);
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static int vidioc_g_tuner (struct file *file, void *priv,
+				struct v4l2_tuner *t)
+{
+	struct tm6000_fh   *fh  =priv;
+	struct tm6000_core *dev = fh->dev;
+
+	if (unlikely(UNSET == dev->tuner_type))
+		return -EINVAL;
+	if (0 != t->index)
+		return -EINVAL;
+
+	strcpy(t->name, "Television");
+	t->type       = V4L2_TUNER_ANALOG_TV;
+	t->capability = V4L2_TUNER_CAP_NORM;
+	t->rangehigh  = 0xffffffffUL;
+	t->rxsubchans = V4L2_TUNER_SUB_MONO;
+
+	return 0;
+}
+
+static int vidioc_s_tuner (struct file *file, void *priv,
+				struct v4l2_tuner *t)
+{
+	struct tm6000_fh   *fh  =priv;
+	struct tm6000_core *dev = fh->dev;
+
+	if (UNSET == dev->tuner_type)
+		return -EINVAL;
+	if (0 != t->index)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int vidioc_g_frequency (struct file *file, void *priv,
+				struct v4l2_frequency *f)
+{
+	struct tm6000_fh   *fh  =priv;
+	struct tm6000_core *dev = fh->dev;
+
+	if (unlikely(UNSET == dev->tuner_type))
+		return -EINVAL;
+
+	f->type = V4L2_TUNER_ANALOG_TV;
+	f->frequency = dev->freq;
+
+	tm6000_i2c_call_clients(dev,VIDIOC_G_FREQUENCY,f);
+
+	return 0;
+}
+
+static int vidioc_s_frequency (struct file *file, void *priv,
+				struct v4l2_frequency *f)
+{
+	struct tm6000_fh   *fh  =priv;
+	struct tm6000_core *dev = fh->dev;
+
+	if (unlikely(f->type != V4L2_TUNER_ANALOG_TV))
+		return -EINVAL;
+
+	if (unlikely(UNSET == dev->tuner_type))
+		return -EINVAL;
+	if (unlikely(f->tuner != 0))
+		return -EINVAL;
+
+//	mutex_lock(&dev->lock);
+	dev->freq = f->frequency;
+	tm6000_i2c_call_clients(dev,VIDIOC_S_FREQUENCY,f);
+//	mutex_unlock(&dev->lock);
+
+	return 0;
+}
+
+/* ------------------------------------------------------------------
+	File operations for the device
+   ------------------------------------------------------------------*/
+
+static int tm6000_open(struct inode *inode, struct file *file)
+{
+	int minor = iminor(inode);
+	struct tm6000_core *h,*dev = NULL;
+	struct tm6000_fh *fh;
+	struct list_head *list;
+	enum v4l2_buf_type type = 0;
+	int i,rc;
+
+	dprintk(dev, V4L2_DEBUG_OPEN, "tm6000: open called "
+						"(minor=%d)\n",minor);
+
+	list_for_each(list,&tm6000_corelist) {
+		h = list_entry(list, struct tm6000_core, tm6000_corelist);
+		if (h->vfd.minor == minor) {
+			dev  = h;
+			type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+		}
+	}
+	if (NULL == dev)
+		return -ENODEV;
+
+
+	/* If more than one user, mutex should be added */
+	dev->users++;
+
+	dprintk(dev, V4L2_DEBUG_OPEN, "open minor=%d type=%s users=%d\n",
+				minor,v4l2_type_names[type],dev->users);
+
+	/* allocate + initialize per filehandle data */
+	fh = kzalloc(sizeof(*fh),GFP_KERNEL);
+	if (NULL == fh) {
+		dev->users--;
+		return -ENOMEM;
+	}
+
+	file->private_data = fh;
+	fh->dev      = dev;
+
+	fh->type     = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+	dev->fourcc  = format[0].fourcc;
+
+	fh->fmt      = format_by_fourcc(dev->fourcc);
+	fh->width    = norm_maxw();
+	fh->height   = norm_maxh();
+
+	dprintk(dev, V4L2_DEBUG_OPEN, "Open: fh=0x%08lx, dev=0x%08lx, "
+						"dev->vidq=0x%08lx\n",
+		(unsigned long)fh,(unsigned long)dev,(unsigned long)&dev->vidq);
+	dprintk(dev, V4L2_DEBUG_OPEN, "Open: list_empty "
+				"queued=%d\n",list_empty(&dev->vidq.queued));
+	dprintk(dev, V4L2_DEBUG_OPEN, "Open: list_empty "
+				"active=%d\n",list_empty(&dev->vidq.active));
+
+	/* initialize hardware on analog mode */
+	if (dev->mode!=TM6000_MODE_ANALOG) {
+		rc=tm6000_init_analog_mode (dev);
+		if (rc<0)
+			return rc;
+
+		/* Put all controls at a sane state */
+		for (i = 0; i < ARRAY_SIZE(tm6000_qctrl); i++)
+			qctl_regs[i] =tm6000_qctrl[i].default_value;
+
+		dev->mode=TM6000_MODE_ANALOG;
+	}
+
+	videobuf_queue_vmalloc_init(&fh->vb_vidq, &tm6000_video_qops,
+			NULL, &dev->slock,
+			fh->type,
+			V4L2_FIELD_INTERLACED,
+			sizeof(struct tm6000_buffer),fh);
+
+	return 0;
+}
+
+static ssize_t
+tm6000_read(struct file *file, char __user *data, size_t count, loff_t *pos)
+{
+	struct tm6000_fh        *fh = file->private_data;
+
+	if (fh->type==V4L2_BUF_TYPE_VIDEO_CAPTURE) {
+		if (res_locked(fh->dev))
+			return -EBUSY;
+
+		return videobuf_read_stream(&fh->vb_vidq, data, count, pos, 0,
+					file->f_flags & O_NONBLOCK);
+	}
+	return 0;
+}
+
+static unsigned int
+tm6000_poll(struct file *file, struct poll_table_struct *wait)
+{
+	struct tm6000_fh        *fh = file->private_data;
+	struct tm6000_buffer    *buf;
+
+	if (V4L2_BUF_TYPE_VIDEO_CAPTURE != fh->type)
+		return POLLERR;
+
+	if (res_get(fh->dev,fh)) {
+		/* streaming capture */
+		if (list_empty(&fh->vb_vidq.stream))
+			return POLLERR;
+		buf = list_entry(fh->vb_vidq.stream.next,struct tm6000_buffer,vb.stream);
+	} else {
+		/* read() capture */
+		buf = (struct tm6000_buffer*)fh->vb_vidq.read_buf;
+		if (NULL == buf)
+			return POLLERR;
+	}
+	poll_wait(file, &buf->vb.done, wait);
+	if (buf->vb.state == STATE_DONE ||
+	    buf->vb.state == STATE_ERROR)
+		return POLLIN|POLLRDNORM;
+	return 0;
+}
+
+static int tm6000_release(struct inode *inode, struct file *file)
+{
+	struct tm6000_fh         *fh = file->private_data;
+	struct tm6000_core      *dev = fh->dev;
+	struct tm6000_dmaqueue *vidq = &dev->vidq;
+	int minor = iminor(inode);
+
+	tm6000_stop_thread(vidq);
+	videobuf_mmap_free(&fh->vb_vidq);
+
+	kfree (fh);
+
+	dprintk(dev, V4L2_DEBUG_OPEN, "tm6000: close called (minor=%d, users=%d)\n",minor,dev->users);
+
+	return 0;
+}
+
+static int tm6000_mmap(struct file *file, struct vm_area_struct * vma)
+{
+	struct tm6000_fh        *fh = file->private_data;
+	int ret;
+
+	ret=videobuf_mmap_mapper(&fh->vb_vidq, vma);
+
+	return ret;
+}
+
+static struct file_operations tm6000_fops = {
+	.owner		= THIS_MODULE,
+	.open           = tm6000_open,
+	.release        = tm6000_release,
+	.ioctl          = video_ioctl2, /* V4L2 ioctl handler */
+	.read           = tm6000_read,
+	.poll		= tm6000_poll,
+	.mmap		= tm6000_mmap,
+	.llseek         = no_llseek,
+};
+
+static struct video_device tm6000_template = {
+	.name		= "tm6000",
+	.type		= VID_TYPE_CAPTURE,
+	.fops           = &tm6000_fops,
+	.minor		= -1,
+	.release	= video_device_release,
+
+	.vidioc_querycap      = vidioc_querycap,
+	.vidioc_enum_fmt_cap  = vidioc_enum_fmt_cap,
+	.vidioc_g_fmt_cap     = vidioc_g_fmt_cap,
+	.vidioc_try_fmt_cap   = vidioc_try_fmt_cap,
+	.vidioc_s_fmt_cap     = vidioc_s_fmt_cap,
+	.vidioc_s_std         = vidioc_s_std,
+	.vidioc_enum_input    = vidioc_enum_input,
+	.vidioc_g_input       = vidioc_g_input,
+	.vidioc_s_input       = vidioc_s_input,
+	.vidioc_queryctrl     = vidioc_queryctrl,
+	.vidioc_g_ctrl        = vidioc_g_ctrl,
+	.vidioc_s_ctrl        = vidioc_s_ctrl,
+	.vidioc_g_tuner       = vidioc_g_tuner,
+	.vidioc_s_tuner       = vidioc_s_tuner,
+	.vidioc_g_frequency   = vidioc_g_frequency,
+	.vidioc_s_frequency   = vidioc_s_frequency,
+	.vidioc_streamon      = vidioc_streamon,
+	.vidioc_streamoff     = vidioc_streamoff,
+	.vidioc_reqbufs       = vidioc_reqbufs,
+	.vidioc_querybuf      = vidioc_querybuf,
+	.vidioc_qbuf          = vidioc_qbuf,
+	.vidioc_dqbuf         = vidioc_dqbuf,
+#ifdef CONFIG_VIDEO_V4L1_COMPAT
+	.vidiocgmbuf          = vidiocgmbuf,
+#endif
+	.tvnorms              = TM6000_STD,
+	.current_norm         = V4L2_STD_NTSC_M,
+};
+/* -----------------------------------------------------------------
+	Initialization and module stuff
+   ------------------------------------------------------------------*/
+
+int tm6000_v4l2_register(struct tm6000_core *dev)
+{
+	int ret;
+
+	list_add_tail(&dev->tm6000_corelist,&tm6000_corelist);
+
+	/* init video dma queues */
+	INIT_LIST_HEAD(&dev->vidq.active);
+	INIT_LIST_HEAD(&dev->vidq.queued);
+
+	dev->vidq.timeout.function = tm6000_vid_timeout;
+	dev->vidq.timeout.data     = (unsigned long)dev;
+	init_timer(&dev->vidq.timeout);
+
+	memcpy (&dev->vfd, &tm6000_template, sizeof(dev->vfd));
+	dev->vfd.debug=tm6000_debug;
+
+	ret = video_register_device(&dev->vfd, VFL_TYPE_GRABBER, video_nr);
+	printk(KERN_INFO "Trident TVMaster TM5600/TM6000 USB2 board (Load status: %d)\n", ret);
+	return ret;
+}
+
+int tm6000_v4l2_unregister(struct tm6000_core *dev)
+{
+	struct tm6000_core *h;
+	struct list_head *list;
+
+	while (!list_empty(&tm6000_corelist)) {
+		list = tm6000_corelist.next;
+		h = list_entry(list, struct tm6000_core, tm6000_corelist);
+		if (h == dev) {
+			video_unregister_device(&dev->vfd);
+			list_del(list);
+			kfree (h);
+		}
+	}
+
+	return 0;
+}
+
+int tm6000_v4l2_exit(void)
+{
+	return 0;
+}
+
+module_param(video_nr, int, 0);
+MODULE_PARM_DESC(video_nr,"Allow changing video device number");
+
+module_param_named (debug, tm6000_debug, int, 0444);
+MODULE_PARM_DESC(debug,"activates debug info");
+
+module_param(vid_limit,int,0644);
+MODULE_PARM_DESC(vid_limit,"capture memory limit in megabytes");
+
diff --git a/drivers/staging/tm6000/tm6000.h b/drivers/staging/tm6000/tm6000.h
new file mode 100644
index 000000000000..623e85c351e1
--- /dev/null
+++ b/drivers/staging/tm6000/tm6000.h
@@ -0,0 +1,230 @@
+/*
+   tm6000.h - driver for TM5600/TM6000 USB video capture devices
+
+   Copyright (C) 2006-2007 Mauro Carvalho Chehab <mchehab@infradead.org>
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation version 2
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+// Use the tm6000-hack, instead of the proper initialization code
+//#define HACK 1
+
+#include <linux/videodev2.h>
+#include <media/v4l2-common.h>
+#include <media/videobuf-vmalloc.h>
+#include "tm6000-usb-isoc.h"
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+
+#define TM6000_VERSION KERNEL_VERSION(0, 0, 1)
+
+/* Inputs */
+#define TM6000_INPUT_TV		0
+#define TM6000_INPUT_COMPOSITE	1
+#define TM6000_INPUT_SVIDEO	2
+
+/* ------------------------------------------------------------------
+	Basic structures
+   ------------------------------------------------------------------*/
+
+struct tm6000_fmt {
+	char  *name;
+	u32   fourcc;          /* v4l2 format id */
+	int   depth;
+};
+
+/* buffer for one video frame */
+struct tm6000_buffer {
+	/* common v4l buffer stuff -- must be first */
+	struct videobuf_buffer vb;
+
+	struct tm6000_fmt      *fmt;
+};
+
+struct tm6000_dmaqueue {
+	struct list_head       active;
+	struct list_head       queued;
+	struct timer_list      timeout;
+
+	/* thread for generating video stream*/
+	struct task_struct         *kthread;
+	wait_queue_head_t          wq;
+	/* Counters to control fps rate */
+	int                        frame;
+	int                        ini_jiffies;
+};
+
+/* device states */
+enum tm6000_core_state {
+	DEV_INITIALIZED   = 0x01,
+	DEV_DISCONNECTED  = 0x02,
+	DEV_MISCONFIGURED = 0x04,
+};
+
+/* io methods */
+enum tm6000_io_method {
+	IO_NONE,
+	IO_READ,
+	IO_MMAP,
+};
+
+enum tm6000_mode {
+	TM6000_MODE_UNKNOWN=0,
+	TM6000_MODE_ANALOG,
+	TM6000_MODE_DIGITAL,
+};
+
+struct tm6000_capabilities {
+	unsigned int    has_tuner:1;
+	unsigned int    has_tda9874:1;
+	unsigned int    has_dvb:1;
+	unsigned int    has_zl10353:1;
+	unsigned int    has_eeprom:1;
+};
+
+struct tm6000_core {
+	/* generic device properties */
+	char				name[30];	/* name (including minor) of the device */
+	int				model;		/* index in the device_data struct */
+	int				devno;		/* marks the number of this device */
+	v4l2_std_id			norm;		/* Current norm */
+
+	enum tm6000_core_state		state;
+
+	/* Device Capabilities*/
+	struct tm6000_capabilities	caps;
+
+	/* Tuner configuration */
+	int				tuner_type;	/* type of the tuner */
+	int				tuner_addr;	/* tuner address */
+
+	/* i2c i/o */
+	struct i2c_adapter		i2c_adap;
+	struct i2c_client		i2c_client;
+
+	/* video for linux */
+	struct list_head		tm6000_corelist;
+	int				users;
+
+	/* various device info */
+	unsigned int			resources;
+	struct video_device		vfd;
+	struct tm6000_dmaqueue		vidq;
+
+	int				input;
+	int				freq;
+	unsigned int			fourcc;
+
+	enum tm6000_mode		mode;
+
+	/* locks */
+	struct mutex			lock;
+
+	/* usb transfer */
+	struct usb_device		*udev;		/* the usb device */
+
+	struct usb_host_endpoint	*bulk_in, *bulk_out, *isoc_in, *isoc_out;
+	unsigned int			max_bulk_in, max_bulk_out;
+	unsigned int			max_isoc_in, max_isoc_out;
+
+	/* scaler!=0 if scaler is active*/
+	int				scaler;
+
+		/* Isoc control struct */
+	struct usb_isoc_ctl          isoc_ctl;
+
+	spinlock_t                   slock;
+};
+
+struct tm6000_fh {
+	struct tm6000_core           *dev;
+
+	/* video capture */
+	struct tm6000_fmt            *fmt;
+	unsigned int                 width,height;
+	struct videobuf_queue        vb_vidq;
+
+	enum v4l2_buf_type           type;
+};
+
+#define TM6000_STD	V4L2_STD_PAL|V4L2_STD_PAL_N|V4L2_STD_PAL_Nc|    \
+			V4L2_STD_PAL_M|V4L2_STD_PAL_60|V4L2_STD_NTSC_M| \
+			V4L2_STD_NTSC_M_JP|V4L2_STD_SECAM
+
+/* In tm6000-core.c */
+extern unsigned long tm6000_devused;
+
+int tm6000_read_write_usb (struct tm6000_core *dev, u8 reqtype, u8 req,
+			   u16 value, u16 index, u8 *buf, u16 len);
+int tm6000_get_reg (struct tm6000_core *dev, u8 req, u16 value, u16 index);
+int tm6000_set_reg (struct tm6000_core *dev, u8 req, u16 value, u16 index);
+int tm6000_init (struct tm6000_core *dev);
+int tm6000_init_after_firmware (struct tm6000_core *dev);
+
+int tm6000_init_analog_mode (struct tm6000_core *dev);
+int tm6000_set_standard (struct tm6000_core *dev, v4l2_std_id *norm);
+int tm6000_set_audio_bitrate (struct tm6000_core *dev, int bitrate);
+
+int tm6000_v4l2_register(struct tm6000_core *dev);
+int tm6000_v4l2_unregister(struct tm6000_core *dev);
+int tm6000_v4l2_exit(void);
+void tm6000_set_fourcc_format(struct tm6000_core *dev);
+
+/* In tm6000-i2c.c */
+int tm6000_i2c_register(struct tm6000_core *dev);
+int tm6000_i2c_unregister(struct tm6000_core *dev);
+void tm6000_i2c_call_clients(struct tm6000_core *dev, unsigned int cmd,
+			     void *arg);
+
+/* In tm6000-queue.c */
+
+int tm6000_v4l2_mmap(struct file *filp, struct vm_area_struct *vma);
+
+int tm6000_vidioc_streamon(struct file *file, void *priv,
+			   enum v4l2_buf_type i);
+int tm6000_vidioc_streamoff(struct file *file, void *priv,
+			    enum v4l2_buf_type i);
+int tm6000_vidioc_reqbufs (struct file *file, void *priv,
+			   struct v4l2_requestbuffers *rb);
+int tm6000_vidioc_querybuf (struct file *file, void *priv,
+			    struct v4l2_buffer *b);
+int tm6000_vidioc_qbuf (struct file *file, void *priv, struct v4l2_buffer *b);
+int tm6000_vidioc_dqbuf (struct file *file, void *priv, struct v4l2_buffer *b);
+ssize_t tm6000_v4l2_read(struct file *filp, char __user * buf, size_t count,
+			 loff_t * f_pos);
+unsigned int tm6000_v4l2_poll(struct file *file,
+			      struct poll_table_struct *wait);
+int tm6000_queue_init(struct tm6000_core *dev);
+
+/* Debug stuff */
+
+extern int tm6000_debug;
+
+#define dprintk(dev, level, fmt, arg...) do {\
+	if (tm6000_debug & level) \
+		printk(KERN_INFO "(%lu) %s %s :"fmt, jiffies, 		\
+			 dev->name, __FUNCTION__ , ##arg); } while (0)
+
+#define V4L2_DEBUG_REG		0x0004
+#define V4L2_DEBUG_I2C		0x0008
+#define V4L2_DEBUG_QUEUE	0x0010
+#define V4L2_DEBUG_ISOC		0x0020
+#define V4L2_DEBUG_RES_LOCK	0x0040	/* Resource locking */
+#define V4L2_DEBUG_OPEN		0x0080	/* video open/close debug */
+
+#define tm6000_err(fmt, arg...) do {\
+	printk(KERN_ERR "tm6000 %s :"fmt, \
+		__FUNCTION__ , ##arg); } while (0)
+
+
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3793d168b44d..3c265603d735 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -369,6 +369,7 @@ struct v4l2_pix_format {
 #define V4L2_PIX_FMT_OV511    v4l2_fourcc('O', '5', '1', '1') /* ov511 JPEG */
 #define V4L2_PIX_FMT_OV518    v4l2_fourcc('O', '5', '1', '8') /* ov518 JPEG */
 #define V4L2_PIX_FMT_STV0680  v4l2_fourcc('S', '6', '8', '0') /* stv0680 bayer */
+#define V4L2_PIX_FMT_TM6000   v4l2_fourcc('T', 'M', '6', '0') /* tm5600/tm60x0 */
 
 /*
  *	F O R M A T   E N U M E R A T I O N
-- 
cgit v1.2.3


From 7a01f6dbc7abb85a6ec048dd45db92ef1b91fe78 Mon Sep 17 00:00:00 2001
From: Devin Heitmueller <dheitmueller@kernellabs.com>
Date: Thu, 11 Mar 2010 21:27:59 -0300
Subject: V4L/DVB: videodev2: introduce a common control for chroma gain

Introduce a new control for modifying the chroma gain.  This allows for user
intervention in abnormal signal conditions cases where the decoder's chroma
AGC cannot compensate and the value needs to be adjusted manually.

This work was sponsored by EyeMagnet Limited.

Signed-off-by: Devin Heitmueller <dheitmueller@kernellabs.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/controls.xml | 6 ++++++
 include/linux/videodev2.h              | 5 ++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/controls.xml b/Documentation/DocBook/v4l/controls.xml
index f46450610412..e1bdbb6eff84 100644
--- a/Documentation/DocBook/v4l/controls.xml
+++ b/Documentation/DocBook/v4l/controls.xml
@@ -266,6 +266,12 @@ minimum value disables backlight compensation.</entry>
 	    <entry>boolean</entry>
 	    <entry>Chroma automatic gain control.</entry>
 	  </row>
+	  <row>
+	    <entry><constant>V4L2_CID_CHROMA_GAIN</constant></entry>
+	    <entry>integer</entry>
+	    <entry>Adjusts the Chroma gain control (for use when chroma AGC
+	    is disabled).</entry>
+	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_COLOR_KILLER</constant></entry>
 	    <entry>boolean</entry>
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 3c265603d735..418dacf5261d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1030,8 +1030,11 @@ enum v4l2_colorfx {
 
 #define V4L2_CID_ROTATE				(V4L2_CID_BASE+34)
 #define V4L2_CID_BG_COLOR			(V4L2_CID_BASE+35)
+
+#define V4L2_CID_CHROMA_GAIN                    (V4L2_CID_BASE+36)
+
 /* last CID + 1 */
-#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+36)
+#define V4L2_CID_LASTP1                         (V4L2_CID_BASE+37)
 
 /*  MPEG-class control IDs defined by V4L2 */
 #define V4L2_CID_MPEG_BASE 			(V4L2_CTRL_CLASS_MPEG | 0x900)
-- 
cgit v1.2.3


From b3e212dcbddb10b44d472a5f4c23d5aa0b5876ca Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 22 Mar 2010 04:52:21 -0300
Subject: V4L/DVB: v4l: add V4L2_PIX_FMT_Y4 and V4L2_PIX_FMT_Y6 pixelformats

Old 4 and 6 bit greyscale pixel formats for the old bw-qcam webcam.
This is needed to convert it to V4L2.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/pixfmt.xml | 12 ++++++++++++
 include/linux/videodev2.h            |  2 ++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/pixfmt.xml b/Documentation/DocBook/v4l/pixfmt.xml
index 885968d6a2fc..c4ad0a8e42dc 100644
--- a/Documentation/DocBook/v4l/pixfmt.xml
+++ b/Documentation/DocBook/v4l/pixfmt.xml
@@ -792,6 +792,18 @@ http://www.thedirks.org/winnov/</ulink></para></entry>
 	    <entry>'YYUV'</entry>
 	    <entry>unknown</entry>
 	  </row>
+	  <row id="V4L2-PIX-FMT-Y4">
+	    <entry><constant>V4L2_PIX_FMT_Y4</constant></entry>
+	    <entry>'Y04 '</entry>
+	    <entry>Old 4-bit greyscale format. Only the least significant 4 bits of each byte are used,
+the other bits are set to 0.</entry>
+	  </row>
+	  <row id="V4L2-PIX-FMT-Y6">
+	    <entry><constant>V4L2_PIX_FMT_Y6</constant></entry>
+	    <entry>'Y06 '</entry>
+	    <entry>Old 6-bit greyscale format. Only the least significant 6 bits of each byte are used,
+the other bits are set to 0.</entry>
+	  </row>
 	</tbody>
       </tgroup>
     </table>
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 418dacf5261d..6fb0b2daa5b9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -294,6 +294,8 @@ struct v4l2_pix_format {
 
 /* Grey formats */
 #define V4L2_PIX_FMT_GREY    v4l2_fourcc('G', 'R', 'E', 'Y') /*  8  Greyscale     */
+#define V4L2_PIX_FMT_Y4      v4l2_fourcc('Y', '0', '4', ' ') /*  4  Greyscale     */
+#define V4L2_PIX_FMT_Y6      v4l2_fourcc('Y', '0', '6', ' ') /*  6  Greyscale     */
 #define V4L2_PIX_FMT_Y10     v4l2_fourcc('Y', '1', '0', ' ') /* 10  Greyscale     */
 #define V4L2_PIX_FMT_Y16     v4l2_fourcc('Y', '1', '6', ' ') /* 16  Greyscale     */
 
-- 
cgit v1.2.3


From 51270617a52793c423ef68ddd3f18745e9abd15b Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Mon, 22 Mar 2010 05:22:34 -0300
Subject: V4L/DVB: meye: remove last V4L1 remnants from the code and add
 v4l2_device

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/Kconfig |  2 +-
 drivers/media/video/meye.c  | 78 ++++++++++++++++++++++++---------------------
 drivers/media/video/meye.h  | 10 ++++--
 include/linux/meye.h        | 12 +++----
 4 files changed, 55 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig
index 9644cf760aaa..2ed76f97bf6c 100644
--- a/drivers/media/video/Kconfig
+++ b/drivers/media/video/Kconfig
@@ -740,7 +740,7 @@ source "drivers/media/video/zoran/Kconfig"
 
 config VIDEO_MEYE
 	tristate "Sony Vaio Picturebook Motion Eye Video For Linux"
-	depends on PCI && SONY_LAPTOP && VIDEO_V4L1
+	depends on PCI && SONY_LAPTOP && VIDEO_V4L2
 	---help---
 	  This is the video4linux driver for the Motion Eye camera found
 	  in the Vaio Picturebook laptops. Please read the material in
diff --git a/drivers/media/video/meye.c b/drivers/media/video/meye.c
index 4404e5ef818f..2be23bccd3c8 100644
--- a/drivers/media/video/meye.c
+++ b/drivers/media/video/meye.c
@@ -30,9 +30,10 @@
 #include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/init.h>
-#include <linux/videodev.h>
 #include <linux/gfp.h>
+#include <linux/videodev2.h>
 #include <media/v4l2-common.h>
+#include <media/v4l2-device.h>
 #include <media/v4l2-ioctl.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
@@ -1168,22 +1169,22 @@ static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *c)
 	case V4L2_CID_BRIGHTNESS:
 		sony_pic_camera_command(
 			SONY_PIC_COMMAND_SETCAMERABRIGHTNESS, c->value);
-		meye.picture.brightness = c->value << 10;
+		meye.brightness = c->value << 10;
 		break;
 	case V4L2_CID_HUE:
 		sony_pic_camera_command(
 			SONY_PIC_COMMAND_SETCAMERAHUE, c->value);
-		meye.picture.hue = c->value << 10;
+		meye.hue = c->value << 10;
 		break;
 	case V4L2_CID_CONTRAST:
 		sony_pic_camera_command(
 			SONY_PIC_COMMAND_SETCAMERACONTRAST, c->value);
-		meye.picture.contrast = c->value << 10;
+		meye.contrast = c->value << 10;
 		break;
 	case V4L2_CID_SATURATION:
 		sony_pic_camera_command(
 			SONY_PIC_COMMAND_SETCAMERACOLOR, c->value);
-		meye.picture.colour = c->value << 10;
+		meye.colour = c->value << 10;
 		break;
 	case V4L2_CID_AGC:
 		sony_pic_camera_command(
@@ -1221,16 +1222,16 @@ static int vidioc_g_ctrl(struct file *file, void *fh, struct v4l2_control *c)
 	mutex_lock(&meye.lock);
 	switch (c->id) {
 	case V4L2_CID_BRIGHTNESS:
-		c->value = meye.picture.brightness >> 10;
+		c->value = meye.brightness >> 10;
 		break;
 	case V4L2_CID_HUE:
-		c->value = meye.picture.hue >> 10;
+		c->value = meye.hue >> 10;
 		break;
 	case V4L2_CID_CONTRAST:
-		c->value = meye.picture.contrast >> 10;
+		c->value = meye.contrast >> 10;
 		break;
 	case V4L2_CID_SATURATION:
-		c->value = meye.picture.colour >> 10;
+		c->value = meye.colour >> 10;
 		break;
 	case V4L2_CID_AGC:
 		c->value = meye.params.agc;
@@ -1729,6 +1730,7 @@ static int meye_resume(struct pci_dev *pdev)
 static int __devinit meye_probe(struct pci_dev *pcidev,
 				const struct pci_device_id *ent)
 {
+	struct v4l2_device *v4l2_dev = &meye.v4l2_dev;
 	int ret = -EBUSY;
 	unsigned long mchip_adr;
 
@@ -1737,70 +1739,75 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 		goto outnotdev;
 	}
 
+	ret = v4l2_device_register(&pcidev->dev, v4l2_dev);
+	if (ret < 0) {
+		v4l2_err(v4l2_dev, "Could not register v4l2_device\n");
+		return ret;
+	}
 	ret = -ENOMEM;
 	meye.mchip_dev = pcidev;
-	meye.video_dev = video_device_alloc();
-	if (!meye.video_dev) {
-		printk(KERN_ERR "meye: video_device_alloc() failed!\n");
+	meye.vdev = video_device_alloc();
+	if (!meye.vdev) {
+		v4l2_err(v4l2_dev, "video_device_alloc() failed!\n");
 		goto outnotdev;
 	}
 
 	meye.grab_temp = vmalloc(MCHIP_NB_PAGES_MJPEG * PAGE_SIZE);
 	if (!meye.grab_temp) {
-		printk(KERN_ERR "meye: grab buffer allocation failed\n");
+		v4l2_err(v4l2_dev, "grab buffer allocation failed\n");
 		goto outvmalloc;
 	}
 
 	spin_lock_init(&meye.grabq_lock);
 	if (kfifo_alloc(&meye.grabq, sizeof(int) * MEYE_MAX_BUFNBRS,
 				GFP_KERNEL)) {
-		printk(KERN_ERR "meye: fifo allocation failed\n");
+		v4l2_err(v4l2_dev, "fifo allocation failed\n");
 		goto outkfifoalloc1;
 	}
 	spin_lock_init(&meye.doneq_lock);
 	if (kfifo_alloc(&meye.doneq, sizeof(int) * MEYE_MAX_BUFNBRS,
 				GFP_KERNEL)) {
-		printk(KERN_ERR "meye: fifo allocation failed\n");
+		v4l2_err(v4l2_dev, "fifo allocation failed\n");
 		goto outkfifoalloc2;
 	}
 
-	memcpy(meye.video_dev, &meye_template, sizeof(meye_template));
-	meye.video_dev->parent = &meye.mchip_dev->dev;
+	memcpy(meye.vdev, &meye_template, sizeof(meye_template));
+	meye.vdev->v4l2_dev = &meye.v4l2_dev;
 
 	ret = -EIO;
 	if ((ret = sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERA, 1))) {
-		printk(KERN_ERR "meye: unable to power on the camera\n");
-		printk(KERN_ERR "meye: did you enable the camera in "
+		v4l2_err(v4l2_dev, "meye: unable to power on the camera\n");
+		v4l2_err(v4l2_dev, "meye: did you enable the camera in "
 				"sonypi using the module options ?\n");
 		goto outsonypienable;
 	}
 
 	if ((ret = pci_enable_device(meye.mchip_dev))) {
-		printk(KERN_ERR "meye: pci_enable_device failed\n");
+		v4l2_err(v4l2_dev, "meye: pci_enable_device failed\n");
 		goto outenabledev;
 	}
 
 	mchip_adr = pci_resource_start(meye.mchip_dev,0);
 	if (!mchip_adr) {
-		printk(KERN_ERR "meye: mchip has no device base address\n");
+		v4l2_err(v4l2_dev, "meye: mchip has no device base address\n");
 		goto outregions;
 	}
 	if (!request_mem_region(pci_resource_start(meye.mchip_dev, 0),
 				pci_resource_len(meye.mchip_dev, 0),
 				"meye")) {
-		printk(KERN_ERR "meye: request_mem_region failed\n");
+		v4l2_err(v4l2_dev, "meye: request_mem_region failed\n");
 		goto outregions;
 	}
 	meye.mchip_mmregs = ioremap(mchip_adr, MCHIP_MM_REGS);
 	if (!meye.mchip_mmregs) {
-		printk(KERN_ERR "meye: ioremap failed\n");
+		v4l2_err(v4l2_dev, "meye: ioremap failed\n");
 		goto outremap;
 	}
 
 	meye.mchip_irq = pcidev->irq;
 	if (request_irq(meye.mchip_irq, meye_irq,
 			IRQF_DISABLED | IRQF_SHARED, "meye", meye_irq)) {
-		printk(KERN_ERR "meye: request_irq failed\n");
+		v4l2_err(v4l2_dev, "request_irq failed\n");
 		goto outreqirq;
 	}
 
@@ -1824,21 +1831,18 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	msleep(1);
 	mchip_set(MCHIP_MM_INTA, MCHIP_MM_INTA_HIC_1_MASK);
 
-	if (video_register_device(meye.video_dev, VFL_TYPE_GRABBER,
+	if (video_register_device(meye.vdev, VFL_TYPE_GRABBER,
 				  video_nr) < 0) {
-		printk(KERN_ERR "meye: video_register_device failed\n");
+		v4l2_err(v4l2_dev, "video_register_device failed\n");
 		goto outvideoreg;
 	}
 
 	mutex_init(&meye.lock);
 	init_waitqueue_head(&meye.proc_list);
-	meye.picture.depth = 16;
-	meye.picture.palette = VIDEO_PALETTE_YUV422;
-	meye.picture.brightness = 32 << 10;
-	meye.picture.hue = 32 << 10;
-	meye.picture.colour = 32 << 10;
-	meye.picture.contrast = 32 << 10;
-	meye.picture.whiteness = 0;
+	meye.brightness = 32 << 10;
+	meye.hue = 32 << 10;
+	meye.colour = 32 << 10;
+	meye.contrast = 32 << 10;
 	meye.params.subsample = 0;
 	meye.params.quality = 8;
 	meye.params.sharpness = 32;
@@ -1854,9 +1858,9 @@ static int __devinit meye_probe(struct pci_dev *pcidev,
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAPICTURE, 0);
 	sony_pic_camera_command(SONY_PIC_COMMAND_SETCAMERAAGC, 48);
 
-	printk(KERN_INFO "meye: Motion Eye Camera Driver v%s.\n",
+	v4l2_info(v4l2_dev, "Motion Eye Camera Driver v%s.\n",
 	       MEYE_DRIVER_VERSION);
-	printk(KERN_INFO "meye: mchip KL5A72002 rev. %d, base %lx, irq %d\n",
+	v4l2_info(v4l2_dev, "mchip KL5A72002 rev. %d, base %lx, irq %d\n",
 	       meye.mchip_dev->revision, mchip_adr, meye.mchip_irq);
 
 	return 0;
@@ -1879,14 +1883,14 @@ outkfifoalloc2:
 outkfifoalloc1:
 	vfree(meye.grab_temp);
 outvmalloc:
-	video_device_release(meye.video_dev);
+	video_device_release(meye.vdev);
 outnotdev:
 	return ret;
 }
 
 static void __devexit meye_remove(struct pci_dev *pcidev)
 {
-	video_unregister_device(meye.video_dev);
+	video_unregister_device(meye.vdev);
 
 	mchip_hic_stop();
 
diff --git a/drivers/media/video/meye.h b/drivers/media/video/meye.h
index 1321ad5d6597..4bdeb03f1644 100644
--- a/drivers/media/video/meye.h
+++ b/drivers/media/video/meye.h
@@ -31,7 +31,7 @@
 #define _MEYE_PRIV_H_
 
 #define MEYE_DRIVER_MAJORVERSION	 1
-#define MEYE_DRIVER_MINORVERSION	13
+#define MEYE_DRIVER_MINORVERSION	14
 
 #define MEYE_DRIVER_VERSION __stringify(MEYE_DRIVER_MAJORVERSION) "." \
 			    __stringify(MEYE_DRIVER_MINORVERSION)
@@ -289,6 +289,7 @@ struct meye_grab_buffer {
 
 /* Motion Eye device structure */
 struct meye {
+	struct v4l2_device v4l2_dev;	/* Main v4l2_device struct */
 	struct pci_dev *mchip_dev;	/* pci device */
 	u8 mchip_irq;			/* irq */
 	u8 mchip_mode;			/* actual mchip mode: HIC_MODE... */
@@ -308,8 +309,11 @@ struct meye {
 	struct kfifo doneq;		/* queue for grabbed buffers */
 	spinlock_t doneq_lock;		/* lock protecting the queue */
 	wait_queue_head_t proc_list;	/* wait queue */
-	struct video_device *video_dev;	/* video device parameters */
-	struct video_picture picture;	/* video picture parameters */
+	struct video_device *vdev;	/* video device parameters */
+	u16 brightness;
+	u16 hue;
+	u16 contrast;
+	u16 colour;
 	struct meye_params params;	/* additional parameters */
 	unsigned long in_use;		/* set to 1 if the device is in use */
 #ifdef CONFIG_PM
diff --git a/include/linux/meye.h b/include/linux/meye.h
index 12010ace1f04..0dd49954f746 100644
--- a/include/linux/meye.h
+++ b/include/linux/meye.h
@@ -44,17 +44,17 @@ struct meye_params {
 };
 
 /* query the extended parameters */
-#define MEYEIOC_G_PARAMS	_IOR ('v', BASE_VIDIOCPRIVATE+0, struct meye_params)
+#define MEYEIOC_G_PARAMS	_IOR ('v', BASE_VIDIOC_PRIVATE+0, struct meye_params)
 /* set the extended parameters */
-#define MEYEIOC_S_PARAMS	_IOW ('v', BASE_VIDIOCPRIVATE+1, struct meye_params)
+#define MEYEIOC_S_PARAMS	_IOW ('v', BASE_VIDIOC_PRIVATE+1, struct meye_params)
 /* queue a buffer for mjpeg capture */
-#define MEYEIOC_QBUF_CAPT	_IOW ('v', BASE_VIDIOCPRIVATE+2, int)
+#define MEYEIOC_QBUF_CAPT	_IOW ('v', BASE_VIDIOC_PRIVATE+2, int)
 /* sync a previously queued mjpeg buffer */
-#define MEYEIOC_SYNC		_IOWR('v', BASE_VIDIOCPRIVATE+3, int)
+#define MEYEIOC_SYNC		_IOWR('v', BASE_VIDIOC_PRIVATE+3, int)
 /* get a still uncompressed snapshot */
-#define MEYEIOC_STILLCAPT	_IO  ('v', BASE_VIDIOCPRIVATE+4)
+#define MEYEIOC_STILLCAPT	_IO  ('v', BASE_VIDIOC_PRIVATE+4)
 /* get a jpeg compressed snapshot */
-#define MEYEIOC_STILLJCAPT	_IOR ('v', BASE_VIDIOCPRIVATE+5, int)
+#define MEYEIOC_STILLJCAPT	_IOR ('v', BASE_VIDIOC_PRIVATE+5, int)
 
 /* V4L2 private controls */
 #define V4L2_CID_AGC		V4L2_CID_PRIVATE_BASE
-- 
cgit v1.2.3


From 48213fe3f981d2cbccd926d6858fa9cf8617beed Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Date: Wed, 20 Jan 2010 12:12:57 -0300
Subject: V4L/DVB: v4l: Add V4L2_CID_IRIS_ABSOLUTE and V4L2_CID_IRIS_RELATIVE
 controls

Those control, as their names imply, control the camera aperture
settings.

Signed-off-by: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/compat.xml      | 11 +++++++++++
 Documentation/DocBook/v4l/controls.xml    | 19 +++++++++++++++++++
 Documentation/DocBook/v4l/videodev2.h.xml |  3 +++
 drivers/media/video/v4l2-common.c         |  3 +++
 include/linux/videodev2.h                 |  3 +++
 5 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/compat.xml b/Documentation/DocBook/v4l/compat.xml
index b9dbdf9e6d29..854235b5208e 100644
--- a/Documentation/DocBook/v4l/compat.xml
+++ b/Documentation/DocBook/v4l/compat.xml
@@ -2332,6 +2332,17 @@ more information.</para>
 	</listitem>
       </orderedlist>
     </section>
+    <section>
+      <title>V4L2 in Linux 2.6.34</title>
+      <orderedlist>
+	<listitem>
+	  <para>Added
+<constant>V4L2_CID_IRIS_ABSOLUTE</constant> and
+<constant>V4L2_CID_IRIS_RELATIVE</constant> controls to the
+	    <link linkend="camera-controls">Camera controls class</link>.
+	  </para>
+	</listitem>
+      </orderedlist>
    </section>
 
    <section id="other">
diff --git a/Documentation/DocBook/v4l/controls.xml b/Documentation/DocBook/v4l/controls.xml
index e1bdbb6eff84..7e0c68747134 100644
--- a/Documentation/DocBook/v4l/controls.xml
+++ b/Documentation/DocBook/v4l/controls.xml
@@ -1830,6 +1830,25 @@ wide-angle direction. The zoom speed unit is driver-specific.</entry>
 	  </row>
 	  <row><entry></entry></row>
 
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_IRIS_ABSOLUTE</constant>&nbsp;</entry>
+	    <entry>integer</entry>
+	  </row><row><entry spanname="descr">This control sets the
+camera's aperture to the specified value. The unit is undefined.
+Larger values open the iris wider, smaller values close it.</entry>
+	  </row>
+	  <row><entry></entry></row>
+
+	  <row>
+	    <entry spanname="id"><constant>V4L2_CID_IRIS_RELATIVE</constant>&nbsp;</entry>
+	    <entry>integer</entry>
+	  </row><row><entry spanname="descr">This control modifies the
+camera's aperture by the specified amount. The unit is undefined.
+Positive values open the iris one step further, negative values close
+it one step further. This is a write-only control.</entry>
+	  </row>
+	  <row><entry></entry></row>
+
 	  <row>
 	    <entry spanname="id"><constant>V4L2_CID_PRIVACY</constant>&nbsp;</entry>
 	    <entry>boolean</entry>
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index 068325940658..c18dfebedeff 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -1271,6 +1271,9 @@ enum  <link linkend="v4l2-exposure-auto-type">v4l2_exposure_auto_type</link> {
 
 #define V4L2_CID_PRIVACY                        (V4L2_CID_CAMERA_CLASS_BASE+16)
 
+#define V4L2_CID_IRIS_ABSOLUTE                  (V4L2_CID_CAMERA_CLASS_BASE+17)
+#define V4L2_CID_IRIS_RELATIVE                  (V4L2_CID_CAMERA_CLASS_BASE+18)
+
 /* FM Modulator class control IDs */
 #define V4L2_CID_FM_TX_CLASS_BASE               (V4L2_CTRL_CLASS_FM_TX | 0x900)
 #define V4L2_CID_FM_TX_CLASS                    (V4L2_CTRL_CLASS_FM_TX | 1)
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index cd1f21d9b079..67944f53a79a 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -489,6 +489,8 @@ const char *v4l2_ctrl_get_name(u32 id)
 	case V4L2_CID_FOCUS_ABSOLUTE:		return "Focus, Absolute";
 	case V4L2_CID_FOCUS_RELATIVE:		return "Focus, Relative";
 	case V4L2_CID_FOCUS_AUTO:		return "Focus, Automatic";
+	case V4L2_CID_IRIS_ABSOLUTE:		return "Iris, Absolute";
+	case V4L2_CID_IRIS_RELATIVE:		return "Iris, Relative";
 	case V4L2_CID_ZOOM_ABSOLUTE:		return "Zoom, Absolute";
 	case V4L2_CID_ZOOM_RELATIVE:		return "Zoom, Relative";
 	case V4L2_CID_ZOOM_CONTINUOUS:		return "Zoom, Continuous";
@@ -643,6 +645,7 @@ int v4l2_ctrl_query_fill(struct v4l2_queryctrl *qctrl, s32 min, s32 max, s32 ste
 	case V4L2_CID_PAN_RELATIVE:
 	case V4L2_CID_TILT_RELATIVE:
 	case V4L2_CID_FOCUS_RELATIVE:
+	case V4L2_CID_IRIS_RELATIVE:
 	case V4L2_CID_ZOOM_RELATIVE:
 		qctrl->flags |= V4L2_CTRL_FLAG_WRITE_ONLY;
 		break;
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 6fb0b2daa5b9..2559b182b8c4 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1282,6 +1282,9 @@ enum  v4l2_exposure_auto_type {
 
 #define V4L2_CID_PRIVACY			(V4L2_CID_CAMERA_CLASS_BASE+16)
 
+#define V4L2_CID_IRIS_ABSOLUTE			(V4L2_CID_CAMERA_CLASS_BASE+17)
+#define V4L2_CID_IRIS_RELATIVE			(V4L2_CID_CAMERA_CLASS_BASE+18)
+
 /* FM Modulator class control IDs */
 #define V4L2_CID_FM_TX_CLASS_BASE		(V4L2_CTRL_CLASS_FM_TX | 0x900)
 #define V4L2_CID_FM_TX_CLASS			(V4L2_CTRL_CLASS_FM_TX | 1)
-- 
cgit v1.2.3


From 56e6943b902562e09d3e74126d8d8256b5ea17fb Mon Sep 17 00:00:00 2001
From: Wolfgang Grandegger <wg@grandegger.com>
Date: Mon, 17 May 2010 22:39:48 -0700
Subject: can: sja1000 platform data fixes

The member "clock" of struct "sja1000_platform_data" is documented as
"CAN bus oscillator frequency in Hz" but it's actually used as the CAN
clock frequency, which is half of it. To avoid further confusion, this
patch fixes it by renaming the member to "osc_freq". That way, also
non mainline users will notice the change. The platform code for the
relevant boards is updated accordingly. Furthermore, pre-defined
values are now used for the members "ocr" and "cdr".

Signed-off-by: Wolfgang Grandegger <wg@grandegger.com>
Acked-by: Marc Kleine-Budde <mkl@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-mx2/pcm970-baseboard.c       | 6 +++---
 arch/arm/mach-mx3/mach-pcm037.c            | 6 +++---
 drivers/net/can/sja1000/sja1000_platform.c | 3 ++-
 include/linux/can/platform/sja1000.h       | 2 +-
 4 files changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mx2/pcm970-baseboard.c b/arch/arm/mach-mx2/pcm970-baseboard.c
index 4aafd5b8b85b..f490a406d57e 100644
--- a/arch/arm/mach-mx2/pcm970-baseboard.c
+++ b/arch/arm/mach-mx2/pcm970-baseboard.c
@@ -201,9 +201,9 @@ static struct resource pcm970_sja1000_resources[] = {
 };
 
 struct sja1000_platform_data pcm970_sja1000_platform_data = {
-	.clock		= 16000000 / 2,
-	.ocr		= 0x40 | 0x18,
-	.cdr		= 0x40,
+	.osc_freq	= 16000000,
+	.ocr		= OCR_TX1_PULLDOWN | OCR_TX0_PUSHPULL,
+	.cdr		= CDR_CBP,
 };
 
 static struct platform_device pcm970_sja1000 = {
diff --git a/arch/arm/mach-mx3/mach-pcm037.c b/arch/arm/mach-mx3/mach-pcm037.c
index 2df1ec55a97e..78ecd751549b 100644
--- a/arch/arm/mach-mx3/mach-pcm037.c
+++ b/arch/arm/mach-mx3/mach-pcm037.c
@@ -530,9 +530,9 @@ static struct resource pcm970_sja1000_resources[] = {
 };
 
 struct sja1000_platform_data pcm970_sja1000_platform_data = {
-	.clock		= 16000000 / 2,
-	.ocr		= 0x40 | 0x18,
-	.cdr		= 0x40,
+	.osc_freq	= 16000000,
+	.ocr		= OCR_TX1_PULLDOWN | OCR_TX0_PUSHPULL,
+	.cdr		= CDR_CBP,
 };
 
 static struct platform_device pcm970_sja1000 = {
diff --git a/drivers/net/can/sja1000/sja1000_platform.c b/drivers/net/can/sja1000/sja1000_platform.c
index b65cabb361ab..d9fadc489b32 100644
--- a/drivers/net/can/sja1000/sja1000_platform.c
+++ b/drivers/net/can/sja1000/sja1000_platform.c
@@ -111,7 +111,8 @@ static int sp_probe(struct platform_device *pdev)
 	dev->irq = res_irq->start;
 	priv->irq_flags = res_irq->flags & (IRQF_TRIGGER_MASK | IRQF_SHARED);
 	priv->reg_base = addr;
-	priv->can.clock.freq = pdata->clock;
+	/* The CAN clock frequency is half the oscillator clock frequency */
+	priv->can.clock.freq = pdata->osc_freq / 2;
 	priv->ocr = pdata->ocr;
 	priv->cdr = pdata->cdr;
 
diff --git a/include/linux/can/platform/sja1000.h b/include/linux/can/platform/sja1000.h
index 01ee2aeb048d..96f8fcc78d78 100644
--- a/include/linux/can/platform/sja1000.h
+++ b/include/linux/can/platform/sja1000.h
@@ -26,7 +26,7 @@
 #define OCR_TX_SHIFT      2
 
 struct sja1000_platform_data {
-	u32 clock;	/* CAN bus oscillator frequency in Hz */
+	u32 osc_freq;	/* CAN bus oscillator frequency in Hz */
 
 	u8 ocr;		/* output control register */
 	u8 cdr;		/* clock divider register */
-- 
cgit v1.2.3


From 57b610805ce92dbd79fc97509f80fa5391b99623 Mon Sep 17 00:00:00 2001
From: Scott Feldman <scofeldm@cisco.com>
Date: Mon, 17 May 2010 22:49:55 -0700
Subject: net: Add netlink support for virtual port management (was iovnl)

Add new netdev ops ndo_{set|get}_vf_port to allow setting of
port-profile on a netdev interface.  Extends netlink socket RTM_SETLINK/
RTM_GETLINK with two new sub msgs called IFLA_VF_PORTS and IFLA_PORT_SELF
(added to end of IFLA_cmd list).  These are both nested atrtibutes
using this layout:

              [IFLA_NUM_VF]
              [IFLA_VF_PORTS]
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      [IFLA_VF_PORT]
                              [IFLA_PORT_*], ...
                      ...
              [IFLA_PORT_SELF]
                      [IFLA_PORT_*], ...

These attributes are design to be set and get symmetrically.  VF_PORTS
is a list of VF_PORTs, one for each VF, when dealing with an SR-IOV
device.  PORT_SELF is for the PF of the SR-IOV device, in case it wants
to also have a port-profile, or for the case where the VF==PF, like in
enic patch 2/2 of this patch set.

A port-profile is used to configure/enable the external switch virtual port
backing the netdev interface, not to configure the host-facing side of the
netdev.  A port-profile is an identifier known to the switch.  How port-
profiles are installed on the switch or how available port-profiles are
made know to the host is outside the scope of this patch.

There are two types of port-profiles specs in the netlink msg.  The first spec
is for 802.1Qbg (pre-)standard, VDP protocol.  The second spec is for devices
that run a similar protocol as VDP but in firmware, thus hiding the protocol
details.  In either case, the specs have much in common and makes sense to
define the netlink msg as the union of the two specs.  For example, both specs
have a notition of associating/deassociating a port-profile.  And both specs
require some information from the hypervisor manager, such as client port
instance ID.

The general flow is the port-profile is applied to a host netdev interface
using RTM_SETLINK, the receiver of the RTM_SETLINK msg communicates with the
switch, and the switch virtual port backing the host netdev interface is
configured/enabled based on the settings defined by the port-profile.  What
those settings comprise, and how those settings are managed is again
outside the scope of this patch, since this patch only deals with the
first step in the flow.

Signed-off-by: Scott Feldman <scofeldm@cisco.com>
Signed-off-by: Roopa Prabhu <roprabhu@cisco.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_link.h   |  75 ++++++++++++++++++++
 include/linux/netdevice.h |   8 +++
 net/core/rtnetlink.c      | 169 +++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 251 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/if_link.h b/include/linux/if_link.h
index c3af67fce3f2..85c812db5a3f 100644
--- a/include/linux/if_link.h
+++ b/include/linux/if_link.h
@@ -113,6 +113,8 @@ enum {
 	IFLA_NUM_VF,		/* Number of VFs if device is SR-IOV PF */
 	IFLA_VFINFO_LIST,
 	IFLA_STATS64,
+	IFLA_VF_PORTS,
+	IFLA_PORT_SELF,
 	__IFLA_MAX
 };
 
@@ -274,4 +276,77 @@ struct ifla_vf_info {
 	__u32 qos;
 	__u32 tx_rate;
 };
+
+/* VF ports management section
+ *
+ *	Nested layout of set/get msg is:
+ *
+ *		[IFLA_NUM_VF]
+ *		[IFLA_VF_PORTS]
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			[IFLA_VF_PORT]
+ *				[IFLA_PORT_*], ...
+ *			...
+ *		[IFLA_PORT_SELF]
+ *			[IFLA_PORT_*], ...
+ */
+
+enum {
+	IFLA_VF_PORT_UNSPEC,
+	IFLA_VF_PORT,			/* nest */
+	__IFLA_VF_PORT_MAX,
+};
+
+#define IFLA_VF_PORT_MAX (__IFLA_VF_PORT_MAX - 1)
+
+enum {
+	IFLA_PORT_UNSPEC,
+	IFLA_PORT_VF,			/* __u32 */
+	IFLA_PORT_PROFILE,		/* string */
+	IFLA_PORT_VSI_TYPE,		/* 802.1Qbg (pre-)standard VDP */
+	IFLA_PORT_INSTANCE_UUID,	/* binary UUID */
+	IFLA_PORT_HOST_UUID,		/* binary UUID */
+	IFLA_PORT_REQUEST,		/* __u8 */
+	IFLA_PORT_RESPONSE,		/* __u16, output only */
+	__IFLA_PORT_MAX,
+};
+
+#define IFLA_PORT_MAX (__IFLA_PORT_MAX - 1)
+
+#define PORT_PROFILE_MAX	40
+#define PORT_UUID_MAX		16
+#define PORT_SELF_VF		-1
+
+enum {
+	PORT_REQUEST_PREASSOCIATE = 0,
+	PORT_REQUEST_PREASSOCIATE_RR,
+	PORT_REQUEST_ASSOCIATE,
+	PORT_REQUEST_DISASSOCIATE,
+};
+
+enum {
+	PORT_VDP_RESPONSE_SUCCESS = 0,
+	PORT_VDP_RESPONSE_INVALID_FORMAT,
+	PORT_VDP_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_VDP_RESPONSE_UNUSED_VTID,
+	PORT_VDP_RESPONSE_VTID_VIOLATION,
+	PORT_VDP_RESPONSE_VTID_VERSION_VIOALTION,
+	PORT_VDP_RESPONSE_OUT_OF_SYNC,
+	/* 0x08-0xFF reserved for future VDP use */
+	PORT_PROFILE_RESPONSE_SUCCESS = 0x100,
+	PORT_PROFILE_RESPONSE_INPROGRESS,
+	PORT_PROFILE_RESPONSE_INVALID,
+	PORT_PROFILE_RESPONSE_BADSTATE,
+	PORT_PROFILE_RESPONSE_INSUFFICIENT_RESOURCES,
+	PORT_PROFILE_RESPONSE_ERROR,
+};
+
+struct ifla_port_vsi {
+	__u8 vsi_mgr_id;
+	__u8 vsi_type_id[3];
+	__u8 vsi_type_version;
+	__u8 pad[3];
+};
+
 #endif /* _LINUX_IF_LINK_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c1b2341897c2..c3487a6bdf99 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -686,6 +686,9 @@ struct netdev_rx_queue {
  * int (*ndo_set_vf_tx_rate)(struct net_device *dev, int vf, int rate);
  * int (*ndo_get_vf_config)(struct net_device *dev,
  *			    int vf, struct ifla_vf_info *ivf);
+ * int (*ndo_set_vf_port)(struct net_device *dev, int vf,
+ *			  struct nlattr *port[]);
+ * int (*ndo_get_vf_port)(struct net_device *dev, int vf, struct sk_buff *skb);
  */
 #define HAVE_NET_DEVICE_OPS
 struct net_device_ops {
@@ -735,6 +738,11 @@ struct net_device_ops {
 	int			(*ndo_get_vf_config)(struct net_device *dev,
 						     int vf,
 						     struct ifla_vf_info *ivf);
+	int			(*ndo_set_vf_port)(struct net_device *dev,
+						   int vf,
+						   struct nlattr *port[]);
+	int			(*ndo_get_vf_port)(struct net_device *dev,
+						   int vf, struct sk_buff *skb);
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
 	int			(*ndo_fcoe_enable)(struct net_device *dev);
 	int			(*ndo_fcoe_disable)(struct net_device *dev);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 66db1201da9b..e4b9870e4706 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -660,6 +660,31 @@ static inline int rtnl_vfinfo_size(const struct net_device *dev)
 		return 0;
 }
 
+static size_t rtnl_port_size(const struct net_device *dev)
+{
+	size_t port_size = nla_total_size(4)		/* PORT_VF */
+		+ nla_total_size(PORT_PROFILE_MAX)	/* PORT_PROFILE */
+		+ nla_total_size(sizeof(struct ifla_port_vsi))
+							/* PORT_VSI_TYPE */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_INSTANCE_UUID */
+		+ nla_total_size(PORT_UUID_MAX)		/* PORT_HOST_UUID */
+		+ nla_total_size(1)			/* PROT_VDP_REQUEST */
+		+ nla_total_size(2);			/* PORT_VDP_RESPONSE */
+	size_t vf_ports_size = nla_total_size(sizeof(struct nlattr));
+	size_t vf_port_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+	size_t port_self_size = nla_total_size(sizeof(struct nlattr))
+		+ port_size;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+	if (dev_num_vf(dev->dev.parent))
+		return port_self_size + vf_ports_size +
+			vf_port_size * dev_num_vf(dev->dev.parent);
+	else
+		return port_self_size;
+}
+
 static inline size_t if_nlmsg_size(const struct net_device *dev)
 {
 	return NLMSG_ALIGN(sizeof(struct ifinfomsg))
@@ -680,9 +705,82 @@ static inline size_t if_nlmsg_size(const struct net_device *dev)
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev) /* IFLA_VFINFO_LIST */
+	       + rtnl_port_size(dev) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev); /* IFLA_LINKINFO */
 }
 
+static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *vf_ports;
+	struct nlattr *vf_port;
+	int vf;
+	int err;
+
+	vf_ports = nla_nest_start(skb, IFLA_VF_PORTS);
+	if (!vf_ports)
+		return -EMSGSIZE;
+
+	for (vf = 0; vf < dev_num_vf(dev->dev.parent); vf++) {
+		vf_port = nla_nest_start(skb, IFLA_VF_PORT);
+		if (!vf_port) {
+			nla_nest_cancel(skb, vf_ports);
+			return -EMSGSIZE;
+		}
+		NLA_PUT_U32(skb, IFLA_PORT_VF, vf);
+		err = dev->netdev_ops->ndo_get_vf_port(dev, vf, skb);
+		if (err) {
+nla_put_failure:
+			nla_nest_cancel(skb, vf_port);
+			continue;
+		}
+		nla_nest_end(skb, vf_port);
+	}
+
+	nla_nest_end(skb, vf_ports);
+
+	return 0;
+}
+
+static int rtnl_port_self_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	struct nlattr *port_self;
+	int err;
+
+	port_self = nla_nest_start(skb, IFLA_PORT_SELF);
+	if (!port_self)
+		return -EMSGSIZE;
+
+	err = dev->netdev_ops->ndo_get_vf_port(dev, PORT_SELF_VF, skb);
+	if (err) {
+		nla_nest_cancel(skb, port_self);
+		return err;
+	}
+
+	nla_nest_end(skb, port_self);
+
+	return 0;
+}
+
+static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev)
+{
+	int err;
+
+	if (!dev->netdev_ops->ndo_get_vf_port || !dev->dev.parent)
+		return 0;
+
+	err = rtnl_port_self_fill(skb, dev);
+	if (err)
+		return err;
+
+	if (dev_num_vf(dev->dev.parent)) {
+		err = rtnl_vf_ports_fill(skb, dev);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			    int type, u32 pid, u32 seq, u32 change,
 			    unsigned int flags)
@@ -754,13 +852,15 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		goto nla_put_failure;
 	copy_rtnl_link_stats64(nla_data(attr), stats);
 
+	if (dev->dev.parent)
+		NLA_PUT_U32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent));
+
 	if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent) {
 		int i;
 
 		struct nlattr *vfinfo, *vf;
 		int num_vfs = dev_num_vf(dev->dev.parent);
 
-		NLA_PUT_U32(skb, IFLA_NUM_VF, num_vfs);
 		vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST);
 		if (!vfinfo)
 			goto nla_put_failure;
@@ -788,6 +888,10 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 		}
 		nla_nest_end(skb, vfinfo);
 	}
+
+	if (rtnl_port_fill(skb, dev))
+		goto nla_put_failure;
+
 	if (dev->rtnl_link_ops) {
 		if (rtnl_link_fill(skb, dev) < 0)
 			goto nla_put_failure;
@@ -849,6 +953,8 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_NET_NS_PID]	= { .type = NLA_U32 },
 	[IFLA_IFALIAS]	        = { .type = NLA_STRING, .len = IFALIASZ-1 },
 	[IFLA_VFINFO_LIST]	= {. type = NLA_NESTED },
+	[IFLA_VF_PORTS]		= { .type = NLA_NESTED },
+	[IFLA_PORT_SELF]	= { .type = NLA_NESTED },
 };
 EXPORT_SYMBOL(ifla_policy);
 
@@ -870,6 +976,20 @@ static const struct nla_policy ifla_vf_policy[IFLA_VF_MAX+1] = {
 				    .len = sizeof(struct ifla_vf_tx_rate) },
 };
 
+static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = {
+	[IFLA_PORT_VF]		= { .type = NLA_U32 },
+	[IFLA_PORT_PROFILE]	= { .type = NLA_STRING,
+				    .len = PORT_PROFILE_MAX },
+	[IFLA_PORT_VSI_TYPE]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct ifla_port_vsi)},
+	[IFLA_PORT_INSTANCE_UUID] = { .type = NLA_BINARY,
+				      .len = PORT_UUID_MAX },
+	[IFLA_PORT_HOST_UUID]	= { .type = NLA_STRING,
+				    .len = PORT_UUID_MAX },
+	[IFLA_PORT_REQUEST]	= { .type = NLA_U8, },
+	[IFLA_PORT_RESPONSE]	= { .type = NLA_U16, },
+};
+
 struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
 {
 	struct net *net;
@@ -1089,6 +1209,53 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
 	}
 	err = 0;
 
+	if (tb[IFLA_VF_PORTS]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+		struct nlattr *attr;
+		int vf;
+		int rem;
+
+		err = -EOPNOTSUPP;
+		if (!ops->ndo_set_vf_port)
+			goto errout;
+
+		nla_for_each_nested(attr, tb[IFLA_VF_PORTS], rem) {
+			if (nla_type(attr) != IFLA_VF_PORT)
+				continue;
+			err = nla_parse_nested(port, IFLA_PORT_MAX,
+				attr, ifla_port_policy);
+			if (err < 0)
+				goto errout;
+			if (!port[IFLA_PORT_VF]) {
+				err = -EOPNOTSUPP;
+				goto errout;
+			}
+			vf = nla_get_u32(port[IFLA_PORT_VF]);
+			err = ops->ndo_set_vf_port(dev, vf, port);
+			if (err < 0)
+				goto errout;
+			modified = 1;
+		}
+	}
+	err = 0;
+
+	if (tb[IFLA_PORT_SELF]) {
+		struct nlattr *port[IFLA_PORT_MAX+1];
+
+		err = nla_parse_nested(port, IFLA_PORT_MAX,
+			tb[IFLA_PORT_SELF], ifla_port_policy);
+		if (err < 0)
+			goto errout;
+
+		err = -EOPNOTSUPP;
+		if (ops->ndo_set_vf_port)
+			err = ops->ndo_set_vf_port(dev, PORT_SELF_VF, port);
+		if (err < 0)
+			goto errout;
+		modified = 1;
+	}
+	err = 0;
+
 errout:
 	if (err < 0 && modified && net_ratelimit())
 		printk(KERN_WARNING "A link change request failed with "
-- 
cgit v1.2.3


From 1471ca9aa71cd37b6a7476bb6f06a3a8622ea1bd Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 16 May 2010 17:27:03 +0200
Subject: fbdev: allow passing more than one aperture for handoff

It removes a hack from nouveau code which had to detect which
region to pass to kick vesafb/efifb.

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Eric Anholt <eric@anholt.net>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Thomas Hellstrom <thellstrom@vmware.com>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Peter Jones <pjones@redhat.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/i915/intel_fb.c         | 11 +++--
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 84 ++++++++++-----------------------
 drivers/gpu/drm/radeon/radeon_fb.c      |  9 +++-
 drivers/gpu/drm/vmwgfx/vmwgfx_fb.c      | 10 +++-
 drivers/video/efifb.c                   | 11 +++--
 drivers/video/fbmem.c                   | 29 ++++++++++--
 drivers/video/fbsysfs.c                 |  1 +
 drivers/video/offb.c                    | 28 ++++++-----
 drivers/video/vesafb.c                  | 11 +++--
 include/linux/fb.h                      | 17 ++++++-
 10 files changed, 123 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
index b04e0a86bf9a..7f1eabbaa2bb 100644
--- a/drivers/gpu/drm/i915/intel_fb.c
+++ b/drivers/gpu/drm/i915/intel_fb.c
@@ -128,11 +128,16 @@ static int intelfb_create(struct intel_fbdev *ifbdev,
 	info->fbops = &intelfb_ops;
 
 	/* setup aperture base/size for vesafb takeover */
-	info->aperture_base = dev->mode_config.fb_base;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto out_unpin;
+	}
+	info->apertures->ranges[0].base = dev->mode_config.fb_base;
 	if (IS_I9XX(dev))
-		info->aperture_size = pci_resource_len(dev->pdev, 2);
+		info->apertures->ranges[0].size = pci_resource_len(dev->pdev, 2);
 	else
-		info->aperture_size = pci_resource_len(dev->pdev, 0);
+		info->apertures->ranges[0].size = pci_resource_len(dev->pdev, 0);
 
 	info->fix.smem_start = dev->mode_config.fb_base + obj_priv->gtt_offset;
 	info->fix.smem_len = size;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index f29fa8c117ce..292c7ff95105 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -152,44 +152,6 @@ static void nouveau_fbcon_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
 	*blue = nv_crtc->lut.b[regno];
 }
 
-#if defined(__i386__) || defined(__x86_64__)
-static bool
-nouveau_fbcon_has_vesafb_or_efifb(struct drm_device *dev)
-{
-	struct pci_dev *pdev = dev->pdev;
-	int ramin;
-
-	if (screen_info.orig_video_isVGA != VIDEO_TYPE_VLFB &&
-	    screen_info.orig_video_isVGA != VIDEO_TYPE_EFI)
-		return false;
-
-	if (screen_info.lfb_base < pci_resource_start(pdev, 1))
-		goto not_fb;
-
-	if (screen_info.lfb_base + screen_info.lfb_size >=
-	    pci_resource_start(pdev, 1) + pci_resource_len(pdev, 1))
-		goto not_fb;
-
-	return true;
-not_fb:
-	ramin = 2;
-	if (pci_resource_len(pdev, ramin) == 0) {
-		ramin = 3;
-		if (pci_resource_len(pdev, ramin) == 0)
-			return false;
-	}
-
-	if (screen_info.lfb_base < pci_resource_start(pdev, ramin))
-		return false;
-
-	if (screen_info.lfb_base + screen_info.lfb_size >=
-	    pci_resource_start(pdev, ramin) + pci_resource_len(pdev, ramin))
-		return false;
-
-	return true;
-}
-#endif
-
 static void
 nouveau_fbcon_zfill(struct drm_device *dev, struct nouveau_fbdev *nfbdev)
 {
@@ -219,7 +181,9 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	struct nouveau_framebuffer *nouveau_fb;
 	struct nouveau_bo *nvbo;
 	struct drm_mode_fb_cmd mode_cmd;
-	struct device *device = &dev->pdev->dev;
+	struct pci_dev *pdev = dev->pdev;
+	struct device *device = &pdev->dev;
+	struct apertures_struct *aper;
 	int size, ret;
 
 	mode_cmd.width = sizes->surface_width;
@@ -299,28 +263,30 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	drm_fb_helper_fill_var(info, &nfbdev->helper, sizes->fb_width, sizes->fb_height);
 
 	/* FIXME: we really shouldn't expose mmio space at all */
-	info->fix.mmio_start = pci_resource_start(dev->pdev, 1);
-	info->fix.mmio_len = pci_resource_len(dev->pdev, 1);
+	info->fix.mmio_start = pci_resource_start(pdev, 1);
+	info->fix.mmio_len = pci_resource_len(pdev, 1);
 
 	/* Set aperture base/size for vesafb takeover */
-#if defined(__i386__) || defined(__x86_64__)
-	if (nouveau_fbcon_has_vesafb_or_efifb(dev)) {
-		/* Some NVIDIA VBIOS' are stupid and decide to put the
-		 * framebuffer in the middle of the PRAMIN BAR for
-		 * whatever reason.  We need to know the exact lfb_base
-		 * to get vesafb kicked off, and the only reliable way
-		 * we have left is to find out lfb_base the same way
-		 * vesafb did.
-		 */
-		info->aperture_base = screen_info.lfb_base;
-		info->aperture_size = screen_info.lfb_size;
-		if (screen_info.orig_video_isVGA == VIDEO_TYPE_VLFB)
-			info->aperture_size *= 65536;
-	} else
-#endif
-	{
-		info->aperture_base = info->fix.mmio_start;
-		info->aperture_size = info->fix.mmio_len;
+	aper = info->apertures = alloc_apertures(3);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+
+	aper->ranges[0].base = pci_resource_start(pdev, 1);
+	aper->ranges[0].size = pci_resource_len(pdev, 1);
+	aper->count = 1;
+
+	if (pci_resource_len(pdev, 2)) {
+		aper->ranges[aper->count].base = pci_resource_start(pdev, 2);
+		aper->ranges[aper->count].size = pci_resource_len(pdev, 2);
+		aper->count++;
+	}
+
+	if (pci_resource_len(pdev, 3)) {
+		aper->ranges[aper->count].base = pci_resource_start(pdev, 3);
+		aper->ranges[aper->count].size = pci_resource_len(pdev, 3);
+		aper->count++;
 	}
 
 	info->pixmap.size = 64*1024;
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index fcb5b52727b0..b4948021e345 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -236,8 +236,13 @@ static int radeonfb_create(struct radeon_fbdev *rfbdev,
 	drm_fb_helper_fill_var(info, &rfbdev->helper, sizes->fb_width, sizes->fb_height);
 
 	/* setup aperture base/size for vesafb takeover */
-	info->aperture_base = rdev->ddev->mode_config.fb_base;
-	info->aperture_size = rdev->mc.real_vram_size;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto out_unref;
+	}
+	info->apertures->ranges[0].base = rdev->ddev->mode_config.fb_base;
+	info->apertures->ranges[0].size = rdev->mc.real_vram_size;
 
 	info->fix.mmio_start = 0;
 	info->fix.mmio_len = 0;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 80125ffc4e28..7421aaad8d09 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -559,8 +559,13 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 	info->pixmap.scan_align = 1;
 #endif
 
-	info->aperture_base = vmw_priv->vram_start;
-	info->aperture_size = vmw_priv->vram_size;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto err_aper;
+	}
+	info->apertures->ranges[0].base = vmw_priv->vram_start;
+	info->apertures->ranges[0].size = vmw_priv->vram_size;
 
 	/*
 	 * Dirty & Deferred IO
@@ -580,6 +585,7 @@ int vmw_fb_init(struct vmw_private *vmw_priv)
 
 err_defio:
 	fb_deferred_io_cleanup(info);
+err_aper:
 	ttm_bo_kunmap(&par->map);
 err_unref:
 	ttm_bo_unref((struct ttm_buffer_object **)&par->vmw_bo);
diff --git a/drivers/video/efifb.c b/drivers/video/efifb.c
index 581d2dbf675a..3b986567111a 100644
--- a/drivers/video/efifb.c
+++ b/drivers/video/efifb.c
@@ -165,7 +165,7 @@ static void efifb_destroy(struct fb_info *info)
 {
 	if (info->screen_base)
 		iounmap(info->screen_base);
-	release_mem_region(info->aperture_base, info->aperture_size);
+	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
 	framebuffer_release(info);
 }
 
@@ -289,8 +289,13 @@ static int __devinit efifb_probe(struct platform_device *dev)
 	info->pseudo_palette = info->par;
 	info->par = NULL;
 
-	info->aperture_base = efifb_fix.smem_start;
-	info->aperture_size = size_remap;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		err = -ENOMEM;
+		goto err_release_fb;
+	}
+	info->apertures->ranges[0].base = efifb_fix.smem_start;
+	info->apertures->ranges[0].size = size_remap;
 
 	info->screen_base = ioremap(efifb_fix.smem_start, efifb_fix.smem_len);
 	if (!info->screen_base) {
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index a15b44e9c003..03f2dc2470b5 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1468,16 +1468,39 @@ static int fb_check_foreignness(struct fb_info *fi)
 	return 0;
 }
 
-static bool fb_do_apertures_overlap(struct fb_info *gen, struct fb_info *hw)
+static bool apertures_overlap(struct aperture *gen, struct aperture *hw)
 {
 	/* is the generic aperture base the same as the HW one */
-	if (gen->aperture_base == hw->aperture_base)
+	if (gen->base == hw->base)
 		return true;
 	/* is the generic aperture base inside the hw base->hw base+size */
-	if (gen->aperture_base > hw->aperture_base && gen->aperture_base <= hw->aperture_base + hw->aperture_size)
+	if (gen->base > hw->base && gen->base <= hw->base + hw->size)
 		return true;
 	return false;
 }
+
+static bool fb_do_apertures_overlap(struct fb_info *gen, struct fb_info *hw)
+{
+	int i, j;
+	struct apertures_struct *hwa = hw->apertures;
+	struct apertures_struct *gena = gen->apertures;
+	if (!hwa || !gena)
+		return false;
+
+	for (i = 0; i < hwa->count; ++i) {
+		struct aperture *h = &hwa->ranges[i];
+		for (j = 0; j < gena->count; ++j) {
+			struct aperture *g = &gena->ranges[j];
+			printk(KERN_DEBUG "checking generic (%llx %llx) vs hw (%llx %llx)\n",
+				g->base, g->size, h->base, h->size);
+			if (apertures_overlap(g, h))
+				return true;
+		}
+	}
+
+	return false;
+}
+
 /**
  *	register_framebuffer - registers a frame buffer device
  *	@fb_info: frame buffer info structure
diff --git a/drivers/video/fbsysfs.c b/drivers/video/fbsysfs.c
index 81aa3129c17d..0a08f1341227 100644
--- a/drivers/video/fbsysfs.c
+++ b/drivers/video/fbsysfs.c
@@ -80,6 +80,7 @@ EXPORT_SYMBOL(framebuffer_alloc);
  */
 void framebuffer_release(struct fb_info *info)
 {
+	kfree(info->apertures);
 	kfree(info);
 }
 EXPORT_SYMBOL(framebuffer_release);
diff --git a/drivers/video/offb.c b/drivers/video/offb.c
index 61f8b8f919b0..46dda7d8aaee 100644
--- a/drivers/video/offb.c
+++ b/drivers/video/offb.c
@@ -285,7 +285,7 @@ static void offb_destroy(struct fb_info *info)
 {
 	if (info->screen_base)
 		iounmap(info->screen_base);
-	release_mem_region(info->aperture_base, info->aperture_size);
+	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
 	framebuffer_release(info);
 }
 
@@ -491,8 +491,11 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 	var->vmode = FB_VMODE_NONINTERLACED;
 
 	/* set offb aperture size for generic probing */
-	info->aperture_base = address;
-	info->aperture_size = fix->smem_len;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures)
+		goto out_aper;
+	info->apertures->ranges[0].base = address;
+	info->apertures->ranges[0].size = fix->smem_len;
 
 	info->fbops = &offb_ops;
 	info->screen_base = ioremap(address, fix->smem_len);
@@ -501,17 +504,20 @@ static void __init offb_init_fb(const char *name, const char *full_name,
 
 	fb_alloc_cmap(&info->cmap, 256, 0);
 
-	if (register_framebuffer(info) < 0) {
-		iounmap(par->cmap_adr);
-		par->cmap_adr = NULL;
-		iounmap(info->screen_base);
-		framebuffer_release(info);
-		release_mem_region(res_start, res_size);
-		return;
-	}
+	if (register_framebuffer(info) < 0)
+		goto out_err;
 
 	printk(KERN_INFO "fb%d: Open Firmware frame buffer device on %s\n",
 	       info->node, full_name);
+	return;
+
+out_err:
+	iounmap(info->screen_base);
+out_aper:
+	iounmap(par->cmap_adr);
+	par->cmap_adr = NULL;
+	framebuffer_release(info);
+	release_mem_region(res_start, res_size);
 }
 
 
diff --git a/drivers/video/vesafb.c b/drivers/video/vesafb.c
index 0cadf7aee27e..090aa1a9be6e 100644
--- a/drivers/video/vesafb.c
+++ b/drivers/video/vesafb.c
@@ -177,7 +177,7 @@ static void vesafb_destroy(struct fb_info *info)
 {
 	if (info->screen_base)
 		iounmap(info->screen_base);
-	release_mem_region(info->aperture_base, info->aperture_size);
+	release_mem_region(info->apertures->ranges[0].base, info->apertures->ranges[0].size);
 	framebuffer_release(info);
 }
 
@@ -295,8 +295,13 @@ static int __init vesafb_probe(struct platform_device *dev)
 	info->par = NULL;
 
 	/* set vesafb aperture size for generic probing */
-	info->aperture_base = screen_info.lfb_base;
-	info->aperture_size = size_total;
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		err = -ENOMEM;
+		goto err;
+	}
+	info->apertures->ranges[0].base = screen_info.lfb_base;
+	info->apertures->ranges[0].size = size_total;
 
 	info->screen_base = ioremap(vesafb_fix.smem_start, vesafb_fix.smem_len);
 	if (!info->screen_base) {
diff --git a/include/linux/fb.h b/include/linux/fb.h
index c10163b4c40e..de5ff5fa8380 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -403,6 +403,7 @@ struct fb_cursor {
 #include <linux/notifier.h>
 #include <linux/list.h>
 #include <linux/backlight.h>
+#include <linux/slab.h>
 #include <asm/io.h>
 
 struct vm_area_struct;
@@ -862,10 +863,22 @@ struct fb_info {
 	/* we need the PCI or similiar aperture base/size not
 	   smem_start/size as smem_start may just be an object
 	   allocated inside the aperture so may not actually overlap */
-	resource_size_t aperture_base;
-	resource_size_t aperture_size;
+	struct apertures_struct {
+		unsigned int count;
+		struct aperture {
+			resource_size_t base;
+			resource_size_t size;
+		} ranges[0];
+	} *apertures;
 };
 
+static inline struct apertures_struct *alloc_apertures(unsigned int max_num) {
+	struct apertures_struct *a = kzalloc(sizeof(struct apertures_struct)
+			+ max_num * sizeof(struct aperture), GFP_KERNEL);
+	a->count = max_num;
+	return a;
+}
+
 #ifdef MODULE
 #define FBINFO_DEFAULT	FBINFO_MODULE
 #else
-- 
cgit v1.2.3


From 06415c564fb98562a4d6b6215615deb2d1cc0dae Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 16 May 2010 17:29:56 +0200
Subject: fbmem, drm/nouveau: kick firmware framebuffers as soon as possible

Currently vesafb/efifb/... is kicked when hardware driver is registering
framebuffer. To do it hardware must be fully functional, so there's a short
window between start of initialisation and framebuffer registration when
two drivers touch the hardware. Unfortunately sometimes it breaks nouveau
initialisation.

Fix it by kicking firmware driver(s) before we start touching the hardware.

Reported-by: Didier Spaier <didier.spaier@epsm.fr>
Tested-by: Didier Spaier <didier.spaier@epsm.fr>
Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: Peter Jones <pjones@redhat.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drv.h   |  1 +
 drivers/gpu/drm/nouveau/nouveau_fbcon.c | 19 +--------------
 drivers/gpu/drm/nouveau/nouveau_state.c | 43 +++++++++++++++++++++++++++++++++
 drivers/video/fbmem.c                   | 43 +++++++++++++++++++--------------
 include/linux/fb.h                      |  1 +
 5 files changed, 71 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 5b47b79f45e8..94d8dd27bde8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -624,6 +624,7 @@ struct drm_nouveau_private {
 	} debugfs;
 
 	struct nouveau_fbdev *nfbdev;
+	struct apertures_struct *apertures;
 };
 
 static inline struct drm_nouveau_private *
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 292c7ff95105..2c2199329cc1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -183,7 +183,6 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	struct drm_mode_fb_cmd mode_cmd;
 	struct pci_dev *pdev = dev->pdev;
 	struct device *device = &pdev->dev;
-	struct apertures_struct *aper;
 	int size, ret;
 
 	mode_cmd.width = sizes->surface_width;
@@ -267,28 +266,12 @@ nouveau_fbcon_create(struct nouveau_fbdev *nfbdev,
 	info->fix.mmio_len = pci_resource_len(pdev, 1);
 
 	/* Set aperture base/size for vesafb takeover */
-	aper = info->apertures = alloc_apertures(3);
+	info->apertures = dev_priv->apertures;
 	if (!info->apertures) {
 		ret = -ENOMEM;
 		goto out_unref;
 	}
 
-	aper->ranges[0].base = pci_resource_start(pdev, 1);
-	aper->ranges[0].size = pci_resource_len(pdev, 1);
-	aper->count = 1;
-
-	if (pci_resource_len(pdev, 2)) {
-		aper->ranges[aper->count].base = pci_resource_start(pdev, 2);
-		aper->ranges[aper->count].size = pci_resource_len(pdev, 2);
-		aper->count++;
-	}
-
-	if (pci_resource_len(pdev, 3)) {
-		aper->ranges[aper->count].base = pci_resource_start(pdev, 3);
-		aper->ranges[aper->count].size = pci_resource_len(pdev, 3);
-		aper->count++;
-	}
-
 	info->pixmap.size = 64*1024;
 	info->pixmap.buf_align = 8;
 	info->pixmap.access_align = 32;
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 92100a9678ba..75c5c465e08e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -639,6 +639,43 @@ static void nouveau_OF_copy_vbios_to_ramin(struct drm_device *dev)
 #endif
 }
 
+static struct apertures_struct *nouveau_get_apertures(struct drm_device *dev)
+{
+	struct pci_dev *pdev = dev->pdev;
+	struct apertures_struct *aper = alloc_apertures(3);
+	if (!aper)
+		return NULL;
+
+	aper->ranges[0].base = pci_resource_start(pdev, 1);
+	aper->ranges[0].size = pci_resource_len(pdev, 1);
+	aper->count = 1;
+
+	if (pci_resource_len(pdev, 2)) {
+		aper->ranges[aper->count].base = pci_resource_start(pdev, 2);
+		aper->ranges[aper->count].size = pci_resource_len(pdev, 2);
+		aper->count++;
+	}
+
+	if (pci_resource_len(pdev, 3)) {
+		aper->ranges[aper->count].base = pci_resource_start(pdev, 3);
+		aper->ranges[aper->count].size = pci_resource_len(pdev, 3);
+		aper->count++;
+	}
+
+	return aper;
+}
+
+static int nouveau_remove_conflicting_drivers(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	dev_priv->apertures = nouveau_get_apertures(dev);
+	if (!dev_priv->apertures)
+		return -ENOMEM;
+
+	remove_conflicting_framebuffers(dev_priv->apertures, "nouveaufb");
+	return 0;
+}
+
 int nouveau_load(struct drm_device *dev, unsigned long flags)
 {
 	struct drm_nouveau_private *dev_priv;
@@ -726,6 +763,12 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	NV_INFO(dev, "Detected an NV%2x generation card (0x%08x)\n",
 		dev_priv->card_type, reg0);
 
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		int ret = nouveau_remove_conflicting_drivers(dev);
+		if (ret)
+			return ret;
+	}
+
 	/* map larger RAMIN aperture on NV40 cards */
 	dev_priv->ramin  = NULL;
 	if (dev_priv->card_type >= NV_40) {
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 03f2dc2470b5..7cfcd716fd5f 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1479,11 +1479,10 @@ static bool apertures_overlap(struct aperture *gen, struct aperture *hw)
 	return false;
 }
 
-static bool fb_do_apertures_overlap(struct fb_info *gen, struct fb_info *hw)
+static bool fb_do_apertures_overlap(struct apertures_struct *gena,
+				    struct apertures_struct *hwa)
 {
 	int i, j;
-	struct apertures_struct *hwa = hw->apertures;
-	struct apertures_struct *gena = gen->apertures;
 	if (!hwa || !gena)
 		return false;
 
@@ -1501,6 +1500,28 @@ static bool fb_do_apertures_overlap(struct fb_info *gen, struct fb_info *hw)
 	return false;
 }
 
+void remove_conflicting_framebuffers(struct apertures_struct *a, const char *name)
+{
+	int i;
+
+	/* check all firmware fbs and kick off if the base addr overlaps */
+	for (i = 0 ; i < FB_MAX; i++) {
+		if (!registered_fb[i])
+			continue;
+
+		if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
+			continue;
+
+		if (fb_do_apertures_overlap(registered_fb[i]->apertures, a)) {
+			printk(KERN_ERR "fb: conflicting fb hw usage "
+			       "%s vs %s - removing generic driver\n",
+			       name, registered_fb[i]->fix.id);
+			unregister_framebuffer(registered_fb[i]);
+		}
+	}
+}
+EXPORT_SYMBOL(remove_conflicting_framebuffers);
+
 /**
  *	register_framebuffer - registers a frame buffer device
  *	@fb_info: frame buffer info structure
@@ -1524,21 +1545,7 @@ register_framebuffer(struct fb_info *fb_info)
 	if (fb_check_foreignness(fb_info))
 		return -ENOSYS;
 
-	/* check all firmware fbs and kick off if the base addr overlaps */
-	for (i = 0 ; i < FB_MAX; i++) {
-		if (!registered_fb[i])
-			continue;
-
-		if (registered_fb[i]->flags & FBINFO_MISC_FIRMWARE) {
-			if (fb_do_apertures_overlap(registered_fb[i], fb_info)) {
-				printk(KERN_ERR "fb: conflicting fb hw usage "
-				       "%s vs %s - removing generic driver\n",
-				       fb_info->fix.id,
-				       registered_fb[i]->fix.id);
-				unregister_framebuffer(registered_fb[i]);
-			}
-		}
-	}
+	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id);
 
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
diff --git a/include/linux/fb.h b/include/linux/fb.h
index de5ff5fa8380..f88e2549123d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -971,6 +971,7 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 /* drivers/video/fbmem.c */
 extern int register_framebuffer(struct fb_info *fb_info);
 extern int unregister_framebuffer(struct fb_info *fb_info);
+extern void remove_conflicting_framebuffers(struct apertures_struct *a, const char *name);
 extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
 extern int fb_show_logo(struct fb_info *fb_info, int rotate);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-- 
cgit v1.2.3


From 3b9676e7ac6eff4f50f1b48b6c36664f55b79507 Mon Sep 17 00:00:00 2001
From: Marcin Slusarz <marcin.slusarz@gmail.com>
Date: Sun, 16 May 2010 17:33:09 +0200
Subject: vga16fb, drm: vga16fb->drm handoff

let vga16fb claim 0xA0000+0x10000 region as its aperture;
drm drivers don't use it, so we have to detect it and kick
vga16fb manually - but only if drm is driving the primary card

Signed-off-by: Marcin Slusarz <marcin.slusarz@gmail.com>
Cc: James Simmons <jsimmons@infradead.org>
Cc: Dave Airlie <airlied@redhat.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_state.c |  7 ++++++-
 drivers/video/fbmem.c                   | 14 +++++++++++---
 drivers/video/vga16fb.c                 | 26 +++++++++++++++++++-------
 include/linux/fb.h                      |  3 ++-
 4 files changed, 38 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 75c5c465e08e..c667a1138c33 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -668,11 +668,16 @@ static struct apertures_struct *nouveau_get_apertures(struct drm_device *dev)
 static int nouveau_remove_conflicting_drivers(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	bool primary = false;
 	dev_priv->apertures = nouveau_get_apertures(dev);
 	if (!dev_priv->apertures)
 		return -ENOMEM;
 
-	remove_conflicting_framebuffers(dev_priv->apertures, "nouveaufb");
+#ifdef CONFIG_X86
+	primary = dev->pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
+#endif
+	
+	remove_conflicting_framebuffers(dev_priv->apertures, "nouveaufb", primary);
 	return 0;
 }
 
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 7cfcd716fd5f..e08b7b5cb326 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1500,19 +1500,26 @@ static bool fb_do_apertures_overlap(struct apertures_struct *gena,
 	return false;
 }
 
-void remove_conflicting_framebuffers(struct apertures_struct *a, const char *name)
+#define VGA_FB_PHYS 0xA0000
+void remove_conflicting_framebuffers(struct apertures_struct *a,
+				     const char *name, bool primary)
 {
 	int i;
 
 	/* check all firmware fbs and kick off if the base addr overlaps */
 	for (i = 0 ; i < FB_MAX; i++) {
+		struct apertures_struct *gen_aper;
 		if (!registered_fb[i])
 			continue;
 
 		if (!(registered_fb[i]->flags & FBINFO_MISC_FIRMWARE))
 			continue;
 
-		if (fb_do_apertures_overlap(registered_fb[i]->apertures, a)) {
+		gen_aper = registered_fb[i]->apertures;
+		if (fb_do_apertures_overlap(gen_aper, a) ||
+			(primary && gen_aper && gen_aper->count &&
+			 gen_aper->ranges[0].base == VGA_FB_PHYS)) {
+
 			printk(KERN_ERR "fb: conflicting fb hw usage "
 			       "%s vs %s - removing generic driver\n",
 			       name, registered_fb[i]->fix.id);
@@ -1545,7 +1552,8 @@ register_framebuffer(struct fb_info *fb_info)
 	if (fb_check_foreignness(fb_info))
 		return -ENOSYS;
 
-	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id);
+	remove_conflicting_framebuffers(fb_info->apertures, fb_info->fix.id,
+					 fb_is_primary_device(fb_info));
 
 	num_registered_fb++;
 	for (i = 0 ; i < FB_MAX; i++)
diff --git a/drivers/video/vga16fb.c b/drivers/video/vga16fb.c
index bf638a47a5b3..149c47ac7e93 100644
--- a/drivers/video/vga16fb.c
+++ b/drivers/video/vga16fb.c
@@ -1263,10 +1263,19 @@ static void vga16fb_imageblit(struct fb_info *info, const struct fb_image *image
 		vga_imageblit_color(info, image);
 }
 
+static void vga16fb_destroy(struct fb_info *info)
+{
+	iounmap(info->screen_base);
+	fb_dealloc_cmap(&info->cmap);
+	/* XXX unshare VGA regions */
+	framebuffer_release(info);
+}
+
 static struct fb_ops vga16fb_ops = {
 	.owner		= THIS_MODULE,
 	.fb_open        = vga16fb_open,
 	.fb_release     = vga16fb_release,
+	.fb_destroy	= vga16fb_destroy,
 	.fb_check_var	= vga16fb_check_var,
 	.fb_set_par	= vga16fb_set_par,
 	.fb_setcolreg 	= vga16fb_setcolreg,
@@ -1306,6 +1315,11 @@ static int __devinit vga16fb_probe(struct platform_device *dev)
 		ret = -ENOMEM;
 		goto err_fb_alloc;
 	}
+	info->apertures = alloc_apertures(1);
+	if (!info->apertures) {
+		ret = -ENOMEM;
+		goto err_ioremap;
+	}
 
 	/* XXX share VGA_FB_PHYS and I/O region with vgacon and others */
 	info->screen_base = (void __iomem *)VGA_MAP_MEM(VGA_FB_PHYS, 0);
@@ -1335,7 +1349,7 @@ static int __devinit vga16fb_probe(struct platform_device *dev)
 	info->fix = vga16fb_fix;
 	/* supports rectangles with widths of multiples of 8 */
 	info->pixmap.blit_x = 1 << 7 | 1 << 15 | 1 << 23 | 1 << 31;
-	info->flags = FBINFO_FLAG_DEFAULT |
+	info->flags = FBINFO_FLAG_DEFAULT | FBINFO_MISC_FIRMWARE |
 		FBINFO_HWACCEL_YPAN;
 
 	i = (info->var.bits_per_pixel == 8) ? 256 : 16;
@@ -1354,6 +1368,9 @@ static int __devinit vga16fb_probe(struct platform_device *dev)
 
 	vga16fb_update_fix(info);
 
+	info->apertures->ranges[0].base = VGA_FB_PHYS;
+	info->apertures->ranges[0].size = VGA_FB_PHYS_LEN;
+
 	if (register_framebuffer(info) < 0) {
 		printk(KERN_ERR "vga16fb: unable to register framebuffer\n");
 		ret = -EINVAL;
@@ -1380,13 +1397,8 @@ static int vga16fb_remove(struct platform_device *dev)
 {
 	struct fb_info *info = platform_get_drvdata(dev);
 
-	if (info) {
+	if (info)
 		unregister_framebuffer(info);
-		iounmap(info->screen_base);
-		fb_dealloc_cmap(&info->cmap);
-	/* XXX unshare VGA regions */
-		framebuffer_release(info);
-	}
 
 	return 0;
 }
diff --git a/include/linux/fb.h b/include/linux/fb.h
index f88e2549123d..1296af45169d 100644
--- a/include/linux/fb.h
+++ b/include/linux/fb.h
@@ -971,7 +971,8 @@ extern ssize_t fb_sys_write(struct fb_info *info, const char __user *buf,
 /* drivers/video/fbmem.c */
 extern int register_framebuffer(struct fb_info *fb_info);
 extern int unregister_framebuffer(struct fb_info *fb_info);
-extern void remove_conflicting_framebuffers(struct apertures_struct *a, const char *name);
+extern void remove_conflicting_framebuffers(struct apertures_struct *a,
+				const char *name, bool primary);
 extern int fb_prepare_logo(struct fb_info *fb_info, int rotate);
 extern int fb_show_logo(struct fb_info *fb_info, int rotate);
 extern char* fb_get_buffer_offset(struct fb_info *info, struct fb_pixmap *buf, u32 size);
-- 
cgit v1.2.3


From ea64f8c2204cd8b9a95072c7ef85cc9a120a2693 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Tue, 6 Apr 2010 06:53:57 -0300
Subject: V4L/DVB: videodev2.h: move definition of enum v4l2_ctrl_type to the
 other control types

For some reason the definition of enum v4l2_ctrl_type is far from the
place where it is actually needed. This makes it hard to work with this
header.

Move it to just before struct v4l2_queryctrl, which is the one that
actually uses it.

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 2559b182b8c4..65d13ec13c1f 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -160,16 +160,6 @@ enum v4l2_buf_type {
 	V4L2_BUF_TYPE_PRIVATE              = 0x80,
 };
 
-enum v4l2_ctrl_type {
-	V4L2_CTRL_TYPE_INTEGER	     = 1,
-	V4L2_CTRL_TYPE_BOOLEAN	     = 2,
-	V4L2_CTRL_TYPE_MENU	     = 3,
-	V4L2_CTRL_TYPE_BUTTON	     = 4,
-	V4L2_CTRL_TYPE_INTEGER64     = 5,
-	V4L2_CTRL_TYPE_CTRL_CLASS    = 6,
-	V4L2_CTRL_TYPE_STRING        = 7,
-};
-
 enum v4l2_tuner_type {
 	V4L2_TUNER_RADIO	     = 1,
 	V4L2_TUNER_ANALOG_TV	     = 2,
@@ -942,6 +932,16 @@ struct v4l2_ext_controls {
 #define V4L2_CTRL_ID2CLASS(id)    ((id) & 0x0fff0000UL)
 #define V4L2_CTRL_DRIVER_PRIV(id) (((id) & 0xffff) >= 0x1000)
 
+enum v4l2_ctrl_type {
+	V4L2_CTRL_TYPE_INTEGER	     = 1,
+	V4L2_CTRL_TYPE_BOOLEAN	     = 2,
+	V4L2_CTRL_TYPE_MENU	     = 3,
+	V4L2_CTRL_TYPE_BUTTON	     = 4,
+	V4L2_CTRL_TYPE_INTEGER64     = 5,
+	V4L2_CTRL_TYPE_CTRL_CLASS    = 6,
+	V4L2_CTRL_TYPE_STRING        = 7,
+};
+
 /*  Used in the VIDIOC_QUERYCTRL ioctl for querying controls */
 struct v4l2_queryctrl {
 	__u32		     id;
-- 
cgit v1.2.3


From b187913c77adbd865194c6b8072764e3474d56ea Mon Sep 17 00:00:00 2001
From: Klaus Schmidinger <Klaus.Schmidinger@tvdr.de>
Date: Sun, 11 Apr 2010 06:12:52 -0300
Subject: V4L/DVB: Add FE_CAN_PSK_8 to allow apps to identify PSK_8 capable DVB
 devices

The enum fe_caps provides flags that allow an application to detect
whether a device is capable of handling various modulation types etc.
A flag for detecting PSK_8, however, is missing.
This patch adds the flag FE_CAN_PSK_8 to frontend.h and implements
it for the gp8psk-fe.c and cx24116.c driver (apparently the only ones
with PSK_8). Only the gp8psk-fe.c has been explicitly tested, though.

Signed-off-by: Klaus Schmidinger <Klaus.Schmidinger@tvdr.de>
Tested-by: Derek Kelly <user.vdr@gmail.com>
Acked-by: Manu Abraham <manu@linuxtv.org>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-usb/gp8psk-fe.c | 2 +-
 drivers/media/dvb/frontends/cx24116.c | 2 +-
 include/linux/dvb/frontend.h          | 1 +
 3 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-usb/gp8psk-fe.c b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
index 7a7f1b2b681c..39143805f3da 100644
--- a/drivers/media/dvb/dvb-usb/gp8psk-fe.c
+++ b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
@@ -349,7 +349,7 @@ static struct dvb_frontend_ops gp8psk_fe_ops = {
 			 * FE_CAN_QAM_16 is for compatibility
 			 * (Myth incorrectly detects Turbo-QPSK as plain QAM-16)
 			 */
-			FE_CAN_QPSK | FE_CAN_QAM_16
+                       FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_PSK_8
 	},
 
 	.release = gp8psk_fe_release,
diff --git a/drivers/media/dvb/frontends/cx24116.c b/drivers/media/dvb/frontends/cx24116.c
index 2410d8b59b6b..13ae2c58d79f 100644
--- a/drivers/media/dvb/frontends/cx24116.c
+++ b/drivers/media/dvb/frontends/cx24116.c
@@ -1479,7 +1479,7 @@ static struct dvb_frontend_ops cx24116_ops = {
 			FE_CAN_FEC_4_5 | FE_CAN_FEC_5_6 | FE_CAN_FEC_6_7 |
 			FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_2G_MODULATION |
-			FE_CAN_QPSK | FE_CAN_RECOVER
+                       FE_CAN_QPSK | FE_CAN_RECOVER | FE_CAN_PSK_8
 	},
 
 	.release = cx24116_release,
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index b6cb5425cde3..7c73f0b09e9c 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,6 +62,7 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
+       FE_CAN_PSK_8                    = 0x8000000,  /* frontend supports "8psk modulation" */
 	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
 	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
-- 
cgit v1.2.3


From c683357236969c48f7b945fd718f141bf36e8ba2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab@redhat.com>
Date: Wed, 21 Apr 2010 11:36:57 -0300
Subject: Revert "V4L/DVB: Add FE_CAN_PSK_8 to allow apps to identify PSK_8
 capable DVB devices"

Patch reverted per Andreas Oberritter <obi@linuxtv.org> request. It is basically
not ready yet for upstream merge.

This reverts commit 77b2ad374a82e3d740cb1780ff4caedc3e051b37.

Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/dvb/dvb-usb/gp8psk-fe.c | 2 +-
 drivers/media/dvb/frontends/cx24116.c | 2 +-
 include/linux/dvb/frontend.h          | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/media/dvb/dvb-usb/gp8psk-fe.c b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
index 39143805f3da..7a7f1b2b681c 100644
--- a/drivers/media/dvb/dvb-usb/gp8psk-fe.c
+++ b/drivers/media/dvb/dvb-usb/gp8psk-fe.c
@@ -349,7 +349,7 @@ static struct dvb_frontend_ops gp8psk_fe_ops = {
 			 * FE_CAN_QAM_16 is for compatibility
 			 * (Myth incorrectly detects Turbo-QPSK as plain QAM-16)
 			 */
-                       FE_CAN_QPSK | FE_CAN_QAM_16 | FE_CAN_PSK_8
+			FE_CAN_QPSK | FE_CAN_QAM_16
 	},
 
 	.release = gp8psk_fe_release,
diff --git a/drivers/media/dvb/frontends/cx24116.c b/drivers/media/dvb/frontends/cx24116.c
index 13ae2c58d79f..2410d8b59b6b 100644
--- a/drivers/media/dvb/frontends/cx24116.c
+++ b/drivers/media/dvb/frontends/cx24116.c
@@ -1479,7 +1479,7 @@ static struct dvb_frontend_ops cx24116_ops = {
 			FE_CAN_FEC_4_5 | FE_CAN_FEC_5_6 | FE_CAN_FEC_6_7 |
 			FE_CAN_FEC_7_8 | FE_CAN_FEC_AUTO |
 			FE_CAN_2G_MODULATION |
-                       FE_CAN_QPSK | FE_CAN_RECOVER | FE_CAN_PSK_8
+			FE_CAN_QPSK | FE_CAN_RECOVER
 	},
 
 	.release = cx24116_release,
diff --git a/include/linux/dvb/frontend.h b/include/linux/dvb/frontend.h
index 7c73f0b09e9c..b6cb5425cde3 100644
--- a/include/linux/dvb/frontend.h
+++ b/include/linux/dvb/frontend.h
@@ -62,7 +62,6 @@ typedef enum fe_caps {
 	FE_CAN_8VSB			= 0x200000,
 	FE_CAN_16VSB			= 0x400000,
 	FE_HAS_EXTENDED_CAPS		= 0x800000,   /* We need more bitspace for newer APIs, indicate this. */
-       FE_CAN_PSK_8                    = 0x8000000,  /* frontend supports "8psk modulation" */
 	FE_CAN_2G_MODULATION		= 0x10000000, /* frontend supports "2nd generation modulation" (DVB-S2) */
 	FE_NEEDS_BENDING		= 0x20000000, /* not supported anymore, don't use (frontend requires frequency bending) */
 	FE_CAN_RECOVER			= 0x40000000, /* frontend can recover from a cable unplug automatically */
-- 
cgit v1.2.3


From 35e6aa9fc38fb7f47f39711e52cb0f58fbf4441c Mon Sep 17 00:00:00 2001
From: Xiaolin Zhang <xiaolin.zhang@intel.com>
Date: Sun, 18 Apr 2010 23:06:50 -0300
Subject: V4L/DVB: core: add support for more color effects

Add support for more color effects (negative, sketch, emboss, etc) by
extending the  v4l2_colorfx enum items.

Signed-off-by: Xiaolin Zhang <xiaolin.zhang@intel.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 Documentation/DocBook/v4l/controls.xml    | 11 +++++++++--
 Documentation/DocBook/v4l/videodev2.h.xml |  7 +++++++
 drivers/media/video/v4l2-common.c         |  7 +++++++
 include/linux/videodev2.h                 |  7 +++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/v4l/controls.xml b/Documentation/DocBook/v4l/controls.xml
index 7e0c68747134..8408caaee276 100644
--- a/Documentation/DocBook/v4l/controls.xml
+++ b/Documentation/DocBook/v4l/controls.xml
@@ -283,8 +283,15 @@ minimum value disables backlight compensation.</entry>
 	    <entry>Selects a color effect. Possible values for
 <constant>enum v4l2_colorfx</constant> are:
 <constant>V4L2_COLORFX_NONE</constant> (0),
-<constant>V4L2_COLORFX_BW</constant> (1) and
-<constant>V4L2_COLORFX_SEPIA</constant> (2).</entry>
+<constant>V4L2_COLORFX_BW</constant> (1),
+<constant>V4L2_COLORFX_SEPIA</constant> (2),
+<constant>V4L2_COLORFX_NEGATIVE</constant> (3),
+<constant>V4L2_COLORFX_EMBOSS</constant> (4),
+<constant>V4L2_COLORFX_SKETCH</constant> (5),
+<constant>V4L2_COLORFX_SKY_BLUE</constant> (6),
+<constant>V4L2_COLORFX_GRASS_GREEN</constant> (7),
+<constant>V4L2_COLORFX_SKIN_WHITEN</constant> (8) and
+<constant>V4L2_COLORFX_VIVID</constant> (9).</entry>
 	  </row>
 	  <row>
 	    <entry><constant>V4L2_CID_ROTATE</constant></entry>
diff --git a/Documentation/DocBook/v4l/videodev2.h.xml b/Documentation/DocBook/v4l/videodev2.h.xml
index c18dfebedeff..865b06d9e679 100644
--- a/Documentation/DocBook/v4l/videodev2.h.xml
+++ b/Documentation/DocBook/v4l/videodev2.h.xml
@@ -1018,6 +1018,13 @@ enum <link linkend="v4l2-colorfx">v4l2_colorfx</link> {
         V4L2_COLORFX_NONE       = 0,
         V4L2_COLORFX_BW         = 1,
         V4L2_COLORFX_SEPIA      = 2,
+        V4L2_COLORFX_NEGATIVE   = 3,
+        V4L2_COLORFX_EMBOSS     = 4,
+        V4L2_COLORFX_SKETCH     = 5,
+        V4L2_COLORFX_SKY_BLUE   = 6,
+        V4L2_COLORFX_GRASS_GREEN = 7,
+        V4L2_COLORFX_SKIN_WHITEN = 8,
+        V4L2_COLORFX_VIVID      = 9.
 };
 #define V4L2_CID_AUTOBRIGHTNESS                 (V4L2_CID_BASE+32)
 #define V4L2_CID_BAND_STOP_FILTER               (V4L2_CID_BASE+33)
diff --git a/drivers/media/video/v4l2-common.c b/drivers/media/video/v4l2-common.c
index 67944f53a79a..035c414507a1 100644
--- a/drivers/media/video/v4l2-common.c
+++ b/drivers/media/video/v4l2-common.c
@@ -343,6 +343,13 @@ const char **v4l2_ctrl_get_menu(u32 id)
 		"None",
 		"Black & White",
 		"Sepia",
+		"Negative",
+		"Emboss",
+		"Sketch",
+		"Sky blue",
+		"Grass green",
+		"Skin whiten",
+		"Vivid",
 		NULL
 	};
 	static const char *tune_preemphasis[] = {
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 65d13ec13c1f..e878056952c7 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1026,6 +1026,13 @@ enum v4l2_colorfx {
 	V4L2_COLORFX_NONE	= 0,
 	V4L2_COLORFX_BW		= 1,
 	V4L2_COLORFX_SEPIA	= 2,
+	V4L2_COLORFX_NEGATIVE = 3,
+	V4L2_COLORFX_EMBOSS = 4,
+	V4L2_COLORFX_SKETCH = 5,
+	V4L2_COLORFX_SKY_BLUE = 6,
+	V4L2_COLORFX_GRASS_GREEN = 7,
+	V4L2_COLORFX_SKIN_WHITEN = 8,
+	V4L2_COLORFX_VIVID = 9,
 };
 #define V4L2_CID_AUTOBRIGHTNESS			(V4L2_CID_BASE+32)
 #define V4L2_CID_BAND_STOP_FILTER		(V4L2_CID_BASE+33)
-- 
cgit v1.2.3


From fda1021477b390506ebed0225eaa6d31a903e2b7 Mon Sep 17 00:00:00 2001
From: Sakari Ailus <sakari.ailus@maxwell.research.nokia.com>
Date: Wed, 24 Feb 2010 19:19:05 -0300
Subject: V4L/DVB: V4L: Events: Add new ioctls for events

This patch adds a set of new ioctls to the V4L2 API. The ioctls conform to
V4L2 Events RFC version 2.3:

<URL:http://www.spinics.net/lists/linux-media/msg12033.html>

Signed-off-by: Sakari Ailus <sakari.ailus@maxwell.research.nokia.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 drivers/media/video/v4l2-compat-ioctl32.c |  3 +++
 drivers/media/video/v4l2-ioctl.c          |  3 +++
 include/linux/videodev2.h                 | 26 ++++++++++++++++++++++++++
 3 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/media/video/v4l2-compat-ioctl32.c b/drivers/media/video/v4l2-compat-ioctl32.c
index f77f84bfe714..9004a5fe7643 100644
--- a/drivers/media/video/v4l2-compat-ioctl32.c
+++ b/drivers/media/video/v4l2-compat-ioctl32.c
@@ -1086,6 +1086,9 @@ long v4l2_compat_ioctl32(struct file *file, unsigned int cmd, unsigned long arg)
 	case VIDIOC_QUERY_DV_PRESET:
 	case VIDIOC_S_DV_TIMINGS:
 	case VIDIOC_G_DV_TIMINGS:
+	case VIDIOC_DQEVENT:
+	case VIDIOC_SUBSCRIBE_EVENT:
+	case VIDIOC_UNSUBSCRIBE_EVENT:
 		ret = do_video_ioctl(file, cmd, arg);
 		break;
 
diff --git a/drivers/media/video/v4l2-ioctl.c b/drivers/media/video/v4l2-ioctl.c
index 3da8d8f041d8..2ceaa152a450 100644
--- a/drivers/media/video/v4l2-ioctl.c
+++ b/drivers/media/video/v4l2-ioctl.c
@@ -291,6 +291,9 @@ static const char *v4l2_ioctls[] = {
 	[_IOC_NR(VIDIOC_QUERY_DV_PRESET)]  = "VIDIOC_QUERY_DV_PRESET",
 	[_IOC_NR(VIDIOC_S_DV_TIMINGS)]     = "VIDIOC_S_DV_TIMINGS",
 	[_IOC_NR(VIDIOC_G_DV_TIMINGS)]     = "VIDIOC_G_DV_TIMINGS",
+	[_IOC_NR(VIDIOC_DQEVENT)]	   = "VIDIOC_DQEVENT",
+	[_IOC_NR(VIDIOC_SUBSCRIBE_EVENT)]  = "VIDIOC_SUBSCRIBE_EVENT",
+	[_IOC_NR(VIDIOC_UNSUBSCRIBE_EVENT)] = "VIDIOC_UNSUBSCRIBE_EVENT",
 };
 #define V4L2_IOCTLS ARRAY_SIZE(v4l2_ioctls)
 
diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index e878056952c7..0f627c5b986b 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1636,6 +1636,29 @@ struct v4l2_streamparm {
 	} parm;
 };
 
+/*
+ *	E V E N T S
+ */
+
+struct v4l2_event {
+	__u32				type;
+	union {
+		__u8			data[64];
+	} u;
+	__u32				pending;
+	__u32				sequence;
+	struct timespec			timestamp;
+	__u32				reserved[9];
+};
+
+struct v4l2_event_subscription {
+	__u32				type;
+	__u32				reserved[7];
+};
+
+#define V4L2_EVENT_ALL				0
+#define V4L2_EVENT_PRIVATE_START		0x08000000
+
 /*
  *	A D V A N C E D   D E B U G G I N G
  *
@@ -1758,6 +1781,9 @@ struct v4l2_dbg_chip_ident {
 #define	VIDIOC_QUERY_DV_PRESET	_IOR('V',  86, struct v4l2_dv_preset)
 #define	VIDIOC_S_DV_TIMINGS	_IOWR('V', 87, struct v4l2_dv_timings)
 #define	VIDIOC_G_DV_TIMINGS	_IOWR('V', 88, struct v4l2_dv_timings)
+#define	VIDIOC_DQEVENT		 _IOR('V', 89, struct v4l2_event)
+#define	VIDIOC_SUBSCRIBE_EVENT	 _IOW('V', 90, struct v4l2_event_subscription)
+#define	VIDIOC_UNSUBSCRIBE_EVENT _IOW('V', 91, struct v4l2_event_subscription)
 
 /* Reminder: when adding new ioctls please add support for them to
    drivers/media/video/v4l2-compat-ioctl32.c as well! */
-- 
cgit v1.2.3


From 1bcaf4bd53872e70c4fceec6bbb76044325f337f Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Sat, 27 Mar 2010 14:09:52 -0300
Subject: V4L/DVB: v4l: add VSYNC and EOS events for use with ivtv

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index 0f627c5b986b..b5121bec1bd9 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -1640,9 +1640,21 @@ struct v4l2_streamparm {
  *	E V E N T S
  */
 
+#define V4L2_EVENT_ALL				0
+#define V4L2_EVENT_VSYNC			1
+#define V4L2_EVENT_EOS				2
+#define V4L2_EVENT_PRIVATE_START		0x08000000
+
+/* Payload for V4L2_EVENT_VSYNC */
+struct v4l2_event_vsync {
+	/* Can be V4L2_FIELD_ANY, _NONE, _TOP or _BOTTOM */
+	__u8 field;
+} __attribute__ ((packed));
+
 struct v4l2_event {
 	__u32				type;
 	union {
+		struct v4l2_event_vsync vsync;
 		__u8			data[64];
 	} u;
 	__u32				pending;
@@ -1656,9 +1668,6 @@ struct v4l2_event_subscription {
 	__u32				reserved[7];
 };
 
-#define V4L2_EVENT_ALL				0
-#define V4L2_EVENT_PRIVATE_START		0x08000000
-
 /*
  *	A D V A N C E D   D E B U G G I N G
  *
-- 
cgit v1.2.3


From bc0f7f19ab9ba5c7ec648b400d1360b525fa1a3a Mon Sep 17 00:00:00 2001
From: Pawel Osciak <p.osciak@samsung.com>
Date: Wed, 28 Apr 2010 04:05:21 -0300
Subject: V4L/DVB: Add a new ERROR flag for DQBUF after recoverable streaming
 errors

This flag is intended to indicate streaming errors, which might have
resulted in corrupted video data in the buffer, but the buffer can still
be reused and streaming may continue.

Setting this flag and returning 0 is different from returning EIO. The
latter should now indicate more serious (unrecoverable) errors.

This patch also solves a problem with the ioctl handling code in
vl42-ioctl.c, which does not copy buffer identification data back to the
userspace when EIO is returned, so there is no way for applications
to discover on which buffer the operation failed in such cases.

Signed-off-by: Pawel Osciak <p.osciak@samsung.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
---
 include/linux/videodev2.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index b5121bec1bd9..047f7e6edb86 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -542,6 +542,8 @@ struct v4l2_buffer {
 #define V4L2_BUF_FLAG_KEYFRAME	0x0008	/* Image is a keyframe (I-frame) */
 #define V4L2_BUF_FLAG_PFRAME	0x0010	/* Image is a P-frame */
 #define V4L2_BUF_FLAG_BFRAME	0x0020	/* Image is a B-frame */
+/* Buffer is ready, but the data contained within is corrupted. */
+#define V4L2_BUF_FLAG_ERROR	0x0040
 #define V4L2_BUF_FLAG_TIMECODE	0x0100	/* timecode field is valid */
 #define V4L2_BUF_FLAG_INPUT     0x0200  /* input field is valid */
 
-- 
cgit v1.2.3


From c7087652e1890a3feef35b30ee1d4be68e1932cd Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:34 +0200
Subject: libata-sff: clean up BMDMA initialization

When BMDMA initialization failed or BMDMA was not available for
whatever reason, bmdma_addr was left at zero and used as an indication
that BMDMA shouldn't be used.  This leads to the following problems.

p1. For BMDMA drivers which don't use traditional BMDMA register,
    ata_bmdma_mode_filter() incorrectly inhibits DMA modes.  Those
    drivers either have to inherit from ata_sff_port_ops or clear
    ->mode_filter explicitly.

p2. non-BMDMA drivers call into BMDMA PRD table allocation.  It
    doesn't actually allocate PRD table if bmdma_addr is not
    initialized but is still confusing.

p3. For BMDMA drivers which don't use traditional BMDMA register, some
    methods might not be invoked as expected (e.g. bmdma_stop from
    ata_sff_post_internal_cmd()).

p4. SFF drivers w/ custom DMA interface implement noop BMDMA ops
    worrying libata core might call into one of them.

These problems are caused by the muddy line between SFF and BMDMA and
the assumption that all BMDMA controllers initialize bmdma_addr.

This patch fixes p1 and p2 by removing the bmdma_addr assumption and
moving prd allocation to BMDMA port start.  Later patches will fix the
remaining issues.

This patch improves BMDMA initialization such that

* When BMDMA register initialization fails, falls back to PIO instead
  of failing.  ata_pci_bmdma_init() never fails now.

* When ata_pci_bmdma_init() falls back to PIO, it clears
  ap->mwdma_mask and udma_mask instead of depending on
  ata_bmdma_mode_filter().  This makes ata_bmdma_mode_filter()
  unnecessary thus resolving p1.

* ata_port_start() which actually is BMDMA specific is moved to
  ata_bmdma_port_start().  ata_port_start() and ata_sff_port_start()
  are killed.

* ata_sff_port_start32() is moved and renamed to
  ata_bmdma_port_start32().

Drivers which no longer call into PRD table allocation are...

  pdc_adma, sata_inic162x, sata_qstor, sata_sx4, pata_cmd640 and all
  drivers which inherit from ata_sff_port_ops.

pata_icside sets ->port_start to ATA_OP_NULL as it doesn't need PRD
but is a BMDMA controller and doesn't have custom port_start like
other such controllers.

Note that with the previous patch which makes all and only BMDMA
drivers inherit from ata_bmdma_port_ops, this change doesn't break
drivers which need PRD table.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c       |  25 ------
 drivers/ata/libata-sff.c        | 170 ++++++++++++++++++++--------------------
 drivers/ata/pata_acpi.c         |   4 +-
 drivers/ata/pata_ali.c          |   2 +-
 drivers/ata/pata_at91.c         |   1 -
 drivers/ata/pata_bf54x.c        |   2 -
 drivers/ata/pata_cmd640.c       |   6 +-
 drivers/ata/pata_hpt366.c       |   2 +-
 drivers/ata/pata_hpt37x.c       |   4 +-
 drivers/ata/pata_icside.c       |   2 +-
 drivers/ata/pata_it821x.c       |   2 +-
 drivers/ata/pata_macio.c        |   2 +
 drivers/ata/pata_pdc2027x.c     |   4 +-
 drivers/ata/pata_pdc202xx_old.c |   2 +-
 drivers/ata/pata_platform.c     |   1 -
 drivers/ata/pata_scc.c          |   6 +-
 drivers/ata/pata_serverworks.c  |   6 +-
 drivers/ata/pata_via.c          |   4 +-
 drivers/ata/pdc_adma.c          |   4 -
 drivers/ata/sata_inic162x.c     |   5 --
 drivers/ata/sata_mv.c           |   2 -
 drivers/ata/sata_nv.c           |   6 +-
 drivers/ata/sata_promise.c      |   3 +-
 drivers/ata/sata_qstor.c        |   4 -
 drivers/ata/sata_sx4.c          |   5 --
 drivers/ata/sata_uli.c          |   4 +-
 include/linux/libata.h          |   9 +--
 27 files changed, 116 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 3d8b62f7441c..cc49a0d3089f 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5505,30 +5505,6 @@ void ata_host_resume(struct ata_host *host)
 }
 #endif
 
-/**
- *	ata_port_start - Set port up for dma.
- *	@ap: Port to initialize
- *
- *	Called just after data structures for each port are
- *	initialized.  Allocates space for PRD table.
- *
- *	May be used as the port_start() entry in ata_port_operations.
- *
- *	LOCKING:
- *	Inherited from caller.
- */
-int ata_port_start(struct ata_port *ap)
-{
-	struct device *dev = ap->dev;
-
-	ap->prd = dmam_alloc_coherent(dev, ATA_PRD_TBL_SZ, &ap->prd_dma,
-				      GFP_KERNEL);
-	if (!ap->prd)
-		return -ENOMEM;
-
-	return 0;
-}
-
 /**
  *	ata_dev_init - Initialize an ata_device structure
  *	@dev: Device structure to initialize
@@ -6757,7 +6733,6 @@ EXPORT_SYMBOL_GPL(ata_xfer_mode2mask);
 EXPORT_SYMBOL_GPL(ata_xfer_mode2shift);
 EXPORT_SYMBOL_GPL(ata_mode_string);
 EXPORT_SYMBOL_GPL(ata_id_xfermask);
-EXPORT_SYMBOL_GPL(ata_port_start);
 EXPORT_SYMBOL_GPL(ata_do_set_mode);
 EXPORT_SYMBOL_GPL(ata_std_qc_defer);
 EXPORT_SYMBOL_GPL(ata_noop_qc_prep);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 6400e8751391..f1c99a3e8b2c 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -66,8 +66,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 	.sff_irq_clear		= ata_sff_irq_clear,
 
 	.lost_interrupt		= ata_sff_lost_interrupt,
-
-	.port_start		= ata_sff_port_start,
 };
 EXPORT_SYMBOL_GPL(ata_sff_port_ops);
 
@@ -2443,50 +2441,6 @@ void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
 }
 EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
 
-/**
- *	ata_sff_port_start - Set port up for dma.
- *	@ap: Port to initialize
- *
- *	Called just after data structures for each port are
- *	initialized.  Allocates space for PRD table if the device
- *	is DMA capable SFF.
- *
- *	May be used as the port_start() entry in ata_port_operations.
- *
- *	LOCKING:
- *	Inherited from caller.
- */
-int ata_sff_port_start(struct ata_port *ap)
-{
-	if (ap->ioaddr.bmdma_addr)
-		return ata_port_start(ap);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sff_port_start);
-
-/**
- *	ata_sff_port_start32 - Set port up for dma.
- *	@ap: Port to initialize
- *
- *	Called just after data structures for each port are
- *	initialized.  Allocates space for PRD table if the device
- *	is DMA capable SFF.
- *
- *	May be used as the port_start() entry in ata_port_operations for
- *	devices that are capable of 32bit PIO.
- *
- *	LOCKING:
- *	Inherited from caller.
- */
-int ata_sff_port_start32(struct ata_port *ap)
-{
-	ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
-	if (ap->ioaddr.bmdma_addr)
-		return ata_port_start(ap);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(ata_sff_port_start32);
-
 /**
  *	ata_sff_std_ports - initialize ioaddr with standard port offsets.
  *	@ioaddr: IO address structure to be initialized
@@ -2646,21 +2600,12 @@ int ata_pci_sff_prepare_host(struct pci_dev *pdev,
 		goto err_out;
 
 	/* init DMA related stuff */
-	rc = ata_pci_bmdma_init(host);
-	if (rc)
-		goto err_bmdma;
+	ata_pci_bmdma_init(host);
 
 	devres_remove_group(&pdev->dev, NULL);
 	*r_host = host;
 	return 0;
 
-err_bmdma:
-	/* This is necessary because PCI and iomap resources are
-	 * merged and releasing the top group won't release the
-	 * acquired resources if some of those have been acquired
-	 * before entering this function.
-	 */
-	pcim_iounmap_regions(pdev, 0xf);
 err_out:
 	devres_release_group(&pdev->dev, NULL);
 	return rc;
@@ -2843,12 +2788,12 @@ EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
 const struct ata_port_operations ata_bmdma_port_ops = {
 	.inherits		= &ata_sff_port_ops,
 
-	.mode_filter		= ata_bmdma_mode_filter,
-
 	.bmdma_setup		= ata_bmdma_setup,
 	.bmdma_start		= ata_bmdma_start,
 	.bmdma_stop		= ata_bmdma_stop,
 	.bmdma_status		= ata_bmdma_status,
+
+	.port_start		= ata_bmdma_port_start,
 };
 EXPORT_SYMBOL_GPL(ata_bmdma_port_ops);
 
@@ -2856,22 +2801,10 @@ const struct ata_port_operations ata_bmdma32_port_ops = {
 	.inherits		= &ata_bmdma_port_ops,
 
 	.sff_data_xfer		= ata_sff_data_xfer32,
-	.port_start		= ata_sff_port_start32,
+	.port_start		= ata_bmdma_port_start32,
 };
 EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
 
-unsigned long ata_bmdma_mode_filter(struct ata_device *adev,
-				    unsigned long xfer_mask)
-{
-	/* Filter out DMA modes if the device has been configured by
-	   the BIOS as PIO only */
-
-	if (adev->link->ap->ioaddr.bmdma_addr == NULL)
-		xfer_mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
-	return xfer_mask;
-}
-EXPORT_SYMBOL_GPL(ata_bmdma_mode_filter);
-
 /**
  *	ata_bmdma_setup - Set up PCI IDE BMDMA transaction
  *	@qc: Info associated with this ATA transaction.
@@ -2976,6 +2909,53 @@ u8 ata_bmdma_status(struct ata_port *ap)
 }
 EXPORT_SYMBOL_GPL(ata_bmdma_status);
 
+
+/**
+ *	ata_bmdma_port_start - Set port up for bmdma.
+ *	@ap: Port to initialize
+ *
+ *	Called just after data structures for each port are
+ *	initialized.  Allocates space for PRD table.
+ *
+ *	May be used as the port_start() entry in ata_port_operations.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+int ata_bmdma_port_start(struct ata_port *ap)
+{
+	if (ap->mwdma_mask || ap->udma_mask) {
+		ap->prd = dmam_alloc_coherent(ap->host->dev, ATA_PRD_TBL_SZ,
+					      &ap->prd_dma, GFP_KERNEL);
+		if (!ap->prd)
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_port_start);
+
+/**
+ *	ata_bmdma_port_start32 - Set port up for dma.
+ *	@ap: Port to initialize
+ *
+ *	Called just after data structures for each port are
+ *	initialized.  Enables 32bit PIO and allocates space for PRD
+ *	table.
+ *
+ *	May be used as the port_start() entry in ata_port_operations for
+ *	devices that are capable of 32bit PIO.
+ *
+ *	LOCKING:
+ *	Inherited from caller.
+ */
+int ata_bmdma_port_start32(struct ata_port *ap)
+{
+	ap->pflags |= ATA_PFLAG_PIO32 | ATA_PFLAG_PIO32CHANGE;
+	return ata_bmdma_port_start(ap);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_port_start32);
+
 #ifdef CONFIG_PCI
 
 /**
@@ -3004,6 +2984,19 @@ int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
 
+static void ata_bmdma_nodma(struct ata_host *host, const char *reason)
+{
+	int i;
+
+	dev_printk(KERN_ERR, host->dev, "BMDMA: %s, falling back to PIO\n",
+		   reason);
+
+	for (i = 0; i < 2; i++) {
+		host->ports[i]->mwdma_mask = 0;
+		host->ports[i]->udma_mask = 0;
+	}
+}
+
 /**
  *	ata_pci_bmdma_init - acquire PCI BMDMA resources and init ATA host
  *	@host: target ATA host
@@ -3012,33 +3005,40 @@ EXPORT_SYMBOL_GPL(ata_pci_bmdma_clear_simplex);
  *
  *	LOCKING:
  *	Inherited from calling layer (may sleep).
- *
- *	RETURNS:
- *	0 on success, -errno otherwise.
  */
-int ata_pci_bmdma_init(struct ata_host *host)
+void ata_pci_bmdma_init(struct ata_host *host)
 {
 	struct device *gdev = host->dev;
 	struct pci_dev *pdev = to_pci_dev(gdev);
 	int i, rc;
 
 	/* No BAR4 allocation: No DMA */
-	if (pci_resource_start(pdev, 4) == 0)
-		return 0;
+	if (pci_resource_start(pdev, 4) == 0) {
+		ata_bmdma_nodma(host, "BAR4 is zero");
+		return;
+	}
 
-	/* TODO: If we get no DMA mask we should fall back to PIO */
+	/*
+	 * Some controllers require BMDMA region to be initialized
+	 * even if DMA is not in use to clear IRQ status via
+	 * ->sff_irq_clear method.  Try to initialize bmdma_addr
+	 * regardless of dma masks.
+	 */
 	rc = pci_set_dma_mask(pdev, ATA_DMA_MASK);
 	if (rc)
-		return rc;
-	rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
-	if (rc)
-		return rc;
+		ata_bmdma_nodma(host, "failed to set dma mask");
+	if (!rc) {
+		rc = pci_set_consistent_dma_mask(pdev, ATA_DMA_MASK);
+		if (rc)
+			ata_bmdma_nodma(host,
+					"failed to set consistent dma mask");
+	}
 
 	/* request and iomap DMA region */
 	rc = pcim_iomap_regions(pdev, 1 << 4, dev_driver_string(gdev));
 	if (rc) {
-		dev_printk(KERN_ERR, gdev, "failed to request/iomap BAR4\n");
-		return -ENOMEM;
+		ata_bmdma_nodma(host, "failed to request/iomap BAR4");
+		return;
 	}
 	host->iomap = pcim_iomap_table(pdev);
 
@@ -3057,8 +3057,6 @@ int ata_pci_bmdma_init(struct ata_host *host)
 		ata_port_desc(ap, "bmdma 0x%llx",
 		    (unsigned long long)pci_resource_start(pdev, 4) + 8 * i);
 	}
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
 
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index 1ea2be0f4b94..1a5a309abccd 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -101,7 +101,7 @@ static unsigned long pacpi_discover_modes(struct ata_port *ap, struct ata_device
 static unsigned long pacpi_mode_filter(struct ata_device *adev, unsigned long mask)
 {
 	struct pata_acpi *acpi = adev->link->ap->private_data;
-	return ata_bmdma_mode_filter(adev, mask & acpi->mask[adev->devno]);
+	return mask & acpi->mask[adev->devno];
 }
 
 /**
@@ -205,7 +205,7 @@ static int pacpi_port_start(struct ata_port *ap)
 		return -ENOMEM;
 	acpi->mask[0] = pacpi_discover_modes(ap, &ap->link.device[0]);
 	acpi->mask[1] = pacpi_discover_modes(ap, &ap->link.device[1]);
-	ret = ata_sff_port_start(ap);
+	ret = ata_bmdma_port_start(ap);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/ata/pata_ali.c b/drivers/ata/pata_ali.c
index dc61b72f751c..f306e10c748d 100644
--- a/drivers/ata/pata_ali.c
+++ b/drivers/ata/pata_ali.c
@@ -124,7 +124,7 @@ static unsigned long ali_20_filter(struct ata_device *adev, unsigned long mask)
 	ata_id_c_string(adev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 	if (strstr(model_num, "WDC"))
 		return mask &= ~ATA_MASK_UDMA;
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
diff --git a/drivers/ata/pata_at91.c b/drivers/ata/pata_at91.c
index c6a946aa252c..0da0dcc7dd08 100644
--- a/drivers/ata/pata_at91.c
+++ b/drivers/ata/pata_at91.c
@@ -202,7 +202,6 @@ static struct ata_port_operations pata_at91_port_ops = {
 	.sff_data_xfer	= pata_at91_data_xfer_noirq,
 	.set_piomode	= pata_at91_set_piomode,
 	.cable_detect	= ata_cable_40wire,
-	.port_start	= ATA_OP_NULL,
 };
 
 static int __devinit pata_at91_probe(struct platform_device *pdev)
diff --git a/drivers/ata/pata_bf54x.c b/drivers/ata/pata_bf54x.c
index 3d94c800e9d1..6422cfd13d0d 100644
--- a/drivers/ata/pata_bf54x.c
+++ b/drivers/ata/pata_bf54x.c
@@ -1450,8 +1450,6 @@ static struct ata_port_operations bfin_pata_ops = {
 
 	.port_start		= bfin_port_start,
 	.port_stop		= bfin_port_stop,
-
-	.mode_filter		= ATA_OP_NULL,	/* will be removed soon */
 };
 
 static struct ata_port_info bfin_port_info[] = {
diff --git a/drivers/ata/pata_cmd640.c b/drivers/ata/pata_cmd640.c
index 60ab2d2b1ccd..e5f289f59ca3 100644
--- a/drivers/ata/pata_cmd640.c
+++ b/drivers/ata/pata_cmd640.c
@@ -153,16 +153,12 @@ static int cmd640_port_start(struct ata_port *ap)
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 	struct cmd640_reg *timing;
 
-	int ret = ata_sff_port_start(ap);
-	if (ret < 0)
-		return ret;
-
 	timing = devm_kzalloc(&pdev->dev, sizeof(struct cmd640_reg), GFP_KERNEL);
 	if (timing == NULL)
 		return -ENOMEM;
 	timing->last = -1;	/* Force a load */
 	ap->private_data = timing;
-	return ret;
+	return 0;
 }
 
 static struct scsi_host_template cmd640_sht = {
diff --git a/drivers/ata/pata_hpt366.c b/drivers/ata/pata_hpt366.c
index af49bfb57247..8580eb3cd54d 100644
--- a/drivers/ata/pata_hpt366.c
+++ b/drivers/ata/pata_hpt366.c
@@ -182,7 +182,7 @@ static unsigned long hpt366_filter(struct ata_device *adev, unsigned long mask)
 	} else if (adev->class == ATA_DEV_ATAPI)
 		mask &= ~(ATA_MASK_MWDMA | ATA_MASK_UDMA);
 
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 static int hpt36x_cable_detect(struct ata_port *ap)
diff --git a/drivers/ata/pata_hpt37x.c b/drivers/ata/pata_hpt37x.c
index 8839307a64cf..98b498b6907c 100644
--- a/drivers/ata/pata_hpt37x.c
+++ b/drivers/ata/pata_hpt37x.c
@@ -282,7 +282,7 @@ static unsigned long hpt370_filter(struct ata_device *adev, unsigned long mask)
 		if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
 			mask &= ~(0xE0 << ATA_SHIFT_UDMA);
 	}
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
@@ -298,7 +298,7 @@ static unsigned long hpt370a_filter(struct ata_device *adev, unsigned long mask)
 		if (hpt_dma_blacklisted(adev, "UDMA100", bad_ata100_5))
 			mask &= ~(0xE0 << ATA_SHIFT_UDMA);
 	}
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index 832c8ccd0e80..ee85a9ce0a11 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -335,7 +335,7 @@ static struct ata_port_operations pata_icside_port_ops = {
 	.postreset		= pata_icside_postreset,
 	.post_internal_cmd	= pata_icside_bmdma_stop,
 
-	.mode_filter		= ATA_OP_NULL,	/* will be removed soon */
+	.port_start		= ATA_OP_NULL,	/* don't need PRD table */
 };
 
 static void __devinit
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 5cb286fd839e..8d73438f292c 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -739,7 +739,7 @@ static int it821x_port_start(struct ata_port *ap)
 	struct it821x_dev *itdev;
 	u8 conf;
 
-	int ret = ata_sff_port_start(ap);
+	int ret = ata_bmdma_port_start(ap);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/ata/pata_macio.c b/drivers/ata/pata_macio.c
index 17e4e5d19696..25df50f51c04 100644
--- a/drivers/ata/pata_macio.c
+++ b/drivers/ata/pata_macio.c
@@ -720,6 +720,8 @@ static int pata_macio_port_start(struct ata_port *ap)
 	if (priv->dma_table_cpu == NULL) {
 		dev_err(priv->dev, "Unable to allocate DMA command list\n");
 		ap->ioaddr.bmdma_addr = NULL;
+		ap->mwdma_mask = 0;
+		ap->udma_mask = 0;
 	}
 	return 0;
 }
diff --git a/drivers/ata/pata_pdc2027x.c b/drivers/ata/pata_pdc2027x.c
index ca5cad0fd80b..09f1f22c0307 100644
--- a/drivers/ata/pata_pdc2027x.c
+++ b/drivers/ata/pata_pdc2027x.c
@@ -265,7 +265,7 @@ static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long
 	struct ata_device *pair = ata_dev_pair(adev);
 
 	if (adev->class != ATA_DEV_ATA || adev->devno == 0 || pair == NULL)
-		return ata_bmdma_mode_filter(adev, mask);
+		return mask;
 
 	/* Check for slave of a Maxtor at UDMA6 */
 	ata_id_c_string(pair->id, model_num, ATA_ID_PROD,
@@ -274,7 +274,7 @@ static unsigned long pdc2027x_mode_filter(struct ata_device *adev, unsigned long
 	if (strstr(model_num, "Maxtor") == NULL && pair->dma_mode == XFER_UDMA_6)
 		mask &= ~ (1 << (6 + ATA_SHIFT_UDMA));
 
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
diff --git a/drivers/ata/pata_pdc202xx_old.c b/drivers/ata/pata_pdc202xx_old.c
index 9ac0897cf8b0..fa1e2f3bc0fd 100644
--- a/drivers/ata/pata_pdc202xx_old.c
+++ b/drivers/ata/pata_pdc202xx_old.c
@@ -249,7 +249,7 @@ static int pdc2026x_port_start(struct ata_port *ap)
 		u8 burst = ioread8(bmdma + 0x1f);
 		iowrite8(burst | 0x01, bmdma + 0x1f);
 	}
-	return ata_sff_port_start(ap);
+	return ata_bmdma_port_start(ap);
 }
 
 /**
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index 3f6ebc6c665a..50400fa120fe 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -53,7 +53,6 @@ static struct ata_port_operations pata_platform_port_ops = {
 	.sff_data_xfer		= ata_sff_data_xfer_noirq,
 	.cable_detect		= ata_cable_unknown,
 	.set_mode		= pata_platform_set_mode,
-	.port_start		= ATA_OP_NULL,
 };
 
 static void pata_platform_setup_port(struct ata_ioports *ioaddr,
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index d3988991ca68..70d549e28f0f 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -265,7 +265,7 @@ unsigned long scc_mode_filter(struct ata_device *adev, unsigned long mask)
 		printk(KERN_INFO "%s: limit ATAPI UDMA to UDMA4\n", DRV_NAME);
 		mask &= ~(0xE0 << ATA_SHIFT_UDMA);
 	}
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
@@ -892,7 +892,7 @@ static void scc_irq_clear (struct ata_port *ap)
  *	scc_port_start - Set port up for dma.
  *	@ap: Port to initialize
  *
- *	Allocate space for PRD table using ata_port_start().
+ *	Allocate space for PRD table using ata_bmdma_port_start().
  *	Set PRD table address for PTERADD. (PRD Transfer End Read)
  */
 
@@ -901,7 +901,7 @@ static int scc_port_start (struct ata_port *ap)
 	void __iomem *mmio = ap->ioaddr.bmdma_addr;
 	int rc;
 
-	rc = ata_port_start(ap);
+	rc = ata_bmdma_port_start(ap);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/pata_serverworks.c b/drivers/ata/pata_serverworks.c
index 9524d54035f7..43ea389df2b3 100644
--- a/drivers/ata/pata_serverworks.c
+++ b/drivers/ata/pata_serverworks.c
@@ -198,7 +198,7 @@ static unsigned long serverworks_osb4_filter(struct ata_device *adev, unsigned l
 {
 	if (adev->class == ATA_DEV_ATA)
 		mask &= ~ATA_MASK_UDMA;
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 
@@ -218,7 +218,7 @@ static unsigned long serverworks_csb_filter(struct ata_device *adev, unsigned lo
 
 	/* Disk, UDMA */
 	if (adev->class != ATA_DEV_ATA)
-		return ata_bmdma_mode_filter(adev, mask);
+		return mask;
 
 	/* Actually do need to check */
 	ata_id_c_string(adev->id, model_num, ATA_ID_PROD, sizeof(model_num));
@@ -227,7 +227,7 @@ static unsigned long serverworks_csb_filter(struct ata_device *adev, unsigned lo
 		if (!strcmp(p, model_num))
 			mask &= ~(0xE0 << ATA_SHIFT_UDMA);
 	}
-	return ata_bmdma_mode_filter(adev, mask);
+	return mask;
 }
 
 /**
diff --git a/drivers/ata/pata_via.c b/drivers/ata/pata_via.c
index 87fb89e407f7..7e3e0a5598b7 100644
--- a/drivers/ata/pata_via.c
+++ b/drivers/ata/pata_via.c
@@ -355,7 +355,7 @@ static unsigned long via_mode_filter(struct ata_device *dev, unsigned long mask)
 			mask &= ~ ATA_MASK_UDMA;
 		}
 	}
-	return ata_bmdma_mode_filter(dev, mask);
+	return mask;
 }
 
 /**
@@ -424,7 +424,7 @@ static int via_port_start(struct ata_port *ap)
 	struct via_port *vp;
 	struct pci_dev *pdev = to_pci_dev(ap->host->dev);
 
-	int ret = ata_sff_port_start(ap);
+	int ret = ata_bmdma_port_start(ap);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index ed18d8b42c52..bb4f838655b6 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -556,11 +556,7 @@ static int adma_port_start(struct ata_port *ap)
 {
 	struct device *dev = ap->host->dev;
 	struct adma_port_priv *pp;
-	int rc;
 
-	rc = ata_port_start(ap);
-	if (rc)
-		return rc;
 	adma_enter_reg_mode(ap);
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
index d3e1bab2a7ad..a36149ebf4a2 100644
--- a/drivers/ata/sata_inic162x.c
+++ b/drivers/ata/sata_inic162x.c
@@ -682,7 +682,6 @@ static int inic_port_start(struct ata_port *ap)
 {
 	struct device *dev = ap->host->dev;
 	struct inic_port_priv *pp;
-	int rc;
 
 	/* alloc and initialize private data */
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
@@ -691,10 +690,6 @@ static int inic_port_start(struct ata_port *ap)
 	ap->private_data = pp;
 
 	/* Alloc resources */
-	rc = ata_port_start(ap);
-	if (rc)
-		return rc;
-
 	pp->pkt = dmam_alloc_coherent(dev, sizeof(struct inic_pkt),
 				      &pp->pkt_dma, GFP_KERNEL);
 	if (!pp->pkt)
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index a03371075990..83491861d110 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -717,8 +717,6 @@ static struct ata_port_operations mv6_ops = {
 
 	.port_start		= mv_port_start,
 	.port_stop		= mv_port_stop,
-
-	.mode_filter            = ATA_OP_NULL,  /* will be removed soon */
 };
 
 static struct ata_port_operations mv_iie_ops = {
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 20cc510595db..a007b20c1991 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1156,7 +1156,8 @@ static int nv_adma_port_start(struct ata_port *ap)
 	if (rc)
 		return rc;
 
-	rc = ata_port_start(ap);
+	/* we might fallback to bmdma, allocate bmdma resources */
+	rc = ata_bmdma_port_start(ap);
 	if (rc)
 		return rc;
 
@@ -1985,7 +1986,8 @@ static int nv_swncq_port_start(struct ata_port *ap)
 	struct nv_swncq_port_priv *pp;
 	int rc;
 
-	rc = ata_port_start(ap);
+	/* we might fallback to bmdma, allocate bmdma resources */
+	rc = ata_bmdma_port_start(ap);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 2c029ea12c54..e80628a77669 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -333,7 +333,8 @@ static int pdc_common_port_start(struct ata_port *ap)
 	struct pdc_port_priv *pp;
 	int rc;
 
-	rc = ata_port_start(ap);
+	/* we use the same prd table as bmdma, allocate it */
+	rc = ata_bmdma_port_start(ap);
 	if (rc)
 		return rc;
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index febc6e748420..da84ea9e4fb0 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -504,11 +504,7 @@ static int qs_port_start(struct ata_port *ap)
 	void __iomem *mmio_base = qs_mmio_base(ap->host);
 	void __iomem *chan = mmio_base + (ap->port_no * 0x4000);
 	u64 addr;
-	int rc;
 
-	rc = ata_port_start(ap);
-	if (rc)
-		return rc;
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
 		return -ENOMEM;
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index 7e3a25ec93b1..a4e552a325b0 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -302,11 +302,6 @@ static int pdc_port_start(struct ata_port *ap)
 {
 	struct device *dev = ap->host->dev;
 	struct pdc_port_priv *pp;
-	int rc;
-
-	rc = ata_port_start(ap);
-	if (rc)
-		return rc;
 
 	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
 	if (!pp)
diff --git a/drivers/ata/sata_uli.c b/drivers/ata/sata_uli.c
index 011e098590d1..d8dac17dc2c8 100644
--- a/drivers/ata/sata_uli.c
+++ b/drivers/ata/sata_uli.c
@@ -181,9 +181,7 @@ static int uli_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
-	rc = ata_pci_bmdma_init(host);
-	if (rc)
-		return rc;
+	ata_pci_bmdma_init(host);
 
 	iomap = host->iomap;
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d735728873b5..45a547e42d47 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1000,7 +1000,6 @@ extern unsigned long ata_xfer_mode2mask(u8 xfer_mode);
 extern int ata_xfer_mode2shift(unsigned long xfer_mode);
 extern const char *ata_mode_string(unsigned long xfer_mask);
 extern unsigned long ata_id_xfermask(const u16 *id);
-extern int ata_port_start(struct ata_port *ap);
 extern int ata_std_qc_defer(struct ata_queued_cmd *qc);
 extern void ata_noop_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sg_init(struct ata_queued_cmd *qc, struct scatterlist *sg,
@@ -1616,8 +1615,6 @@ extern void ata_sff_postreset(struct ata_link *link, unsigned int *classes);
 extern void ata_sff_drain_fifo(struct ata_queued_cmd *qc);
 extern void ata_sff_error_handler(struct ata_port *ap);
 extern void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc);
-extern int ata_sff_port_start(struct ata_port *ap);
-extern int ata_sff_port_start32(struct ata_port *ap);
 extern void ata_sff_std_ports(struct ata_ioports *ioaddr);
 #ifdef CONFIG_PCI
 extern int ata_pci_sff_init_host(struct ata_host *host);
@@ -1632,16 +1629,16 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 		struct scsi_host_template *sht, void *host_priv, int hflags);
 #endif /* CONFIG_PCI */
 
-extern unsigned long ata_bmdma_mode_filter(struct ata_device *dev,
-					   unsigned long xfer_mask);
 extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
 extern void ata_bmdma_start(struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
 extern u8 ata_bmdma_status(struct ata_port *ap);
+extern int ata_bmdma_port_start(struct ata_port *ap);
+extern int ata_bmdma_port_start32(struct ata_port *ap);
 
 #ifdef CONFIG_PCI
 extern int ata_pci_bmdma_clear_simplex(struct pci_dev *pdev);
-extern int ata_pci_bmdma_init(struct ata_host *host);
+extern void ata_pci_bmdma_init(struct ata_host *host);
 #endif /* CONFIG_PCI */
 
 /**
-- 
cgit v1.2.3


From 8244cd05979ef924787aa70fd80304f1773976a1 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:36 +0200
Subject: libata-sff: rename ap->ops->drain_fifo() to sff_drain_fifo()

->drain_fifo() is SFF specific.  Rename and relocate it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c  | 6 +++---
 drivers/ata/pata_pcmcia.c | 2 +-
 include/linux/libata.h    | 3 +--
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 4a3d1f214457..6a103b310cac 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -53,7 +53,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 	.softreset		= ata_sff_softreset,
 	.hardreset		= sata_sff_hardreset,
 	.postreset		= ata_sff_postreset,
-	.drain_fifo		= ata_sff_drain_fifo,
 	.error_handler		= ata_sff_error_handler,
 	.post_internal_cmd	= ata_sff_post_internal_cmd,
 
@@ -64,6 +63,7 @@ const struct ata_port_operations ata_sff_port_ops = {
 	.sff_exec_command	= ata_sff_exec_command,
 	.sff_data_xfer		= ata_sff_data_xfer,
 	.sff_irq_clear		= ata_sff_irq_clear,
+	.sff_drain_fifo		= ata_sff_drain_fifo,
 
 	.lost_interrupt		= ata_sff_lost_interrupt,
 };
@@ -2398,8 +2398,8 @@ void ata_sff_error_handler(struct ata_port *ap)
 	 * if we touch the data port post reset. Pass qc in case anyone wants
 	 *  to do different PIO/DMA recovery or has per command fixups
 	 */
-	if (ap->ops->drain_fifo)
-		ap->ops->drain_fifo(qc);
+	if (ap->ops->sff_drain_fifo)
+		ap->ops->sff_drain_fifo(qc);
 
 	spin_unlock_irqrestore(ap->lock, flags);
 
diff --git a/drivers/ata/pata_pcmcia.c b/drivers/ata/pata_pcmcia.c
index d94b8f0bd743..ef374388c0f6 100644
--- a/drivers/ata/pata_pcmcia.c
+++ b/drivers/ata/pata_pcmcia.c
@@ -175,7 +175,7 @@ static struct ata_port_operations pcmcia_8bit_port_ops = {
 	.sff_data_xfer	= ata_data_xfer_8bit,
 	.cable_detect	= ata_cable_40wire,
 	.set_mode	= pcmcia_set_mode_8bit,
-	.drain_fifo	= pcmcia_8bit_drain_fifo,
+	.sff_drain_fifo	= pcmcia_8bit_drain_fifo,
 };
 
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 45a547e42d47..19f716edd3be 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -856,13 +856,12 @@ struct ata_port_operations {
 	void (*sff_irq_on)(struct ata_port *);
 	bool (*sff_irq_check)(struct ata_port *);
 	void (*sff_irq_clear)(struct ata_port *);
+	void (*sff_drain_fifo)(struct ata_queued_cmd *qc);
 
 	void (*bmdma_setup)(struct ata_queued_cmd *qc);
 	void (*bmdma_start)(struct ata_queued_cmd *qc);
 	void (*bmdma_stop)(struct ata_queued_cmd *qc);
 	u8   (*bmdma_status)(struct ata_port *ap);
-
-	void (*drain_fifo)(struct ata_queued_cmd *qc);
 #endif /* CONFIG_ATA_SFF */
 
 	ssize_t (*em_show)(struct ata_port *ap, char *buf);
-- 
cgit v1.2.3


From 5fe7454aa9c6ef5fcf506b0f2dfc20f696891f1a Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:37 +0200
Subject: libata-sff: ap->[last_]ctl are SFF specific

ap->[last_]ctl are specific to SFF controllers.  Put them inside
CONFIG_ATA_SFF and move initialization into ata_sff_port_init().

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 2 --
 drivers/ata/libata-sff.c  | 2 ++
 include/linux/libata.h    | 8 ++++++--
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 200f49d09228..bf6ffc1273a0 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -5634,10 +5634,8 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	ap->pflags |= ATA_PFLAG_INITIALIZING;
 	ap->lock = &host->lock;
 	ap->print_id = -1;
-	ap->ctl = ATA_DEVCTL_OBS;
 	ap->host = host;
 	ap->dev = host->dev;
-	ap->last_ctl = 0xFF;
 
 #if defined(ATA_VERBOSE_DEBUG)
 	/* turn on all debugging levels */
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 6a103b310cac..8a1396f52a3a 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -3074,6 +3074,8 @@ EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
  */
 void ata_sff_port_init(struct ata_port *ap)
 {
+	ap->ctl = ATA_DEVCTL_OBS;
+	ap->last_ctl = 0xFF;
 }
 
 int __init ata_sff_init(void)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 19f716edd3be..85b99b83f990 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -721,10 +721,10 @@ struct ata_port {
 
 #ifdef CONFIG_ATA_SFF
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
-#endif /* CONFIG_ATA_SFF */
-
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
+#endif /* CONFIG_ATA_SFF */
+
 	unsigned int		pio_mask;
 	unsigned int		mwdma_mask;
 	unsigned int		udma_mask;
@@ -1435,7 +1435,11 @@ static inline void ata_tf_init(struct ata_device *dev, struct ata_taskfile *tf)
 {
 	memset(tf, 0, sizeof(*tf));
 
+#ifdef CONFIG_ATA_SFF
 	tf->ctl = dev->link->ap->ctl;
+#else
+	tf->ctl = ATA_DEVCTL_OBS;
+#endif
 	if (dev->devno == 0)
 		tf->device = ATA_DEVICE_OBS;
 	else
-- 
cgit v1.2.3


From c429137a67b82788d24682153bb9c96501a9ef34 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:38 +0200
Subject: libata-sff: port_task is SFF specific

port_task is tightly bound to the standard SFF PIO HSM implementation.
Using it for any other purpose would be error-prone and there's no
such user and if some drivers need such feature, it would be much
better off using its own.  Move it inside CONFIG_ATA_SFF and rename it
to sff_pio_task.

The only function which is exposed to the core layer is
ata_sff_flush_pio_task() which is renamed from ata_port_flush_task()
and now also takes care of resetting hsm_task_state to HSM_ST_IDLE,
which is possible as it's now specific to PIO HSM.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 69 +--------------------------------------------
 drivers/ata/libata-eh.c   |  4 +--
 drivers/ata/libata-sff.c  | 72 +++++++++++++++++++++++++++++++++++------------
 drivers/ata/libata.h      |  5 ++--
 drivers/ata/sata_mv.c     |  2 +-
 include/linux/libata.h    |  7 ++---
 6 files changed, 63 insertions(+), 96 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index bf6ffc1273a0..c47373f01f89 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -97,7 +97,6 @@ static void ata_dev_xfermask(struct ata_device *dev);
 static unsigned long ata_dev_blacklisted(const struct ata_device *dev);
 
 unsigned int ata_print_id = 1;
-static struct workqueue_struct *ata_wq;
 
 struct workqueue_struct *ata_aux_wq;
 
@@ -1686,52 +1685,6 @@ unsigned long ata_id_xfermask(const u16 *id)
 	return ata_pack_xfermask(pio_mask, mwdma_mask, udma_mask);
 }
 
-/**
- *	ata_pio_queue_task - Queue port_task
- *	@ap: The ata_port to queue port_task for
- *	@data: data for @fn to use
- *	@delay: delay time in msecs for workqueue function
- *
- *	Schedule @fn(@data) for execution after @delay jiffies using
- *	port_task.  There is one port_task per port and it's the
- *	user(low level driver)'s responsibility to make sure that only
- *	one task is active at any given time.
- *
- *	libata core layer takes care of synchronization between
- *	port_task and EH.  ata_pio_queue_task() may be ignored for EH
- *	synchronization.
- *
- *	LOCKING:
- *	Inherited from caller.
- */
-void ata_pio_queue_task(struct ata_port *ap, void *data, unsigned long delay)
-{
-	ap->port_task_data = data;
-
-	/* may fail if ata_port_flush_task() in progress */
-	queue_delayed_work(ata_wq, &ap->port_task, msecs_to_jiffies(delay));
-}
-
-/**
- *	ata_port_flush_task - Flush port_task
- *	@ap: The ata_port to flush port_task for
- *
- *	After this function completes, port_task is guranteed not to
- *	be running or scheduled.
- *
- *	LOCKING:
- *	Kernel thread context (may sleep)
- */
-void ata_port_flush_task(struct ata_port *ap)
-{
-	DPRINTK("ENTER\n");
-
-	cancel_rearming_delayed_work(&ap->port_task);
-
-	if (ata_msg_ctl(ap))
-		ata_port_printk(ap, KERN_DEBUG, "%s: EXIT\n", __func__);
-}
-
 static void ata_qc_complete_internal(struct ata_queued_cmd *qc)
 {
 	struct completion *waiting = qc->private_data;
@@ -1853,7 +1806,7 @@ unsigned ata_exec_internal_sg(struct ata_device *dev,
 
 	rc = wait_for_completion_timeout(&wait, msecs_to_jiffies(timeout));
 
-	ata_port_flush_task(ap);
+	ata_sff_flush_pio_task(ap);
 
 	if (!rc) {
 		spin_lock_irqsave(ap->lock, flags);
@@ -5646,11 +5599,6 @@ struct ata_port *ata_port_alloc(struct ata_host *host)
 	ap->msg_enable = ATA_MSG_DRV | ATA_MSG_ERR | ATA_MSG_WARN;
 #endif
 
-#ifdef CONFIG_ATA_SFF
-	INIT_DELAYED_WORK(&ap->port_task, ata_pio_task);
-#else
-	INIT_DELAYED_WORK(&ap->port_task, NULL);
-#endif
 	INIT_DELAYED_WORK(&ap->hotplug_task, ata_scsi_hotplug);
 	INIT_WORK(&ap->scsi_rescan_task, ata_scsi_dev_rescan);
 	INIT_LIST_HEAD(&ap->eh_done_q);
@@ -6588,17 +6536,6 @@ static int __init ata_init(void)
 
 	ata_parse_force_param();
 
-	/*
-	 * FIXME: In UP case, there is only one workqueue thread and if you
-	 * have more than one PIO device, latency is bloody awful, with
-	 * occasional multi-second "hiccups" as one PIO device waits for
-	 * another.  It's an ugly wart that users DO occasionally complain
-	 * about; luckily most users have at most one PIO polled device.
-	 */
-	ata_wq = create_workqueue("ata");
-	if (!ata_wq)
-		goto fail;
-
 	ata_aux_wq = create_singlethread_workqueue("ata_aux");
 	if (!ata_aux_wq)
 		goto fail;
@@ -6612,8 +6549,6 @@ static int __init ata_init(void)
 
 fail:
 	kfree(ata_force_tbl);
-	if (ata_wq)
-		destroy_workqueue(ata_wq);
 	if (ata_aux_wq)
 		destroy_workqueue(ata_aux_wq);
 	return rc;
@@ -6623,7 +6558,6 @@ static void __exit ata_exit(void)
 {
 	ata_sff_exit();
 	kfree(ata_force_tbl);
-	destroy_workqueue(ata_wq);
 	destroy_workqueue(ata_aux_wq);
 }
 
@@ -6777,7 +6711,6 @@ EXPORT_SYMBOL_GPL(ata_id_c_string);
 EXPORT_SYMBOL_GPL(ata_do_dev_read_id);
 EXPORT_SYMBOL_GPL(ata_scsi_simulate);
 
-EXPORT_SYMBOL_GPL(ata_pio_queue_task);
 EXPORT_SYMBOL_GPL(ata_pio_need_iordy);
 EXPORT_SYMBOL_GPL(ata_timing_find_mode);
 EXPORT_SYMBOL_GPL(ata_timing_compute);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index 228740f356c9..d6e67488174c 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -550,8 +550,8 @@ void ata_scsi_error(struct Scsi_Host *host)
 
 	DPRINTK("ENTER\n");
 
-	/* synchronize with port task */
-	ata_port_flush_task(ap);
+	/* make sure sff pio task is not running */
+	ata_sff_flush_pio_task(ap);
 
 	/* synchronize with host lock and sort out timeouts */
 
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 8a1396f52a3a..e78ad76861f4 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -40,6 +40,8 @@
 
 #include "libata.h"
 
+static struct workqueue_struct *ata_sff_wq;
+
 const struct ata_port_operations ata_sff_port_ops = {
 	.inherits		= &ata_base_port_ops,
 
@@ -1293,7 +1295,7 @@ fsm_start:
 		if (in_wq)
 			spin_unlock_irqrestore(ap->lock, flags);
 
-		/* if polling, ata_pio_task() handles the rest.
+		/* if polling, ata_sff_pio_task() handles the rest.
 		 * otherwise, interrupt handler takes over from here.
 		 */
 		break;
@@ -1458,14 +1460,38 @@ fsm_start:
 }
 EXPORT_SYMBOL_GPL(ata_sff_hsm_move);
 
-void ata_pio_task(struct work_struct *work)
+void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay)
+{
+	/* may fail if ata_sff_flush_pio_task() in progress */
+	queue_delayed_work(ata_sff_wq, &ap->sff_pio_task,
+			   msecs_to_jiffies(delay));
+}
+EXPORT_SYMBOL_GPL(ata_sff_queue_pio_task);
+
+void ata_sff_flush_pio_task(struct ata_port *ap)
+{
+	DPRINTK("ENTER\n");
+
+	cancel_rearming_delayed_work(&ap->sff_pio_task);
+	ap->hsm_task_state = HSM_ST_IDLE;
+
+	if (ata_msg_ctl(ap))
+		ata_port_printk(ap, KERN_DEBUG, "%s: EXIT\n", __func__);
+}
+
+static void ata_sff_pio_task(struct work_struct *work)
 {
 	struct ata_port *ap =
-		container_of(work, struct ata_port, port_task.work);
-	struct ata_queued_cmd *qc = ap->port_task_data;
+		container_of(work, struct ata_port, sff_pio_task.work);
+	struct ata_queued_cmd *qc;
 	u8 status;
 	int poll_next;
 
+	/* qc can be NULL if timeout occurred */
+	qc = ata_qc_from_tag(ap, ap->link.active_tag);
+	if (!qc)
+		return;
+
 fsm_start:
 	WARN_ON_ONCE(ap->hsm_task_state == HSM_ST_IDLE);
 
@@ -1481,7 +1507,7 @@ fsm_start:
 		msleep(2);
 		status = ata_sff_busy_wait(ap, ATA_BUSY, 10);
 		if (status & ATA_BUSY) {
-			ata_pio_queue_task(ap, qc, ATA_SHORT_PAUSE);
+			ata_sff_queue_pio_task(ap, ATA_SHORT_PAUSE);
 			return;
 		}
 	}
@@ -1551,7 +1577,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		ap->hsm_task_state = HSM_ST_LAST;
 
 		if (qc->tf.flags & ATA_TFLAG_POLLING)
-			ata_pio_queue_task(ap, qc, 0);
+			ata_sff_queue_pio_task(ap, 0);
 
 		break;
 
@@ -1573,20 +1599,21 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		if (qc->tf.flags & ATA_TFLAG_WRITE) {
 			/* PIO data out protocol */
 			ap->hsm_task_state = HSM_ST_FIRST;
-			ata_pio_queue_task(ap, qc, 0);
+			ata_sff_queue_pio_task(ap, 0);
 
-			/* always send first data block using
-			 * the ata_pio_task() codepath.
+			/* always send first data block using the
+			 * ata_sff_pio_task() codepath.
 			 */
 		} else {
 			/* PIO data in protocol */
 			ap->hsm_task_state = HSM_ST;
 
 			if (qc->tf.flags & ATA_TFLAG_POLLING)
-				ata_pio_queue_task(ap, qc, 0);
+				ata_sff_queue_pio_task(ap, 0);
 
-			/* if polling, ata_pio_task() handles the rest.
-			 * otherwise, interrupt handler takes over from here.
+			/* if polling, ata_sff_pio_task() handles the
+			 * rest.  otherwise, interrupt handler takes
+			 * over from here.
 			 */
 		}
 
@@ -1604,7 +1631,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 		/* send cdb by polling if no cdb interrupt */
 		if ((!(qc->dev->flags & ATA_DFLAG_CDB_INTR)) ||
 		    (qc->tf.flags & ATA_TFLAG_POLLING))
-			ata_pio_queue_task(ap, qc, 0);
+			ata_sff_queue_pio_task(ap, 0);
 		break;
 
 	case ATAPI_PROT_DMA:
@@ -1616,7 +1643,7 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 
 		/* send cdb by polling if no cdb interrupt */
 		if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
-			ata_pio_queue_task(ap, qc, 0);
+			ata_sff_queue_pio_task(ap, 0);
 		break;
 
 	default:
@@ -2360,8 +2387,6 @@ void ata_sff_error_handler(struct ata_port *ap)
 	/* reset PIO HSM and stop DMA engine */
 	spin_lock_irqsave(ap->lock, flags);
 
-	ap->hsm_task_state = HSM_ST_IDLE;
-
 	if (ap->ioaddr.bmdma_addr &&
 	    qc && (qc->tf.protocol == ATA_PROT_DMA ||
 		   qc->tf.protocol == ATAPI_PROT_DMA)) {
@@ -2432,8 +2457,6 @@ void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
 
 	spin_lock_irqsave(ap->lock, flags);
 
-	ap->hsm_task_state = HSM_ST_IDLE;
-
 	if (ap->ioaddr.bmdma_addr)
 		ap->ops->bmdma_stop(qc);
 
@@ -3074,15 +3097,28 @@ EXPORT_SYMBOL_GPL(ata_pci_bmdma_init);
  */
 void ata_sff_port_init(struct ata_port *ap)
 {
+	INIT_DELAYED_WORK(&ap->sff_pio_task, ata_sff_pio_task);
 	ap->ctl = ATA_DEVCTL_OBS;
 	ap->last_ctl = 0xFF;
 }
 
 int __init ata_sff_init(void)
 {
+	/*
+	 * FIXME: In UP case, there is only one workqueue thread and if you
+	 * have more than one PIO device, latency is bloody awful, with
+	 * occasional multi-second "hiccups" as one PIO device waits for
+	 * another.  It's an ugly wart that users DO occasionally complain
+	 * about; luckily most users have at most one PIO polled device.
+	 */
+	ata_sff_wq = create_workqueue("ata_sff");
+	if (!ata_sff_wq)
+		return -ENOMEM;
+
 	return 0;
 }
 
 void __exit ata_sff_exit(void)
 {
+	destroy_workqueue(ata_sff_wq);
 }
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index d89502f3123a..002390cdc041 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -79,7 +79,6 @@ extern int ata_build_rw_tf(struct ata_taskfile *tf, struct ata_device *dev,
 			   u64 block, u32 n_block, unsigned int tf_flags,
 			   unsigned int tag);
 extern u64 ata_tf_read_block(struct ata_taskfile *tf, struct ata_device *dev);
-extern void ata_port_flush_task(struct ata_port *ap);
 extern unsigned ata_exec_internal(struct ata_device *dev,
 				  struct ata_taskfile *tf, const u8 *cdb,
 				  int dma_dir, void *buf, unsigned int buflen,
@@ -202,11 +201,13 @@ static inline int sata_pmp_attach(struct ata_device *dev)
 
 /* libata-sff.c */
 #ifdef CONFIG_ATA_SFF
-extern void ata_pio_task(struct work_struct *work);
+extern void ata_sff_flush_pio_task(struct ata_port *ap);
 extern void ata_sff_port_init(struct ata_port *ap);
 extern int ata_sff_init(void);
 extern void ata_sff_exit(void);
 #else /* CONFIG_ATA_SFF */
+static inline void ata_sff_flush_pio_task(struct ata_port *ap)
+{ }
 static inline void ata_sff_port_init(struct ata_port *ap)
 { }
 static inline int ata_sff_init(void)
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index 83491861d110..fb0d2c1d8417 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2262,7 +2262,7 @@ static unsigned int mv_qc_issue_fis(struct ata_queued_cmd *qc)
 	}
 
 	if (qc->tf.flags & ATA_TFLAG_POLLING)
-		ata_pio_queue_task(ap, qc, 0);
+		ata_sff_queue_pio_task(ap, 0);
 	return 0;
 }
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 85b99b83f990..6888b5c36e0f 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -723,6 +723,7 @@ struct ata_port {
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
+	struct delayed_work	sff_pio_task;
 #endif /* CONFIG_ATA_SFF */
 
 	unsigned int		pio_mask;
@@ -746,8 +747,6 @@ struct ata_port {
 	struct ata_host		*host;
 	struct device 		*dev;
 
-	void			*port_task_data;
-	struct delayed_work	port_task;
 	struct delayed_work	hotplug_task;
 	struct work_struct	scsi_rescan_task;
 
@@ -1031,9 +1030,6 @@ extern int ata_cable_sata(struct ata_port *ap);
 extern int ata_cable_ignore(struct ata_port *ap);
 extern int ata_cable_unknown(struct ata_port *ap);
 
-extern void ata_pio_queue_task(struct ata_port *ap, void *data,
-			       unsigned long delay);
-
 /* Timing helpers */
 extern unsigned int ata_pio_need_iordy(const struct ata_device *);
 extern const struct ata_timing *ata_timing_find_mode(u8 xfer_mode);
@@ -1597,6 +1593,7 @@ extern void ata_sff_irq_on(struct ata_port *ap);
 extern void ata_sff_irq_clear(struct ata_port *ap);
 extern int ata_sff_hsm_move(struct ata_port *ap, struct ata_queued_cmd *qc,
 			    u8 status, int in_wq);
+extern void ata_sff_queue_pio_task(struct ata_port *ap, unsigned long delay);
 extern unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc);
 extern bool ata_sff_qc_fill_rtf(struct ata_queued_cmd *qc);
 extern unsigned int ata_sff_host_intr(struct ata_port *ap,
-- 
cgit v1.2.3


From fe06e5f9b7c61dc567edace3f4909672067f7d7e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:39 +0200
Subject: libata-sff: separate out BMDMA EH

Some of error handling logic in ata_sff_error_handler() and all of
ata_sff_post_internal_cmd() are for BMDMA.  Create
ata_bmdma_error_handler() and ata_bmdma_post_internal_cmd() and move
BMDMA part into those.

While at it, change DMA protocol check to ata_is_dma(), fix
post_internal_cmd to call ap->ops->bmdma_stop instead of directly
calling ata_bmdma_stop() and open code hardreset selection so that
ata_std_error_handler() doesn't have to know about sff hardreset.

As these two functions are BMDMA specific, there's no reason to check
for bmdma_addr before calling bmdma methods if the protocol of the
failed command is DMA.  sata_mv and pata_mpc52xx now don't need to set
.post_internal_cmd to ATA_OP_NULL and pata_icside and sata_qstor don't
need to set it to their bmdma_stop routines.

ata_sff_post_internal_cmd() becomes noop and is removed.

This fixes p3 described in clean-up-BMDMA-initialization patch.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-eh.c    |   2 +-
 drivers/ata/libata-sff.c   | 159 ++++++++++++++++++++++++++-------------------
 drivers/ata/libata.h       |  11 ----
 drivers/ata/pata_icside.c  |   1 -
 drivers/ata/pata_scc.c     |   1 -
 drivers/ata/sata_nv.c      |   6 +-
 drivers/ata/sata_promise.c |   2 +-
 drivers/ata/sata_qstor.c   |   3 +-
 drivers/ata/sata_sx4.c     |   2 +-
 include/linux/libata.h     |   3 +-
 10 files changed, 102 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index d6e67488174c..f77a67303f8b 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -3684,7 +3684,7 @@ void ata_std_error_handler(struct ata_port *ap)
 	ata_reset_fn_t hardreset = ops->hardreset;
 
 	/* ignore built-in hardreset if SCR access is not available */
-	if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link))
+	if (hardreset == sata_std_hardreset && !sata_scr_valid(&ap->link))
 		hardreset = NULL;
 
 	ata_do_eh(ap, ops->prereset, ops->softreset, hardreset, ops->postreset);
diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index e78ad76861f4..aa378c04ed87 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -56,7 +56,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 	.hardreset		= sata_sff_hardreset,
 	.postreset		= ata_sff_postreset,
 	.error_handler		= ata_sff_error_handler,
-	.post_internal_cmd	= ata_sff_post_internal_cmd,
 
 	.sff_dev_select		= ata_sff_dev_select,
 	.sff_check_status	= ata_sff_check_status,
@@ -2361,7 +2360,7 @@ void ata_sff_drain_fifo(struct ata_queued_cmd *qc)
 EXPORT_SYMBOL_GPL(ata_sff_drain_fifo);
 
 /**
- *	ata_sff_error_handler - Stock error handler for BMDMA controller
+ *	ata_sff_error_handler - Stock error handler for SFF controller
  *	@ap: port to handle error for
  *
  *	Stock error handler for SFF controller.  It can handle both
@@ -2378,64 +2377,32 @@ void ata_sff_error_handler(struct ata_port *ap)
 	ata_reset_fn_t hardreset = ap->ops->hardreset;
 	struct ata_queued_cmd *qc;
 	unsigned long flags;
-	bool thaw = false;
 
 	qc = __ata_qc_from_tag(ap, ap->link.active_tag);
 	if (qc && !(qc->flags & ATA_QCFLAG_FAILED))
 		qc = NULL;
 
-	/* reset PIO HSM and stop DMA engine */
 	spin_lock_irqsave(ap->lock, flags);
 
-	if (ap->ioaddr.bmdma_addr &&
-	    qc && (qc->tf.protocol == ATA_PROT_DMA ||
-		   qc->tf.protocol == ATAPI_PROT_DMA)) {
-		u8 host_stat;
-
-		host_stat = ap->ops->bmdma_status(ap);
-
-		/* BMDMA controllers indicate host bus error by
-		 * setting DMA_ERR bit and timing out.  As it wasn't
-		 * really a timeout event, adjust error mask and
-		 * cancel frozen state.
-		 */
-		if (qc->err_mask == AC_ERR_TIMEOUT
-						&& (host_stat & ATA_DMA_ERR)) {
-			qc->err_mask = AC_ERR_HOST_BUS;
-			thaw = true;
-		}
-
-		ap->ops->bmdma_stop(qc);
-
-		/* if we're gonna thaw, make sure IRQ is clear */
-		if (thaw) {
-			ap->ops->sff_check_status(ap);
-			ap->ops->sff_irq_clear(ap);
-
-			spin_unlock_irqrestore(ap->lock, flags);
-			ata_eh_thaw_port(ap);
-			spin_lock_irqsave(ap->lock, flags);
-		}
-	}
-
-	/* We *MUST* do FIFO draining before we issue a reset as several
-	 * devices helpfully clear their internal state and will lock solid
-	 * if we touch the data port post reset. Pass qc in case anyone wants
-	 *  to do different PIO/DMA recovery or has per command fixups
+	/*
+	 * We *MUST* do FIFO draining before we issue a reset as
+	 * several devices helpfully clear their internal state and
+	 * will lock solid if we touch the data port post reset. Pass
+	 * qc in case anyone wants to do different PIO/DMA recovery or
+	 * has per command fixups
 	 */
 	if (ap->ops->sff_drain_fifo)
 		ap->ops->sff_drain_fifo(qc);
 
 	spin_unlock_irqrestore(ap->lock, flags);
 
-	/* PIO and DMA engines have been stopped, perform recovery */
-
-	/* Ignore ata_sff_softreset if ctl isn't accessible and
-	 * built-in hardresets if SCR access isn't available.
-	 */
+	/* ignore ata_sff_softreset if ctl isn't accessible */
 	if (softreset == ata_sff_softreset && !ap->ioaddr.ctl_addr)
 		softreset = NULL;
-	if (ata_is_builtin_hardreset(hardreset) && !sata_scr_valid(&ap->link))
+
+	/* ignore built-in hardresets if SCR access is not available */
+	if ((hardreset == sata_std_hardreset ||
+	     hardreset == sata_sff_hardreset) && !sata_scr_valid(&ap->link))
 		hardreset = NULL;
 
 	ata_do_eh(ap, ap->ops->prereset, softreset, hardreset,
@@ -2443,27 +2410,6 @@ void ata_sff_error_handler(struct ata_port *ap)
 }
 EXPORT_SYMBOL_GPL(ata_sff_error_handler);
 
-/**
- *	ata_sff_post_internal_cmd - Stock post_internal_cmd for SFF controller
- *	@qc: internal command to clean up
- *
- *	LOCKING:
- *	Kernel thread context (may sleep)
- */
-void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	unsigned long flags;
-
-	spin_lock_irqsave(ap->lock, flags);
-
-	if (ap->ioaddr.bmdma_addr)
-		ap->ops->bmdma_stop(qc);
-
-	spin_unlock_irqrestore(ap->lock, flags);
-}
-EXPORT_SYMBOL_GPL(ata_sff_post_internal_cmd);
-
 /**
  *	ata_sff_std_ports - initialize ioaddr with standard port offsets.
  *	@ioaddr: IO address structure to be initialized
@@ -2811,6 +2757,9 @@ EXPORT_SYMBOL_GPL(ata_pci_sff_init_one);
 const struct ata_port_operations ata_bmdma_port_ops = {
 	.inherits		= &ata_sff_port_ops,
 
+	.error_handler		= ata_bmdma_error_handler,
+	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
+
 	.bmdma_setup		= ata_bmdma_setup,
 	.bmdma_start		= ata_bmdma_start,
 	.bmdma_stop		= ata_bmdma_stop,
@@ -2828,6 +2777,84 @@ const struct ata_port_operations ata_bmdma32_port_ops = {
 };
 EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
 
+/**
+ *	ata_bmdma_error_handler - Stock error handler for BMDMA controller
+ *	@ap: port to handle error for
+ *
+ *	Stock error handler for BMDMA controller.  It can handle both
+ *	PATA and SATA controllers.  Most BMDMA controllers should be
+ *	able to use this EH as-is or with some added handling before
+ *	and after.
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+void ata_bmdma_error_handler(struct ata_port *ap)
+{
+	struct ata_queued_cmd *qc;
+	unsigned long flags;
+	bool thaw = false;
+
+	qc = __ata_qc_from_tag(ap, ap->link.active_tag);
+	if (qc && !(qc->flags & ATA_QCFLAG_FAILED))
+		qc = NULL;
+
+	/* reset PIO HSM and stop DMA engine */
+	spin_lock_irqsave(ap->lock, flags);
+
+	if (qc && ata_is_dma(qc->tf.protocol)) {
+		u8 host_stat;
+
+		host_stat = ap->ops->bmdma_status(ap);
+
+		/* BMDMA controllers indicate host bus error by
+		 * setting DMA_ERR bit and timing out.  As it wasn't
+		 * really a timeout event, adjust error mask and
+		 * cancel frozen state.
+		 */
+		if (qc->err_mask == AC_ERR_TIMEOUT && (host_stat & ATA_DMA_ERR)) {
+			qc->err_mask = AC_ERR_HOST_BUS;
+			thaw = true;
+		}
+
+		ap->ops->bmdma_stop(qc);
+
+		/* if we're gonna thaw, make sure IRQ is clear */
+		if (thaw) {
+			ap->ops->sff_check_status(ap);
+			ap->ops->sff_irq_clear(ap);
+		}
+	}
+
+	spin_unlock_irqrestore(ap->lock, flags);
+
+	if (thaw)
+		ata_eh_thaw_port(ap);
+
+	ata_sff_error_handler(ap);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_error_handler);
+
+/**
+ *	ata_bmdma_post_internal_cmd - Stock post_internal_cmd for BMDMA
+ *	@qc: internal command to clean up
+ *
+ *	LOCKING:
+ *	Kernel thread context (may sleep)
+ */
+void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	unsigned long flags;
+
+	if (ata_is_dma(qc->tf.protocol)) {
+		spin_lock_irqsave(ap->lock, flags);
+		ap->ops->bmdma_stop(qc);
+		spin_unlock_irqrestore(ap->lock, flags);
+	}
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_post_internal_cmd);
+
 /**
  *	ata_bmdma_setup - Set up PCI IDE BMDMA transaction
  *	@qc: Info associated with this ATA transaction.
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 002390cdc041..4b84ed60324a 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -38,17 +38,6 @@ struct ata_scsi_args {
 	void			(*done)(struct scsi_cmnd *);
 };
 
-static inline int ata_is_builtin_hardreset(ata_reset_fn_t reset)
-{
-	if (reset == sata_std_hardreset)
-		return 1;
-#ifdef CONFIG_ATA_SFF
-	if (reset == sata_sff_hardreset)
-		return 1;
-#endif
-	return 0;
-}
-
 /* libata-core.c */
 enum {
 	/* flags for ata_dev_read_id() */
diff --git a/drivers/ata/pata_icside.c b/drivers/ata/pata_icside.c
index ee85a9ce0a11..b56e8f722d20 100644
--- a/drivers/ata/pata_icside.c
+++ b/drivers/ata/pata_icside.c
@@ -333,7 +333,6 @@ static struct ata_port_operations pata_icside_port_ops = {
 	.cable_detect		= ata_cable_40wire,
 	.set_dmamode		= pata_icside_set_dmamode,
 	.postreset		= pata_icside_postreset,
-	.post_internal_cmd	= pata_icside_bmdma_stop,
 
 	.port_start		= ATA_OP_NULL,	/* don't need PRD table */
 };
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 70d549e28f0f..93f690e51a79 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -951,7 +951,6 @@ static struct ata_port_operations scc_pata_ops = {
 	.prereset		= scc_pata_prereset,
 	.softreset		= scc_softreset,
 	.postreset		= scc_postreset,
-	.post_internal_cmd	= scc_bmdma_stop,
 
 	.sff_irq_clear		= scc_irq_clear,
 
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index a007b20c1991..64e99824d8c1 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1131,7 +1131,7 @@ static void nv_adma_post_internal_cmd(struct ata_queued_cmd *qc)
 	struct nv_adma_port_priv *pp = qc->ap->private_data;
 
 	if (pp->flags & NV_ADMA_PORT_REGISTER_MODE)
-		ata_sff_post_internal_cmd(qc);
+		ata_bmdma_post_internal_cmd(qc);
 }
 
 static int nv_adma_port_start(struct ata_port *ap)
@@ -1739,7 +1739,7 @@ static void nv_adma_error_handler(struct ata_port *ap)
 		readw(mmio + NV_ADMA_CTL);	/* flush posted write */
 	}
 
-	ata_sff_error_handler(ap);
+	ata_bmdma_error_handler(ap);
 }
 
 static void nv_swncq_qc_to_dq(struct ata_port *ap, struct ata_queued_cmd *qc)
@@ -1865,7 +1865,7 @@ static void nv_swncq_error_handler(struct ata_port *ap)
 		ehc->i.action |= ATA_EH_RESET;
 	}
 
-	ata_sff_error_handler(ap);
+	ata_bmdma_error_handler(ap);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index e80628a77669..09a6179f5de3 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -839,7 +839,7 @@ static void pdc_error_handler(struct ata_port *ap)
 	if (!(ap->pflags & ATA_PFLAG_FROZEN))
 		pdc_reset_port(ap);
 
-	ata_std_error_handler(ap);
+	ata_sff_error_handler(ap);
 }
 
 static void pdc_post_internal_cmd(struct ata_queued_cmd *qc)
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index da84ea9e4fb0..d3a22f2ae7b6 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -147,7 +147,6 @@ static struct ata_port_operations qs_ata_ops = {
 	.prereset		= qs_prereset,
 	.softreset		= ATA_OP_NULL,
 	.error_handler		= qs_error_handler,
-	.post_internal_cmd	= ATA_OP_NULL,
 	.lost_interrupt		= ATA_OP_NULL,
 
 	.scr_read		= qs_scr_read,
@@ -255,7 +254,7 @@ static int qs_scr_read(struct ata_link *link, unsigned int sc_reg, u32 *val)
 static void qs_error_handler(struct ata_port *ap)
 {
 	qs_enter_reg_mode(ap);
-	ata_std_error_handler(ap);
+	ata_sff_error_handler(ap);
 }
 
 static int qs_scr_write(struct ata_link *link, unsigned int sc_reg, u32 val)
diff --git a/drivers/ata/sata_sx4.c b/drivers/ata/sata_sx4.c
index a4e552a325b0..bedd5188e5b0 100644
--- a/drivers/ata/sata_sx4.c
+++ b/drivers/ata/sata_sx4.c
@@ -921,7 +921,7 @@ static void pdc_error_handler(struct ata_port *ap)
 	if (!(ap->pflags & ATA_PFLAG_FROZEN))
 		pdc_reset_port(ap);
 
-	ata_std_error_handler(ap);
+	ata_sff_error_handler(ap);
 }
 
 static void pdc_post_internal_cmd(struct ata_queued_cmd *qc)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6888b5c36e0f..1d3859016aec 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1614,7 +1614,6 @@ extern int sata_sff_hardreset(struct ata_link *link, unsigned int *class,
 extern void ata_sff_postreset(struct ata_link *link, unsigned int *classes);
 extern void ata_sff_drain_fifo(struct ata_queued_cmd *qc);
 extern void ata_sff_error_handler(struct ata_port *ap);
-extern void ata_sff_post_internal_cmd(struct ata_queued_cmd *qc);
 extern void ata_sff_std_ports(struct ata_ioports *ioaddr);
 #ifdef CONFIG_PCI
 extern int ata_pci_sff_init_host(struct ata_host *host);
@@ -1629,6 +1628,8 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 		struct scsi_host_template *sht, void *host_priv, int hflags);
 #endif /* CONFIG_PCI */
 
+extern void ata_bmdma_error_handler(struct ata_port *ap);
+extern void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc);
 extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
 extern void ata_bmdma_start(struct ata_queued_cmd *qc);
 extern void ata_bmdma_stop(struct ata_queued_cmd *qc);
-- 
cgit v1.2.3


From f47451c45fe0032ef491aaf3e0623fa0154e156d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:40 +0200
Subject: libata-sff: ata_sff_[dumb_]qc_prep are BMDMA specific

Both qc_prep functions deal only with BMDMA PRD setup and PIO only SFF
drivers don't need them.  Rename to ata_bmdma_[dumb_]qc_prep() and
relocate.

All usages are renamed except for pdc_adma and sata_qstor.  Those two
drivers are not BMDMA drivers and don't need to call BMDMA qc_prep
functions.  Calls to ata_sff_qc_prep() in the two drivers are removed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c  | 290 +++++++++++++++++++++++-----------------------
 drivers/ata/pata_atiixp.c |   2 +-
 drivers/ata/pata_cs5520.c |   2 +-
 drivers/ata/pata_cs5530.c |   2 +-
 drivers/ata/pata_sc1200.c |   2 +-
 drivers/ata/pdc_adma.c    |   4 +-
 drivers/ata/sata_nv.c     |   4 +-
 drivers/ata/sata_qstor.c  |   4 +-
 include/linux/libata.h    |   4 +-
 9 files changed, 156 insertions(+), 158 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index aa378c04ed87..a58693bdde9d 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -45,7 +45,7 @@ static struct workqueue_struct *ata_sff_wq;
 const struct ata_port_operations ata_sff_port_ops = {
 	.inherits		= &ata_base_port_ops,
 
-	.qc_prep		= ata_sff_qc_prep,
+	.qc_prep		= ata_noop_qc_prep,
 	.qc_issue		= ata_sff_qc_issue,
 	.qc_fill_rtf		= ata_sff_qc_fill_rtf,
 
@@ -70,149 +70,6 @@ const struct ata_port_operations ata_sff_port_ops = {
 };
 EXPORT_SYMBOL_GPL(ata_sff_port_ops);
 
-/**
- *	ata_fill_sg - Fill PCI IDE PRD table
- *	@qc: Metadata associated with taskfile to be transferred
- *
- *	Fill PCI IDE PRD (scatter-gather) table with segments
- *	associated with the current disk command.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- *
- */
-static void ata_fill_sg(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	struct scatterlist *sg;
-	unsigned int si, pi;
-
-	pi = 0;
-	for_each_sg(qc->sg, sg, qc->n_elem, si) {
-		u32 addr, offset;
-		u32 sg_len, len;
-
-		/* determine if physical DMA addr spans 64K boundary.
-		 * Note h/w doesn't support 64-bit, so we unconditionally
-		 * truncate dma_addr_t to u32.
-		 */
-		addr = (u32) sg_dma_address(sg);
-		sg_len = sg_dma_len(sg);
-
-		while (sg_len) {
-			offset = addr & 0xffff;
-			len = sg_len;
-			if ((offset + sg_len) > 0x10000)
-				len = 0x10000 - offset;
-
-			ap->prd[pi].addr = cpu_to_le32(addr);
-			ap->prd[pi].flags_len = cpu_to_le32(len & 0xffff);
-			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
-
-			pi++;
-			sg_len -= len;
-			addr += len;
-		}
-	}
-
-	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
-}
-
-/**
- *	ata_fill_sg_dumb - Fill PCI IDE PRD table
- *	@qc: Metadata associated with taskfile to be transferred
- *
- *	Fill PCI IDE PRD (scatter-gather) table with segments
- *	associated with the current disk command. Perform the fill
- *	so that we avoid writing any length 64K records for
- *	controllers that don't follow the spec.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- *
- */
-static void ata_fill_sg_dumb(struct ata_queued_cmd *qc)
-{
-	struct ata_port *ap = qc->ap;
-	struct scatterlist *sg;
-	unsigned int si, pi;
-
-	pi = 0;
-	for_each_sg(qc->sg, sg, qc->n_elem, si) {
-		u32 addr, offset;
-		u32 sg_len, len, blen;
-
-		/* determine if physical DMA addr spans 64K boundary.
-		 * Note h/w doesn't support 64-bit, so we unconditionally
-		 * truncate dma_addr_t to u32.
-		 */
-		addr = (u32) sg_dma_address(sg);
-		sg_len = sg_dma_len(sg);
-
-		while (sg_len) {
-			offset = addr & 0xffff;
-			len = sg_len;
-			if ((offset + sg_len) > 0x10000)
-				len = 0x10000 - offset;
-
-			blen = len & 0xffff;
-			ap->prd[pi].addr = cpu_to_le32(addr);
-			if (blen == 0) {
-				/* Some PATA chipsets like the CS5530 can't
-				   cope with 0x0000 meaning 64K as the spec
-				   says */
-				ap->prd[pi].flags_len = cpu_to_le32(0x8000);
-				blen = 0x8000;
-				ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000);
-			}
-			ap->prd[pi].flags_len = cpu_to_le32(blen);
-			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
-
-			pi++;
-			sg_len -= len;
-			addr += len;
-		}
-	}
-
-	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
-}
-
-/**
- *	ata_sff_qc_prep - Prepare taskfile for submission
- *	@qc: Metadata associated with taskfile to be prepared
- *
- *	Prepare ATA taskfile for submission.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_sff_qc_prep(struct ata_queued_cmd *qc)
-{
-	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
-
-	ata_fill_sg(qc);
-}
-EXPORT_SYMBOL_GPL(ata_sff_qc_prep);
-
-/**
- *	ata_sff_dumb_qc_prep - Prepare taskfile for submission
- *	@qc: Metadata associated with taskfile to be prepared
- *
- *	Prepare ATA taskfile for submission.
- *
- *	LOCKING:
- *	spin_lock_irqsave(host lock)
- */
-void ata_sff_dumb_qc_prep(struct ata_queued_cmd *qc)
-{
-	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
-		return;
-
-	ata_fill_sg_dumb(qc);
-}
-EXPORT_SYMBOL_GPL(ata_sff_dumb_qc_prep);
-
 /**
  *	ata_sff_check_status - Read device status reg & clear interrupt
  *	@ap: port where the device is
@@ -2760,6 +2617,8 @@ const struct ata_port_operations ata_bmdma_port_ops = {
 	.error_handler		= ata_bmdma_error_handler,
 	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
 
+	.qc_prep		= ata_bmdma_qc_prep,
+
 	.bmdma_setup		= ata_bmdma_setup,
 	.bmdma_start		= ata_bmdma_start,
 	.bmdma_stop		= ata_bmdma_stop,
@@ -2777,6 +2636,149 @@ const struct ata_port_operations ata_bmdma32_port_ops = {
 };
 EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
 
+/**
+ *	ata_bmdma_fill_sg - Fill PCI IDE PRD table
+ *	@qc: Metadata associated with taskfile to be transferred
+ *
+ *	Fill PCI IDE PRD (scatter-gather) table with segments
+ *	associated with the current disk command.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ *
+ */
+static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct scatterlist *sg;
+	unsigned int si, pi;
+
+	pi = 0;
+	for_each_sg(qc->sg, sg, qc->n_elem, si) {
+		u32 addr, offset;
+		u32 sg_len, len;
+
+		/* determine if physical DMA addr spans 64K boundary.
+		 * Note h/w doesn't support 64-bit, so we unconditionally
+		 * truncate dma_addr_t to u32.
+		 */
+		addr = (u32) sg_dma_address(sg);
+		sg_len = sg_dma_len(sg);
+
+		while (sg_len) {
+			offset = addr & 0xffff;
+			len = sg_len;
+			if ((offset + sg_len) > 0x10000)
+				len = 0x10000 - offset;
+
+			ap->prd[pi].addr = cpu_to_le32(addr);
+			ap->prd[pi].flags_len = cpu_to_le32(len & 0xffff);
+			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
+
+			pi++;
+			sg_len -= len;
+			addr += len;
+		}
+	}
+
+	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+}
+
+/**
+ *	ata_bmdma_fill_sg_dumb - Fill PCI IDE PRD table
+ *	@qc: Metadata associated with taskfile to be transferred
+ *
+ *	Fill PCI IDE PRD (scatter-gather) table with segments
+ *	associated with the current disk command. Perform the fill
+ *	so that we avoid writing any length 64K records for
+ *	controllers that don't follow the spec.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ *
+ */
+static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+	struct scatterlist *sg;
+	unsigned int si, pi;
+
+	pi = 0;
+	for_each_sg(qc->sg, sg, qc->n_elem, si) {
+		u32 addr, offset;
+		u32 sg_len, len, blen;
+
+		/* determine if physical DMA addr spans 64K boundary.
+		 * Note h/w doesn't support 64-bit, so we unconditionally
+		 * truncate dma_addr_t to u32.
+		 */
+		addr = (u32) sg_dma_address(sg);
+		sg_len = sg_dma_len(sg);
+
+		while (sg_len) {
+			offset = addr & 0xffff;
+			len = sg_len;
+			if ((offset + sg_len) > 0x10000)
+				len = 0x10000 - offset;
+
+			blen = len & 0xffff;
+			ap->prd[pi].addr = cpu_to_le32(addr);
+			if (blen == 0) {
+				/* Some PATA chipsets like the CS5530 can't
+				   cope with 0x0000 meaning 64K as the spec
+				   says */
+				ap->prd[pi].flags_len = cpu_to_le32(0x8000);
+				blen = 0x8000;
+				ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000);
+			}
+			ap->prd[pi].flags_len = cpu_to_le32(blen);
+			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
+
+			pi++;
+			sg_len -= len;
+			addr += len;
+		}
+	}
+
+	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+}
+
+/**
+ *	ata_bmdma_qc_prep - Prepare taskfile for submission
+ *	@qc: Metadata associated with taskfile to be prepared
+ *
+ *	Prepare ATA taskfile for submission.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_bmdma_qc_prep(struct ata_queued_cmd *qc)
+{
+	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+		return;
+
+	ata_bmdma_fill_sg(qc);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_qc_prep);
+
+/**
+ *	ata_bmdma_dumb_qc_prep - Prepare taskfile for submission
+ *	@qc: Metadata associated with taskfile to be prepared
+ *
+ *	Prepare ATA taskfile for submission.
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ */
+void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
+{
+	if (!(qc->flags & ATA_QCFLAG_DMAMAP))
+		return;
+
+	ata_bmdma_fill_sg_dumb(qc);
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
+
 /**
  *	ata_bmdma_error_handler - Stock error handler for BMDMA controller
  *	@ap: port to handle error for
diff --git a/drivers/ata/pata_atiixp.c b/drivers/ata/pata_atiixp.c
index cbaf2eddac6b..44d88b380ddd 100644
--- a/drivers/ata/pata_atiixp.c
+++ b/drivers/ata/pata_atiixp.c
@@ -217,7 +217,7 @@ static struct scsi_host_template atiixp_sht = {
 static struct ata_port_operations atiixp_port_ops = {
 	.inherits	= &ata_bmdma_port_ops,
 
-	.qc_prep 	= ata_sff_dumb_qc_prep,
+	.qc_prep 	= ata_bmdma_dumb_qc_prep,
 	.bmdma_start 	= atiixp_bmdma_start,
 	.bmdma_stop	= atiixp_bmdma_stop,
 
diff --git a/drivers/ata/pata_cs5520.c b/drivers/ata/pata_cs5520.c
index 95ebdac517f2..17c5f346ff01 100644
--- a/drivers/ata/pata_cs5520.c
+++ b/drivers/ata/pata_cs5520.c
@@ -110,7 +110,7 @@ static struct scsi_host_template cs5520_sht = {
 
 static struct ata_port_operations cs5520_port_ops = {
 	.inherits		= &ata_bmdma_port_ops,
-	.qc_prep		= ata_sff_dumb_qc_prep,
+	.qc_prep		= ata_bmdma_dumb_qc_prep,
 	.cable_detect		= ata_cable_40wire,
 	.set_piomode		= cs5520_set_piomode,
 };
diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c
index 738ad2e14a97..4b9a66f18de6 100644
--- a/drivers/ata/pata_cs5530.c
+++ b/drivers/ata/pata_cs5530.c
@@ -167,7 +167,7 @@ static struct scsi_host_template cs5530_sht = {
 static struct ata_port_operations cs5530_port_ops = {
 	.inherits	= &ata_bmdma_port_ops,
 
-	.qc_prep 	= ata_sff_dumb_qc_prep,
+	.qc_prep 	= ata_bmdma_dumb_qc_prep,
 	.qc_issue	= cs5530_qc_issue,
 
 	.cable_detect	= ata_cable_40wire,
diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index dfecc6f964b0..599e648a722f 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c
@@ -209,7 +209,7 @@ static struct scsi_host_template sc1200_sht = {
 
 static struct ata_port_operations sc1200_port_ops = {
 	.inherits	= &ata_bmdma_port_ops,
-	.qc_prep 	= ata_sff_dumb_qc_prep,
+	.qc_prep 	= ata_bmdma_dumb_qc_prep,
 	.qc_issue	= sc1200_qc_issue,
 	.qc_defer	= sc1200_qc_defer,
 	.cable_detect	= ata_cable_40wire,
diff --git a/drivers/ata/pdc_adma.c b/drivers/ata/pdc_adma.c
index bb4f838655b6..adbe0426c8f0 100644
--- a/drivers/ata/pdc_adma.c
+++ b/drivers/ata/pdc_adma.c
@@ -324,10 +324,8 @@ static void adma_qc_prep(struct ata_queued_cmd *qc)
 	VPRINTK("ENTER\n");
 
 	adma_enter_reg_mode(qc->ap);
-	if (qc->tf.protocol != ATA_PROT_DMA) {
-		ata_sff_qc_prep(qc);
+	if (qc->tf.protocol != ATA_PROT_DMA)
 		return;
-	}
 
 	buf[i++] = 0;	/* Response flags */
 	buf[i++] = 0;	/* reserved */
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 64e99824d8c1..7a283d5d68f3 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1409,7 +1409,7 @@ static void nv_adma_qc_prep(struct ata_queued_cmd *qc)
 		BUG_ON(!(pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) &&
 			(qc->flags & ATA_QCFLAG_DMAMAP));
 		nv_adma_register_mode(qc->ap);
-		ata_sff_qc_prep(qc);
+		ata_bmdma_qc_prep(qc);
 		return;
 	}
 
@@ -2012,7 +2012,7 @@ static int nv_swncq_port_start(struct ata_port *ap)
 static void nv_swncq_qc_prep(struct ata_queued_cmd *qc)
 {
 	if (qc->tf.protocol != ATA_PROT_NCQ) {
-		ata_sff_qc_prep(qc);
+		ata_bmdma_qc_prep(qc);
 		return;
 	}
 
diff --git a/drivers/ata/sata_qstor.c b/drivers/ata/sata_qstor.c
index d3a22f2ae7b6..d533b3d20ca1 100644
--- a/drivers/ata/sata_qstor.c
+++ b/drivers/ata/sata_qstor.c
@@ -303,10 +303,8 @@ static void qs_qc_prep(struct ata_queued_cmd *qc)
 	VPRINTK("ENTER\n");
 
 	qs_enter_reg_mode(qc->ap);
-	if (qc->tf.protocol != ATA_PROT_DMA) {
-		ata_sff_qc_prep(qc);
+	if (qc->tf.protocol != ATA_PROT_DMA)
 		return;
-	}
 
 	nelem = qs_fill_sg(qc);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 1d3859016aec..3675fd29b2e5 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1570,8 +1570,6 @@ extern const struct ata_port_operations ata_bmdma32_port_ops;
 	.sg_tablesize		= LIBATA_MAX_PRD,		\
 	.dma_boundary		= ATA_DMA_BOUNDARY
 
-extern void ata_sff_qc_prep(struct ata_queued_cmd *qc);
-extern void ata_sff_dumb_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_sff_dev_select(struct ata_port *ap, unsigned int device);
 extern u8 ata_sff_check_status(struct ata_port *ap);
 extern void ata_sff_pause(struct ata_port *ap);
@@ -1628,6 +1626,8 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 		struct scsi_host_template *sht, void *host_priv, int hflags);
 #endif /* CONFIG_PCI */
 
+extern void ata_bmdma_qc_prep(struct ata_queued_cmd *qc);
+extern void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_bmdma_error_handler(struct ata_port *ap);
 extern void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc);
 extern void ata_bmdma_setup(struct ata_queued_cmd *qc);
-- 
cgit v1.2.3


From f60d70113fa04e32aee2dedbf304a48469c9c45c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:41 +0200
Subject: libata-sff: prd is BMDMA specific

struct ata_prd and ap->prd are BMDMA specific.  Add bmdma_ prefix to
them and move them inside CONFIG_ATA_SFF.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c   | 27 +++++++++++++++------------
 drivers/ata/pata_ns87415.c |  2 +-
 drivers/ata/pata_scc.c     |  4 ++--
 drivers/ata/sata_nv.c      |  4 ++--
 drivers/ata/sata_promise.c | 21 +++++++++++----------
 drivers/ata/sata_sil.c     |  6 +++---
 drivers/ata/sata_svw.c     |  2 +-
 include/linux/ata.h        |  2 +-
 include/linux/libata.h     |  5 ++---
 9 files changed, 38 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index a58693bdde9d..6f52b598b8d2 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -2650,6 +2650,7 @@ EXPORT_SYMBOL_GPL(ata_bmdma32_port_ops);
 static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_bmdma_prd *prd = ap->bmdma_prd;
 	struct scatterlist *sg;
 	unsigned int si, pi;
 
@@ -2671,8 +2672,8 @@ static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
 			if ((offset + sg_len) > 0x10000)
 				len = 0x10000 - offset;
 
-			ap->prd[pi].addr = cpu_to_le32(addr);
-			ap->prd[pi].flags_len = cpu_to_le32(len & 0xffff);
+			prd[pi].addr = cpu_to_le32(addr);
+			prd[pi].flags_len = cpu_to_le32(len & 0xffff);
 			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
 
 			pi++;
@@ -2681,7 +2682,7 @@ static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
 		}
 	}
 
-	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+	prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
 }
 
 /**
@@ -2700,6 +2701,7 @@ static void ata_bmdma_fill_sg(struct ata_queued_cmd *qc)
 static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_bmdma_prd *prd = ap->bmdma_prd;
 	struct scatterlist *sg;
 	unsigned int si, pi;
 
@@ -2722,16 +2724,16 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
 				len = 0x10000 - offset;
 
 			blen = len & 0xffff;
-			ap->prd[pi].addr = cpu_to_le32(addr);
+			prd[pi].addr = cpu_to_le32(addr);
 			if (blen == 0) {
 				/* Some PATA chipsets like the CS5530 can't
 				   cope with 0x0000 meaning 64K as the spec
 				   says */
-				ap->prd[pi].flags_len = cpu_to_le32(0x8000);
+				prd[pi].flags_len = cpu_to_le32(0x8000);
 				blen = 0x8000;
-				ap->prd[++pi].addr = cpu_to_le32(addr + 0x8000);
+				prd[++pi].addr = cpu_to_le32(addr + 0x8000);
 			}
-			ap->prd[pi].flags_len = cpu_to_le32(blen);
+			prd[pi].flags_len = cpu_to_le32(blen);
 			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", pi, addr, len);
 
 			pi++;
@@ -2740,7 +2742,7 @@ static void ata_bmdma_fill_sg_dumb(struct ata_queued_cmd *qc)
 		}
 	}
 
-	ap->prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+	prd[pi - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
 }
 
 /**
@@ -2872,7 +2874,7 @@ void ata_bmdma_setup(struct ata_queued_cmd *qc)
 
 	/* load PRD table addr. */
 	mb();	/* make sure PRD table writes are visible to controller */
-	iowrite32(ap->prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
+	iowrite32(ap->bmdma_prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
 
 	/* specify data direction, triple-check start bit is clear */
 	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
@@ -2977,9 +2979,10 @@ EXPORT_SYMBOL_GPL(ata_bmdma_status);
 int ata_bmdma_port_start(struct ata_port *ap)
 {
 	if (ap->mwdma_mask || ap->udma_mask) {
-		ap->prd = dmam_alloc_coherent(ap->host->dev, ATA_PRD_TBL_SZ,
-					      &ap->prd_dma, GFP_KERNEL);
-		if (!ap->prd)
+		ap->bmdma_prd =
+			dmam_alloc_coherent(ap->host->dev, ATA_PRD_TBL_SZ,
+					    &ap->bmdma_prd_dma, GFP_KERNEL);
+		if (!ap->bmdma_prd)
 			return -ENOMEM;
 	}
 
diff --git a/drivers/ata/pata_ns87415.c b/drivers/ata/pata_ns87415.c
index 830431f036a1..fdbba2d76d3e 100644
--- a/drivers/ata/pata_ns87415.c
+++ b/drivers/ata/pata_ns87415.c
@@ -126,7 +126,7 @@ static void ns87415_bmdma_setup(struct ata_queued_cmd *qc)
 
 	/* load PRD table addr. */
 	mb();	/* make sure PRD table writes are visible to controller */
-	iowrite32(ap->prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
+	iowrite32(ap->bmdma_prd_dma, ap->ioaddr.bmdma_addr + ATA_DMA_TABLE_OFS);
 
 	/* specify data direction, triple-check start bit is clear */
 	dmactl = ioread8(ap->ioaddr.bmdma_addr + ATA_DMA_CMD);
diff --git a/drivers/ata/pata_scc.c b/drivers/ata/pata_scc.c
index 93f690e51a79..6f6193b707cb 100644
--- a/drivers/ata/pata_scc.c
+++ b/drivers/ata/pata_scc.c
@@ -441,7 +441,7 @@ static void scc_bmdma_setup (struct ata_queued_cmd *qc)
 	void __iomem *mmio = ap->ioaddr.bmdma_addr;
 
 	/* load PRD table addr */
-	out_be32(mmio + SCC_DMA_TABLE_OFS, ap->prd_dma);
+	out_be32(mmio + SCC_DMA_TABLE_OFS, ap->bmdma_prd_dma);
 
 	/* specify data direction, triple-check start bit is clear */
 	dmactl = in_be32(mmio + SCC_DMA_CMD);
@@ -905,7 +905,7 @@ static int scc_port_start (struct ata_port *ap)
 	if (rc)
 		return rc;
 
-	out_be32(mmio + SCC_DMA_PTERADD, ap->prd_dma);
+	out_be32(mmio + SCC_DMA_PTERADD, ap->bmdma_prd_dma);
 	return 0;
 }
 
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 7a283d5d68f3..4863a62c3d8c 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -272,7 +272,7 @@ enum ncq_saw_flag_list {
 };
 
 struct nv_swncq_port_priv {
-	struct ata_prd	*prd;	 /* our SG list */
+	struct ata_bmdma_prd *prd;	 /* our SG list */
 	dma_addr_t	prd_dma; /* and its DMA mapping */
 	void __iomem	*sactive_block;
 	void __iomem	*irq_block;
@@ -2027,7 +2027,7 @@ static void nv_swncq_fill_sg(struct ata_queued_cmd *qc)
 	struct ata_port *ap = qc->ap;
 	struct scatterlist *sg;
 	struct nv_swncq_port_priv *pp = ap->private_data;
-	struct ata_prd *prd;
+	struct ata_bmdma_prd *prd;
 	unsigned int si, idx;
 
 	prd = pp->prd + ATA_MAX_PRD * qc->tag;
diff --git a/drivers/ata/sata_promise.c b/drivers/ata/sata_promise.c
index 09a6179f5de3..f03ad48273ff 100644
--- a/drivers/ata/sata_promise.c
+++ b/drivers/ata/sata_promise.c
@@ -500,7 +500,7 @@ static int pdc_sata_scr_write(struct ata_link *link,
 static void pdc_atapi_pkt(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
-	dma_addr_t sg_table = ap->prd_dma;
+	dma_addr_t sg_table = ap->bmdma_prd_dma;
 	unsigned int cdb_len = qc->dev->cdb_len;
 	u8 *cdb = qc->cdb;
 	struct pdc_port_priv *pp = ap->private_data;
@@ -588,6 +588,7 @@ static void pdc_atapi_pkt(struct ata_queued_cmd *qc)
 static void pdc_fill_sg(struct ata_queued_cmd *qc)
 {
 	struct ata_port *ap = qc->ap;
+	struct ata_bmdma_prd *prd = ap->bmdma_prd;
 	struct scatterlist *sg;
 	const u32 SG_COUNT_ASIC_BUG = 41*4;
 	unsigned int si, idx;
@@ -614,8 +615,8 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
 			if ((offset + sg_len) > 0x10000)
 				len = 0x10000 - offset;
 
-			ap->prd[idx].addr = cpu_to_le32(addr);
-			ap->prd[idx].flags_len = cpu_to_le32(len & 0xffff);
+			prd[idx].addr = cpu_to_le32(addr);
+			prd[idx].flags_len = cpu_to_le32(len & 0xffff);
 			VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
 
 			idx++;
@@ -624,27 +625,27 @@ static void pdc_fill_sg(struct ata_queued_cmd *qc)
 		}
 	}
 
-	len = le32_to_cpu(ap->prd[idx - 1].flags_len);
+	len = le32_to_cpu(prd[idx - 1].flags_len);
 
 	if (len > SG_COUNT_ASIC_BUG) {
 		u32 addr;
 
 		VPRINTK("Splitting last PRD.\n");
 
-		addr = le32_to_cpu(ap->prd[idx - 1].addr);
-		ap->prd[idx - 1].flags_len = cpu_to_le32(len - SG_COUNT_ASIC_BUG);
+		addr = le32_to_cpu(prd[idx - 1].addr);
+		prd[idx - 1].flags_len = cpu_to_le32(len - SG_COUNT_ASIC_BUG);
 		VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx - 1, addr, SG_COUNT_ASIC_BUG);
 
 		addr = addr + len - SG_COUNT_ASIC_BUG;
 		len = SG_COUNT_ASIC_BUG;
-		ap->prd[idx].addr = cpu_to_le32(addr);
-		ap->prd[idx].flags_len = cpu_to_le32(len);
+		prd[idx].addr = cpu_to_le32(addr);
+		prd[idx].flags_len = cpu_to_le32(len);
 		VPRINTK("PRD[%u] = (0x%X, 0x%X)\n", idx, addr, len);
 
 		idx++;
 	}
 
-	ap->prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
+	prd[idx - 1].flags_len |= cpu_to_le32(ATA_PRD_EOT);
 }
 
 static void pdc_qc_prep(struct ata_queued_cmd *qc)
@@ -659,7 +660,7 @@ static void pdc_qc_prep(struct ata_queued_cmd *qc)
 		pdc_fill_sg(qc);
 		/*FALLTHROUGH*/
 	case ATA_PROT_NODATA:
-		i = pdc_pkt_header(&qc->tf, qc->ap->prd_dma,
+		i = pdc_pkt_header(&qc->tf, qc->ap->bmdma_prd_dma,
 				   qc->dev->devno, pp->pkt);
 		if (qc->tf.flags & ATA_TFLAG_LBA48)
 			i = pdc_prep_lba48(&qc->tf, pp->pkt, i);
diff --git a/drivers/ata/sata_sil.c b/drivers/ata/sata_sil.c
index 9c367f71c627..2dda312b6b9a 100644
--- a/drivers/ata/sata_sil.c
+++ b/drivers/ata/sata_sil.c
@@ -284,7 +284,7 @@ static void sil_bmdma_setup(struct ata_queued_cmd *qc)
 	void __iomem *bmdma = ap->ioaddr.bmdma_addr;
 
 	/* load PRD table addr. */
-	iowrite32(ap->prd_dma, bmdma + ATA_DMA_TABLE_OFS);
+	iowrite32(ap->bmdma_prd_dma, bmdma + ATA_DMA_TABLE_OFS);
 
 	/* issue r/w command */
 	ap->ops->sff_exec_command(ap, &qc->tf);
@@ -311,10 +311,10 @@ static void sil_fill_sg(struct ata_queued_cmd *qc)
 {
 	struct scatterlist *sg;
 	struct ata_port *ap = qc->ap;
-	struct ata_prd *prd, *last_prd = NULL;
+	struct ata_bmdma_prd *prd, *last_prd = NULL;
 	unsigned int si;
 
-	prd = &ap->prd[0];
+	prd = &ap->bmdma_prd[0];
 	for_each_sg(qc->sg, sg, qc->n_elem, si) {
 		/* Note h/w doesn't support 64-bit, so we unconditionally
 		 * truncate dma_addr_t to u32.
diff --git a/drivers/ata/sata_svw.c b/drivers/ata/sata_svw.c
index 7257f2d5c52c..101fd6a19829 100644
--- a/drivers/ata/sata_svw.c
+++ b/drivers/ata/sata_svw.c
@@ -224,7 +224,7 @@ static void k2_bmdma_setup_mmio(struct ata_queued_cmd *qc)
 
 	/* load PRD table addr. */
 	mb();	/* make sure PRD table writes are visible to controller */
-	writel(ap->prd_dma, mmio + ATA_DMA_TABLE_OFS);
+	writel(ap->bmdma_prd_dma, mmio + ATA_DMA_TABLE_OFS);
 
 	/* specify data direction, triple-check start bit is clear */
 	dmactl = readb(mmio + ATA_DMA_CMD);
diff --git a/include/linux/ata.h b/include/linux/ata.h
index 700c5b9b3583..fe6e681a9d74 100644
--- a/include/linux/ata.h
+++ b/include/linux/ata.h
@@ -467,7 +467,7 @@ enum ata_ioctls {
 
 /* core structures */
 
-struct ata_prd {
+struct ata_bmdma_prd {
 	__le32			addr;
 	__le32			flags_len;
 };
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 3675fd29b2e5..d973972f0db2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -716,14 +716,13 @@ struct ata_port {
 	unsigned int		print_id; /* user visible unique port ID */
 	unsigned int		port_no; /* 0 based port no. inside the host */
 
-	struct ata_prd		*prd;	 /* our SG list */
-	dma_addr_t		prd_dma; /* and its DMA mapping */
-
 #ifdef CONFIG_ATA_SFF
 	struct ata_ioports	ioaddr;	/* ATA cmd/ctl/dma register blocks */
 	u8			ctl;	/* cache of ATA control register */
 	u8			last_ctl;	/* Cache last written value */
 	struct delayed_work	sff_pio_task;
+	struct ata_bmdma_prd	*bmdma_prd;	/* BMDMA SG list */
+	dma_addr_t		bmdma_prd_dma;	/* and its DMA mapping */
 #endif /* CONFIG_ATA_SFF */
 
 	unsigned int		pio_mask;
-- 
cgit v1.2.3


From 360ff7833098e944e5003618b03894251e937802 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 10 May 2010 21:41:42 +0200
Subject: libata-sff: separate out BMDMA qc_issue

Separate out ata_bmdma_qc_issue() from ata_sff_qc_issue() such that
ata_sff_qc_issue() only deals with non-BMDMA SFF protocols (PIO and
nodata) while ata_bmdma_qc_issue() deals with the BMDMA protocols and
uses ata_sff_qc_issue() for non-DMA commands.  All the users are
updated accordingly.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-sff.c   | 112 +++++++++++++++++++++++++++------------------
 drivers/ata/pata_acpi.c    |   4 +-
 drivers/ata/pata_cs5530.c  |   2 +-
 drivers/ata/pata_hpt3x2n.c |   2 +-
 drivers/ata/pata_it821x.c  |   4 +-
 drivers/ata/pata_oldpiix.c |   2 +-
 drivers/ata/pata_radisys.c |   2 +-
 drivers/ata/pata_sc1200.c  |   2 +-
 drivers/ata/sata_mv.c      |   2 +-
 drivers/ata/sata_nv.c      |   4 +-
 include/linux/libata.h     |   1 +
 11 files changed, 80 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
index 6f52b598b8d2..19ddf924944f 100644
--- a/drivers/ata/libata-sff.c
+++ b/drivers/ata/libata-sff.c
@@ -1379,15 +1379,11 @@ fsm_start:
 }
 
 /**
- *	ata_sff_qc_issue - issue taskfile to device in proto-dependent manner
+ *	ata_sff_qc_issue - issue taskfile to a SFF controller
  *	@qc: command to issue to device
  *
- *	Using various libata functions and hooks, this function
- *	starts an ATA command.  ATA commands are grouped into
- *	classes called "protocols", and issuing each type of protocol
- *	is slightly different.
- *
- *	May be used as the qc_issue() entry in ata_port_operations.
+ *	This function issues a PIO or NODATA command to a SFF
+ *	controller.
  *
  *	LOCKING:
  *	spin_lock_irqsave(host lock)
@@ -1402,23 +1398,8 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 	/* Use polling pio if the LLD doesn't handle
 	 * interrupt driven pio and atapi CDB interrupt.
 	 */
-	if (ap->flags & ATA_FLAG_PIO_POLLING) {
-		switch (qc->tf.protocol) {
-		case ATA_PROT_PIO:
-		case ATA_PROT_NODATA:
-		case ATAPI_PROT_PIO:
-		case ATAPI_PROT_NODATA:
-			qc->tf.flags |= ATA_TFLAG_POLLING;
-			break;
-		case ATAPI_PROT_DMA:
-			if (qc->dev->flags & ATA_DFLAG_CDB_INTR)
-				/* see ata_dma_blacklisted() */
-				BUG();
-			break;
-		default:
-			break;
-		}
-	}
+	if (ap->flags & ATA_FLAG_PIO_POLLING)
+		qc->tf.flags |= ATA_TFLAG_POLLING;
 
 	/* select the device */
 	ata_dev_select(ap, qc->dev->devno, 1, 0);
@@ -1437,15 +1418,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 
 		break;
 
-	case ATA_PROT_DMA:
-		WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
-
-		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
-		ap->ops->bmdma_setup(qc);	    /* set up bmdma */
-		ap->ops->bmdma_start(qc);	    /* initiate bmdma */
-		ap->hsm_task_state = HSM_ST_LAST;
-		break;
-
 	case ATA_PROT_PIO:
 		if (qc->tf.flags & ATA_TFLAG_POLLING)
 			ata_qc_set_polling(qc);
@@ -1490,18 +1462,6 @@ unsigned int ata_sff_qc_issue(struct ata_queued_cmd *qc)
 			ata_sff_queue_pio_task(ap, 0);
 		break;
 
-	case ATAPI_PROT_DMA:
-		WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
-
-		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
-		ap->ops->bmdma_setup(qc);	    /* set up bmdma */
-		ap->hsm_task_state = HSM_ST_FIRST;
-
-		/* send cdb by polling if no cdb interrupt */
-		if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
-			ata_sff_queue_pio_task(ap, 0);
-		break;
-
 	default:
 		WARN_ON_ONCE(1);
 		return AC_ERR_SYSTEM;
@@ -2618,6 +2578,7 @@ const struct ata_port_operations ata_bmdma_port_ops = {
 	.post_internal_cmd	= ata_bmdma_post_internal_cmd,
 
 	.qc_prep		= ata_bmdma_qc_prep,
+	.qc_issue		= ata_bmdma_qc_issue,
 
 	.bmdma_setup		= ata_bmdma_setup,
 	.bmdma_start		= ata_bmdma_start,
@@ -2781,6 +2742,67 @@ void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc)
 }
 EXPORT_SYMBOL_GPL(ata_bmdma_dumb_qc_prep);
 
+/**
+ *	ata_bmdma_qc_issue - issue taskfile to a BMDMA controller
+ *	@qc: command to issue to device
+ *
+ *	This function issues a PIO, NODATA or DMA command to a
+ *	SFF/BMDMA controller.  PIO and NODATA are handled by
+ *	ata_sff_qc_issue().
+ *
+ *	LOCKING:
+ *	spin_lock_irqsave(host lock)
+ *
+ *	RETURNS:
+ *	Zero on success, AC_ERR_* mask on failure
+ */
+unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc)
+{
+	struct ata_port *ap = qc->ap;
+
+	/* see ata_dma_blacklisted() */
+	BUG_ON((ap->flags & ATA_FLAG_PIO_POLLING) &&
+	       qc->tf.protocol == ATAPI_PROT_DMA);
+
+	/* defer PIO handling to sff_qc_issue */
+	if (!ata_is_dma(qc->tf.protocol))
+		return ata_sff_qc_issue(qc);
+
+	/* select the device */
+	ata_dev_select(ap, qc->dev->devno, 1, 0);
+
+	/* start the command */
+	switch (qc->tf.protocol) {
+	case ATA_PROT_DMA:
+		WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+
+		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
+		ap->ops->bmdma_setup(qc);	    /* set up bmdma */
+		ap->ops->bmdma_start(qc);	    /* initiate bmdma */
+		ap->hsm_task_state = HSM_ST_LAST;
+		break;
+
+	case ATAPI_PROT_DMA:
+		WARN_ON_ONCE(qc->tf.flags & ATA_TFLAG_POLLING);
+
+		ap->ops->sff_tf_load(ap, &qc->tf);  /* load tf registers */
+		ap->ops->bmdma_setup(qc);	    /* set up bmdma */
+		ap->hsm_task_state = HSM_ST_FIRST;
+
+		/* send cdb by polling if no cdb interrupt */
+		if (!(qc->dev->flags & ATA_DFLAG_CDB_INTR))
+			ata_sff_queue_pio_task(ap, 0);
+		break;
+
+	default:
+		WARN_ON(1);
+		return AC_ERR_SYSTEM;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(ata_bmdma_qc_issue);
+
 /**
  *	ata_bmdma_error_handler - Stock error handler for BMDMA controller
  *	@ap: port to handle error for
diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
index 1a5a309abccd..066b9f301ed5 100644
--- a/drivers/ata/pata_acpi.c
+++ b/drivers/ata/pata_acpi.c
@@ -172,7 +172,7 @@ static unsigned int pacpi_qc_issue(struct ata_queued_cmd *qc)
 	struct pata_acpi *acpi = ap->private_data;
 
 	if (acpi->gtm.flags & 0x10)
-		return ata_sff_qc_issue(qc);
+		return ata_bmdma_qc_issue(qc);
 
 	if (adev != acpi->last) {
 		pacpi_set_piomode(ap, adev);
@@ -180,7 +180,7 @@ static unsigned int pacpi_qc_issue(struct ata_queued_cmd *qc)
 			pacpi_set_dmamode(ap, adev);
 		acpi->last = adev;
 	}
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 /**
diff --git a/drivers/ata/pata_cs5530.c b/drivers/ata/pata_cs5530.c
index 4b9a66f18de6..e809a4233a81 100644
--- a/drivers/ata/pata_cs5530.c
+++ b/drivers/ata/pata_cs5530.c
@@ -156,7 +156,7 @@ static unsigned int cs5530_qc_issue(struct ata_queued_cmd *qc)
 		    	cs5530_set_dmamode(ap, adev);
 	}
 
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 static struct scsi_host_template cs5530_sht = {
diff --git a/drivers/ata/pata_hpt3x2n.c b/drivers/ata/pata_hpt3x2n.c
index 01457b266f3d..8b95aeba0e74 100644
--- a/drivers/ata/pata_hpt3x2n.c
+++ b/drivers/ata/pata_hpt3x2n.c
@@ -320,7 +320,7 @@ static unsigned int hpt3x2n_qc_issue(struct ata_queued_cmd *qc)
 
 		hpt3x2n_set_clock(ap, dpll ? 0x21 : 0x23);
 	}
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 static struct scsi_host_template hpt3x2n_sht = {
diff --git a/drivers/ata/pata_it821x.c b/drivers/ata/pata_it821x.c
index 8d73438f292c..2bd2b002d14a 100644
--- a/drivers/ata/pata_it821x.c
+++ b/drivers/ata/pata_it821x.c
@@ -430,7 +430,7 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc)
 		case 0xFC:	/* Internal 'report rebuild state' */
 		/* Arguably should just no-op this one */
 		case ATA_CMD_SET_FEATURES:
-			return ata_sff_qc_issue(qc);
+			return ata_bmdma_qc_issue(qc);
 	}
 	printk(KERN_DEBUG "it821x: can't process command 0x%02X\n", qc->tf.command);
 	return AC_ERR_DEV;
@@ -448,7 +448,7 @@ static unsigned int it821x_smart_qc_issue(struct ata_queued_cmd *qc)
 static unsigned int it821x_passthru_qc_issue(struct ata_queued_cmd *qc)
 {
 	it821x_passthru_dev_select(qc->ap, qc->dev->devno);
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 /**
diff --git a/drivers/ata/pata_oldpiix.c b/drivers/ata/pata_oldpiix.c
index 5f6aba7eb0dd..988ef2627be3 100644
--- a/drivers/ata/pata_oldpiix.c
+++ b/drivers/ata/pata_oldpiix.c
@@ -200,7 +200,7 @@ static unsigned int oldpiix_qc_issue(struct ata_queued_cmd *qc)
 		if (ata_dma_enabled(adev))
 			oldpiix_set_dmamode(ap, adev);
 	}
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 
diff --git a/drivers/ata/pata_radisys.c b/drivers/ata/pata_radisys.c
index fc9602229acb..a5fa388e5398 100644
--- a/drivers/ata/pata_radisys.c
+++ b/drivers/ata/pata_radisys.c
@@ -179,7 +179,7 @@ static unsigned int radisys_qc_issue(struct ata_queued_cmd *qc)
 				radisys_set_piomode(ap, adev);
 		}
 	}
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 
diff --git a/drivers/ata/pata_sc1200.c b/drivers/ata/pata_sc1200.c
index 599e648a722f..6b5b63a2fd8e 100644
--- a/drivers/ata/pata_sc1200.c
+++ b/drivers/ata/pata_sc1200.c
@@ -174,7 +174,7 @@ static unsigned int sc1200_qc_issue(struct ata_queued_cmd *qc)
 		    	sc1200_set_dmamode(ap, adev);
 	}
 
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 /**
diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
index fb0d2c1d8417..f3471bc949d3 100644
--- a/drivers/ata/sata_mv.c
+++ b/drivers/ata/sata_mv.c
@@ -2358,7 +2358,7 @@ static unsigned int mv_qc_issue(struct ata_queued_cmd *qc)
 		if (IS_GEN_II(hpriv))
 			return mv_qc_issue_fis(qc);
 	}
-	return ata_sff_qc_issue(qc);
+	return ata_bmdma_qc_issue(qc);
 }
 
 static struct ata_queued_cmd *mv_get_active_qc(struct ata_port *ap)
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index 4863a62c3d8c..baa8f0d2c86f 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1468,7 +1468,7 @@ static unsigned int nv_adma_qc_issue(struct ata_queued_cmd *qc)
 		BUG_ON(!(pp->flags & NV_ADMA_ATAPI_SETUP_COMPLETE) &&
 			(qc->flags & ATA_QCFLAG_DMAMAP));
 		nv_adma_register_mode(qc->ap);
-		return ata_sff_qc_issue(qc);
+		return ata_bmdma_qc_issue(qc);
 	} else
 		nv_adma_mode(qc->ap);
 
@@ -2088,7 +2088,7 @@ static unsigned int nv_swncq_qc_issue(struct ata_queued_cmd *qc)
 	struct nv_swncq_port_priv *pp = ap->private_data;
 
 	if (qc->tf.protocol != ATA_PROT_NCQ)
-		return ata_sff_qc_issue(qc);
+		return ata_bmdma_qc_issue(qc);
 
 	DPRINTK("Enter\n");
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index d973972f0db2..ee84e7e12039 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1626,6 +1626,7 @@ extern int ata_pci_sff_init_one(struct pci_dev *pdev,
 #endif /* CONFIG_PCI */
 
 extern void ata_bmdma_qc_prep(struct ata_queued_cmd *qc);
+extern unsigned int ata_bmdma_qc_issue(struct ata_queued_cmd *qc);
 extern void ata_bmdma_dumb_qc_prep(struct ata_queued_cmd *qc);
 extern void ata_bmdma_error_handler(struct ata_port *ap);
 extern void ata_bmdma_post_internal_cmd(struct ata_queued_cmd *qc);
-- 
cgit v1.2.3


From 20c9d2c4ab8243a1c311248232954b2c1da3ba75 Mon Sep 17 00:00:00 2001
From: Kalle Jokiniemi <kalle.jokiniemi@digia.com>
Date: Tue, 11 May 2010 11:35:08 -0700
Subject: i2c-omap: add mpu wake up latency constraint in i2c

While waiting for completion of the i2c transfer, the
MPU could hit OFF mode and cause several msecs of
delay that made i2c transfers fail more often. The
extra delays and subsequent re-trys cause i2c clocks
to be active more often. This has also an negative
effect on power consumption.

Created a mechanism for passing and using the
constraint setting function in driver code. The used
mpu wake up latency constraints are now set individually
per bus, and they are calculated based on clock rate
and fifo size.

Thanks to Jarkko Nikula, Moiz Sonasath, Paul Walmsley,
and Nishanth Menon for tuning out the details of
this patch.

Updates by Kevin as requested by Tony:

- Remove omap_set_i2c_constraint_func() in favor of conditionally
  adding the flag in omap_i2c_add_bus() in order to keep all the OMAP
  conditional checking in a single location.
- Update set_mpu_wkup_lat prototypes to match OMAP PM layer so
  OMAP PM function can be used directly in pdata.

Cc: Moiz Sonasath <m-sonasath@ti.com>
Cc: Jarkko Nikula <jhnikula@gmail.com>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Nishanth Menon <nm@ti.com>
Signed-off-by: Kalle Jokiniemi <kalle.jokiniemi@digia.com>
Signed-off-by: Kevin Hilman <khilman@deeprootsystems.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Ben Dooks <ben-linux@fluff.org>
---
 arch/arm/plat-omap/i2c.c      | 39 +++++++++++++++++++++++++++------------
 drivers/i2c/busses/i2c-omap.c | 24 ++++++++++++++++++++----
 include/linux/i2c-omap.h      |  9 +++++++++
 3 files changed, 56 insertions(+), 16 deletions(-)
 create mode 100644 include/linux/i2c-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/i2c.c b/arch/arm/plat-omap/i2c.c
index 624e26298faa..f044b5927508 100644
--- a/arch/arm/plat-omap/i2c.c
+++ b/arch/arm/plat-omap/i2c.c
@@ -26,9 +26,12 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/i2c.h>
+#include <linux/i2c-omap.h>
+
 #include <mach/irqs.h>
 #include <plat/mux.h>
 #include <plat/i2c.h>
+#include <plat/omap-pm.h>
 
 #define OMAP_I2C_SIZE		0x3f
 #define OMAP1_I2C_BASE		0xfffb3800
@@ -70,14 +73,14 @@ static struct resource i2c_resources[][2] = {
 		},					\
 	}
 
-static u32 i2c_rate[ARRAY_SIZE(i2c_resources)];
+static struct omap_i2c_bus_platform_data i2c_pdata[ARRAY_SIZE(i2c_resources)];
 static struct platform_device omap_i2c_devices[] = {
-	I2C_DEV_BUILDER(1, i2c_resources[0], &i2c_rate[0]),
+	I2C_DEV_BUILDER(1, i2c_resources[0], &i2c_pdata[0]),
 #if	defined(CONFIG_ARCH_OMAP2) || defined(CONFIG_ARCH_OMAP3)
-	I2C_DEV_BUILDER(2, i2c_resources[1], &i2c_rate[1]),
+	I2C_DEV_BUILDER(2, i2c_resources[1], &i2c_pdata[1]),
 #endif
 #if	defined(CONFIG_ARCH_OMAP3)
-	I2C_DEV_BUILDER(3, i2c_resources[2], &i2c_rate[2]),
+	I2C_DEV_BUILDER(3, i2c_resources[2], &i2c_pdata[2]),
 #endif
 };
 
@@ -100,10 +103,12 @@ static int __init omap_i2c_nr_ports(void)
 static int __init omap_i2c_add_bus(int bus_id)
 {
 	struct platform_device *pdev;
+	struct omap_i2c_bus_platform_data *pd;
 	struct resource *res;
 	resource_size_t base, irq;
 
 	pdev = &omap_i2c_devices[bus_id - 1];
+	pd = pdev->dev.platform_data;
 	if (bus_id == 1) {
 		res = pdev->resource;
 		if (cpu_class_is_omap1()) {
@@ -123,6 +128,15 @@ static int __init omap_i2c_add_bus(int bus_id)
 	if (cpu_class_is_omap2())
 		omap2_i2c_mux_pins(bus_id);
 
+	/*
+	 * When waiting for completion of a i2c transfer, we need to
+	 * set a wake up latency constraint for the MPU. This is to
+	 * ensure quick enough wakeup from idle, when transfer
+	 * completes.
+	 */
+	if (cpu_is_omap34xx())
+		pd->set_mpu_wkup_lat = omap_pm_set_max_mpu_wakeup_lat;
+
 	return platform_device_register(pdev);
 }
 
@@ -146,8 +160,8 @@ static int __init omap_i2c_bus_setup(char *str)
 	get_options(str, 3, ints);
 	if (ints[0] < 2 || ints[1] < 1 || ints[1] > ports)
 		return 0;
-	i2c_rate[ints[1] - 1] = ints[2];
-	i2c_rate[ints[1] - 1] |= OMAP_I2C_CMDLINE_SETUP;
+	i2c_pdata[ints[1] - 1].clkrate = ints[2];
+	i2c_pdata[ints[1] - 1].clkrate |= OMAP_I2C_CMDLINE_SETUP;
 
 	return 1;
 }
@@ -161,9 +175,9 @@ static int __init omap_register_i2c_bus_cmdline(void)
 {
 	int i, err = 0;
 
-	for (i = 0; i < ARRAY_SIZE(i2c_rate); i++)
-		if (i2c_rate[i] & OMAP_I2C_CMDLINE_SETUP) {
-			i2c_rate[i] &= ~OMAP_I2C_CMDLINE_SETUP;
+	for (i = 0; i < ARRAY_SIZE(i2c_pdata); i++)
+		if (i2c_pdata[i].clkrate & OMAP_I2C_CMDLINE_SETUP) {
+			i2c_pdata[i].clkrate &= ~OMAP_I2C_CMDLINE_SETUP;
 			err = omap_i2c_add_bus(i + 1);
 			if (err)
 				goto out;
@@ -197,9 +211,10 @@ int __init omap_register_i2c_bus(int bus_id, u32 clkrate,
 			return err;
 	}
 
-	if (!i2c_rate[bus_id - 1])
-		i2c_rate[bus_id - 1] = clkrate;
-	i2c_rate[bus_id - 1] &= ~OMAP_I2C_CMDLINE_SETUP;
+	if (!i2c_pdata[bus_id - 1].clkrate)
+		i2c_pdata[bus_id - 1].clkrate = clkrate;
+
+	i2c_pdata[bus_id - 1].clkrate &= ~OMAP_I2C_CMDLINE_SETUP;
 
 	return omap_i2c_add_bus(bus_id);
 }
diff --git a/drivers/i2c/busses/i2c-omap.c b/drivers/i2c/busses/i2c-omap.c
index 46111ff18133..42c0b9108c7f 100644
--- a/drivers/i2c/busses/i2c-omap.c
+++ b/drivers/i2c/busses/i2c-omap.c
@@ -38,6 +38,7 @@
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/i2c-omap.h>
 
 /* I2C controller revisions */
 #define OMAP_I2C_REV_2			0x20
@@ -175,6 +176,9 @@ struct omap_i2c_dev {
 	struct clk		*fclk;		/* Functional clock */
 	struct completion	cmd_complete;
 	struct resource		*ioarea;
+	u32			latency;	/* maximum mpu wkup latency */
+	void			(*set_mpu_wkup_lat)(struct device *dev,
+						    long latency);
 	u32			speed;		/* Speed of bus in Khz */
 	u16			cmd_err;
 	u8			*buf;
@@ -603,8 +607,12 @@ static int omap_i2c_xfer_msg(struct i2c_adapter *adap,
 	 * REVISIT: We should abort the transfer on signals, but the bus goes
 	 * into arbitration and we're currently unable to recover from it.
 	 */
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, dev->latency);
 	r = wait_for_completion_timeout(&dev->cmd_complete,
 					OMAP_I2C_TIMEOUT);
+	if (dev->set_mpu_wkup_lat != NULL)
+		dev->set_mpu_wkup_lat(dev->dev, -1);
 	dev->buf_len = 0;
 	if (r < 0)
 		return r;
@@ -927,6 +935,7 @@ omap_i2c_probe(struct platform_device *pdev)
 	struct omap_i2c_dev	*dev;
 	struct i2c_adapter	*adap;
 	struct resource		*mem, *irq, *ioarea;
+	struct omap_i2c_bus_platform_data *pdata = pdev->dev.platform_data;
 	irq_handler_t isr;
 	int r;
 	u32 speed = 0;
@@ -956,10 +965,13 @@ omap_i2c_probe(struct platform_device *pdev)
 		goto err_release_region;
 	}
 
-	if (pdev->dev.platform_data != NULL)
-		speed = *(u32 *)pdev->dev.platform_data;
-	else
-		speed = 100;	/* Defualt speed */
+	if (pdata != NULL) {
+		speed = pdata->clkrate;
+		dev->set_mpu_wkup_lat = pdata->set_mpu_wkup_lat;
+	} else {
+		speed = 100;	/* Default speed */
+		dev->set_mpu_wkup_lat = NULL;
+	}
 
 	dev->speed = speed;
 	dev->idle = 1;
@@ -1011,6 +1023,10 @@ omap_i2c_probe(struct platform_device *pdev)
 			dev->fifo_size = (dev->fifo_size / 2);
 			dev->b_hw = 1; /* Enable hardware fixes */
 		}
+		/* calculate wakeup latency constraint for MPU */
+		if (dev->set_mpu_wkup_lat != NULL)
+			dev->latency = (1000000 * dev->fifo_size) /
+				       (1000 * speed / 8);
 	}
 
 	/* reset ASAP, clearing any IRQs */
diff --git a/include/linux/i2c-omap.h b/include/linux/i2c-omap.h
new file mode 100644
index 000000000000..78ebf507ce56
--- /dev/null
+++ b/include/linux/i2c-omap.h
@@ -0,0 +1,9 @@
+#ifndef __I2C_OMAP_H__
+#define __I2C_OMAP_H__
+
+struct omap_i2c_bus_platform_data {
+	u32		clkrate;
+	void		(*set_mpu_wkup_lat)(struct device *dev, long set);
+};
+
+#endif
-- 
cgit v1.2.3


From 27729aadd31dafddaaf64c24f8ef6d0ff750f3aa Mon Sep 17 00:00:00 2001
From: Eric Lescouet <Eric.Lescouet@virtuallogix.com>
Date: Sat, 24 Apr 2010 23:21:52 +0200
Subject: USB: make hcd.h public (drivers dependency)

The usbcore headers: hcd.h and hub.h are shared between usbcore,
HCDs and a couple of other drivers (e.g. USBIP modules).
So, it makes sense to move them into a more public location and
to cleanup dependency of those modules on kernel internal headers.
This patch moves hcd.h from drivers/usb/core into include/linux/usb/

Signed-of-by: Eric Lescouet <eric@lescouet.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/staging/usbip/stub_rx.c |   2 +-
 drivers/staging/usbip/vhci.h    |   2 +-
 drivers/usb/c67x00/c67x00-hcd.h |   2 +-
 drivers/usb/core/buffer.c       |   2 +-
 drivers/usb/core/config.c       |   3 +-
 drivers/usb/core/devices.c      |   2 +-
 drivers/usb/core/devio.c        |   3 +-
 drivers/usb/core/driver.c       |   3 +-
 drivers/usb/core/generic.c      |   2 +-
 drivers/usb/core/hcd-pci.c      |   2 +-
 drivers/usb/core/hcd.c          |   3 +-
 drivers/usb/core/hcd.h          | 578 ----------------------------------------
 drivers/usb/core/hub.c          |   3 +-
 drivers/usb/core/inode.c        |   2 +-
 drivers/usb/core/message.c      |   2 +-
 drivers/usb/core/urb.c          |   2 +-
 drivers/usb/core/usb.c          |   2 +-
 drivers/usb/gadget/dummy_hcd.c  |   4 +-
 drivers/usb/host/ehci-hcd.c     |   3 +-
 drivers/usb/host/fhci-dbg.c     |   2 +-
 drivers/usb/host/fhci-hcd.c     |   2 +-
 drivers/usb/host/fhci-hub.c     |   2 +-
 drivers/usb/host/fhci-mem.c     |   2 +-
 drivers/usb/host/fhci-q.c       |   2 +-
 drivers/usb/host/fhci-sched.c   |   2 +-
 drivers/usb/host/fhci-tds.c     |   2 +-
 drivers/usb/host/fhci.h         |   2 +-
 drivers/usb/host/imx21-hcd.c    |   2 +-
 drivers/usb/host/isp116x-hcd.c  |   2 +-
 drivers/usb/host/isp1362-hcd.c  |   2 +-
 drivers/usb/host/isp1760-hcd.c  |   2 +-
 drivers/usb/host/isp1760-if.c   |   2 +-
 drivers/usb/host/ohci-hcd.c     |   2 +-
 drivers/usb/host/oxu210hp-hcd.c |   3 +-
 drivers/usb/host/r8a66597-hcd.c |   2 +-
 drivers/usb/host/sl811-hcd.c    |   2 +-
 drivers/usb/host/u132-hcd.c     |   2 +-
 drivers/usb/host/uhci-hcd.c     |   2 +-
 drivers/usb/host/xhci.h         |   2 +-
 drivers/usb/misc/ftdi-elan.c    |   2 +-
 drivers/usb/mon/mon_main.c      |   3 +-
 drivers/usb/musb/musb_core.h    |   2 +-
 drivers/usb/wusbcore/wusbhc.h   |   4 +-
 include/linux/usb/hcd.h         | 578 ++++++++++++++++++++++++++++++++++++++++
 44 files changed, 623 insertions(+), 629 deletions(-)
 delete mode 100644 drivers/usb/core/hcd.h
 create mode 100644 include/linux/usb/hcd.h

(limited to 'include/linux')

diff --git a/drivers/staging/usbip/stub_rx.c b/drivers/staging/usbip/stub_rx.c
index bc2674086673..da30658fa048 100644
--- a/drivers/staging/usbip/stub_rx.c
+++ b/drivers/staging/usbip/stub_rx.c
@@ -21,7 +21,7 @@
 
 #include "usbip_common.h"
 #include "stub.h"
-#include "../../usb/core/hcd.h"
+#include <linux/usb/hcd.h>
 
 
 static int is_clear_halt_cmd(struct urb *urb)
diff --git a/drivers/staging/usbip/vhci.h b/drivers/staging/usbip/vhci.h
index 5e375173bbce..41a1fe5138f4 100644
--- a/drivers/staging/usbip/vhci.h
+++ b/drivers/staging/usbip/vhci.h
@@ -18,7 +18,7 @@
  */
 
 #include <linux/platform_device.h>
-#include "../../usb/core/hcd.h"
+#include <linux/usb/hcd.h>
 
 
 struct vhci_device {
diff --git a/drivers/usb/c67x00/c67x00-hcd.h b/drivers/usb/c67x00/c67x00-hcd.h
index e8c6d94b2514..74e44621e313 100644
--- a/drivers/usb/c67x00/c67x00-hcd.h
+++ b/drivers/usb/c67x00/c67x00-hcd.h
@@ -28,7 +28,7 @@
 #include <linux/spinlock.h>
 #include <linux/list.h>
 #include <linux/usb.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "c67x00.h"
 
 /*
diff --git a/drivers/usb/core/buffer.c b/drivers/usb/core/buffer.c
index 3ba2fff71490..2c6965484fe8 100644
--- a/drivers/usb/core/buffer.c
+++ b/drivers/usb/core/buffer.c
@@ -14,7 +14,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
 #include <linux/usb.h>
-#include "hcd.h"
+#include <linux/usb/hcd.h>
 
 
 /*
diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 0d3af6a6ee49..77e0dda3a2fb 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -1,12 +1,13 @@
 #include <linux/usb.h>
 #include <linux/usb/ch9.h>
+#include <linux/usb/hcd.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/device.h>
 #include <asm/byteorder.h>
 #include "usb.h"
-#include "hcd.h"
+
 
 #define USB_MAXALTSETTING		128	/* Hard limit */
 #define USB_MAXENDPOINTS		30	/* Hard limit */
diff --git a/drivers/usb/core/devices.c b/drivers/usb/core/devices.c
index 19bc03a9fecf..75077215a8b8 100644
--- a/drivers/usb/core/devices.c
+++ b/drivers/usb/core/devices.c
@@ -55,11 +55,11 @@
 #include <linux/usb.h>
 #include <linux/smp_lock.h>
 #include <linux/usbdevice_fs.h>
+#include <linux/usb/hcd.h>
 #include <linux/mutex.h>
 #include <asm/uaccess.h>
 
 #include "usb.h"
-#include "hcd.h"
 
 /* Define ALLOW_SERIAL_NUMBER if you want to see the serial number of devices */
 #define ALLOW_SERIAL_NUMBER
diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c
index 3466fdc5bb11..c2f62a3993d2 100644
--- a/drivers/usb/core/devio.c
+++ b/drivers/usb/core/devio.c
@@ -43,6 +43,7 @@
 #include <linux/module.h>
 #include <linux/usb.h>
 #include <linux/usbdevice_fs.h>
+#include <linux/usb/hcd.h>	/* for usbcore internals */
 #include <linux/cdev.h>
 #include <linux/notifier.h>
 #include <linux/security.h>
@@ -50,9 +51,7 @@
 #include <asm/byteorder.h>
 #include <linux/moduleparam.h>
 
-#include "hcd.h"	/* for usbcore internals */
 #include "usb.h"
-#include "hub.h"
 
 #define USB_MAXBUS			64
 #define USB_DEVICE_MAX			USB_MAXBUS * 128
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 2f3dc4cdf79b..edff55a32575 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -26,8 +26,9 @@
 #include <linux/slab.h>
 #include <linux/usb.h>
 #include <linux/usb/quirks.h>
+#include <linux/usb/hcd.h>
 #include <linux/pm_runtime.h>
-#include "hcd.h"
+
 #include "usb.h"
 
 
diff --git a/drivers/usb/core/generic.c b/drivers/usb/core/generic.c
index 2c95153c0f24..9a34ccb0a1c0 100644
--- a/drivers/usb/core/generic.c
+++ b/drivers/usb/core/generic.c
@@ -18,8 +18,8 @@
  */
 
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include "usb.h"
-#include "hcd.h"
 
 static inline const char *plural(int n)
 {
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index 15286533c15a..1cf2d1e79a5c 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -21,6 +21,7 @@
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 
 #include <asm/io.h>
 #include <asm/irq.h>
@@ -33,7 +34,6 @@
 #endif
 
 #include "usb.h"
-#include "hcd.h"
 
 
 /* PCI-based HCs are common, but plenty of non-PCI HCs are used too */
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2f8cedda8007..4a24843cfc61 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -42,10 +42,9 @@
 #include <linux/pm_runtime.h>
 
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 
 #include "usb.h"
-#include "hcd.h"
-#include "hub.h"
 
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/core/hcd.h b/drivers/usb/core/hcd.h
deleted file mode 100644
index a3cdb09734ab..000000000000
--- a/drivers/usb/core/hcd.h
+++ /dev/null
@@ -1,578 +0,0 @@
-/*
- * Copyright (c) 2001-2002 by David Brownell
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
- * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software Foundation,
- * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- */
-
-#ifndef __USB_CORE_HCD_H
-#define __USB_CORE_HCD_H
-
-#ifdef __KERNEL__
-
-#include <linux/rwsem.h>
-
-#define MAX_TOPO_LEVEL		6
-
-/* This file contains declarations of usbcore internals that are mostly
- * used or exposed by Host Controller Drivers.
- */
-
-/*
- * USB Packet IDs (PIDs)
- */
-#define USB_PID_EXT			0xf0	/* USB 2.0 LPM ECN */
-#define USB_PID_OUT			0xe1
-#define USB_PID_ACK			0xd2
-#define USB_PID_DATA0			0xc3
-#define USB_PID_PING			0xb4	/* USB 2.0 */
-#define USB_PID_SOF			0xa5
-#define USB_PID_NYET			0x96	/* USB 2.0 */
-#define USB_PID_DATA2			0x87	/* USB 2.0 */
-#define USB_PID_SPLIT			0x78	/* USB 2.0 */
-#define USB_PID_IN			0x69
-#define USB_PID_NAK			0x5a
-#define USB_PID_DATA1			0x4b
-#define USB_PID_PREAMBLE		0x3c	/* Token mode */
-#define USB_PID_ERR			0x3c	/* USB 2.0: handshake mode */
-#define USB_PID_SETUP			0x2d
-#define USB_PID_STALL			0x1e
-#define USB_PID_MDATA			0x0f	/* USB 2.0 */
-
-/*-------------------------------------------------------------------------*/
-
-/*
- * USB Host Controller Driver (usb_hcd) framework
- *
- * Since "struct usb_bus" is so thin, you can't share much code in it.
- * This framework is a layer over that, and should be more sharable.
- *
- * @authorized_default: Specifies if new devices are authorized to
- *                      connect by default or they require explicit
- *                      user space authorization; this bit is settable
- *                      through /sys/class/usb_host/X/authorized_default.
- *                      For the rest is RO, so we don't lock to r/w it.
- */
-
-/*-------------------------------------------------------------------------*/
-
-struct usb_hcd {
-
-	/*
-	 * housekeeping
-	 */
-	struct usb_bus		self;		/* hcd is-a bus */
-	struct kref		kref;		/* reference counter */
-
-	const char		*product_desc;	/* product/vendor string */
-	char			irq_descr[24];	/* driver + bus # */
-
-	struct timer_list	rh_timer;	/* drives root-hub polling */
-	struct urb		*status_urb;	/* the current status urb */
-#ifdef CONFIG_USB_SUSPEND
-	struct work_struct	wakeup_work;	/* for remote wakeup */
-#endif
-
-	/*
-	 * hardware info/state
-	 */
-	const struct hc_driver	*driver;	/* hw-specific hooks */
-
-	/* Flags that need to be manipulated atomically */
-	unsigned long		flags;
-#define HCD_FLAG_HW_ACCESSIBLE	0x00000001
-#define HCD_FLAG_SAW_IRQ	0x00000002
-
-	unsigned		rh_registered:1;/* is root hub registered? */
-
-	/* The next flag is a stopgap, to be removed when all the HCDs
-	 * support the new root-hub polling mechanism. */
-	unsigned		uses_new_polling:1;
-	unsigned		poll_rh:1;	/* poll for rh status? */
-	unsigned		poll_pending:1;	/* status has changed? */
-	unsigned		wireless:1;	/* Wireless USB HCD */
-	unsigned		authorized_default:1;
-	unsigned		has_tt:1;	/* Integrated TT in root hub */
-
-	int			irq;		/* irq allocated */
-	void __iomem		*regs;		/* device memory/io */
-	u64			rsrc_start;	/* memory/io resource start */
-	u64			rsrc_len;	/* memory/io resource length */
-	unsigned		power_budget;	/* in mA, 0 = no limit */
-
-	/* bandwidth_mutex should be taken before adding or removing
-	 * any new bus bandwidth constraints:
-	 *   1. Before adding a configuration for a new device.
-	 *   2. Before removing the configuration to put the device into
-	 *      the addressed state.
-	 *   3. Before selecting a different configuration.
-	 *   4. Before selecting an alternate interface setting.
-	 *
-	 * bandwidth_mutex should be dropped after a successful control message
-	 * to the device, or resetting the bandwidth after a failed attempt.
-	 */
-	struct mutex		bandwidth_mutex;
-
-
-#define HCD_BUFFER_POOLS	4
-	struct dma_pool		*pool [HCD_BUFFER_POOLS];
-
-	int			state;
-#	define	__ACTIVE		0x01
-#	define	__SUSPEND		0x04
-#	define	__TRANSIENT		0x80
-
-#	define	HC_STATE_HALT		0
-#	define	HC_STATE_RUNNING	(__ACTIVE)
-#	define	HC_STATE_QUIESCING	(__SUSPEND|__TRANSIENT|__ACTIVE)
-#	define	HC_STATE_RESUMING	(__SUSPEND|__TRANSIENT)
-#	define	HC_STATE_SUSPENDED	(__SUSPEND)
-
-#define	HC_IS_RUNNING(state) ((state) & __ACTIVE)
-#define	HC_IS_SUSPENDED(state) ((state) & __SUSPEND)
-
-	/* more shared queuing code would be good; it should support
-	 * smarter scheduling, handle transaction translators, etc;
-	 * input size of periodic table to an interrupt scheduler.
-	 * (ohci 32, uhci 1024, ehci 256/512/1024).
-	 */
-
-	/* The HC driver's private data is stored at the end of
-	 * this structure.
-	 */
-	unsigned long hcd_priv[0]
-			__attribute__ ((aligned(sizeof(unsigned long))));
-};
-
-/* 2.4 does this a bit differently ... */
-static inline struct usb_bus *hcd_to_bus(struct usb_hcd *hcd)
-{
-	return &hcd->self;
-}
-
-static inline struct usb_hcd *bus_to_hcd(struct usb_bus *bus)
-{
-	return container_of(bus, struct usb_hcd, self);
-}
-
-struct hcd_timeout {	/* timeouts we allocate */
-	struct list_head	timeout_list;
-	struct timer_list	timer;
-};
-
-/*-------------------------------------------------------------------------*/
-
-
-struct hc_driver {
-	const char	*description;	/* "ehci-hcd" etc */
-	const char	*product_desc;	/* product/vendor string */
-	size_t		hcd_priv_size;	/* size of private data */
-
-	/* irq handler */
-	irqreturn_t	(*irq) (struct usb_hcd *hcd);
-
-	int	flags;
-#define	HCD_MEMORY	0x0001		/* HC regs use memory (else I/O) */
-#define	HCD_LOCAL_MEM	0x0002		/* HC needs local memory */
-#define	HCD_USB11	0x0010		/* USB 1.1 */
-#define	HCD_USB2	0x0020		/* USB 2.0 */
-#define	HCD_USB3	0x0040		/* USB 3.0 */
-#define	HCD_MASK	0x0070
-
-	/* called to init HCD and root hub */
-	int	(*reset) (struct usb_hcd *hcd);
-	int	(*start) (struct usb_hcd *hcd);
-
-	/* NOTE:  these suspend/resume calls relate to the HC as
-	 * a whole, not just the root hub; they're for PCI bus glue.
-	 */
-	/* called after suspending the hub, before entering D3 etc */
-	int	(*pci_suspend)(struct usb_hcd *hcd);
-
-	/* called after entering D0 (etc), before resuming the hub */
-	int	(*pci_resume)(struct usb_hcd *hcd, bool hibernated);
-
-	/* cleanly make HCD stop writing memory and doing I/O */
-	void	(*stop) (struct usb_hcd *hcd);
-
-	/* shutdown HCD */
-	void	(*shutdown) (struct usb_hcd *hcd);
-
-	/* return current frame number */
-	int	(*get_frame_number) (struct usb_hcd *hcd);
-
-	/* manage i/o requests, device state */
-	int	(*urb_enqueue)(struct usb_hcd *hcd,
-				struct urb *urb, gfp_t mem_flags);
-	int	(*urb_dequeue)(struct usb_hcd *hcd,
-				struct urb *urb, int status);
-
-	/* hw synch, freeing endpoint resources that urb_dequeue can't */
-	void 	(*endpoint_disable)(struct usb_hcd *hcd,
-			struct usb_host_endpoint *ep);
-
-	/* (optional) reset any endpoint state such as sequence number
-	   and current window */
-	void 	(*endpoint_reset)(struct usb_hcd *hcd,
-			struct usb_host_endpoint *ep);
-
-	/* root hub support */
-	int	(*hub_status_data) (struct usb_hcd *hcd, char *buf);
-	int	(*hub_control) (struct usb_hcd *hcd,
-				u16 typeReq, u16 wValue, u16 wIndex,
-				char *buf, u16 wLength);
-	int	(*bus_suspend)(struct usb_hcd *);
-	int	(*bus_resume)(struct usb_hcd *);
-	int	(*start_port_reset)(struct usb_hcd *, unsigned port_num);
-
-		/* force handover of high-speed port to full-speed companion */
-	void	(*relinquish_port)(struct usb_hcd *, int);
-		/* has a port been handed over to a companion? */
-	int	(*port_handed_over)(struct usb_hcd *, int);
-
-		/* CLEAR_TT_BUFFER completion callback */
-	void	(*clear_tt_buffer_complete)(struct usb_hcd *,
-				struct usb_host_endpoint *);
-
-	/* xHCI specific functions */
-		/* Called by usb_alloc_dev to alloc HC device structures */
-	int	(*alloc_dev)(struct usb_hcd *, struct usb_device *);
-		/* Called by usb_disconnect to free HC device structures */
-	void	(*free_dev)(struct usb_hcd *, struct usb_device *);
-
-	/* Bandwidth computation functions */
-	/* Note that add_endpoint() can only be called once per endpoint before
-	 * check_bandwidth() or reset_bandwidth() must be called.
-	 * drop_endpoint() can only be called once per endpoint also.
-	 * A call to xhci_drop_endpoint() followed by a call to xhci_add_endpoint() will
-	 * add the endpoint to the schedule with possibly new parameters denoted by a
-	 * different endpoint descriptor in usb_host_endpoint.
-	 * A call to xhci_add_endpoint() followed by a call to xhci_drop_endpoint() is
-	 * not allowed.
-	 */
-		/* Allocate endpoint resources and add them to a new schedule */
-	int 	(*add_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
-		/* Drop an endpoint from a new schedule */
-	int 	(*drop_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
-		/* Check that a new hardware configuration, set using
-		 * endpoint_enable and endpoint_disable, does not exceed bus
-		 * bandwidth.  This must be called before any set configuration
-		 * or set interface requests are sent to the device.
-		 */
-	int	(*check_bandwidth)(struct usb_hcd *, struct usb_device *);
-		/* Reset the device schedule to the last known good schedule,
-		 * which was set from a previous successful call to
-		 * check_bandwidth().  This reverts any add_endpoint() and
-		 * drop_endpoint() calls since that last successful call.
-		 * Used for when a check_bandwidth() call fails due to resource
-		 * or bandwidth constraints.
-		 */
-	void	(*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
-		/* Returns the hardware-chosen device address */
-	int	(*address_device)(struct usb_hcd *, struct usb_device *udev);
-		/* Notifies the HCD after a hub descriptor is fetched.
-		 * Will block.
-		 */
-	int	(*update_hub_device)(struct usb_hcd *, struct usb_device *hdev,
-			struct usb_tt *tt, gfp_t mem_flags);
-	int	(*reset_device)(struct usb_hcd *, struct usb_device *);
-};
-
-extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
-extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
-		int status);
-extern void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb);
-
-extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags);
-extern int usb_hcd_unlink_urb(struct urb *urb, int status);
-extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb,
-		int status);
-extern void usb_hcd_flush_endpoint(struct usb_device *udev,
-		struct usb_host_endpoint *ep);
-extern void usb_hcd_disable_endpoint(struct usb_device *udev,
-		struct usb_host_endpoint *ep);
-extern void usb_hcd_reset_endpoint(struct usb_device *udev,
-		struct usb_host_endpoint *ep);
-extern void usb_hcd_synchronize_unlinks(struct usb_device *udev);
-extern int usb_hcd_alloc_bandwidth(struct usb_device *udev,
-		struct usb_host_config *new_config,
-		struct usb_host_interface *old_alt,
-		struct usb_host_interface *new_alt);
-extern int usb_hcd_get_frame_number(struct usb_device *udev);
-
-extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver,
-		struct device *dev, const char *bus_name);
-extern struct usb_hcd *usb_get_hcd(struct usb_hcd *hcd);
-extern void usb_put_hcd(struct usb_hcd *hcd);
-extern int usb_add_hcd(struct usb_hcd *hcd,
-		unsigned int irqnum, unsigned long irqflags);
-extern void usb_remove_hcd(struct usb_hcd *hcd);
-
-struct platform_device;
-extern void usb_hcd_platform_shutdown(struct platform_device *dev);
-
-#ifdef CONFIG_PCI
-struct pci_dev;
-struct pci_device_id;
-extern int usb_hcd_pci_probe(struct pci_dev *dev,
-				const struct pci_device_id *id);
-extern void usb_hcd_pci_remove(struct pci_dev *dev);
-extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
-
-#ifdef CONFIG_PM_SLEEP
-extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
-#endif
-#endif /* CONFIG_PCI */
-
-/* pci-ish (pdev null is ok) buffer alloc/mapping support */
-int hcd_buffer_create(struct usb_hcd *hcd);
-void hcd_buffer_destroy(struct usb_hcd *hcd);
-
-void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
-	gfp_t mem_flags, dma_addr_t *dma);
-void hcd_buffer_free(struct usb_bus *bus, size_t size,
-	void *addr, dma_addr_t dma);
-
-/* generic bus glue, needed for host controllers that don't use PCI */
-extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);
-
-extern void usb_hc_died(struct usb_hcd *hcd);
-extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd);
-
-/* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */
-#define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1)
-#define	usb_dotoggle(dev, ep, out)  ((dev)->toggle[out] ^= (1 << (ep)))
-#define usb_settoggle(dev, ep, out, bit) \
-		((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \
-		 ((bit) << (ep)))
-
-/* -------------------------------------------------------------------------- */
-
-/* Enumeration is only for the hub driver, or HCD virtual root hubs */
-extern struct usb_device *usb_alloc_dev(struct usb_device *parent,
-					struct usb_bus *, unsigned port);
-extern int usb_new_device(struct usb_device *dev);
-extern void usb_disconnect(struct usb_device **);
-
-extern int usb_get_configuration(struct usb_device *dev);
-extern void usb_destroy_configuration(struct usb_device *dev);
-
-/*-------------------------------------------------------------------------*/
-
-/*
- * HCD Root Hub support
- */
-
-#include "hub.h"
-
-/* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */
-#define DeviceRequest \
-	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
-#define DeviceOutRequest \
-	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
-
-#define InterfaceRequest \
-	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
-
-#define EndpointRequest \
-	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
-#define EndpointOutRequest \
-	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
-
-/* class requests from the USB 2.0 hub spec, table 11-15 */
-/* GetBusState and SetHubDescriptor are optional, omitted */
-#define ClearHubFeature		(0x2000 | USB_REQ_CLEAR_FEATURE)
-#define ClearPortFeature	(0x2300 | USB_REQ_CLEAR_FEATURE)
-#define GetHubDescriptor	(0xa000 | USB_REQ_GET_DESCRIPTOR)
-#define GetHubStatus		(0xa000 | USB_REQ_GET_STATUS)
-#define GetPortStatus		(0xa300 | USB_REQ_GET_STATUS)
-#define SetHubFeature		(0x2000 | USB_REQ_SET_FEATURE)
-#define SetPortFeature		(0x2300 | USB_REQ_SET_FEATURE)
-
-
-/*-------------------------------------------------------------------------*/
-
-/*
- * Generic bandwidth allocation constants/support
- */
-#define FRAME_TIME_USECS	1000L
-#define BitTime(bytecount) (7 * 8 * bytecount / 6) /* with integer truncation */
-		/* Trying not to use worst-case bit-stuffing
-		 * of (7/6 * 8 * bytecount) = 9.33 * bytecount */
-		/* bytecount = data payload byte count */
-
-#define NS_TO_US(ns)	((ns + 500L) / 1000L)
-			/* convert & round nanoseconds to microseconds */
-
-
-/*
- * Full/low speed bandwidth allocation constants/support.
- */
-#define BW_HOST_DELAY	1000L		/* nanoseconds */
-#define BW_HUB_LS_SETUP	333L		/* nanoseconds */
-			/* 4 full-speed bit times (est.) */
-
-#define FRAME_TIME_BITS			12000L	/* frame = 1 millisecond */
-#define FRAME_TIME_MAX_BITS_ALLOC	(90L * FRAME_TIME_BITS / 100L)
-#define FRAME_TIME_MAX_USECS_ALLOC	(90L * FRAME_TIME_USECS / 100L)
-
-/*
- * Ceiling [nano/micro]seconds (typical) for that many bytes at high speed
- * ISO is a bit less, no ACK ... from USB 2.0 spec, 5.11.3 (and needed
- * to preallocate bandwidth)
- */
-#define USB2_HOST_DELAY	5	/* nsec, guess */
-#define HS_NSECS(bytes) (((55 * 8 * 2083) \
-	+ (2083UL * (3 + BitTime(bytes))))/1000 \
-	+ USB2_HOST_DELAY)
-#define HS_NSECS_ISO(bytes) (((38 * 8 * 2083) \
-	+ (2083UL * (3 + BitTime(bytes))))/1000 \
-	+ USB2_HOST_DELAY)
-#define HS_USECS(bytes) NS_TO_US (HS_NSECS(bytes))
-#define HS_USECS_ISO(bytes) NS_TO_US (HS_NSECS_ISO(bytes))
-
-extern long usb_calc_bus_time(int speed, int is_input,
-			int isoc, int bytecount);
-
-/*-------------------------------------------------------------------------*/
-
-extern void usb_set_device_state(struct usb_device *udev,
-		enum usb_device_state new_state);
-
-/*-------------------------------------------------------------------------*/
-
-/* exported only within usbcore */
-
-extern struct list_head usb_bus_list;
-extern struct mutex usb_bus_list_lock;
-extern wait_queue_head_t usb_kill_urb_queue;
-
-extern int usb_find_interface_driver(struct usb_device *dev,
-	struct usb_interface *interface);
-
-#define usb_endpoint_out(ep_dir)	(!((ep_dir) & USB_DIR_IN))
-
-#ifdef CONFIG_PM
-extern void usb_root_hub_lost_power(struct usb_device *rhdev);
-extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
-extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_USB_SUSPEND
-extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
-#else
-static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
-{
-	return;
-}
-#endif /* CONFIG_USB_SUSPEND */
-
-
-/*
- * USB device fs stuff
- */
-
-#ifdef CONFIG_USB_DEVICEFS
-
-/*
- * these are expected to be called from the USB core/hub thread
- * with the kernel lock held
- */
-extern void usbfs_update_special(void);
-extern int usbfs_init(void);
-extern void usbfs_cleanup(void);
-
-#else /* CONFIG_USB_DEVICEFS */
-
-static inline void usbfs_update_special(void) {}
-static inline int usbfs_init(void) { return 0; }
-static inline void usbfs_cleanup(void) { }
-
-#endif /* CONFIG_USB_DEVICEFS */
-
-/*-------------------------------------------------------------------------*/
-
-#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
-
-struct usb_mon_operations {
-	void (*urb_submit)(struct usb_bus *bus, struct urb *urb);
-	void (*urb_submit_error)(struct usb_bus *bus, struct urb *urb, int err);
-	void (*urb_complete)(struct usb_bus *bus, struct urb *urb, int status);
-	/* void (*urb_unlink)(struct usb_bus *bus, struct urb *urb); */
-};
-
-extern struct usb_mon_operations *mon_ops;
-
-static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb)
-{
-	if (bus->monitored)
-		(*mon_ops->urb_submit)(bus, urb);
-}
-
-static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
-    int error)
-{
-	if (bus->monitored)
-		(*mon_ops->urb_submit_error)(bus, urb, error);
-}
-
-static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
-		int status)
-{
-	if (bus->monitored)
-		(*mon_ops->urb_complete)(bus, urb, status);
-}
-
-int usb_mon_register(struct usb_mon_operations *ops);
-void usb_mon_deregister(void);
-
-#else
-
-static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb) {}
-static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
-    int error) {}
-static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
-		int status) {}
-
-#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */
-
-/*-------------------------------------------------------------------------*/
-
-/* hub.h ... DeviceRemovable in 2.4.2-ac11, gone in 2.4.10 */
-/* bleech -- resurfaced in 2.4.11 or 2.4.12 */
-#define bitmap 	DeviceRemovable
-
-
-/*-------------------------------------------------------------------------*/
-
-/* random stuff */
-
-#define	RUN_CONTEXT (in_irq() ? "in_irq" \
-		: (in_interrupt() ? "in_interrupt" : "can sleep"))
-
-
-/* This rwsem is for use only by the hub driver and ehci-hcd.
- * Nobody else should touch it.
- */
-extern struct rw_semaphore ehci_cf_port_reset_rwsem;
-
-/* Keep track of which host controller drivers are loaded */
-#define USB_UHCI_LOADED		0
-#define USB_OHCI_LOADED		1
-#define USB_EHCI_LOADED		2
-extern unsigned long usb_hcds_loaded;
-
-#endif /* __KERNEL__ */
-
-#endif /* __USB_CORE_HCD_H */
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 0940ccd6f4f4..1883c3c7b69b 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -19,6 +19,7 @@
 #include <linux/ioctl.h>
 #include <linux/usb.h>
 #include <linux/usbdevice_fs.h>
+#include <linux/usb/hcd.h>
 #include <linux/kthread.h>
 #include <linux/mutex.h>
 #include <linux/freezer.h>
@@ -28,8 +29,6 @@
 #include <asm/byteorder.h>
 
 #include "usb.h"
-#include "hcd.h"
-#include "hub.h"
 
 /* if we are in debug mode, always announce new devices */
 #ifdef DEBUG
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c
index 111a01a747fc..1a27618b67d6 100644
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -40,9 +40,9 @@
 #include <linux/notifier.h>
 #include <linux/seq_file.h>
 #include <linux/smp_lock.h>
+#include <linux/usb/hcd.h>
 #include <asm/byteorder.h>
 #include "usb.h"
-#include "hcd.h"
 
 #define USBFS_DEFAULT_DEVMODE (S_IWUSR | S_IRUGO)
 #define USBFS_DEFAULT_BUSMODE (S_IXUGO | S_IRUGO)
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index cd220277c6c3..619c44fb8a96 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -14,9 +14,9 @@
 #include <linux/device.h>
 #include <linux/scatterlist.h>
 #include <linux/usb/quirks.h>
+#include <linux/usb/hcd.h>	/* for usbcore internals */
 #include <asm/byteorder.h>
 
-#include "hcd.h"	/* for usbcore internals */
 #include "usb.h"
 
 static void cancel_async_set_config(struct usb_device *udev);
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 45a32dadb406..09301f4b7225 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -6,7 +6,7 @@
 #include <linux/log2.h>
 #include <linux/usb.h>
 #include <linux/wait.h>
-#include "hcd.h"
+#include <linux/usb/hcd.h>
 
 #define to_urb(d) container_of(d, struct urb, kref)
 
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 0561430f2ede..1088abb4416c 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -32,6 +32,7 @@
 #include <linux/spinlock.h>
 #include <linux/errno.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/mutex.h>
 #include <linux/workqueue.h>
 #include <linux/debugfs.h>
@@ -41,7 +42,6 @@
 #include <linux/mm.h>
 #include <linux/dma-mapping.h>
 
-#include "hcd.h"
 #include "usb.h"
 
 
diff --git a/drivers/usb/gadget/dummy_hcd.c b/drivers/usb/gadget/dummy_hcd.c
index 5e0966485188..4f9e578cde9d 100644
--- a/drivers/usb/gadget/dummy_hcd.c
+++ b/drivers/usb/gadget/dummy_hcd.c
@@ -47,6 +47,7 @@
 #include <linux/platform_device.h>
 #include <linux/usb.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb/hcd.h>
 
 #include <asm/byteorder.h>
 #include <asm/io.h>
@@ -55,9 +56,6 @@
 #include <asm/unaligned.h>
 
 
-#include "../core/hcd.h"
-
-
 #define DRIVER_DESC	"USB Host+Gadget Emulator"
 #define DRIVER_VERSION	"02 May 2005"
 
diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 13ead00aecd5..ef3e88f0b3c3 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -31,13 +31,12 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
 #include <linux/debugfs.h>
 #include <linux/slab.h>
 
-#include "../core/hcd.h"
-
 #include <asm/byteorder.h>
 #include <asm/io.h>
 #include <asm/irq.h>
diff --git a/drivers/usb/host/fhci-dbg.c b/drivers/usb/host/fhci-dbg.c
index e799f86dab11..6fe550049119 100644
--- a/drivers/usb/host/fhci-dbg.c
+++ b/drivers/usb/host/fhci-dbg.c
@@ -20,7 +20,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 #include <linux/usb.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "fhci.h"
 
 void fhci_dbg_isr(struct fhci_hcd *fhci, int usb_er)
diff --git a/drivers/usb/host/fhci-hcd.c b/drivers/usb/host/fhci-hcd.c
index 15379c636143..90453379a434 100644
--- a/drivers/usb/host/fhci-hcd.c
+++ b/drivers/usb/host/fhci-hcd.c
@@ -25,12 +25,12 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/of_platform.h>
 #include <linux/of_gpio.h>
 #include <linux/slab.h>
 #include <asm/qe.h>
 #include <asm/fsl_gtm.h>
-#include "../core/hcd.h"
 #include "fhci.h"
 
 void fhci_start_sof_timer(struct fhci_hcd *fhci)
diff --git a/drivers/usb/host/fhci-hub.c b/drivers/usb/host/fhci-hub.c
index 0cfaedc3e124..348fe62e94f7 100644
--- a/drivers/usb/host/fhci-hub.c
+++ b/drivers/usb/host/fhci-hub.c
@@ -22,9 +22,9 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/gpio.h>
 #include <asm/qe.h>
-#include "../core/hcd.h"
 #include "fhci.h"
 
 /* virtual root hub specific descriptor */
diff --git a/drivers/usb/host/fhci-mem.c b/drivers/usb/host/fhci-mem.c
index 5591bfb499d1..b0b88f57a5ac 100644
--- a/drivers/usb/host/fhci-mem.c
+++ b/drivers/usb/host/fhci-mem.c
@@ -21,7 +21,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/usb.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "fhci.h"
 
 static void init_td(struct td *td)
diff --git a/drivers/usb/host/fhci-q.c b/drivers/usb/host/fhci-q.c
index f73c92359beb..03be7494a476 100644
--- a/drivers/usb/host/fhci-q.c
+++ b/drivers/usb/host/fhci-q.c
@@ -22,7 +22,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/usb.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "fhci.h"
 
 /* maps the hardware error code to the USB error code */
diff --git a/drivers/usb/host/fhci-sched.c b/drivers/usb/host/fhci-sched.c
index ff43747a614f..4f2cbdcc0273 100644
--- a/drivers/usb/host/fhci-sched.c
+++ b/drivers/usb/host/fhci-sched.c
@@ -24,9 +24,9 @@
 #include <linux/interrupt.h>
 #include <linux/io.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <asm/qe.h>
 #include <asm/fsl_gtm.h>
-#include "../core/hcd.h"
 #include "fhci.h"
 
 static void recycle_frame(struct fhci_usb *usb, struct packet *pkt)
diff --git a/drivers/usb/host/fhci-tds.c b/drivers/usb/host/fhci-tds.c
index 57013479d7f7..7be548ca2183 100644
--- a/drivers/usb/host/fhci-tds.c
+++ b/drivers/usb/host/fhci-tds.c
@@ -22,7 +22,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/usb.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "fhci.h"
 
 #define DUMMY_BD_BUFFER  0xdeadbeef
diff --git a/drivers/usb/host/fhci.h b/drivers/usb/host/fhci.h
index 72dae1c5ab38..649ab07308f2 100644
--- a/drivers/usb/host/fhci.h
+++ b/drivers/usb/host/fhci.h
@@ -25,8 +25,8 @@
 #include <linux/kfifo.h>
 #include <linux/io.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <asm/qe.h>
-#include "../core/hcd.h"
 
 #define USB_CLOCK	48000000
 
diff --git a/drivers/usb/host/imx21-hcd.c b/drivers/usb/host/imx21-hcd.c
index 8a12f297645f..ca0e98d8e1f4 100644
--- a/drivers/usb/host/imx21-hcd.c
+++ b/drivers/usb/host/imx21-hcd.c
@@ -56,8 +56,8 @@
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 
-#include "../core/hcd.h"
 #include "imx21-hcd.h"
 
 #ifdef DEBUG
diff --git a/drivers/usb/host/isp116x-hcd.c b/drivers/usb/host/isp116x-hcd.c
index 92de71dc7729..d9e82123de2a 100644
--- a/drivers/usb/host/isp116x-hcd.c
+++ b/drivers/usb/host/isp116x-hcd.c
@@ -65,6 +65,7 @@
 #include <linux/slab.h>
 #include <linux/usb.h>
 #include <linux/usb/isp116x.h>
+#include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
@@ -72,7 +73,6 @@
 #include <asm/system.h>
 #include <asm/byteorder.h>
 
-#include "../core/hcd.h"
 #include "isp116x.h"
 
 #define DRIVER_VERSION	"03 Nov 2005"
diff --git a/drivers/usb/host/isp1362-hcd.c b/drivers/usb/host/isp1362-hcd.c
index 217fb5170200..acc157da7275 100644
--- a/drivers/usb/host/isp1362-hcd.c
+++ b/drivers/usb/host/isp1362-hcd.c
@@ -77,6 +77,7 @@
 #include <linux/interrupt.h>
 #include <linux/usb.h>
 #include <linux/usb/isp1362.h>
+#include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
 #include <linux/pm.h>
 #include <linux/io.h>
@@ -95,7 +96,6 @@ module_param(dbg_level, int, 0);
 #define	STUB_DEBUG_FILE
 #endif
 
-#include "../core/hcd.h"
 #include "../core/usb.h"
 #include "isp1362.h"
 
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index 9f01293600b0..c7ac1d97d176 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/list.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/io.h>
@@ -21,7 +22,6 @@
 #include <asm/unaligned.h>
 #include <asm/cacheflush.h>
 
-#include "../core/hcd.h"
 #include "isp1760-hcd.h"
 
 static struct kmem_cache *qtd_cachep;
diff --git a/drivers/usb/host/isp1760-if.c b/drivers/usb/host/isp1760-if.c
index 4293cfd28d61..42a094605c92 100644
--- a/drivers/usb/host/isp1760-if.c
+++ b/drivers/usb/host/isp1760-if.c
@@ -13,8 +13,8 @@
 #include <linux/io.h>
 #include <linux/platform_device.h>
 #include <linux/usb/isp1760.h>
+#include <linux/usb/hcd.h>
 
-#include "../core/hcd.h"
 #include "isp1760-hcd.h"
 
 #ifdef CONFIG_PPC_OF
diff --git a/drivers/usb/host/ohci-hcd.c b/drivers/usb/host/ohci-hcd.c
index afe59be23645..d15d2478493e 100644
--- a/drivers/usb/host/ohci-hcd.c
+++ b/drivers/usb/host/ohci-hcd.c
@@ -32,6 +32,7 @@
 #include <linux/list.h>
 #include <linux/usb.h>
 #include <linux/usb/otg.h>
+#include <linux/usb/hcd.h>
 #include <linux/dma-mapping.h>
 #include <linux/dmapool.h>
 #include <linux/workqueue.h>
@@ -43,7 +44,6 @@
 #include <asm/unaligned.h>
 #include <asm/byteorder.h>
 
-#include "../core/hcd.h"
 
 #define DRIVER_AUTHOR "Roman Weissgaerber, David Brownell"
 #define DRIVER_DESC "USB 1.1 'Open' Host Controller (OHCI) Driver"
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index e62b30b3e429..2891203200ce 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -34,12 +34,11 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/moduleparam.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
 
-#include "../core/hcd.h"
-
 #include <asm/irq.h>
 #include <asm/system.h>
 #include <asm/unaligned.h>
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index d478ffad59b4..1398de140ead 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -33,6 +33,7 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
 #include <linux/mm.h>
@@ -40,7 +41,6 @@
 #include <linux/slab.h>
 #include <asm/cacheflush.h>
 
-#include "../core/hcd.h"
 #include "r8a66597.h"
 
 MODULE_DESCRIPTION("R8A66597 USB Host Controller Driver");
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 3b867a8af7b2..8f2f477890c4 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -45,6 +45,7 @@
 #include <linux/interrupt.h>
 #include <linux/usb.h>
 #include <linux/usb/sl811.h>
+#include <linux/usb/hcd.h>
 #include <linux/platform_device.h>
 
 #include <asm/io.h>
@@ -53,7 +54,6 @@
 #include <asm/byteorder.h>
 #include <asm/unaligned.h>
 
-#include "../core/hcd.h"
 #include "sl811.h"
 
 
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index 228f2b070f2b..fbd7adafd61c 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -49,6 +49,7 @@
 #include <linux/list.h>
 #include <linux/interrupt.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/workqueue.h>
 #include <linux/platform_device.h>
 #include <linux/mutex.h>
@@ -56,7 +57,6 @@
 #include <asm/irq.h>
 #include <asm/system.h>
 #include <asm/byteorder.h>
-#include "../core/hcd.h"
 
 	/* FIXME ohci.h is ONLY for internal use by the OHCI driver.
 	 * If you're going to try stuff like this, you need to split
diff --git a/drivers/usb/host/uhci-hcd.c b/drivers/usb/host/uhci-hcd.c
index 09197067fe6b..6637e52736dd 100644
--- a/drivers/usb/host/uhci-hcd.c
+++ b/drivers/usb/host/uhci-hcd.c
@@ -38,6 +38,7 @@
 #include <linux/dmapool.h>
 #include <linux/dma-mapping.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/bitops.h>
 #include <linux/dmi.h>
 
@@ -46,7 +47,6 @@
 #include <asm/irq.h>
 #include <asm/system.h>
 
-#include "../core/hcd.h"
 #include "uhci-hcd.h"
 #include "pci-quirks.h"
 
diff --git a/drivers/usb/host/xhci.h b/drivers/usb/host/xhci.h
index ea389e9a4931..a7c4e1122902 100644
--- a/drivers/usb/host/xhci.h
+++ b/drivers/usb/host/xhci.h
@@ -26,8 +26,8 @@
 #include <linux/usb.h>
 #include <linux/timer.h>
 #include <linux/kernel.h>
+#include <linux/usb/hcd.h>
 
-#include "../core/hcd.h"
 /* Code sharing between pci-quirks and xhci hcd */
 #include	"xhci-ext-caps.h"
 
diff --git a/drivers/usb/misc/ftdi-elan.c b/drivers/usb/misc/ftdi-elan.c
index 1edb6d361896..2300a51d48b7 100644
--- a/drivers/usb/misc/ftdi-elan.c
+++ b/drivers/usb/misc/ftdi-elan.c
@@ -73,7 +73,7 @@ static struct list_head ftdi_static_list;
 */
 #include "usb_u132.h"
 #include <asm/io.h>
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 
 	/* FIXME ohci.h is ONLY for internal use by the OHCI driver.
 	 * If you're going to try stuff like this, you need to split
diff --git a/drivers/usb/mon/mon_main.c b/drivers/usb/mon/mon_main.c
index e4af18b93c7d..812dc288bb8c 100644
--- a/drivers/usb/mon/mon_main.c
+++ b/drivers/usb/mon/mon_main.c
@@ -9,12 +9,13 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/usb.h>
+#include <linux/usb/hcd.h>
 #include <linux/slab.h>
 #include <linux/notifier.h>
 #include <linux/mutex.h>
 
 #include "usb_mon.h"
-#include "../core/hcd.h"
+
 
 static void mon_stop(struct mon_bus *mbus);
 static void mon_dissolve(struct mon_bus *mbus, struct usb_bus *ubus);
diff --git a/drivers/usb/musb/musb_core.h b/drivers/usb/musb/musb_core.h
index ac17b004909b..1da14bf17a65 100644
--- a/drivers/usb/musb/musb_core.h
+++ b/drivers/usb/musb/musb_core.h
@@ -69,7 +69,7 @@ struct musb_ep;
 #include "musb_regs.h"
 
 #include "musb_gadget.h"
-#include "../core/hcd.h"
+#include <linux/usb/hcd.h>
 #include "musb_host.h"
 
 
diff --git a/drivers/usb/wusbcore/wusbhc.h b/drivers/usb/wusbcore/wusbhc.h
index 759cda55f7c3..3d94c4247f46 100644
--- a/drivers/usb/wusbcore/wusbhc.h
+++ b/drivers/usb/wusbcore/wusbhc.h
@@ -58,9 +58,7 @@
 #include <linux/mutex.h>
 #include <linux/kref.h>
 #include <linux/workqueue.h>
-/* FIXME: Yes, I know: BAD--it's not my fault the USB HC iface is not
- *        public */
-#include <linux/../../drivers/usb/core/hcd.h>
+#include <linux/usb/hcd.h>
 #include <linux/uwb.h>
 #include <linux/usb/wusb.h>
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
new file mode 100644
index 000000000000..c7e575cb3149
--- /dev/null
+++ b/include/linux/usb/hcd.h
@@ -0,0 +1,578 @@
+/*
+ * Copyright (c) 2001-2002 by David Brownell
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef __USB_CORE_HCD_H
+#define __USB_CORE_HCD_H
+
+#ifdef __KERNEL__
+
+#include <linux/rwsem.h>
+
+#define MAX_TOPO_LEVEL		6
+
+/* This file contains declarations of usbcore internals that are mostly
+ * used or exposed by Host Controller Drivers.
+ */
+
+/*
+ * USB Packet IDs (PIDs)
+ */
+#define USB_PID_EXT			0xf0	/* USB 2.0 LPM ECN */
+#define USB_PID_OUT			0xe1
+#define USB_PID_ACK			0xd2
+#define USB_PID_DATA0			0xc3
+#define USB_PID_PING			0xb4	/* USB 2.0 */
+#define USB_PID_SOF			0xa5
+#define USB_PID_NYET			0x96	/* USB 2.0 */
+#define USB_PID_DATA2			0x87	/* USB 2.0 */
+#define USB_PID_SPLIT			0x78	/* USB 2.0 */
+#define USB_PID_IN			0x69
+#define USB_PID_NAK			0x5a
+#define USB_PID_DATA1			0x4b
+#define USB_PID_PREAMBLE		0x3c	/* Token mode */
+#define USB_PID_ERR			0x3c	/* USB 2.0: handshake mode */
+#define USB_PID_SETUP			0x2d
+#define USB_PID_STALL			0x1e
+#define USB_PID_MDATA			0x0f	/* USB 2.0 */
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * USB Host Controller Driver (usb_hcd) framework
+ *
+ * Since "struct usb_bus" is so thin, you can't share much code in it.
+ * This framework is a layer over that, and should be more sharable.
+ *
+ * @authorized_default: Specifies if new devices are authorized to
+ *                      connect by default or they require explicit
+ *                      user space authorization; this bit is settable
+ *                      through /sys/class/usb_host/X/authorized_default.
+ *                      For the rest is RO, so we don't lock to r/w it.
+ */
+
+/*-------------------------------------------------------------------------*/
+
+struct usb_hcd {
+
+	/*
+	 * housekeeping
+	 */
+	struct usb_bus		self;		/* hcd is-a bus */
+	struct kref		kref;		/* reference counter */
+
+	const char		*product_desc;	/* product/vendor string */
+	char			irq_descr[24];	/* driver + bus # */
+
+	struct timer_list	rh_timer;	/* drives root-hub polling */
+	struct urb		*status_urb;	/* the current status urb */
+#ifdef CONFIG_USB_SUSPEND
+	struct work_struct	wakeup_work;	/* for remote wakeup */
+#endif
+
+	/*
+	 * hardware info/state
+	 */
+	const struct hc_driver	*driver;	/* hw-specific hooks */
+
+	/* Flags that need to be manipulated atomically */
+	unsigned long		flags;
+#define HCD_FLAG_HW_ACCESSIBLE	0x00000001
+#define HCD_FLAG_SAW_IRQ	0x00000002
+
+	unsigned		rh_registered:1;/* is root hub registered? */
+
+	/* The next flag is a stopgap, to be removed when all the HCDs
+	 * support the new root-hub polling mechanism. */
+	unsigned		uses_new_polling:1;
+	unsigned		poll_rh:1;	/* poll for rh status? */
+	unsigned		poll_pending:1;	/* status has changed? */
+	unsigned		wireless:1;	/* Wireless USB HCD */
+	unsigned		authorized_default:1;
+	unsigned		has_tt:1;	/* Integrated TT in root hub */
+
+	int			irq;		/* irq allocated */
+	void __iomem		*regs;		/* device memory/io */
+	u64			rsrc_start;	/* memory/io resource start */
+	u64			rsrc_len;	/* memory/io resource length */
+	unsigned		power_budget;	/* in mA, 0 = no limit */
+
+	/* bandwidth_mutex should be taken before adding or removing
+	 * any new bus bandwidth constraints:
+	 *   1. Before adding a configuration for a new device.
+	 *   2. Before removing the configuration to put the device into
+	 *      the addressed state.
+	 *   3. Before selecting a different configuration.
+	 *   4. Before selecting an alternate interface setting.
+	 *
+	 * bandwidth_mutex should be dropped after a successful control message
+	 * to the device, or resetting the bandwidth after a failed attempt.
+	 */
+	struct mutex		bandwidth_mutex;
+
+
+#define HCD_BUFFER_POOLS	4
+	struct dma_pool		*pool [HCD_BUFFER_POOLS];
+
+	int			state;
+#	define	__ACTIVE		0x01
+#	define	__SUSPEND		0x04
+#	define	__TRANSIENT		0x80
+
+#	define	HC_STATE_HALT		0
+#	define	HC_STATE_RUNNING	(__ACTIVE)
+#	define	HC_STATE_QUIESCING	(__SUSPEND|__TRANSIENT|__ACTIVE)
+#	define	HC_STATE_RESUMING	(__SUSPEND|__TRANSIENT)
+#	define	HC_STATE_SUSPENDED	(__SUSPEND)
+
+#define	HC_IS_RUNNING(state) ((state) & __ACTIVE)
+#define	HC_IS_SUSPENDED(state) ((state) & __SUSPEND)
+
+	/* more shared queuing code would be good; it should support
+	 * smarter scheduling, handle transaction translators, etc;
+	 * input size of periodic table to an interrupt scheduler.
+	 * (ohci 32, uhci 1024, ehci 256/512/1024).
+	 */
+
+	/* The HC driver's private data is stored at the end of
+	 * this structure.
+	 */
+	unsigned long hcd_priv[0]
+			__attribute__ ((aligned(sizeof(unsigned long))));
+};
+
+/* 2.4 does this a bit differently ... */
+static inline struct usb_bus *hcd_to_bus(struct usb_hcd *hcd)
+{
+	return &hcd->self;
+}
+
+static inline struct usb_hcd *bus_to_hcd(struct usb_bus *bus)
+{
+	return container_of(bus, struct usb_hcd, self);
+}
+
+struct hcd_timeout {	/* timeouts we allocate */
+	struct list_head	timeout_list;
+	struct timer_list	timer;
+};
+
+/*-------------------------------------------------------------------------*/
+
+
+struct hc_driver {
+	const char	*description;	/* "ehci-hcd" etc */
+	const char	*product_desc;	/* product/vendor string */
+	size_t		hcd_priv_size;	/* size of private data */
+
+	/* irq handler */
+	irqreturn_t	(*irq) (struct usb_hcd *hcd);
+
+	int	flags;
+#define	HCD_MEMORY	0x0001		/* HC regs use memory (else I/O) */
+#define	HCD_LOCAL_MEM	0x0002		/* HC needs local memory */
+#define	HCD_USB11	0x0010		/* USB 1.1 */
+#define	HCD_USB2	0x0020		/* USB 2.0 */
+#define	HCD_USB3	0x0040		/* USB 3.0 */
+#define	HCD_MASK	0x0070
+
+	/* called to init HCD and root hub */
+	int	(*reset) (struct usb_hcd *hcd);
+	int	(*start) (struct usb_hcd *hcd);
+
+	/* NOTE:  these suspend/resume calls relate to the HC as
+	 * a whole, not just the root hub; they're for PCI bus glue.
+	 */
+	/* called after suspending the hub, before entering D3 etc */
+	int	(*pci_suspend)(struct usb_hcd *hcd);
+
+	/* called after entering D0 (etc), before resuming the hub */
+	int	(*pci_resume)(struct usb_hcd *hcd, bool hibernated);
+
+	/* cleanly make HCD stop writing memory and doing I/O */
+	void	(*stop) (struct usb_hcd *hcd);
+
+	/* shutdown HCD */
+	void	(*shutdown) (struct usb_hcd *hcd);
+
+	/* return current frame number */
+	int	(*get_frame_number) (struct usb_hcd *hcd);
+
+	/* manage i/o requests, device state */
+	int	(*urb_enqueue)(struct usb_hcd *hcd,
+				struct urb *urb, gfp_t mem_flags);
+	int	(*urb_dequeue)(struct usb_hcd *hcd,
+				struct urb *urb, int status);
+
+	/* hw synch, freeing endpoint resources that urb_dequeue can't */
+	void 	(*endpoint_disable)(struct usb_hcd *hcd,
+			struct usb_host_endpoint *ep);
+
+	/* (optional) reset any endpoint state such as sequence number
+	   and current window */
+	void 	(*endpoint_reset)(struct usb_hcd *hcd,
+			struct usb_host_endpoint *ep);
+
+	/* root hub support */
+	int	(*hub_status_data) (struct usb_hcd *hcd, char *buf);
+	int	(*hub_control) (struct usb_hcd *hcd,
+				u16 typeReq, u16 wValue, u16 wIndex,
+				char *buf, u16 wLength);
+	int	(*bus_suspend)(struct usb_hcd *);
+	int	(*bus_resume)(struct usb_hcd *);
+	int	(*start_port_reset)(struct usb_hcd *, unsigned port_num);
+
+		/* force handover of high-speed port to full-speed companion */
+	void	(*relinquish_port)(struct usb_hcd *, int);
+		/* has a port been handed over to a companion? */
+	int	(*port_handed_over)(struct usb_hcd *, int);
+
+		/* CLEAR_TT_BUFFER completion callback */
+	void	(*clear_tt_buffer_complete)(struct usb_hcd *,
+				struct usb_host_endpoint *);
+
+	/* xHCI specific functions */
+		/* Called by usb_alloc_dev to alloc HC device structures */
+	int	(*alloc_dev)(struct usb_hcd *, struct usb_device *);
+		/* Called by usb_disconnect to free HC device structures */
+	void	(*free_dev)(struct usb_hcd *, struct usb_device *);
+
+	/* Bandwidth computation functions */
+	/* Note that add_endpoint() can only be called once per endpoint before
+	 * check_bandwidth() or reset_bandwidth() must be called.
+	 * drop_endpoint() can only be called once per endpoint also.
+	 * A call to xhci_drop_endpoint() followed by a call to xhci_add_endpoint() will
+	 * add the endpoint to the schedule with possibly new parameters denoted by a
+	 * different endpoint descriptor in usb_host_endpoint.
+	 * A call to xhci_add_endpoint() followed by a call to xhci_drop_endpoint() is
+	 * not allowed.
+	 */
+		/* Allocate endpoint resources and add them to a new schedule */
+	int 	(*add_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
+		/* Drop an endpoint from a new schedule */
+	int 	(*drop_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
+		/* Check that a new hardware configuration, set using
+		 * endpoint_enable and endpoint_disable, does not exceed bus
+		 * bandwidth.  This must be called before any set configuration
+		 * or set interface requests are sent to the device.
+		 */
+	int	(*check_bandwidth)(struct usb_hcd *, struct usb_device *);
+		/* Reset the device schedule to the last known good schedule,
+		 * which was set from a previous successful call to
+		 * check_bandwidth().  This reverts any add_endpoint() and
+		 * drop_endpoint() calls since that last successful call.
+		 * Used for when a check_bandwidth() call fails due to resource
+		 * or bandwidth constraints.
+		 */
+	void	(*reset_bandwidth)(struct usb_hcd *, struct usb_device *);
+		/* Returns the hardware-chosen device address */
+	int	(*address_device)(struct usb_hcd *, struct usb_device *udev);
+		/* Notifies the HCD after a hub descriptor is fetched.
+		 * Will block.
+		 */
+	int	(*update_hub_device)(struct usb_hcd *, struct usb_device *hdev,
+			struct usb_tt *tt, gfp_t mem_flags);
+	int	(*reset_device)(struct usb_hcd *, struct usb_device *);
+};
+
+extern int usb_hcd_link_urb_to_ep(struct usb_hcd *hcd, struct urb *urb);
+extern int usb_hcd_check_unlink_urb(struct usb_hcd *hcd, struct urb *urb,
+		int status);
+extern void usb_hcd_unlink_urb_from_ep(struct usb_hcd *hcd, struct urb *urb);
+
+extern int usb_hcd_submit_urb(struct urb *urb, gfp_t mem_flags);
+extern int usb_hcd_unlink_urb(struct urb *urb, int status);
+extern void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb,
+		int status);
+extern void usb_hcd_flush_endpoint(struct usb_device *udev,
+		struct usb_host_endpoint *ep);
+extern void usb_hcd_disable_endpoint(struct usb_device *udev,
+		struct usb_host_endpoint *ep);
+extern void usb_hcd_reset_endpoint(struct usb_device *udev,
+		struct usb_host_endpoint *ep);
+extern void usb_hcd_synchronize_unlinks(struct usb_device *udev);
+extern int usb_hcd_alloc_bandwidth(struct usb_device *udev,
+		struct usb_host_config *new_config,
+		struct usb_host_interface *old_alt,
+		struct usb_host_interface *new_alt);
+extern int usb_hcd_get_frame_number(struct usb_device *udev);
+
+extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver,
+		struct device *dev, const char *bus_name);
+extern struct usb_hcd *usb_get_hcd(struct usb_hcd *hcd);
+extern void usb_put_hcd(struct usb_hcd *hcd);
+extern int usb_add_hcd(struct usb_hcd *hcd,
+		unsigned int irqnum, unsigned long irqflags);
+extern void usb_remove_hcd(struct usb_hcd *hcd);
+
+struct platform_device;
+extern void usb_hcd_platform_shutdown(struct platform_device *dev);
+
+#ifdef CONFIG_PCI
+struct pci_dev;
+struct pci_device_id;
+extern int usb_hcd_pci_probe(struct pci_dev *dev,
+				const struct pci_device_id *id);
+extern void usb_hcd_pci_remove(struct pci_dev *dev);
+extern void usb_hcd_pci_shutdown(struct pci_dev *dev);
+
+#ifdef CONFIG_PM_SLEEP
+extern const struct dev_pm_ops usb_hcd_pci_pm_ops;
+#endif
+#endif /* CONFIG_PCI */
+
+/* pci-ish (pdev null is ok) buffer alloc/mapping support */
+int hcd_buffer_create(struct usb_hcd *hcd);
+void hcd_buffer_destroy(struct usb_hcd *hcd);
+
+void *hcd_buffer_alloc(struct usb_bus *bus, size_t size,
+	gfp_t mem_flags, dma_addr_t *dma);
+void hcd_buffer_free(struct usb_bus *bus, size_t size,
+	void *addr, dma_addr_t dma);
+
+/* generic bus glue, needed for host controllers that don't use PCI */
+extern irqreturn_t usb_hcd_irq(int irq, void *__hcd);
+
+extern void usb_hc_died(struct usb_hcd *hcd);
+extern void usb_hcd_poll_rh_status(struct usb_hcd *hcd);
+
+/* The D0/D1 toggle bits ... USE WITH CAUTION (they're almost hcd-internal) */
+#define usb_gettoggle(dev, ep, out) (((dev)->toggle[out] >> (ep)) & 1)
+#define	usb_dotoggle(dev, ep, out)  ((dev)->toggle[out] ^= (1 << (ep)))
+#define usb_settoggle(dev, ep, out, bit) \
+		((dev)->toggle[out] = ((dev)->toggle[out] & ~(1 << (ep))) | \
+		 ((bit) << (ep)))
+
+/* -------------------------------------------------------------------------- */
+
+/* Enumeration is only for the hub driver, or HCD virtual root hubs */
+extern struct usb_device *usb_alloc_dev(struct usb_device *parent,
+					struct usb_bus *, unsigned port);
+extern int usb_new_device(struct usb_device *dev);
+extern void usb_disconnect(struct usb_device **);
+
+extern int usb_get_configuration(struct usb_device *dev);
+extern void usb_destroy_configuration(struct usb_device *dev);
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * HCD Root Hub support
+ */
+
+#include <linux/../../drivers/usb/core/hub.h>
+
+/* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */
+#define DeviceRequest \
+	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
+#define DeviceOutRequest \
+	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_DEVICE)<<8)
+
+#define InterfaceRequest \
+	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+
+#define EndpointRequest \
+	((USB_DIR_IN|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+#define EndpointOutRequest \
+	((USB_DIR_OUT|USB_TYPE_STANDARD|USB_RECIP_INTERFACE)<<8)
+
+/* class requests from the USB 2.0 hub spec, table 11-15 */
+/* GetBusState and SetHubDescriptor are optional, omitted */
+#define ClearHubFeature		(0x2000 | USB_REQ_CLEAR_FEATURE)
+#define ClearPortFeature	(0x2300 | USB_REQ_CLEAR_FEATURE)
+#define GetHubDescriptor	(0xa000 | USB_REQ_GET_DESCRIPTOR)
+#define GetHubStatus		(0xa000 | USB_REQ_GET_STATUS)
+#define GetPortStatus		(0xa300 | USB_REQ_GET_STATUS)
+#define SetHubFeature		(0x2000 | USB_REQ_SET_FEATURE)
+#define SetPortFeature		(0x2300 | USB_REQ_SET_FEATURE)
+
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * Generic bandwidth allocation constants/support
+ */
+#define FRAME_TIME_USECS	1000L
+#define BitTime(bytecount) (7 * 8 * bytecount / 6) /* with integer truncation */
+		/* Trying not to use worst-case bit-stuffing
+		 * of (7/6 * 8 * bytecount) = 9.33 * bytecount */
+		/* bytecount = data payload byte count */
+
+#define NS_TO_US(ns)	((ns + 500L) / 1000L)
+			/* convert & round nanoseconds to microseconds */
+
+
+/*
+ * Full/low speed bandwidth allocation constants/support.
+ */
+#define BW_HOST_DELAY	1000L		/* nanoseconds */
+#define BW_HUB_LS_SETUP	333L		/* nanoseconds */
+			/* 4 full-speed bit times (est.) */
+
+#define FRAME_TIME_BITS			12000L	/* frame = 1 millisecond */
+#define FRAME_TIME_MAX_BITS_ALLOC	(90L * FRAME_TIME_BITS / 100L)
+#define FRAME_TIME_MAX_USECS_ALLOC	(90L * FRAME_TIME_USECS / 100L)
+
+/*
+ * Ceiling [nano/micro]seconds (typical) for that many bytes at high speed
+ * ISO is a bit less, no ACK ... from USB 2.0 spec, 5.11.3 (and needed
+ * to preallocate bandwidth)
+ */
+#define USB2_HOST_DELAY	5	/* nsec, guess */
+#define HS_NSECS(bytes) (((55 * 8 * 2083) \
+	+ (2083UL * (3 + BitTime(bytes))))/1000 \
+	+ USB2_HOST_DELAY)
+#define HS_NSECS_ISO(bytes) (((38 * 8 * 2083) \
+	+ (2083UL * (3 + BitTime(bytes))))/1000 \
+	+ USB2_HOST_DELAY)
+#define HS_USECS(bytes) NS_TO_US (HS_NSECS(bytes))
+#define HS_USECS_ISO(bytes) NS_TO_US (HS_NSECS_ISO(bytes))
+
+extern long usb_calc_bus_time(int speed, int is_input,
+			int isoc, int bytecount);
+
+/*-------------------------------------------------------------------------*/
+
+extern void usb_set_device_state(struct usb_device *udev,
+		enum usb_device_state new_state);
+
+/*-------------------------------------------------------------------------*/
+
+/* exported only within usbcore */
+
+extern struct list_head usb_bus_list;
+extern struct mutex usb_bus_list_lock;
+extern wait_queue_head_t usb_kill_urb_queue;
+
+extern int usb_find_interface_driver(struct usb_device *dev,
+	struct usb_interface *interface);
+
+#define usb_endpoint_out(ep_dir)	(!((ep_dir) & USB_DIR_IN))
+
+#ifdef CONFIG_PM
+extern void usb_root_hub_lost_power(struct usb_device *rhdev);
+extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
+extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
+#endif /* CONFIG_PM */
+
+#ifdef CONFIG_USB_SUSPEND
+extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
+#else
+static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
+{
+	return;
+}
+#endif /* CONFIG_USB_SUSPEND */
+
+
+/*
+ * USB device fs stuff
+ */
+
+#ifdef CONFIG_USB_DEVICEFS
+
+/*
+ * these are expected to be called from the USB core/hub thread
+ * with the kernel lock held
+ */
+extern void usbfs_update_special(void);
+extern int usbfs_init(void);
+extern void usbfs_cleanup(void);
+
+#else /* CONFIG_USB_DEVICEFS */
+
+static inline void usbfs_update_special(void) {}
+static inline int usbfs_init(void) { return 0; }
+static inline void usbfs_cleanup(void) { }
+
+#endif /* CONFIG_USB_DEVICEFS */
+
+/*-------------------------------------------------------------------------*/
+
+#if defined(CONFIG_USB_MON) || defined(CONFIG_USB_MON_MODULE)
+
+struct usb_mon_operations {
+	void (*urb_submit)(struct usb_bus *bus, struct urb *urb);
+	void (*urb_submit_error)(struct usb_bus *bus, struct urb *urb, int err);
+	void (*urb_complete)(struct usb_bus *bus, struct urb *urb, int status);
+	/* void (*urb_unlink)(struct usb_bus *bus, struct urb *urb); */
+};
+
+extern struct usb_mon_operations *mon_ops;
+
+static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb)
+{
+	if (bus->monitored)
+		(*mon_ops->urb_submit)(bus, urb);
+}
+
+static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
+    int error)
+{
+	if (bus->monitored)
+		(*mon_ops->urb_submit_error)(bus, urb, error);
+}
+
+static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
+		int status)
+{
+	if (bus->monitored)
+		(*mon_ops->urb_complete)(bus, urb, status);
+}
+
+int usb_mon_register(struct usb_mon_operations *ops);
+void usb_mon_deregister(void);
+
+#else
+
+static inline void usbmon_urb_submit(struct usb_bus *bus, struct urb *urb) {}
+static inline void usbmon_urb_submit_error(struct usb_bus *bus, struct urb *urb,
+    int error) {}
+static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
+		int status) {}
+
+#endif /* CONFIG_USB_MON || CONFIG_USB_MON_MODULE */
+
+/*-------------------------------------------------------------------------*/
+
+/* hub.h ... DeviceRemovable in 2.4.2-ac11, gone in 2.4.10 */
+/* bleech -- resurfaced in 2.4.11 or 2.4.12 */
+#define bitmap 	DeviceRemovable
+
+
+/*-------------------------------------------------------------------------*/
+
+/* random stuff */
+
+#define	RUN_CONTEXT (in_irq() ? "in_irq" \
+		: (in_interrupt() ? "in_interrupt" : "can sleep"))
+
+
+/* This rwsem is for use only by the hub driver and ehci-hcd.
+ * Nobody else should touch it.
+ */
+extern struct rw_semaphore ehci_cf_port_reset_rwsem;
+
+/* Keep track of which host controller drivers are loaded */
+#define USB_UHCI_LOADED		0
+#define USB_OHCI_LOADED		1
+#define USB_EHCI_LOADED		2
+extern unsigned long usb_hcds_loaded;
+
+#endif /* __KERNEL__ */
+
+#endif /* __USB_CORE_HCD_H */
-- 
cgit v1.2.3


From d65d7e7ef3dc5e61e4dfaac54ec6d3e97f9a1108 Mon Sep 17 00:00:00 2001
From: Eric Lescouet <Eric.Lescouet@virtuallogix.com>
Date: Sat, 24 Apr 2010 23:34:27 +0200
Subject: USB: make hub.h public (drivers dependency)

The usbcore headers: hcd.h and hub.h are shared between usbcore,
HCDs and a couple of other drivers (e.g. USBIP modules).
So, it makes sense to move them into a more public location and
to cleanup dependency of those modules on kernel internal headers.
This patch moves hub.h from drivers/usb/core into include/linux/usb/

Signed-of-by: Eric Lescouet <eric@lescouet.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hub.h  | 205 ------------------------------------------------
 include/linux/usb/hcd.h |   2 +-
 include/linux/usb/hub.h | 205 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 206 insertions(+), 206 deletions(-)
 delete mode 100644 drivers/usb/core/hub.h
 create mode 100644 include/linux/usb/hub.h

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.h b/drivers/usb/core/hub.h
deleted file mode 100644
index de8081f065ed..000000000000
--- a/drivers/usb/core/hub.h
+++ /dev/null
@@ -1,205 +0,0 @@
-#ifndef __LINUX_HUB_H
-#define __LINUX_HUB_H
-
-/*
- * Hub protocol and driver data structures.
- *
- * Some of these are known to the "virtual root hub" code
- * in host controller drivers.
- */
-
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/compiler.h>	/* likely()/unlikely() */
-
-/*
- * Hub request types
- */
-
-#define USB_RT_HUB	(USB_TYPE_CLASS | USB_RECIP_DEVICE)
-#define USB_RT_PORT	(USB_TYPE_CLASS | USB_RECIP_OTHER)
-
-/*
- * Hub class requests
- * See USB 2.0 spec Table 11-16
- */
-#define HUB_CLEAR_TT_BUFFER	8
-#define HUB_RESET_TT		9
-#define HUB_GET_TT_STATE	10
-#define HUB_STOP_TT		11
-
-/*
- * Hub Class feature numbers
- * See USB 2.0 spec Table 11-17
- */
-#define C_HUB_LOCAL_POWER	0
-#define C_HUB_OVER_CURRENT	1
-
-/*
- * Port feature numbers
- * See USB 2.0 spec Table 11-17
- */
-#define USB_PORT_FEAT_CONNECTION	0
-#define USB_PORT_FEAT_ENABLE		1
-#define USB_PORT_FEAT_SUSPEND		2	/* L2 suspend */
-#define USB_PORT_FEAT_OVER_CURRENT	3
-#define USB_PORT_FEAT_RESET		4
-#define USB_PORT_FEAT_L1		5	/* L1 suspend */
-#define USB_PORT_FEAT_POWER		8
-#define USB_PORT_FEAT_LOWSPEED		9
-/* This value was never in Table 11-17 */
-#define USB_PORT_FEAT_HIGHSPEED		10
-/* This value is also fake */
-#define USB_PORT_FEAT_SUPERSPEED	11
-#define USB_PORT_FEAT_C_CONNECTION	16
-#define USB_PORT_FEAT_C_ENABLE		17
-#define USB_PORT_FEAT_C_SUSPEND		18
-#define USB_PORT_FEAT_C_OVER_CURRENT	19
-#define USB_PORT_FEAT_C_RESET		20
-#define USB_PORT_FEAT_TEST              21
-#define USB_PORT_FEAT_INDICATOR         22
-#define USB_PORT_FEAT_C_PORT_L1         23
-
-/*
- * Hub Status and Hub Change results
- * See USB 2.0 spec Table 11-19 and Table 11-20
- */
-struct usb_port_status {
-	__le16 wPortStatus;
-	__le16 wPortChange;
-} __attribute__ ((packed));
-
-/*
- * wPortStatus bit field
- * See USB 2.0 spec Table 11-21
- */
-#define USB_PORT_STAT_CONNECTION	0x0001
-#define USB_PORT_STAT_ENABLE		0x0002
-#define USB_PORT_STAT_SUSPEND		0x0004
-#define USB_PORT_STAT_OVERCURRENT	0x0008
-#define USB_PORT_STAT_RESET		0x0010
-#define USB_PORT_STAT_L1		0x0020
-/* bits 6 to 7 are reserved */
-#define USB_PORT_STAT_POWER		0x0100
-#define USB_PORT_STAT_LOW_SPEED		0x0200
-#define USB_PORT_STAT_HIGH_SPEED        0x0400
-#define USB_PORT_STAT_TEST              0x0800
-#define USB_PORT_STAT_INDICATOR         0x1000
-/* bits 13 to 15 are reserved */
-
-/*
- * wPortChange bit field
- * See USB 2.0 spec Table 11-22
- * Bits 0 to 4 shown, bits 5 to 15 are reserved
- */
-#define USB_PORT_STAT_C_CONNECTION	0x0001
-#define USB_PORT_STAT_C_ENABLE		0x0002
-#define USB_PORT_STAT_C_SUSPEND		0x0004
-#define USB_PORT_STAT_C_OVERCURRENT	0x0008
-#define USB_PORT_STAT_C_RESET		0x0010
-#define USB_PORT_STAT_C_L1		0x0020
-
-/*
- * wHubCharacteristics (masks)
- * See USB 2.0 spec Table 11-13, offset 3
- */
-#define HUB_CHAR_LPSM		0x0003 /* D1 .. D0 */
-#define HUB_CHAR_COMPOUND	0x0004 /* D2       */
-#define HUB_CHAR_OCPM		0x0018 /* D4 .. D3 */
-#define HUB_CHAR_TTTT           0x0060 /* D6 .. D5 */
-#define HUB_CHAR_PORTIND        0x0080 /* D7       */
-
-struct usb_hub_status {
-	__le16 wHubStatus;
-	__le16 wHubChange;
-} __attribute__ ((packed));
-
-/*
- * Hub Status & Hub Change bit masks
- * See USB 2.0 spec Table 11-19 and Table 11-20
- * Bits 0 and 1 for wHubStatus and wHubChange
- * Bits 2 to 15 are reserved for both
- */
-#define HUB_STATUS_LOCAL_POWER	0x0001
-#define HUB_STATUS_OVERCURRENT	0x0002
-#define HUB_CHANGE_LOCAL_POWER	0x0001
-#define HUB_CHANGE_OVERCURRENT	0x0002
-
-
-/*
- * Hub descriptor
- * See USB 2.0 spec Table 11-13
- */
-
-#define USB_DT_HUB			(USB_TYPE_CLASS | 0x09)
-#define USB_DT_HUB_NONVAR_SIZE		7
-
-struct usb_hub_descriptor {
-	__u8  bDescLength;
-	__u8  bDescriptorType;
-	__u8  bNbrPorts;
-	__le16 wHubCharacteristics;
-	__u8  bPwrOn2PwrGood;
-	__u8  bHubContrCurrent;
-		/* add 1 bit for hub status change; round to bytes */
-	__u8  DeviceRemovable[(USB_MAXCHILDREN + 1 + 7) / 8];
-	__u8  PortPwrCtrlMask[(USB_MAXCHILDREN + 1 + 7) / 8];
-} __attribute__ ((packed));
-
-
-/* port indicator status selectors, tables 11-7 and 11-25 */
-#define HUB_LED_AUTO	0
-#define HUB_LED_AMBER	1
-#define HUB_LED_GREEN	2
-#define HUB_LED_OFF	3
-
-enum hub_led_mode {
-	INDICATOR_AUTO = 0,
-	INDICATOR_CYCLE,
-	/* software blinks for attention:  software, hardware, reserved */
-	INDICATOR_GREEN_BLINK, INDICATOR_GREEN_BLINK_OFF,
-	INDICATOR_AMBER_BLINK, INDICATOR_AMBER_BLINK_OFF,
-	INDICATOR_ALT_BLINK, INDICATOR_ALT_BLINK_OFF
-} __attribute__ ((packed));
-
-struct usb_device;
-
-/* Transaction Translator Think Times, in bits */
-#define HUB_TTTT_8_BITS		0x00
-#define HUB_TTTT_16_BITS	0x20
-#define HUB_TTTT_24_BITS	0x40
-#define HUB_TTTT_32_BITS	0x60
-
-/*
- * As of USB 2.0, full/low speed devices are segregated into trees.
- * One type grows from USB 1.1 host controllers (OHCI, UHCI etc).
- * The other type grows from high speed hubs when they connect to
- * full/low speed devices using "Transaction Translators" (TTs).
- *
- * TTs should only be known to the hub driver, and high speed bus
- * drivers (only EHCI for now).  They affect periodic scheduling and
- * sometimes control/bulk error recovery.
- */
-struct usb_tt {
-	struct usb_device	*hub;	/* upstream highspeed hub */
-	int			multi;	/* true means one TT per port */
-	unsigned		think_time;	/* think time in ns */
-
-	/* for control/bulk error recovery (CLEAR_TT_BUFFER) */
-	spinlock_t		lock;
-	struct list_head	clear_list;	/* of usb_tt_clear */
-	struct work_struct	clear_work;
-};
-
-struct usb_tt_clear {
-	struct list_head	clear_list;
-	unsigned		tt;
-	u16			devinfo;
-	struct usb_hcd		*hcd;
-	struct usb_host_endpoint	*ep;
-};
-
-extern int usb_hub_clear_tt_buffer(struct urb *urb);
-extern void usb_ep0_reinit(struct usb_device *);
-
-#endif /* __LINUX_HUB_H */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index c7e575cb3149..ca228f00b826 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -374,7 +374,7 @@ extern void usb_destroy_configuration(struct usb_device *dev);
  * HCD Root Hub support
  */
 
-#include <linux/../../drivers/usb/core/hub.h>
+#include <linux/usb/hub.h>
 
 /* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */
 #define DeviceRequest \
diff --git a/include/linux/usb/hub.h b/include/linux/usb/hub.h
new file mode 100644
index 000000000000..de8081f065ed
--- /dev/null
+++ b/include/linux/usb/hub.h
@@ -0,0 +1,205 @@
+#ifndef __LINUX_HUB_H
+#define __LINUX_HUB_H
+
+/*
+ * Hub protocol and driver data structures.
+ *
+ * Some of these are known to the "virtual root hub" code
+ * in host controller drivers.
+ */
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/compiler.h>	/* likely()/unlikely() */
+
+/*
+ * Hub request types
+ */
+
+#define USB_RT_HUB	(USB_TYPE_CLASS | USB_RECIP_DEVICE)
+#define USB_RT_PORT	(USB_TYPE_CLASS | USB_RECIP_OTHER)
+
+/*
+ * Hub class requests
+ * See USB 2.0 spec Table 11-16
+ */
+#define HUB_CLEAR_TT_BUFFER	8
+#define HUB_RESET_TT		9
+#define HUB_GET_TT_STATE	10
+#define HUB_STOP_TT		11
+
+/*
+ * Hub Class feature numbers
+ * See USB 2.0 spec Table 11-17
+ */
+#define C_HUB_LOCAL_POWER	0
+#define C_HUB_OVER_CURRENT	1
+
+/*
+ * Port feature numbers
+ * See USB 2.0 spec Table 11-17
+ */
+#define USB_PORT_FEAT_CONNECTION	0
+#define USB_PORT_FEAT_ENABLE		1
+#define USB_PORT_FEAT_SUSPEND		2	/* L2 suspend */
+#define USB_PORT_FEAT_OVER_CURRENT	3
+#define USB_PORT_FEAT_RESET		4
+#define USB_PORT_FEAT_L1		5	/* L1 suspend */
+#define USB_PORT_FEAT_POWER		8
+#define USB_PORT_FEAT_LOWSPEED		9
+/* This value was never in Table 11-17 */
+#define USB_PORT_FEAT_HIGHSPEED		10
+/* This value is also fake */
+#define USB_PORT_FEAT_SUPERSPEED	11
+#define USB_PORT_FEAT_C_CONNECTION	16
+#define USB_PORT_FEAT_C_ENABLE		17
+#define USB_PORT_FEAT_C_SUSPEND		18
+#define USB_PORT_FEAT_C_OVER_CURRENT	19
+#define USB_PORT_FEAT_C_RESET		20
+#define USB_PORT_FEAT_TEST              21
+#define USB_PORT_FEAT_INDICATOR         22
+#define USB_PORT_FEAT_C_PORT_L1         23
+
+/*
+ * Hub Status and Hub Change results
+ * See USB 2.0 spec Table 11-19 and Table 11-20
+ */
+struct usb_port_status {
+	__le16 wPortStatus;
+	__le16 wPortChange;
+} __attribute__ ((packed));
+
+/*
+ * wPortStatus bit field
+ * See USB 2.0 spec Table 11-21
+ */
+#define USB_PORT_STAT_CONNECTION	0x0001
+#define USB_PORT_STAT_ENABLE		0x0002
+#define USB_PORT_STAT_SUSPEND		0x0004
+#define USB_PORT_STAT_OVERCURRENT	0x0008
+#define USB_PORT_STAT_RESET		0x0010
+#define USB_PORT_STAT_L1		0x0020
+/* bits 6 to 7 are reserved */
+#define USB_PORT_STAT_POWER		0x0100
+#define USB_PORT_STAT_LOW_SPEED		0x0200
+#define USB_PORT_STAT_HIGH_SPEED        0x0400
+#define USB_PORT_STAT_TEST              0x0800
+#define USB_PORT_STAT_INDICATOR         0x1000
+/* bits 13 to 15 are reserved */
+
+/*
+ * wPortChange bit field
+ * See USB 2.0 spec Table 11-22
+ * Bits 0 to 4 shown, bits 5 to 15 are reserved
+ */
+#define USB_PORT_STAT_C_CONNECTION	0x0001
+#define USB_PORT_STAT_C_ENABLE		0x0002
+#define USB_PORT_STAT_C_SUSPEND		0x0004
+#define USB_PORT_STAT_C_OVERCURRENT	0x0008
+#define USB_PORT_STAT_C_RESET		0x0010
+#define USB_PORT_STAT_C_L1		0x0020
+
+/*
+ * wHubCharacteristics (masks)
+ * See USB 2.0 spec Table 11-13, offset 3
+ */
+#define HUB_CHAR_LPSM		0x0003 /* D1 .. D0 */
+#define HUB_CHAR_COMPOUND	0x0004 /* D2       */
+#define HUB_CHAR_OCPM		0x0018 /* D4 .. D3 */
+#define HUB_CHAR_TTTT           0x0060 /* D6 .. D5 */
+#define HUB_CHAR_PORTIND        0x0080 /* D7       */
+
+struct usb_hub_status {
+	__le16 wHubStatus;
+	__le16 wHubChange;
+} __attribute__ ((packed));
+
+/*
+ * Hub Status & Hub Change bit masks
+ * See USB 2.0 spec Table 11-19 and Table 11-20
+ * Bits 0 and 1 for wHubStatus and wHubChange
+ * Bits 2 to 15 are reserved for both
+ */
+#define HUB_STATUS_LOCAL_POWER	0x0001
+#define HUB_STATUS_OVERCURRENT	0x0002
+#define HUB_CHANGE_LOCAL_POWER	0x0001
+#define HUB_CHANGE_OVERCURRENT	0x0002
+
+
+/*
+ * Hub descriptor
+ * See USB 2.0 spec Table 11-13
+ */
+
+#define USB_DT_HUB			(USB_TYPE_CLASS | 0x09)
+#define USB_DT_HUB_NONVAR_SIZE		7
+
+struct usb_hub_descriptor {
+	__u8  bDescLength;
+	__u8  bDescriptorType;
+	__u8  bNbrPorts;
+	__le16 wHubCharacteristics;
+	__u8  bPwrOn2PwrGood;
+	__u8  bHubContrCurrent;
+		/* add 1 bit for hub status change; round to bytes */
+	__u8  DeviceRemovable[(USB_MAXCHILDREN + 1 + 7) / 8];
+	__u8  PortPwrCtrlMask[(USB_MAXCHILDREN + 1 + 7) / 8];
+} __attribute__ ((packed));
+
+
+/* port indicator status selectors, tables 11-7 and 11-25 */
+#define HUB_LED_AUTO	0
+#define HUB_LED_AMBER	1
+#define HUB_LED_GREEN	2
+#define HUB_LED_OFF	3
+
+enum hub_led_mode {
+	INDICATOR_AUTO = 0,
+	INDICATOR_CYCLE,
+	/* software blinks for attention:  software, hardware, reserved */
+	INDICATOR_GREEN_BLINK, INDICATOR_GREEN_BLINK_OFF,
+	INDICATOR_AMBER_BLINK, INDICATOR_AMBER_BLINK_OFF,
+	INDICATOR_ALT_BLINK, INDICATOR_ALT_BLINK_OFF
+} __attribute__ ((packed));
+
+struct usb_device;
+
+/* Transaction Translator Think Times, in bits */
+#define HUB_TTTT_8_BITS		0x00
+#define HUB_TTTT_16_BITS	0x20
+#define HUB_TTTT_24_BITS	0x40
+#define HUB_TTTT_32_BITS	0x60
+
+/*
+ * As of USB 2.0, full/low speed devices are segregated into trees.
+ * One type grows from USB 1.1 host controllers (OHCI, UHCI etc).
+ * The other type grows from high speed hubs when they connect to
+ * full/low speed devices using "Transaction Translators" (TTs).
+ *
+ * TTs should only be known to the hub driver, and high speed bus
+ * drivers (only EHCI for now).  They affect periodic scheduling and
+ * sometimes control/bulk error recovery.
+ */
+struct usb_tt {
+	struct usb_device	*hub;	/* upstream highspeed hub */
+	int			multi;	/* true means one TT per port */
+	unsigned		think_time;	/* think time in ns */
+
+	/* for control/bulk error recovery (CLEAR_TT_BUFFER) */
+	spinlock_t		lock;
+	struct list_head	clear_list;	/* of usb_tt_clear */
+	struct work_struct	clear_work;
+};
+
+struct usb_tt_clear {
+	struct list_head	clear_list;
+	unsigned		tt;
+	u16			devinfo;
+	struct usb_hcd		*hcd;
+	struct usb_host_endpoint	*ep;
+};
+
+extern int usb_hub_clear_tt_buffer(struct urb *urb);
+extern void usb_ep0_reinit(struct usb_device *);
+
+#endif /* __LINUX_HUB_H */
-- 
cgit v1.2.3


From d20db4b4e938aa8e4e5735b5a1b202de5800400e Mon Sep 17 00:00:00 2001
From: Eric Lescouet <Eric.Lescouet@virtuallogix.com>
Date: Sat, 24 Apr 2010 23:38:17 +0200
Subject: USB: split hub.h into ch11.h and merge-in hcd.h

Base on inputs from Alan Stern, split the hub.h header into:
- new ch11.h header (most of it) containing constants and
  structures from chapter 11 of the USB 2.0 spec.
- a small remaining part being merged into hcd.h.

Signed-of-by: Eric Lescouet <eric@lescouet.org>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch11.h | 172 +++++++++++++++++++++++++++++++++++++++
 include/linux/usb/hcd.h  |  37 ++++++++-
 include/linux/usb/hub.h  | 205 -----------------------------------------------
 3 files changed, 208 insertions(+), 206 deletions(-)
 create mode 100644 include/linux/usb/ch11.h
 delete mode 100644 include/linux/usb/hub.h

(limited to 'include/linux')

diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
new file mode 100644
index 000000000000..35cb7357ff72
--- /dev/null
+++ b/include/linux/usb/ch11.h
@@ -0,0 +1,172 @@
+/*
+ * This file holds Hub protocol constants and data structures that are
+ * defined in chapter 11 (Hub Specification) of the USB 2.0 specification.
+ *
+ * It is used/shared between the USB core, the HCDs and couple of other USB
+ * drivers.
+ */
+
+#ifndef __LINUX_HUB_H
+#define __LINUX_HUB_H
+
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/compiler.h>	/* likely()/unlikely() */
+
+/*
+ * Hub request types
+ */
+
+#define USB_RT_HUB	(USB_TYPE_CLASS | USB_RECIP_DEVICE)
+#define USB_RT_PORT	(USB_TYPE_CLASS | USB_RECIP_OTHER)
+
+/*
+ * Hub class requests
+ * See USB 2.0 spec Table 11-16
+ */
+#define HUB_CLEAR_TT_BUFFER	8
+#define HUB_RESET_TT		9
+#define HUB_GET_TT_STATE	10
+#define HUB_STOP_TT		11
+
+/*
+ * Hub Class feature numbers
+ * See USB 2.0 spec Table 11-17
+ */
+#define C_HUB_LOCAL_POWER	0
+#define C_HUB_OVER_CURRENT	1
+
+/*
+ * Port feature numbers
+ * See USB 2.0 spec Table 11-17
+ */
+#define USB_PORT_FEAT_CONNECTION	0
+#define USB_PORT_FEAT_ENABLE		1
+#define USB_PORT_FEAT_SUSPEND		2	/* L2 suspend */
+#define USB_PORT_FEAT_OVER_CURRENT	3
+#define USB_PORT_FEAT_RESET		4
+#define USB_PORT_FEAT_L1		5	/* L1 suspend */
+#define USB_PORT_FEAT_POWER		8
+#define USB_PORT_FEAT_LOWSPEED		9
+/* This value was never in Table 11-17 */
+#define USB_PORT_FEAT_HIGHSPEED		10
+/* This value is also fake */
+#define USB_PORT_FEAT_SUPERSPEED	11
+#define USB_PORT_FEAT_C_CONNECTION	16
+#define USB_PORT_FEAT_C_ENABLE		17
+#define USB_PORT_FEAT_C_SUSPEND		18
+#define USB_PORT_FEAT_C_OVER_CURRENT	19
+#define USB_PORT_FEAT_C_RESET		20
+#define USB_PORT_FEAT_TEST              21
+#define USB_PORT_FEAT_INDICATOR         22
+#define USB_PORT_FEAT_C_PORT_L1         23
+
+/*
+ * Hub Status and Hub Change results
+ * See USB 2.0 spec Table 11-19 and Table 11-20
+ */
+struct usb_port_status {
+	__le16 wPortStatus;
+	__le16 wPortChange;
+} __attribute__ ((packed));
+
+/*
+ * wPortStatus bit field
+ * See USB 2.0 spec Table 11-21
+ */
+#define USB_PORT_STAT_CONNECTION	0x0001
+#define USB_PORT_STAT_ENABLE		0x0002
+#define USB_PORT_STAT_SUSPEND		0x0004
+#define USB_PORT_STAT_OVERCURRENT	0x0008
+#define USB_PORT_STAT_RESET		0x0010
+#define USB_PORT_STAT_L1		0x0020
+/* bits 6 to 7 are reserved */
+#define USB_PORT_STAT_POWER		0x0100
+#define USB_PORT_STAT_LOW_SPEED		0x0200
+#define USB_PORT_STAT_HIGH_SPEED        0x0400
+#define USB_PORT_STAT_TEST              0x0800
+#define USB_PORT_STAT_INDICATOR         0x1000
+/* bits 13 to 15 are reserved */
+
+/*
+ * wPortChange bit field
+ * See USB 2.0 spec Table 11-22
+ * Bits 0 to 4 shown, bits 5 to 15 are reserved
+ */
+#define USB_PORT_STAT_C_CONNECTION	0x0001
+#define USB_PORT_STAT_C_ENABLE		0x0002
+#define USB_PORT_STAT_C_SUSPEND		0x0004
+#define USB_PORT_STAT_C_OVERCURRENT	0x0008
+#define USB_PORT_STAT_C_RESET		0x0010
+#define USB_PORT_STAT_C_L1		0x0020
+
+/*
+ * wHubCharacteristics (masks)
+ * See USB 2.0 spec Table 11-13, offset 3
+ */
+#define HUB_CHAR_LPSM		0x0003 /* D1 .. D0 */
+#define HUB_CHAR_COMPOUND	0x0004 /* D2       */
+#define HUB_CHAR_OCPM		0x0018 /* D4 .. D3 */
+#define HUB_CHAR_TTTT           0x0060 /* D6 .. D5 */
+#define HUB_CHAR_PORTIND        0x0080 /* D7       */
+
+struct usb_hub_status {
+	__le16 wHubStatus;
+	__le16 wHubChange;
+} __attribute__ ((packed));
+
+/*
+ * Hub Status & Hub Change bit masks
+ * See USB 2.0 spec Table 11-19 and Table 11-20
+ * Bits 0 and 1 for wHubStatus and wHubChange
+ * Bits 2 to 15 are reserved for both
+ */
+#define HUB_STATUS_LOCAL_POWER	0x0001
+#define HUB_STATUS_OVERCURRENT	0x0002
+#define HUB_CHANGE_LOCAL_POWER	0x0001
+#define HUB_CHANGE_OVERCURRENT	0x0002
+
+
+/*
+ * Hub descriptor
+ * See USB 2.0 spec Table 11-13
+ */
+
+#define USB_DT_HUB			(USB_TYPE_CLASS | 0x09)
+#define USB_DT_HUB_NONVAR_SIZE		7
+
+struct usb_hub_descriptor {
+	__u8  bDescLength;
+	__u8  bDescriptorType;
+	__u8  bNbrPorts;
+	__le16 wHubCharacteristics;
+	__u8  bPwrOn2PwrGood;
+	__u8  bHubContrCurrent;
+		/* add 1 bit for hub status change; round to bytes */
+	__u8  DeviceRemovable[(USB_MAXCHILDREN + 1 + 7) / 8];
+	__u8  PortPwrCtrlMask[(USB_MAXCHILDREN + 1 + 7) / 8];
+} __attribute__ ((packed));
+
+
+/* port indicator status selectors, tables 11-7 and 11-25 */
+#define HUB_LED_AUTO	0
+#define HUB_LED_AMBER	1
+#define HUB_LED_GREEN	2
+#define HUB_LED_OFF	3
+
+enum hub_led_mode {
+	INDICATOR_AUTO = 0,
+	INDICATOR_CYCLE,
+	/* software blinks for attention:  software, hardware, reserved */
+	INDICATOR_GREEN_BLINK, INDICATOR_GREEN_BLINK_OFF,
+	INDICATOR_AMBER_BLINK, INDICATOR_AMBER_BLINK_OFF,
+	INDICATOR_ALT_BLINK, INDICATOR_ALT_BLINK_OFF
+} __attribute__ ((packed));
+
+/* Transaction Translator Think Times, in bits */
+#define HUB_TTTT_8_BITS		0x00
+#define HUB_TTTT_16_BITS	0x20
+#define HUB_TTTT_24_BITS	0x40
+#define HUB_TTTT_32_BITS	0x60
+
+#endif /* __LINUX_HUB_H */
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index ca228f00b826..d268415b7a40 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -374,7 +374,42 @@ extern void usb_destroy_configuration(struct usb_device *dev);
  * HCD Root Hub support
  */
 
-#include <linux/usb/hub.h>
+#include <linux/usb/ch11.h>
+
+/*
+ * As of USB 2.0, full/low speed devices are segregated into trees.
+ * One type grows from USB 1.1 host controllers (OHCI, UHCI etc).
+ * The other type grows from high speed hubs when they connect to
+ * full/low speed devices using "Transaction Translators" (TTs).
+ *
+ * TTs should only be known to the hub driver, and high speed bus
+ * drivers (only EHCI for now).  They affect periodic scheduling and
+ * sometimes control/bulk error recovery.
+ */
+
+struct usb_device;
+
+struct usb_tt {
+	struct usb_device	*hub;	/* upstream highspeed hub */
+	int			multi;	/* true means one TT per port */
+	unsigned		think_time;	/* think time in ns */
+
+	/* for control/bulk error recovery (CLEAR_TT_BUFFER) */
+	spinlock_t		lock;
+	struct list_head	clear_list;	/* of usb_tt_clear */
+	struct work_struct	clear_work;
+};
+
+struct usb_tt_clear {
+	struct list_head	clear_list;
+	unsigned		tt;
+	u16			devinfo;
+	struct usb_hcd		*hcd;
+	struct usb_host_endpoint	*ep;
+};
+
+extern int usb_hub_clear_tt_buffer(struct urb *urb);
+extern void usb_ep0_reinit(struct usb_device *);
 
 /* (shifted) direction/type/recipient from the USB 2.0 spec, table 9.2 */
 #define DeviceRequest \
diff --git a/include/linux/usb/hub.h b/include/linux/usb/hub.h
deleted file mode 100644
index de8081f065ed..000000000000
--- a/include/linux/usb/hub.h
+++ /dev/null
@@ -1,205 +0,0 @@
-#ifndef __LINUX_HUB_H
-#define __LINUX_HUB_H
-
-/*
- * Hub protocol and driver data structures.
- *
- * Some of these are known to the "virtual root hub" code
- * in host controller drivers.
- */
-
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/compiler.h>	/* likely()/unlikely() */
-
-/*
- * Hub request types
- */
-
-#define USB_RT_HUB	(USB_TYPE_CLASS | USB_RECIP_DEVICE)
-#define USB_RT_PORT	(USB_TYPE_CLASS | USB_RECIP_OTHER)
-
-/*
- * Hub class requests
- * See USB 2.0 spec Table 11-16
- */
-#define HUB_CLEAR_TT_BUFFER	8
-#define HUB_RESET_TT		9
-#define HUB_GET_TT_STATE	10
-#define HUB_STOP_TT		11
-
-/*
- * Hub Class feature numbers
- * See USB 2.0 spec Table 11-17
- */
-#define C_HUB_LOCAL_POWER	0
-#define C_HUB_OVER_CURRENT	1
-
-/*
- * Port feature numbers
- * See USB 2.0 spec Table 11-17
- */
-#define USB_PORT_FEAT_CONNECTION	0
-#define USB_PORT_FEAT_ENABLE		1
-#define USB_PORT_FEAT_SUSPEND		2	/* L2 suspend */
-#define USB_PORT_FEAT_OVER_CURRENT	3
-#define USB_PORT_FEAT_RESET		4
-#define USB_PORT_FEAT_L1		5	/* L1 suspend */
-#define USB_PORT_FEAT_POWER		8
-#define USB_PORT_FEAT_LOWSPEED		9
-/* This value was never in Table 11-17 */
-#define USB_PORT_FEAT_HIGHSPEED		10
-/* This value is also fake */
-#define USB_PORT_FEAT_SUPERSPEED	11
-#define USB_PORT_FEAT_C_CONNECTION	16
-#define USB_PORT_FEAT_C_ENABLE		17
-#define USB_PORT_FEAT_C_SUSPEND		18
-#define USB_PORT_FEAT_C_OVER_CURRENT	19
-#define USB_PORT_FEAT_C_RESET		20
-#define USB_PORT_FEAT_TEST              21
-#define USB_PORT_FEAT_INDICATOR         22
-#define USB_PORT_FEAT_C_PORT_L1         23
-
-/*
- * Hub Status and Hub Change results
- * See USB 2.0 spec Table 11-19 and Table 11-20
- */
-struct usb_port_status {
-	__le16 wPortStatus;
-	__le16 wPortChange;
-} __attribute__ ((packed));
-
-/*
- * wPortStatus bit field
- * See USB 2.0 spec Table 11-21
- */
-#define USB_PORT_STAT_CONNECTION	0x0001
-#define USB_PORT_STAT_ENABLE		0x0002
-#define USB_PORT_STAT_SUSPEND		0x0004
-#define USB_PORT_STAT_OVERCURRENT	0x0008
-#define USB_PORT_STAT_RESET		0x0010
-#define USB_PORT_STAT_L1		0x0020
-/* bits 6 to 7 are reserved */
-#define USB_PORT_STAT_POWER		0x0100
-#define USB_PORT_STAT_LOW_SPEED		0x0200
-#define USB_PORT_STAT_HIGH_SPEED        0x0400
-#define USB_PORT_STAT_TEST              0x0800
-#define USB_PORT_STAT_INDICATOR         0x1000
-/* bits 13 to 15 are reserved */
-
-/*
- * wPortChange bit field
- * See USB 2.0 spec Table 11-22
- * Bits 0 to 4 shown, bits 5 to 15 are reserved
- */
-#define USB_PORT_STAT_C_CONNECTION	0x0001
-#define USB_PORT_STAT_C_ENABLE		0x0002
-#define USB_PORT_STAT_C_SUSPEND		0x0004
-#define USB_PORT_STAT_C_OVERCURRENT	0x0008
-#define USB_PORT_STAT_C_RESET		0x0010
-#define USB_PORT_STAT_C_L1		0x0020
-
-/*
- * wHubCharacteristics (masks)
- * See USB 2.0 spec Table 11-13, offset 3
- */
-#define HUB_CHAR_LPSM		0x0003 /* D1 .. D0 */
-#define HUB_CHAR_COMPOUND	0x0004 /* D2       */
-#define HUB_CHAR_OCPM		0x0018 /* D4 .. D3 */
-#define HUB_CHAR_TTTT           0x0060 /* D6 .. D5 */
-#define HUB_CHAR_PORTIND        0x0080 /* D7       */
-
-struct usb_hub_status {
-	__le16 wHubStatus;
-	__le16 wHubChange;
-} __attribute__ ((packed));
-
-/*
- * Hub Status & Hub Change bit masks
- * See USB 2.0 spec Table 11-19 and Table 11-20
- * Bits 0 and 1 for wHubStatus and wHubChange
- * Bits 2 to 15 are reserved for both
- */
-#define HUB_STATUS_LOCAL_POWER	0x0001
-#define HUB_STATUS_OVERCURRENT	0x0002
-#define HUB_CHANGE_LOCAL_POWER	0x0001
-#define HUB_CHANGE_OVERCURRENT	0x0002
-
-
-/*
- * Hub descriptor
- * See USB 2.0 spec Table 11-13
- */
-
-#define USB_DT_HUB			(USB_TYPE_CLASS | 0x09)
-#define USB_DT_HUB_NONVAR_SIZE		7
-
-struct usb_hub_descriptor {
-	__u8  bDescLength;
-	__u8  bDescriptorType;
-	__u8  bNbrPorts;
-	__le16 wHubCharacteristics;
-	__u8  bPwrOn2PwrGood;
-	__u8  bHubContrCurrent;
-		/* add 1 bit for hub status change; round to bytes */
-	__u8  DeviceRemovable[(USB_MAXCHILDREN + 1 + 7) / 8];
-	__u8  PortPwrCtrlMask[(USB_MAXCHILDREN + 1 + 7) / 8];
-} __attribute__ ((packed));
-
-
-/* port indicator status selectors, tables 11-7 and 11-25 */
-#define HUB_LED_AUTO	0
-#define HUB_LED_AMBER	1
-#define HUB_LED_GREEN	2
-#define HUB_LED_OFF	3
-
-enum hub_led_mode {
-	INDICATOR_AUTO = 0,
-	INDICATOR_CYCLE,
-	/* software blinks for attention:  software, hardware, reserved */
-	INDICATOR_GREEN_BLINK, INDICATOR_GREEN_BLINK_OFF,
-	INDICATOR_AMBER_BLINK, INDICATOR_AMBER_BLINK_OFF,
-	INDICATOR_ALT_BLINK, INDICATOR_ALT_BLINK_OFF
-} __attribute__ ((packed));
-
-struct usb_device;
-
-/* Transaction Translator Think Times, in bits */
-#define HUB_TTTT_8_BITS		0x00
-#define HUB_TTTT_16_BITS	0x20
-#define HUB_TTTT_24_BITS	0x40
-#define HUB_TTTT_32_BITS	0x60
-
-/*
- * As of USB 2.0, full/low speed devices are segregated into trees.
- * One type grows from USB 1.1 host controllers (OHCI, UHCI etc).
- * The other type grows from high speed hubs when they connect to
- * full/low speed devices using "Transaction Translators" (TTs).
- *
- * TTs should only be known to the hub driver, and high speed bus
- * drivers (only EHCI for now).  They affect periodic scheduling and
- * sometimes control/bulk error recovery.
- */
-struct usb_tt {
-	struct usb_device	*hub;	/* upstream highspeed hub */
-	int			multi;	/* true means one TT per port */
-	unsigned		think_time;	/* think time in ns */
-
-	/* for control/bulk error recovery (CLEAR_TT_BUFFER) */
-	spinlock_t		lock;
-	struct list_head	clear_list;	/* of usb_tt_clear */
-	struct work_struct	clear_work;
-};
-
-struct usb_tt_clear {
-	struct list_head	clear_list;
-	unsigned		tt;
-	u16			devinfo;
-	struct usb_hcd		*hcd;
-	struct usb_host_endpoint	*ep;
-};
-
-extern int usb_hub_clear_tt_buffer(struct urb *urb);
-extern void usb_ep0_reinit(struct usb_device *);
-
-#endif /* __LINUX_HUB_H */
-- 
cgit v1.2.3


From 288ead45fa6637e959015d055304f521cbbc0575 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Thu, 4 Mar 2010 11:32:30 -0500
Subject: USB: remove bogus USB_PORT_FEAT_*_SPEED symbols

This patch (as1348) removes the bogus
USB_PORT_FEAT_{HIGHSPEED,SUPERSPEED} symbols from ch11.h.  No such
features are defined by the USB spec.  (There is a PORT_LOWSPEED
feature, but the spec doesn't mention it except to say that host
software should never use it.)  The speed indicators are port
statuses, not port features.

As a temporary workaround for the xhci-hcd driver, a fictional
USB_PORT_STAT_SUPER_SPEED symbol is added.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hub.c          | 8 ++++----
 drivers/usb/host/ehci.h         | 8 ++++----
 drivers/usb/host/isp1760-hcd.c  | 2 +-
 drivers/usb/host/oxu210hp-hcd.c | 4 ++--
 drivers/usb/host/r8a66597-hcd.c | 7 +++----
 drivers/usb/host/sl811-hcd.c    | 6 +++---
 drivers/usb/host/xhci-hub.c     | 8 ++++----
 include/linux/usb/ch11.h        | 7 ++-----
 8 files changed, 23 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1883c3c7b69b..5ac27ed0c635 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -153,11 +153,11 @@ static int usb_reset_and_verify_device(struct usb_device *udev);
 
 static inline char *portspeed(int portstatus)
 {
-	if (portstatus & (1 << USB_PORT_FEAT_HIGHSPEED))
+	if (portstatus & USB_PORT_STAT_HIGH_SPEED)
     		return "480 Mb/s";
-	else if (portstatus & (1 << USB_PORT_FEAT_LOWSPEED))
+	else if (portstatus & USB_PORT_STAT_LOW_SPEED)
 		return "1.5 Mb/s";
-	else if (portstatus & (1 << USB_PORT_FEAT_SUPERSPEED))
+	else if (portstatus & USB_PORT_STAT_SUPER_SPEED)
 		return "5.0 Gb/s";
 	else
 		return "12 Mb/s";
@@ -3075,7 +3075,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 		if (!(hcd->driver->flags & HCD_USB3))
 			udev->speed = USB_SPEED_UNKNOWN;
 		else if ((hdev->parent == NULL) &&
-				(portstatus & (1 << USB_PORT_FEAT_SUPERSPEED)))
+				(portstatus & USB_PORT_STAT_SUPER_SPEED))
 			udev->speed = USB_SPEED_SUPER;
 		else
 			udev->speed = USB_SPEED_UNKNOWN;
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index 556c0b48f3ab..4ebe9ad209e4 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -556,20 +556,20 @@ ehci_port_speed(struct ehci_hcd *ehci, unsigned int portsc)
 		case 0:
 			return 0;
 		case 1:
-			return (1<<USB_PORT_FEAT_LOWSPEED);
+			return USB_PORT_STAT_LOW_SPEED;
 		case 2:
 		default:
-			return (1<<USB_PORT_FEAT_HIGHSPEED);
+			return USB_PORT_STAT_HIGH_SPEED;
 		}
 	}
-	return (1<<USB_PORT_FEAT_HIGHSPEED);
+	return USB_PORT_STAT_HIGH_SPEED;
 }
 
 #else
 
 #define	ehci_is_TDI(e)			(0)
 
-#define	ehci_port_speed(ehci, portsc)	(1<<USB_PORT_FEAT_HIGHSPEED)
+#define	ehci_port_speed(ehci, portsc)	USB_PORT_STAT_HIGH_SPEED
 #endif
 
 /*-------------------------------------------------------------------------*/
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index c7ac1d97d176..cfdac6da9556 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -111,7 +111,7 @@ struct isp1760_qh {
 	u32 ping;
 };
 
-#define ehci_port_speed(priv, portsc) (1 << USB_PORT_FEAT_HIGHSPEED)
+#define ehci_port_speed(priv, portsc) USB_PORT_STAT_HIGH_SPEED
 
 static unsigned int isp1760_readl(__u32 __iomem *regs)
 {
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 2891203200ce..8f04c0afa59f 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -3153,10 +3153,10 @@ static inline unsigned int oxu_port_speed(struct oxu_hcd *oxu,
 	case 0:
 		return 0;
 	case 1:
-		return 1 << USB_PORT_FEAT_LOWSPEED;
+		return USB_PORT_STAT_LOW_SPEED;
 	case 2:
 	default:
-		return 1 << USB_PORT_FEAT_HIGHSPEED;
+		return USB_PORT_STAT_HIGH_SPEED;
 	}
 }
 
diff --git a/drivers/usb/host/r8a66597-hcd.c b/drivers/usb/host/r8a66597-hcd.c
index 1398de140ead..a004a1220848 100644
--- a/drivers/usb/host/r8a66597-hcd.c
+++ b/drivers/usb/host/r8a66597-hcd.c
@@ -1059,12 +1059,11 @@ static void r8a66597_usb_connect(struct r8a66597 *r8a66597, int port)
 	u16 speed = get_rh_usb_speed(r8a66597, port);
 	struct r8a66597_root_hub *rh = &r8a66597->root_hub[port];
 
-	rh->port &= ~((1 << USB_PORT_FEAT_HIGHSPEED) |
-		      (1 << USB_PORT_FEAT_LOWSPEED));
+	rh->port &= ~(USB_PORT_STAT_HIGH_SPEED | USB_PORT_STAT_LOW_SPEED);
 	if (speed == HSMODE)
-		rh->port |= (1 << USB_PORT_FEAT_HIGHSPEED);
+		rh->port |= USB_PORT_STAT_HIGH_SPEED;
 	else if (speed == LSMODE)
-		rh->port |= (1 << USB_PORT_FEAT_LOWSPEED);
+		rh->port |= USB_PORT_STAT_LOW_SPEED;
 
 	rh->port &= ~(1 << USB_PORT_FEAT_RESET);
 	rh->port |= 1 << USB_PORT_FEAT_ENABLE;
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index 8f2f477890c4..dcd7fab7179c 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -1121,7 +1121,7 @@ sl811h_timer(unsigned long _sl811)
 	u8		signaling = sl811->ctrl1 & SL11H_CTL1MASK_FORCE;
 	const u32	mask = (1 << USB_PORT_FEAT_CONNECTION)
 				| (1 << USB_PORT_FEAT_ENABLE)
-				| (1 << USB_PORT_FEAT_LOWSPEED);
+				| USB_PORT_STAT_LOW_SPEED;
 
 	spin_lock_irqsave(&sl811->lock, flags);
 
@@ -1162,7 +1162,7 @@ sl811h_timer(unsigned long _sl811)
 	} else {
 		sl811->port1 |= mask;
 		if (irqstat & SL11H_INTMASK_DP)
-			sl811->port1 &= ~(1 << USB_PORT_FEAT_LOWSPEED);
+			sl811->port1 &= ~USB_PORT_STAT_LOW_SPEED;
 		sl811->irq_enable = SL11H_INTMASK_INSRMV | SL11H_INTMASK_RD;
 	}
 
@@ -1173,7 +1173,7 @@ sl811h_timer(unsigned long _sl811)
 #ifdef USE_B
 		sl811->irq_enable |= SL11H_INTMASK_DONE_B;
 #endif
-		if (sl811->port1 & (1 << USB_PORT_FEAT_LOWSPEED)) {
+		if (sl811->port1 & USB_PORT_STAT_LOW_SPEED) {
 			sl811->ctrl1 |= SL11H_CTL1MASK_LSPD;
 			ctrl2 |= SL811HS_CTL2MASK_DSWAP;
 		}
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 208b805b80eb..dd69df1e4558 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -64,15 +64,15 @@ static void xhci_hub_descriptor(struct xhci_hcd *xhci,
 static unsigned int xhci_port_speed(unsigned int port_status)
 {
 	if (DEV_LOWSPEED(port_status))
-		return 1 << USB_PORT_FEAT_LOWSPEED;
+		return USB_PORT_STAT_LOW_SPEED;
 	if (DEV_HIGHSPEED(port_status))
-		return 1 << USB_PORT_FEAT_HIGHSPEED;
+		return USB_PORT_STAT_HIGH_SPEED;
 	if (DEV_SUPERSPEED(port_status))
-		return 1 << USB_PORT_FEAT_SUPERSPEED;
+		return USB_PORT_STAT_SUPER_SPEED;
 	/*
 	 * FIXME: Yes, we should check for full speed, but the core uses that as
 	 * a default in portspeed() in usb/core/hub.c (which is the only place
-	 * USB_PORT_FEAT_*SPEED is used).
+	 * USB_PORT_STAT_*_SPEED is used).
 	 */
 	return 0;
 }
diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index 35cb7357ff72..bd3185aba4ee 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -47,11 +47,7 @@
 #define USB_PORT_FEAT_RESET		4
 #define USB_PORT_FEAT_L1		5	/* L1 suspend */
 #define USB_PORT_FEAT_POWER		8
-#define USB_PORT_FEAT_LOWSPEED		9
-/* This value was never in Table 11-17 */
-#define USB_PORT_FEAT_HIGHSPEED		10
-/* This value is also fake */
-#define USB_PORT_FEAT_SUPERSPEED	11
+#define USB_PORT_FEAT_LOWSPEED		9	/* Should never be used */
 #define USB_PORT_FEAT_C_CONNECTION	16
 #define USB_PORT_FEAT_C_ENABLE		17
 #define USB_PORT_FEAT_C_SUSPEND		18
@@ -87,6 +83,7 @@ struct usb_port_status {
 #define USB_PORT_STAT_TEST              0x0800
 #define USB_PORT_STAT_INDICATOR         0x1000
 /* bits 13 to 15 are reserved */
+#define USB_PORT_STAT_SUPER_SPEED	0x8000	/* Linux-internal */
 
 /*
  * wPortChange bit field
-- 
cgit v1.2.3


From bd5afa9eac6daa408412a31a6c69e87e8bd28c7e Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 8 Mar 2010 21:50:12 -0600
Subject: usb-serial: Use tty_port version console instead of usb_serial_port

Replace all instances of using the console variable in struct
usb_serial_port with the struct tty_port version.

CC: Alan Cox <alan@linux.intel.com>
CC: Alan Stern <stern@rowland.harvard.edu>
CC: Oliver Neukum <oliver@neukum.org>
CC: Andrew Morton <akpm@linux-foundation.org>
CC: linux-usb@vger.kernel.org
CC: linux-kernel@vger.kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/console.c    | 5 ++---
 drivers/usb/serial/ftdi_sio.c   | 2 +-
 drivers/usb/serial/generic.c    | 4 ++--
 drivers/usb/serial/pl2303.c     | 2 +-
 drivers/usb/serial/usb-serial.c | 4 ++--
 include/linux/usb/serial.h      | 2 --
 6 files changed, 8 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/console.c b/drivers/usb/serial/console.c
index f347da2ef00a..4ea64fef6718 100644
--- a/drivers/usb/serial/console.c
+++ b/drivers/usb/serial/console.c
@@ -180,7 +180,6 @@ static int usb_console_setup(struct console *co, char *options)
 	--port->port.count;
 	/* The console is special in terms of closing the device so
 	 * indicate this port is now acting as a system console. */
-	port->console = 1;
 	port->port.console = 1;
 
 	mutex_unlock(&serial->disc_mutex);
@@ -217,7 +216,7 @@ static void usb_console_write(struct console *co,
 
 	dbg("%s - port %d, %d byte(s)", __func__, port->number, count);
 
-	if (!port->console) {
+	if (!port->port.console) {
 		dbg("%s - port not opened", __func__);
 		return;
 	}
@@ -313,7 +312,7 @@ void usb_serial_console_exit(void)
 {
 	if (usbcons_info.port) {
 		unregister_console(&usbcons);
-		usbcons_info.port->console = 0;
+		usbcons_info.port->port.console = 0;
 		usbcons_info.port = NULL;
 	}
 }
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 1d7c4fac02e8..46a88ae9c46a 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -2074,7 +2074,7 @@ static int ftdi_process_packet(struct tty_struct *tty,
 		return 0;	/* status only */
 	ch = packet + 2;
 
-	if (!(port->console && port->sysrq) && flag == TTY_NORMAL)
+	if (!(port->port.console && port->sysrq) && flag == TTY_NORMAL)
 		tty_insert_flip_string(tty, ch, len);
 	else {
 		for (i = 0; i < len; i++, ch++) {
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index f804acb138ec..ba61c745df0b 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -447,7 +447,7 @@ static void flush_and_resubmit_read_urb(struct usb_serial_port *port)
 	/* The per character mucking around with sysrq path it too slow for
 	   stuff like 3G modems, so shortcircuit it in the 99.9999999% of cases
 	   where the USB serial is not a console anyway */
-	if (!port->console || !port->sysrq)
+	if (!port->port.console || !port->sysrq)
 		tty_insert_flip_string(tty, ch, urb->actual_length);
 	else {
 		/* Push data to tty */
@@ -561,7 +561,7 @@ void usb_serial_generic_unthrottle(struct tty_struct *tty)
 int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 			struct usb_serial_port *port, unsigned int ch)
 {
-	if (port->sysrq && port->console) {
+	if (port->sysrq && port->port.console) {
 		if (ch && time_before(jiffies, port->sysrq)) {
 			handle_sysrq(ch, tty);
 			port->sysrq = 0;
diff --git a/drivers/usb/serial/pl2303.c b/drivers/usb/serial/pl2303.c
index c28b1607eacc..b10b0efe3e06 100644
--- a/drivers/usb/serial/pl2303.c
+++ b/drivers/usb/serial/pl2303.c
@@ -1058,7 +1058,7 @@ static void pl2303_push_data(struct tty_struct *tty,
 	if (line_status & UART_OVERRUN_ERROR)
 		tty_insert_flip_char(tty, 0, TTY_OVERRUN);
 
-	if (tty_flag == TTY_NORMAL && !(port->console && port->sysrq))
+	if (tty_flag == TTY_NORMAL && !(port->port.console && port->sysrq))
 		tty_insert_flip_string(tty, data, urb->actual_length);
 	else {
 		int i;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 3873660d8217..f3f65171de38 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -289,7 +289,7 @@ static void serial_down(struct tty_port *tport)
 	 * The console is magical.  Do not hang up the console hardware
 	 * or there will be tears.
 	 */
-	if (port->console)
+	if (port->port.console)
 		return;
 	if (drv->close)
 		drv->close(port);
@@ -328,7 +328,7 @@ static void serial_cleanup(struct tty_struct *tty)
 	/* The console is magical.  Do not hang up the console hardware
 	 * or there will be tears.
 	 */
-	if (port->console)
+	if (port->port.console)
 		return;
 
 	dbg("%s - port %d", __func__, port->number);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 0a458b861933..b7682fed1d9c 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -66,7 +66,6 @@ enum port_dev_state {
  * @work: work queue entry for the line discipline waking up.
  * @throttled: nonzero if the read urb is inactive to throttle the device
  * @throttle_req: nonzero if the tty wants to throttle us
- * @console: attached usb serial console
  * @dev: pointer to the serial device
  *
  * This structure is used by the usb-serial core and drivers for the specific
@@ -106,7 +105,6 @@ struct usb_serial_port {
 	struct work_struct	work;
 	char			throttled;
 	char			throttle_req;
-	char			console;
 	unsigned long		sysrq; /* sysrq timeout */
 	struct device		dev;
 	enum port_dev_state	dev_state;
-- 
cgit v1.2.3


From e6c213b2968cbee4cfb8f89f2d685b9ad07eefbd Mon Sep 17 00:00:00 2001
From: Felipe Balbi <felipe.balbi@nokia.com>
Date: Fri, 12 Mar 2010 10:29:06 +0200
Subject: usb: musb: allow board to pass down fifo mode

boards might want to optimize their fifo configuration
to the particular needs of that specific board. Allow
that by moving all related data structures to
<linux/usb/musb.h>

Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/musb_core.c | 36 ++++++++++++++++--------------------
 include/linux/usb/musb.h     | 35 +++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index ced6d9ea9af3..3f4c158a9f5e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -995,24 +995,13 @@ static ushort __initdata fifo_mode = 2;
 module_param(fifo_mode, ushort, 0);
 MODULE_PARM_DESC(fifo_mode, "initial endpoint configuration");
 
-
-enum fifo_style { FIFO_RXTX, FIFO_TX, FIFO_RX } __attribute__ ((packed));
-enum buf_mode { BUF_SINGLE, BUF_DOUBLE } __attribute__ ((packed));
-
-struct fifo_cfg {
-	u8		hw_ep_num;
-	enum fifo_style	style;
-	enum buf_mode	mode;
-	u16		maxpacket;
-};
-
 /*
  * tables defining fifo_mode values.  define more if you like.
  * for host side, make sure both halves of ep1 are set up.
  */
 
 /* mode 0 - fits in 2KB */
-static struct fifo_cfg __initdata mode_0_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_0_cfg[] = {
 { .hw_ep_num = 1, .style = FIFO_TX,   .maxpacket = 512, },
 { .hw_ep_num = 1, .style = FIFO_RX,   .maxpacket = 512, },
 { .hw_ep_num = 2, .style = FIFO_RXTX, .maxpacket = 512, },
@@ -1021,7 +1010,7 @@ static struct fifo_cfg __initdata mode_0_cfg[] = {
 };
 
 /* mode 1 - fits in 4KB */
-static struct fifo_cfg __initdata mode_1_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_1_cfg[] = {
 { .hw_ep_num = 1, .style = FIFO_TX,   .maxpacket = 512, .mode = BUF_DOUBLE, },
 { .hw_ep_num = 1, .style = FIFO_RX,   .maxpacket = 512, .mode = BUF_DOUBLE, },
 { .hw_ep_num = 2, .style = FIFO_RXTX, .maxpacket = 512, .mode = BUF_DOUBLE, },
@@ -1030,7 +1019,7 @@ static struct fifo_cfg __initdata mode_1_cfg[] = {
 };
 
 /* mode 2 - fits in 4KB */
-static struct fifo_cfg __initdata mode_2_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_2_cfg[] = {
 { .hw_ep_num = 1, .style = FIFO_TX,   .maxpacket = 512, },
 { .hw_ep_num = 1, .style = FIFO_RX,   .maxpacket = 512, },
 { .hw_ep_num = 2, .style = FIFO_TX,   .maxpacket = 512, },
@@ -1040,7 +1029,7 @@ static struct fifo_cfg __initdata mode_2_cfg[] = {
 };
 
 /* mode 3 - fits in 4KB */
-static struct fifo_cfg __initdata mode_3_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_3_cfg[] = {
 { .hw_ep_num = 1, .style = FIFO_TX,   .maxpacket = 512, .mode = BUF_DOUBLE, },
 { .hw_ep_num = 1, .style = FIFO_RX,   .maxpacket = 512, .mode = BUF_DOUBLE, },
 { .hw_ep_num = 2, .style = FIFO_TX,   .maxpacket = 512, },
@@ -1050,7 +1039,7 @@ static struct fifo_cfg __initdata mode_3_cfg[] = {
 };
 
 /* mode 4 - fits in 16KB */
-static struct fifo_cfg __initdata mode_4_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_4_cfg[] = {
 { .hw_ep_num =  1, .style = FIFO_TX,   .maxpacket = 512, },
 { .hw_ep_num =  1, .style = FIFO_RX,   .maxpacket = 512, },
 { .hw_ep_num =  2, .style = FIFO_TX,   .maxpacket = 512, },
@@ -1081,7 +1070,7 @@ static struct fifo_cfg __initdata mode_4_cfg[] = {
 };
 
 /* mode 5 - fits in 8KB */
-static struct fifo_cfg __initdata mode_5_cfg[] = {
+static struct musb_fifo_cfg __initdata mode_5_cfg[] = {
 { .hw_ep_num =  1, .style = FIFO_TX,   .maxpacket = 512, },
 { .hw_ep_num =  1, .style = FIFO_RX,   .maxpacket = 512, },
 { .hw_ep_num =  2, .style = FIFO_TX,   .maxpacket = 512, },
@@ -1119,7 +1108,7 @@ static struct fifo_cfg __initdata mode_5_cfg[] = {
  */
 static int __init
 fifo_setup(struct musb *musb, struct musb_hw_ep  *hw_ep,
-		const struct fifo_cfg *cfg, u16 offset)
+		const struct musb_fifo_cfg *cfg, u16 offset)
 {
 	void __iomem	*mbase = musb->mregs;
 	int	size = 0;
@@ -1190,17 +1179,23 @@ fifo_setup(struct musb *musb, struct musb_hw_ep  *hw_ep,
 	return offset + (maxpacket << ((c_size & MUSB_FIFOSZ_DPB) ? 1 : 0));
 }
 
-static struct fifo_cfg __initdata ep0_cfg = {
+static struct musb_fifo_cfg __initdata ep0_cfg = {
 	.style = FIFO_RXTX, .maxpacket = 64,
 };
 
 static int __init ep_config_from_table(struct musb *musb)
 {
-	const struct fifo_cfg	*cfg;
+	const struct musb_fifo_cfg	*cfg;
 	unsigned		i, n;
 	int			offset;
 	struct musb_hw_ep	*hw_ep = musb->endpoints;
 
+	if (musb->config->fifo_cfg) {
+		cfg = musb->config->fifo_cfg;
+		n = musb->config->fifo_cfg_size;
+		goto done;
+	}
+
 	switch (fifo_mode) {
 	default:
 		fifo_mode = 0;
@@ -1235,6 +1230,7 @@ static int __init ep_config_from_table(struct musb *musb)
 			musb_driver_name, fifo_mode);
 
 
+done:
 	offset = fifo_setup(musb, hw_ep, &ep0_cfg, 0);
 	/* assert(offset > 0) */
 
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index 7acef0234c0e..f3d68f62dae8 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -22,12 +22,47 @@ enum musb_mode {
 
 struct clk;
 
+enum musb_fifo_style {
+	FIFO_RXTX,
+	FIFO_TX,
+	FIFO_RX
+} __attribute__ ((packed));
+
+enum musb_buf_mode {
+	BUF_SINGLE,
+	BUF_DOUBLE
+} __attribute__ ((packed));
+
+struct musb_fifo_cfg {
+	u8			hw_ep_num;
+	enum musb_fifo_style	style;
+	enum musb_buf_mode	mode;
+	u16			maxpacket;
+};
+
+#define MUSB_EP_FIFO(ep, st, m, pkt)		\
+{						\
+	.hw_ep_num	= ep,			\
+	.style		= st,			\
+	.mode		= m,			\
+	.maxpacket	= pkt,			\
+}
+
+#define MUSB_EP_FIFO_SINGLE(ep, st, pkt)	\
+	MUSB_EP_FIFO(ep, st, BUF_SINGLE, pkt)
+
+#define MUSB_EP_FIFO_DOUBLE(ep, st, pkt)	\
+	MUSB_EP_FIFO(ep, st, BUF_DOUBLE, pkt)
+
 struct musb_hdrc_eps_bits {
 	const char	name[16];
 	u8		bits;
 };
 
 struct musb_hdrc_config {
+	struct musb_fifo_cfg	*fifo_cfg;	/* board fifo configuration */
+	unsigned		fifo_cfg_size;	/* size of the fifo configuration */
+
 	/* MUSB configuration-specific details */
 	unsigned	multipoint:1;	/* multipoint device */
 	unsigned	dyn_fifo:1 __deprecated; /* supports dynamic fifo sizing */
-- 
cgit v1.2.3


From 6ddc6dae4ab559b648bc348e3a48e113e92ab4a2 Mon Sep 17 00:00:00 2001
From: Cliff Cai <cliff.cai@analog.com>
Date: Fri, 12 Mar 2010 10:29:10 +0200
Subject: USB: musb: allow the Blackfin vrsel gpio to be active low

Rather than hardcoding the gpio levels for vrsel, allow the platform
resources to handle this so boards can be active high or low.

Signed-off-by: Cliff Cai <cliff.cai@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Felipe Balbi <felipe.balbi@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/musb/blackfin.c | 8 ++++----
 include/linux/usb/musb.h    | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 7c71eb422a9c..eb4392f904e4 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -238,10 +238,10 @@ static void bfin_vbus_power(struct musb *musb, int is_on, int sleeping)
 
 static void bfin_set_vbus(struct musb *musb, int is_on)
 {
-	if (is_on)
-		gpio_set_value(musb->config->gpio_vrsel, 1);
-	else
-		gpio_set_value(musb->config->gpio_vrsel, 0);
+	int value = musb->config->gpio_vrsel_active;
+	if (!is_on)
+		value = !value;
+	gpio_set_value(musb->config->gpio_vrsel, value);
 
 	DBG(1, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index f3d68f62dae8..fbb83fe21851 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -88,6 +88,7 @@ struct musb_hdrc_config {
 #ifdef CONFIG_BLACKFIN
         /* A GPIO controlling VRSEL in Blackfin */
         unsigned int    gpio_vrsel;
+	unsigned int	gpio_vrsel_active;
 #endif
 
 };
-- 
cgit v1.2.3


From 317149c655defedfaf432143b86a720cfc12a424 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 29 Mar 2010 12:03:17 +0200
Subject: USB: Add a new quirk: USB_QUIRK_HONOR_BNUMINTERFACES

Add a new quirk USB_QUIRK_HONOR_BNUMINTERFACES, when this quirk is
set and a device has more interface descriptors in a configuration
then it claims to have in config->bNumInterfaces, ignore all additional
interfaces.

This is needed for devices which try to hide unused interfaces by only
lowering config->bNumInterfaces, and which can't handle if you try to talk
to the "hidden" interfaces.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/config.c  | 16 ++++++++++++++--
 drivers/usb/core/quirks.c  |  4 ++++
 include/linux/usb/quirks.h |  4 ++++
 3 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index 77e0dda3a2fb..16c1157be3fc 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -1,6 +1,7 @@
 #include <linux/usb.h>
 #include <linux/usb/ch9.h>
 #include <linux/usb/hcd.h>
+#include <linux/usb/quirks.h>
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/slab.h>
@@ -479,9 +480,10 @@ skip_to_next_interface_descriptor:
 	return buffer - buffer0 + i;
 }
 
-static int usb_parse_configuration(struct device *ddev, int cfgidx,
+static int usb_parse_configuration(struct usb_device *dev, int cfgidx,
     struct usb_host_config *config, unsigned char *buffer, int size)
 {
+	struct device *ddev = &dev->dev;
 	unsigned char *buffer0 = buffer;
 	int cfgno;
 	int nintf, nintf_orig;
@@ -550,6 +552,16 @@ static int usb_parse_configuration(struct device *ddev, int cfgidx,
 			}
 
 			inum = d->bInterfaceNumber;
+
+			if ((dev->quirks & USB_QUIRK_HONOR_BNUMINTERFACES) &&
+			    n >= nintf_orig) {
+				dev_warn(ddev, "config %d has more interface "
+				    "descriptors, than it declares in "
+				    "bNumInterfaces, ignoring interface "
+				    "number: %d\n", cfgno, inum);
+				continue;
+			}
+
 			if (inum >= nintf_orig)
 				dev_warn(ddev, "config %d has an invalid "
 				    "interface number: %d but max is %d\n",
@@ -801,7 +813,7 @@ int usb_get_configuration(struct usb_device *dev)
 
 		dev->rawdescriptors[cfgno] = bigbuffer;
 
-		result = usb_parse_configuration(&dev->dev, cfgno,
+		result = usb_parse_configuration(dev, cfgno,
 		    &dev->config[cfgno], bigbuffer, length);
 		if (result < 0) {
 			++cfgno;
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index f073c5cb4e7b..f22d03df8b17 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -71,6 +71,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* SKYMEDI USB_DRIVE */
 	{ USB_DEVICE(0x1516, 0x8628), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* BUILDWIN Photo Frame */
+	{ USB_DEVICE(0x1908, 0x1315), .driver_info =
+			USB_QUIRK_HONOR_BNUMINTERFACES },
+
 	/* INTEL VALUE SSD */
 	{ USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 0a555dd131fc..16b7f3347545 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -22,4 +22,8 @@
 /*device will morph if reset, don't use reset for handling errors */
 #define USB_QUIRK_RESET_MORPHS		0x00000010
 
+/* device has more interface descriptions than the bNumInterfaces count,
+   and can't handle talking to these interfaces */
+#define USB_QUIRK_HONOR_BNUMINTERFACES	0x00000020
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From bbcb2b907415a90334521a31a8767cd77462c716 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 17 Mar 2010 23:00:37 +0100
Subject: USB: serial: allow drivers to define bulk buffer sizes

Allow drivers to define custom bulk in/out buffer sizes in struct
usb_serial_driver. If not set, fall back to the default buffer size
which matches the endpoint size.

Three drivers are currently freeing the pre-allocated buffers and
allocating larger ones to achieve this at port probe (ftdi_sio) or even
at port open (ipaq and iuu_phoenix), which needless to say is suboptimal.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/ftdi_sio.c    | 16 +---------------
 drivers/usb/serial/ipaq.c        | 28 ++--------------------------
 drivers/usb/serial/iuu_phoenix.c | 30 ++----------------------------
 drivers/usb/serial/usb-serial.c  |  8 ++++++--
 include/linux/usb/serial.h       |  5 +++++
 5 files changed, 16 insertions(+), 71 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index 46a88ae9c46a..ab4ad18d6ef3 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -768,9 +768,6 @@ static const char *ftdi_chip_name[] = {
 };
 
 
-/* Constants for read urb and write urb */
-#define BUFSZ 512
-
 /* Used for TIOCMIWAIT */
 #define FTDI_STATUS_B0_MASK	(FTDI_RS0_CTS | FTDI_RS0_DSR | FTDI_RS0_RI | FTDI_RS0_RLSD)
 #define FTDI_STATUS_B1_MASK	(FTDI_RS_BI)
@@ -821,6 +818,7 @@ static struct usb_serial_driver ftdi_sio_device = {
 	.usb_driver = 		&ftdi_driver,
 	.id_table =		id_table_combined,
 	.num_ports =		1,
+	.bulk_in_size =		512,
 	.probe =		ftdi_sio_probe,
 	.port_probe =		ftdi_sio_port_probe,
 	.port_remove =		ftdi_sio_port_remove,
@@ -1552,18 +1550,6 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 	if (quirk && quirk->port_probe)
 		quirk->port_probe(priv);
 
-	/* Increase the size of read buffers */
-	kfree(port->bulk_in_buffer);
-	port->bulk_in_buffer = kmalloc(BUFSZ, GFP_KERNEL);
-	if (!port->bulk_in_buffer) {
-		kfree(priv);
-		return -ENOMEM;
-	}
-	if (port->read_urb) {
-		port->read_urb->transfer_buffer = port->bulk_in_buffer;
-		port->read_urb->transfer_buffer_length = BUFSZ;
-	}
-
 	priv->port = port;
 
 	/* Free port's existing write urb and transfer buffer. */
diff --git a/drivers/usb/serial/ipaq.c b/drivers/usb/serial/ipaq.c
index 3fea9298eb15..87b11461bf11 100644
--- a/drivers/usb/serial/ipaq.c
+++ b/drivers/usb/serial/ipaq.c
@@ -571,6 +571,8 @@ static struct usb_serial_driver ipaq_device = {
 	.description =		"PocketPC PDA",
 	.usb_driver = 		&ipaq_driver,
 	.id_table =		ipaq_id_table,
+	.bulk_in_size =		URBDATA_SIZE,
+	.bulk_out_size =	URBDATA_SIZE,
 	.open =			ipaq_open,
 	.close =		ipaq_close,
 	.attach =		ipaq_startup,
@@ -628,32 +630,6 @@ static int ipaq_open(struct tty_struct *tty,
 		priv->free_len += PACKET_SIZE;
 	}
 
-	/*
-	 * Lose the small buffers usbserial provides. Make larger ones.
-	 */
-
-	kfree(port->bulk_in_buffer);
-	kfree(port->bulk_out_buffer);
-	/* make sure the generic serial code knows */
-	port->bulk_out_buffer = NULL;
-
-	port->bulk_in_buffer = kmalloc(URBDATA_SIZE, GFP_KERNEL);
-	if (port->bulk_in_buffer == NULL)
-		goto enomem;
-
-	port->bulk_out_buffer = kmalloc(URBDATA_SIZE, GFP_KERNEL);
-	if (port->bulk_out_buffer == NULL) {
-		/* the buffer is useless, free it */
-		kfree(port->bulk_in_buffer);
-		port->bulk_in_buffer = NULL;
-		goto enomem;
-	}
-	port->read_urb->transfer_buffer = port->bulk_in_buffer;
-	port->write_urb->transfer_buffer = port->bulk_out_buffer;
-	port->read_urb->transfer_buffer_length = URBDATA_SIZE;
-	port->bulk_out_size = port->write_urb->transfer_buffer_length
-							= URBDATA_SIZE;
-
 	msleep(1000*initial_wait);
 
 	/*
diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
index 43f13cf2f016..74551cb2e8ee 100644
--- a/drivers/usb/serial/iuu_phoenix.c
+++ b/drivers/usb/serial/iuu_phoenix.c
@@ -1044,34 +1044,6 @@ static int iuu_open(struct tty_struct *tty, struct usb_serial_port *port)
 	if (buf == NULL)
 		return -ENOMEM;
 
-	/* fixup the endpoint buffer size */
-	kfree(port->bulk_out_buffer);
-	port->bulk_out_buffer = kmalloc(512, GFP_KERNEL);
-	port->bulk_out_size = 512;
-	kfree(port->bulk_in_buffer);
-	port->bulk_in_buffer = kmalloc(512, GFP_KERNEL);
-	port->bulk_in_size = 512;
-
-	if (!port->bulk_out_buffer || !port->bulk_in_buffer) {
-		kfree(port->bulk_out_buffer);
-		kfree(port->bulk_in_buffer);
-		kfree(buf);
-		return -ENOMEM;
-	}
-
-	usb_fill_bulk_urb(port->write_urb, port->serial->dev,
-			  usb_sndbulkpipe(port->serial->dev,
-					  port->bulk_out_endpointAddress),
-			  port->bulk_out_buffer, 512,
-			  NULL, NULL);
-
-
-	usb_fill_bulk_urb(port->read_urb, port->serial->dev,
-			  usb_rcvbulkpipe(port->serial->dev,
-					  port->bulk_in_endpointAddress),
-			  port->bulk_in_buffer, 512,
-			  NULL, NULL);
-
 	priv->poll = 0;
 
 	/* initialize writebuf */
@@ -1277,6 +1249,8 @@ static struct usb_serial_driver iuu_device = {
 		   },
 	.id_table = id_table,
 	.num_ports = 1,
+	.bulk_in_size = 512,
+	.bulk_out_size = 512,
 	.port_probe = iuu_create_sysfs_attrs,
 	.port_remove = iuu_remove_sysfs_attrs,
 	.open = iuu_open,
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index f3f65171de38..538924627eba 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -901,7 +901,9 @@ int usb_serial_probe(struct usb_interface *interface,
 			dev_err(&interface->dev, "No free urbs available\n");
 			goto probe_error;
 		}
-		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
+		buffer_size = serial->type->bulk_in_size;
+		if (!buffer_size)
+			buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_in_size = buffer_size;
 		port->bulk_in_endpointAddress = endpoint->bEndpointAddress;
 		port->bulk_in_buffer = kmalloc(buffer_size, GFP_KERNEL);
@@ -927,7 +929,9 @@ int usb_serial_probe(struct usb_interface *interface,
 		}
 		if (kfifo_alloc(&port->write_fifo, PAGE_SIZE, GFP_KERNEL))
 			goto probe_error;
-		buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
+		buffer_size = serial->type->bulk_out_size;
+		if (!buffer_size)
+			buffer_size = le16_to_cpu(endpoint->wMaxPacketSize);
 		port->bulk_out_size = buffer_size;
 		port->bulk_out_endpointAddress = endpoint->bEndpointAddress;
 		port->bulk_out_buffer = kmalloc(buffer_size, GFP_KERNEL);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index b7682fed1d9c..ab311dab3383 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -179,6 +179,8 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data)
  * @id_table: pointer to a list of usb_device_id structures that define all
  *	of the devices this structure can support.
  * @num_ports: the number of different ports this device will have.
+ * @bulk_in_size: bytes to allocate for bulk-in buffer (0 = end-point size)
+ * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size)
  * @calc_num_ports: pointer to a function to determine how many ports this
  *	device has dynamically.  It will be called after the probe()
  *	callback is called, but before attach()
@@ -223,6 +225,9 @@ struct usb_serial_driver {
 	struct usb_dynids	dynids;
 	int			max_in_flight_urbs;
 
+	size_t			bulk_in_size;
+	size_t			bulk_out_size;
+
 	int (*probe)(struct usb_serial *serial, const struct usb_device_id *id);
 	int (*attach)(struct usb_serial *serial);
 	int (*calc_num_ports) (struct usb_serial *serial);
-- 
cgit v1.2.3


From 41bd72f9041def8e3334d3e3693862d078f5cb9a Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 17 Mar 2010 23:05:53 +0100
Subject: USB: serial: refactor read urb submission in generic driver

Use the already exported function for submitting the read urb associated
with a usb_serial_port.

Make sure it returns the result of usb_submit_urb and rename to the
more descriptive usb_serial_generic_submit_read_urb.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c   | 32 ++++++++------------------------
 drivers/usb/serial/usb_debug.c |  2 +-
 include/linux/usb/serial.h     |  2 +-
 3 files changed, 10 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index e1d245aa4a1d..d8dd3a59f56a 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -118,7 +118,6 @@ void usb_serial_generic_deregister(void)
 
 int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port)
 {
-	struct usb_serial *serial = port->serial;
 	int result = 0;
 	unsigned long flags;
 
@@ -131,23 +130,8 @@ int usb_serial_generic_open(struct tty_struct *tty, struct usb_serial_port *port
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	/* if we have a bulk endpoint, start reading from it */
-	if (port->bulk_in_size) {
-		/* Start reading from the device */
-		usb_fill_bulk_urb(port->read_urb, serial->dev,
-				   usb_rcvbulkpipe(serial->dev,
-						port->bulk_in_endpointAddress),
-				   port->read_urb->transfer_buffer,
-				   port->read_urb->transfer_buffer_length,
-				   ((serial->type->read_bulk_callback) ?
-				     serial->type->read_bulk_callback :
-				     usb_serial_generic_read_bulk_callback),
-				   port);
-		result = usb_submit_urb(port->read_urb, GFP_KERNEL);
-		if (result)
-			dev_err(&port->dev,
-			    "%s - failed resubmitting read urb, error %d\n",
-							__func__, result);
-	}
+	if (port->bulk_in_size)
+		result = usb_serial_generic_submit_read_urb(port, GFP_KERNEL);
 
 	return result;
 }
@@ -418,9 +402,8 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 	return chars;
 }
 
-
-void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
-			gfp_t mem_flags)
+int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
+					gfp_t mem_flags)
 {
 	struct urb *urb = port->read_urb;
 	struct usb_serial *serial = port->serial;
@@ -439,11 +422,12 @@ void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
 	result = usb_submit_urb(urb, mem_flags);
 	if (result && result != -EPERM) {
 		dev_err(&port->dev,
-			"%s - failed resubmitting read urb, error %d\n",
+			"%s - failed submitting read urb, error %d\n",
 							__func__, result);
 	}
+	return result;
 }
-EXPORT_SYMBOL_GPL(usb_serial_generic_resubmit_read_urb);
+EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urb);
 
 /* Push data to tty layer and resubmit the bulk read URB */
 static void flush_and_resubmit_read_urb(struct usb_serial_port *port)
@@ -471,7 +455,7 @@ static void flush_and_resubmit_read_urb(struct usb_serial_port *port)
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 done:
-	usb_serial_generic_resubmit_read_urb(port, GFP_ATOMIC);
+	usb_serial_generic_submit_read_urb(port, GFP_ATOMIC);
 }
 
 void usb_serial_generic_read_bulk_callback(struct urb *urb)
diff --git a/drivers/usb/serial/usb_debug.c b/drivers/usb/serial/usb_debug.c
index 28026b47344a..63c2734e764a 100644
--- a/drivers/usb/serial/usb_debug.c
+++ b/drivers/usb/serial/usb_debug.c
@@ -69,7 +69,7 @@ static void usb_debug_read_bulk_callback(struct urb *urb)
 	    memcmp(urb->transfer_buffer, USB_DEBUG_BRK,
 		   USB_DEBUG_BRK_SIZE) == 0) {
 		usb_serial_handle_break(port);
-		usb_serial_generic_resubmit_read_urb(port, GFP_ATOMIC);
+		usb_serial_generic_submit_read_urb(port, GFP_ATOMIC);
 		return;
 	}
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index ab311dab3383..53f6dc65e87e 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -321,7 +321,7 @@ extern void usb_serial_generic_disconnect(struct usb_serial *serial);
 extern void usb_serial_generic_release(struct usb_serial *serial);
 extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
-extern void usb_serial_generic_resubmit_read_urb(struct usb_serial_port *port,
+extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 						 gfp_t mem_flags);
 extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 					struct usb_serial_port *port,
-- 
cgit v1.2.3


From 231543206452f5c42bace54b5c13dfe5a0313812 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 17 Mar 2010 23:05:57 +0100
Subject: USB: serial: generalise generic read implementation

Add process_read_urb to usb_serial_driver so that a driver can rely on
the generic read (and throttle) mechanism but still do device specific
processing of incoming data (such as adding tty_flags before pushing to
line discipline).

The default generic implementation handles sysrq for consoles but
otherwise simply pushes to tty.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 5 +++--
 drivers/usb/serial/usb-serial.c | 1 +
 include/linux/usb/serial.h      | 3 +++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index e16c0b234cc9..176f1257b664 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -408,7 +408,7 @@ int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_submit_read_urb);
 
-static void usb_serial_generic_process_read_urb(struct urb *urb)
+void usb_serial_generic_process_read_urb(struct urb *urb)
 {
 	struct usb_serial_port *port = urb->context;
 	struct tty_struct *tty;
@@ -433,6 +433,7 @@ static void usb_serial_generic_process_read_urb(struct urb *urb)
 	tty_flip_buffer_push(tty);
 	tty_kref_put(tty);
 }
+EXPORT_SYMBOL_GPL(usb_serial_generic_process_read_urb);
 
 void usb_serial_generic_read_bulk_callback(struct urb *urb)
 {
@@ -451,7 +452,7 @@ void usb_serial_generic_read_bulk_callback(struct urb *urb)
 
 	usb_serial_debug_data(debug, &port->dev, __func__,
 						urb->actual_length, data);
-	usb_serial_generic_process_read_urb(urb);
+	port->serial->type->process_read_urb(urb);
 
 	/* Throttle the device if requested by tty */
 	spin_lock_irqsave(&port->lock, flags);
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 538924627eba..1b924425089f 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1298,6 +1298,7 @@ static void fixup_generic(struct usb_serial_driver *device)
 	set_to_generic_if_null(device, write_bulk_callback);
 	set_to_generic_if_null(device, disconnect);
 	set_to_generic_if_null(device, release);
+	set_to_generic_if_null(device, process_read_urb);
 }
 
 int usb_serial_register(struct usb_serial_driver *driver)
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 53f6dc65e87e..ff8872eba3ac 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -272,6 +272,8 @@ struct usb_serial_driver {
 	void (*write_int_callback)(struct urb *urb);
 	void (*read_bulk_callback)(struct urb *urb);
 	void (*write_bulk_callback)(struct urb *urb);
+	/* Called by the generic read bulk callback */
+	void (*process_read_urb)(struct urb *urb);
 };
 #define to_usb_serial_driver(d) \
 	container_of(d, struct usb_serial_driver, driver)
@@ -323,6 +325,7 @@ extern int usb_serial_generic_register(int debug);
 extern void usb_serial_generic_deregister(void);
 extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 						 gfp_t mem_flags);
+extern void usb_serial_generic_process_read_urb(struct urb *urb);
 extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 					struct usb_serial_port *port,
 					unsigned int ch);
-- 
cgit v1.2.3


From 25d514ca227e1ac81d0906a4ccf2aa171f50a600 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 17 Mar 2010 23:06:07 +0100
Subject: USB: serial: re-implement multi-urb writes in generic driver

Use dynamic transfer buffer sizes since it is more efficient to let the
host controller do the partitioning to fit endpoint size. This way we
also do not use more than one urb per write request.

Replace max_in_flight_urbs with multi_urb_write flag in struct
usb_serial_driver to enable multi-urb writes.

Use MAX_TX_URBS=40 and a max buffer size of PAGE_SIZE to prevent DoS
attacks.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c | 132 ++++++++++++++++++-------------------------
 include/linux/usb/serial.h   |   9 ++-
 2 files changed, 62 insertions(+), 79 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index be52c748bccb..ad4823bbfa19 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -25,6 +25,8 @@
 
 static int debug;
 
+#define MAX_TX_URBS		40
+
 #ifdef CONFIG_USB_SERIAL_GENERIC
 
 static int generic_probe(struct usb_interface *interface,
@@ -172,78 +174,63 @@ static int usb_serial_multi_urb_write(struct tty_struct *tty,
 	struct urb *urb;
 	unsigned char *buffer;
 	int status;
-	int towrite;
-	int bwrite = 0;
-
-	dbg("%s - port %d", __func__, port->number);
-
-	if (count == 0)
-		dbg("%s - write request of 0 bytes", __func__);
 
-	while (count > 0) {
-		towrite = (count > port->bulk_out_size) ?
-			port->bulk_out_size : count;
-		spin_lock_irqsave(&port->lock, flags);
-		if (port->urbs_in_flight >
-		    port->serial->type->max_in_flight_urbs) {
-			spin_unlock_irqrestore(&port->lock, flags);
-			dbg("%s - write limit hit", __func__);
-			return bwrite;
-		}
-		port->tx_bytes_flight += towrite;
-		port->urbs_in_flight++;
+	spin_lock_irqsave(&port->lock, flags);
+	if (port->tx_urbs == MAX_TX_URBS) {
 		spin_unlock_irqrestore(&port->lock, flags);
+		dbg("%s - write limit hit", __func__);
+		return 0;
+	}
+	port->tx_urbs++;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-		buffer = kmalloc(towrite, GFP_ATOMIC);
-		if (!buffer) {
-			dev_err(&port->dev,
-			"%s ran out of kernel memory for urb ...\n", __func__);
-			goto error_no_buffer;
-		}
+	urb = usb_alloc_urb(0, GFP_ATOMIC);
+	if (!urb) {
+		dev_err(&port->dev, "%s - no free urbs available\n", __func__);
+		status = -ENOMEM;
+		goto err_urb;
+	}
 
-		urb = usb_alloc_urb(0, GFP_ATOMIC);
-		if (!urb) {
-			dev_err(&port->dev, "%s - no more free urbs\n",
+	count = min_t(int, count, PAGE_SIZE);
+	buffer = kmalloc(count, GFP_ATOMIC);
+	if (!buffer) {
+		dev_err(&port->dev, "%s - could not allocate buffer\n",
 				__func__);
-			goto error_no_urb;
-		}
+		status = -ENOMEM;
+		goto err_buf;
+	}
 
-		/* Copy data */
-		memcpy(buffer, buf + bwrite, towrite);
-		usb_serial_debug_data(debug, &port->dev, __func__,
-				      towrite, buffer);
-		/* fill the buffer and send it */
-		usb_fill_bulk_urb(urb, port->serial->dev,
+	memcpy(buffer, buf, count);
+	usb_serial_debug_data(debug, &port->dev, __func__, count, buffer);
+	usb_fill_bulk_urb(urb, port->serial->dev,
 			usb_sndbulkpipe(port->serial->dev,
 					port->bulk_out_endpointAddress),
-			buffer, towrite,
+			buffer, count,
 			port->serial->type->write_bulk_callback, port);
 
-		status = usb_submit_urb(urb, GFP_ATOMIC);
-		if (status) {
-			dev_err(&port->dev, "%s - error submitting urb: %d\n",
+	status = usb_submit_urb(urb, GFP_ATOMIC);
+	if (status) {
+		dev_err(&port->dev, "%s - error submitting urb: %d\n",
 				__func__, status);
-			goto error;
-		}
-
-		/* This urb is the responsibility of the host driver now */
-		usb_free_urb(urb);
-		dbg("%s write: %d", __func__, towrite);
-		count -= towrite;
-		bwrite += towrite;
+		goto err;
 	}
-	return bwrite;
+	spin_lock_irqsave(&port->lock, flags);
+	port->tx_bytes += urb->transfer_buffer_length;
+	spin_unlock_irqrestore(&port->lock, flags);
 
-error:
 	usb_free_urb(urb);
-error_no_urb:
+
+	return count;
+err:
 	kfree(buffer);
-error_no_buffer:
+err_buf:
+	usb_free_urb(urb);
+err_urb:
 	spin_lock_irqsave(&port->lock, flags);
-	port->urbs_in_flight--;
-	port->tx_bytes_flight -= towrite;
+	port->tx_urbs--;
 	spin_unlock_irqrestore(&port->lock, flags);
-	return bwrite;
+
+	return status;
 }
 
 /**
@@ -286,7 +273,7 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	}
 
 	spin_lock_irqsave(&port->lock, flags);
-	port->tx_bytes_flight += count;
+	port->tx_bytes += count;
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	return count;
@@ -318,9 +305,8 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	if (!count)
 		return 0;
 
-	if (serial->type->max_in_flight_urbs)
-		return usb_serial_multi_urb_write(tty, port,
-						  buf, count);
+	if (serial->type->multi_urb_write)
+		return usb_serial_multi_urb_write(tty, port, buf, count);
 
 	count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
@@ -337,7 +323,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 	struct usb_serial_port *port = tty->driver_data;
 	struct usb_serial *serial = port->serial;
 	unsigned long flags;
-	int room = 0;
+	int room;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -345,14 +331,10 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 		return 0;
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (serial->type->max_in_flight_urbs) {
-		if (port->urbs_in_flight < serial->type->max_in_flight_urbs)
-			room = port->bulk_out_size *
-				(serial->type->max_in_flight_urbs -
-				 port->urbs_in_flight);
-	} else {
+	if (serial->type->multi_urb_write)
+		room = (MAX_TX_URBS - port->tx_urbs) * PAGE_SIZE;
+	else
 		room = kfifo_avail(&port->write_fifo);
-	}
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -372,10 +354,10 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		return 0;
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (serial->type->max_in_flight_urbs)
-		chars = port->tx_bytes_flight;
+	if (serial->type->multi_urb_write)
+		chars = port->tx_bytes;
 	else
-		chars = kfifo_len(&port->write_fifo) + port->tx_bytes_flight;
+		chars = kfifo_len(&port->write_fifo) + port->tx_bytes;
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, chars);
@@ -461,18 +443,16 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (port->serial->type->max_in_flight_urbs) {
+	if (port->serial->type->multi_urb_write) {
 		kfree(urb->transfer_buffer);
 
 		spin_lock_irqsave(&port->lock, flags);
-		--port->urbs_in_flight;
-		port->tx_bytes_flight -= urb->transfer_buffer_length;
-		if (port->urbs_in_flight < 0)
-			port->urbs_in_flight = 0;
+		port->tx_bytes -= urb->transfer_buffer_length;
+		port->tx_urbs--;
 		spin_unlock_irqrestore(&port->lock, flags);
 	} else {
 		spin_lock_irqsave(&port->lock, flags);
-		port->tx_bytes_flight -= urb->transfer_buffer_length;
+		port->tx_bytes -= urb->transfer_buffer_length;
 		port->write_urb_busy = 0;
 		spin_unlock_irqrestore(&port->lock, flags);
 
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index ff8872eba3ac..2a3283761600 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -60,6 +60,8 @@ enum port_dev_state {
  * @write_urb: pointer to the bulk out struct urb for this port.
  * @write_fifo: kfifo used to buffer outgoing data
  * @write_urb_busy: port`s writing status
+ * @tx_bytes: number of bytes currently in host stack queues
+ * @tx_urbs: number of urbs currently in host stack queues
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
  * @write_wait: a wait_queue_head_t used by the port.
@@ -98,8 +100,8 @@ struct usb_serial_port {
 	int			write_urb_busy;
 	__u8			bulk_out_endpointAddress;
 
-	int			tx_bytes_flight;
-	int			urbs_in_flight;
+	int			tx_bytes;
+	int			tx_urbs;
 
 	wait_queue_head_t	write_wait;
 	struct work_struct	work;
@@ -223,7 +225,8 @@ struct usb_serial_driver {
 	struct device_driver	driver;
 	struct usb_driver	*usb_driver;
 	struct usb_dynids	dynids;
-	int			max_in_flight_urbs;
+
+	unsigned char		multi_urb_write:1;
 
 	size_t			bulk_in_size;
 	size_t			bulk_out_size;
-- 
cgit v1.2.3


From eaa3bcb06aed1ac1d6d9e3edd3b5f72ea57a6ac0 Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 17 Mar 2010 23:06:08 +0100
Subject: USB: serial: generalise write buffer preparation

Generalise write buffer preparation.

This allows for drivers to manipulate (e.g. add headers) to bulk out
data before it is sent.

This adds a new function pointer to usb_serial_driver:

int (*prepare_write_buffer)(struct usb_serial_port *port,
		void **dest, size_t size, const void *src, size_t count);

The function is generic and can be used with either kfifo-based or
multi-urb writes:

If *dest is NULL the implementation should allocate dest.
If src is NULL the implementation should use the port write fifo.

If not set, a generic implementation is used which simply uses memcpy or
kfifo_out.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 47 +++++++++++++++++++++++++++++------------
 drivers/usb/serial/usb-serial.c |  1 +
 include/linux/usb/serial.h      |  5 +++++
 3 files changed, 40 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index ad4823bbfa19..1a134f9c64f3 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -167,12 +167,35 @@ void usb_serial_generic_close(struct usb_serial_port *port)
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_close);
 
+int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
+		void **dest, size_t size, const void *src, size_t count)
+{
+	if (!*dest) {
+		size = count;
+		*dest = kmalloc(count, GFP_ATOMIC);
+		if (!*dest) {
+			dev_err(&port->dev, "%s - could not allocate buffer\n",
+					__func__);
+			return -ENOMEM;
+		}
+	}
+	if (src) {
+		count = size;
+		memcpy(*dest, src, size);
+	} else {
+		count = kfifo_out_locked(&port->write_fifo, *dest, size,
+								&port->lock);
+	}
+	return count;
+}
+EXPORT_SYMBOL_GPL(usb_serial_generic_prepare_write_buffer);
+
 static int usb_serial_multi_urb_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
 	unsigned long flags;
 	struct urb *urb;
-	unsigned char *buffer;
+	void *buffer;
 	int status;
 
 	spin_lock_irqsave(&port->lock, flags);
@@ -191,16 +214,14 @@ static int usb_serial_multi_urb_write(struct tty_struct *tty,
 		goto err_urb;
 	}
 
+	buffer = NULL;
 	count = min_t(int, count, PAGE_SIZE);
-	buffer = kmalloc(count, GFP_ATOMIC);
-	if (!buffer) {
-		dev_err(&port->dev, "%s - could not allocate buffer\n",
-				__func__);
-		status = -ENOMEM;
+	count = port->serial->type->prepare_write_buffer(port, &buffer, 0,
+								buf, count);
+	if (count < 0) {
+		status = count;
 		goto err_buf;
 	}
-
-	memcpy(buffer, buf, count);
 	usb_serial_debug_data(debug, &port->dev, __func__, count, buffer);
 	usb_fill_bulk_urb(urb, port->serial->dev,
 			usb_sndbulkpipe(port->serial->dev,
@@ -242,7 +263,6 @@ err_urb:
  */
 static int usb_serial_generic_write_start(struct usb_serial_port *port)
 {
-	unsigned char *data;
 	int result;
 	int count;
 	unsigned long flags;
@@ -255,10 +275,11 @@ static int usb_serial_generic_write_start(struct usb_serial_port *port)
 	port->write_urb_busy = 1;
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	data = port->write_urb->transfer_buffer;
-	count = kfifo_out_locked(&port->write_fifo, data, port->bulk_out_size, &port->lock);
-	usb_serial_debug_data(debug, &port->dev, __func__, count, data);
-
+	count = port->serial->type->prepare_write_buffer(port,
+					&port->write_urb->transfer_buffer,
+					port->bulk_out_size, NULL, 0);
+	usb_serial_debug_data(debug, &port->dev, __func__,
+				count, port->write_urb->transfer_buffer);
 	port->write_urb->transfer_buffer_length = count;
 
 	/* send the data out the bulk port */
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 1b924425089f..8249fd8381fb 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -1299,6 +1299,7 @@ static void fixup_generic(struct usb_serial_driver *device)
 	set_to_generic_if_null(device, disconnect);
 	set_to_generic_if_null(device, release);
 	set_to_generic_if_null(device, process_read_urb);
+	set_to_generic_if_null(device, prepare_write_buffer);
 }
 
 int usb_serial_register(struct usb_serial_driver *driver)
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 2a3283761600..a4c99ea390e7 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -277,6 +277,9 @@ struct usb_serial_driver {
 	void (*write_bulk_callback)(struct urb *urb);
 	/* Called by the generic read bulk callback */
 	void (*process_read_urb)(struct urb *urb);
+	/* Called by the generic write implementation */
+	int (*prepare_write_buffer)(struct usb_serial_port *port,
+		void **dest, size_t size, const void *src, size_t count);
 };
 #define to_usb_serial_driver(d) \
 	container_of(d, struct usb_serial_driver, driver)
@@ -329,6 +332,8 @@ extern void usb_serial_generic_deregister(void);
 extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 						 gfp_t mem_flags);
 extern void usb_serial_generic_process_read_urb(struct urb *urb);
+extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
+		void **dest, size_t size, const void *src, size_t count);
 extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 					struct usb_serial_port *port,
 					unsigned int ch);
-- 
cgit v1.2.3


From 71adf118946957839a13aa4d1094183e05c6c094 Mon Sep 17 00:00:00 2001
From: Fabien Chouteau <fabien.chouteau@barco.com>
Date: Thu, 8 Apr 2010 09:31:15 +0200
Subject: USB: gadget: add HID gadget driver

g_hid is a USB gadget driver implementing the Human Interface Device class
specification. The driver handles basic HID protocol handling in the
kernel, and allows userspace to read/write HID reports trough /dev/hidgX
character devices.

Signed-off-by: Fabien Chouteau <fabien.chouteau@barco.com>
Signed-off-by: Peter Korsgaard <peter.korsgaard@barco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/gadget_hid.txt | 445 ++++++++++++++++++++++++++
 drivers/usb/gadget/Kconfig       |  11 +
 drivers/usb/gadget/Makefile      |   2 +
 drivers/usb/gadget/f_hid.c       | 673 +++++++++++++++++++++++++++++++++++++++
 drivers/usb/gadget/hid.c         | 288 +++++++++++++++++
 include/linux/usb/g_hid.h        |  32 ++
 6 files changed, 1451 insertions(+)
 create mode 100644 Documentation/usb/gadget_hid.txt
 create mode 100644 drivers/usb/gadget/f_hid.c
 create mode 100644 drivers/usb/gadget/hid.c
 create mode 100644 include/linux/usb/g_hid.h

(limited to 'include/linux')

diff --git a/Documentation/usb/gadget_hid.txt b/Documentation/usb/gadget_hid.txt
new file mode 100644
index 000000000000..f4a51f567427
--- /dev/null
+++ b/Documentation/usb/gadget_hid.txt
@@ -0,0 +1,445 @@
+
+		     Linux USB HID gadget driver
+
+Introduction
+
+	The HID Gadget driver provides emulation of USB Human Interface
+	Devices (HID). The basic HID handling is done in the kernel,
+	and HID reports can be sent/received through I/O on the
+	/dev/hidgX character devices.
+
+	For more details about HID, see the developer page on
+	http://www.usb.org/developers/hidpage/
+
+Configuration
+
+	g_hid is a platform driver, so to use it you need to add
+	struct platform_device(s) to your platform code defining the
+	HID function descriptors you want to use - E.G. something
+	like:
+
+#include <linux/platform_device.h>
+#include <linux/usb/g_hid.h>
+
+/* hid descriptor for a keyboard */
+static struct hidg_func_descriptor my_hid_data = {
+	.subclass		= 0, /* No subclass */
+	.protocol		= 1, /* Keyboard */
+	.report_length		= 8,
+	.report_desc_length	= 63,
+	.report_desc		= {
+		0x05, 0x01,	/* USAGE_PAGE (Generic Desktop)	          */
+		0x09, 0x06,	/* USAGE (Keyboard)                       */
+		0xa1, 0x01,	/* COLLECTION (Application)               */
+		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
+		0x19, 0xe0,	/*   USAGE_MINIMUM (Keyboard LeftControl) */
+		0x29, 0xe7,	/*   USAGE_MAXIMUM (Keyboard Right GUI)   */
+		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
+		0x25, 0x01,	/*   LOGICAL_MAXIMUM (1)                  */
+		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
+		0x95, 0x08,	/*   REPORT_COUNT (8)                     */
+		0x81, 0x02,	/*   INPUT (Data,Var,Abs)                 */
+		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
+		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
+		0x81, 0x03,	/*   INPUT (Cnst,Var,Abs)                 */
+		0x95, 0x05,	/*   REPORT_COUNT (5)                     */
+		0x75, 0x01,	/*   REPORT_SIZE (1)                      */
+		0x05, 0x08,	/*   USAGE_PAGE (LEDs)                    */
+		0x19, 0x01,	/*   USAGE_MINIMUM (Num Lock)             */
+		0x29, 0x05,	/*   USAGE_MAXIMUM (Kana)                 */
+		0x91, 0x02,	/*   OUTPUT (Data,Var,Abs)                */
+		0x95, 0x01,	/*   REPORT_COUNT (1)                     */
+		0x75, 0x03,	/*   REPORT_SIZE (3)                      */
+		0x91, 0x03,	/*   OUTPUT (Cnst,Var,Abs)                */
+		0x95, 0x06,	/*   REPORT_COUNT (6)                     */
+		0x75, 0x08,	/*   REPORT_SIZE (8)                      */
+		0x15, 0x00,	/*   LOGICAL_MINIMUM (0)                  */
+		0x25, 0x65,	/*   LOGICAL_MAXIMUM (101)                */
+		0x05, 0x07,	/*   USAGE_PAGE (Keyboard)                */
+		0x19, 0x00,	/*   USAGE_MINIMUM (Reserved)             */
+		0x29, 0x65,	/*   USAGE_MAXIMUM (Keyboard Application) */
+		0x81, 0x00,	/*   INPUT (Data,Ary,Abs)                 */
+		0xc0		/* END_COLLECTION                         */
+	}
+};
+
+static struct platform_device my_hid = {
+	.name			= "hidg",
+	.id			= 0,
+	.num_resources		= 0,
+	.resource		= 0,
+	.dev.platform_data	= &my_hid_data,
+};
+
+	You can add as many HID functions as you want, only limited by
+	the amount of interrupt endpoints your gadget driver supports.
+
+Send and receive HID reports
+
+	HID reports can be sent/received using read/write on the
+	/dev/hidgX character devices. See below for an example program
+	to do this.
+
+	hid_gadget_test is a small interactive program to test the HID
+ 	gadget driver. To use, point it at a hidg device and set the
+ 	device type (keyboard / mouse / joystick) - E.G.:
+
+		# hid_gadget_test /dev/hidg0 keyboard
+
+	You are now in the prompt of hid_gadget_test. You can type any
+	combination of options and values. Available options and
+	values are listed at program start. In keyboard mode you can
+	send up to six values.
+
+	For example type: g i s t r --left-shift
+
+	Hit return and the corresponding report will be sent by the
+	HID gadget.
+
+	Another interesting example is the caps lock test. Type
+	-–caps-lock and hit return. A report is then sent by the
+	gadget and you should receive the host answer, corresponding
+	to the caps lock LED status.
+
+		--caps-lock
+		recv report:2
+
+	With this command:
+
+		# hid_gadget_test /dev/hidg1 mouse
+
+	You can test the mouse emulation. Values are two signed numbers.
+
+
+Sample code
+
+/* hid_gadget_test */
+
+#include <pthread.h>
+#include <string.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#define BUF_LEN 512
+
+struct options {
+	const char    *opt;
+	unsigned char val;
+};
+
+static struct options kmod[] = {
+	{.opt = "--left-ctrl",		.val = 0x01},
+	{.opt = "--right-ctrl",		.val = 0x10},
+	{.opt = "--left-shift",		.val = 0x02},
+	{.opt = "--right-shift",	.val = 0x20},
+	{.opt = "--left-alt",		.val = 0x04},
+	{.opt = "--right-alt",		.val = 0x40},
+	{.opt = "--left-meta",		.val = 0x08},
+	{.opt = "--right-meta",		.val = 0x80},
+	{.opt = NULL}
+};
+
+static struct options kval[] = {
+	{.opt = "--return",	.val = 0x28},
+	{.opt = "--esc",	.val = 0x29},
+	{.opt = "--bckspc",	.val = 0x2a},
+	{.opt = "--tab",	.val = 0x2b},
+	{.opt = "--spacebar",	.val = 0x2c},
+	{.opt = "--caps-lock",	.val = 0x39},
+	{.opt = "--f1",		.val = 0x3a},
+	{.opt = "--f2",		.val = 0x3b},
+	{.opt = "--f3",		.val = 0x3c},
+	{.opt = "--f4",		.val = 0x3d},
+	{.opt = "--f5",		.val = 0x3e},
+	{.opt = "--f6",		.val = 0x3f},
+	{.opt = "--f7",		.val = 0x40},
+	{.opt = "--f8",		.val = 0x41},
+	{.opt = "--f9",		.val = 0x42},
+	{.opt = "--f10",	.val = 0x43},
+	{.opt = "--f11",	.val = 0x44},
+	{.opt = "--f12",	.val = 0x45},
+	{.opt = "--insert",	.val = 0x49},
+	{.opt = "--home",	.val = 0x4a},
+	{.opt = "--pageup",	.val = 0x4b},
+	{.opt = "--del",	.val = 0x4c},
+	{.opt = "--end",	.val = 0x4d},
+	{.opt = "--pagedown",	.val = 0x4e},
+	{.opt = "--right",	.val = 0x4f},
+	{.opt = "--left",	.val = 0x50},
+	{.opt = "--down",	.val = 0x51},
+	{.opt = "--kp-enter",	.val = 0x58},
+	{.opt = "--up",		.val = 0x52},
+	{.opt = "--num-lock",	.val = 0x53},
+	{.opt = NULL}
+};
+
+int keyboard_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+	char *tok = strtok(buf, " ");
+	int key = 0;
+	int i = 0;
+
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		if (strcmp(tok, "--hold") == 0) {
+			*hold = 1;
+			continue;
+		}
+
+		if (key < 6) {
+			for (i = 0; kval[i].opt != NULL; i++)
+				if (strcmp(tok, kval[i].opt) == 0) {
+					report[2 + key++] = kval[i].val;
+					break;
+				}
+			if (kval[i].opt != NULL)
+				continue;
+		}
+
+		if (key < 6)
+			if (islower(tok[0])) {
+				report[2 + key++] = (tok[0] - ('a' - 0x04));
+				continue;
+			}
+
+		for (i = 0; kmod[i].opt != NULL; i++)
+			if (strcmp(tok, kmod[i].opt) == 0) {
+				report[0] = report[0] | kmod[i].val;
+				break;
+			}
+		if (kmod[i].opt != NULL)
+			continue;
+
+		if (key < 6)
+			fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 8;
+}
+
+static struct options mmod[] = {
+	{.opt = "--b1", .val = 0x01},
+	{.opt = "--b2", .val = 0x02},
+	{.opt = "--b3", .val = 0x04},
+	{.opt = NULL}
+};
+
+int mouse_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+	char *tok = strtok(buf, " ");
+	int mvt = 0;
+	int i = 0;
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		if (strcmp(tok, "--hold") == 0) {
+			*hold = 1;
+			continue;
+		}
+
+		for (i = 0; mmod[i].opt != NULL; i++)
+			if (strcmp(tok, mmod[i].opt) == 0) {
+				report[0] = report[0] | mmod[i].val;
+				break;
+			}
+		if (mmod[i].opt != NULL)
+			continue;
+
+		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 2) {
+			errno = 0;
+			report[1 + mvt++] = (char)strtol(tok, NULL, 0);
+			if (errno != 0) {
+				fprintf(stderr, "Bad value:'%s'\n", tok);
+				report[1 + mvt--] = 0;
+			}
+			continue;
+		}
+
+		fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 3;
+}
+
+static struct options jmod[] = {
+	{.opt = "--b1",		.val = 0x10},
+	{.opt = "--b2",		.val = 0x20},
+	{.opt = "--b3",		.val = 0x40},
+	{.opt = "--b4",		.val = 0x80},
+	{.opt = "--hat1",	.val = 0x00},
+	{.opt = "--hat2",	.val = 0x01},
+	{.opt = "--hat3",	.val = 0x02},
+	{.opt = "--hat4",	.val = 0x03},
+	{.opt = "--hatneutral",	.val = 0x04},
+	{.opt = NULL}
+};
+
+int joystick_fill_report(char report[8], char buf[BUF_LEN], int *hold)
+{
+	char *tok = strtok(buf, " ");
+	int mvt = 0;
+	int i = 0;
+
+	*hold = 1;
+
+	/* set default hat position: neutral */
+	report[3] = 0x04;
+
+	for (; tok != NULL; tok = strtok(NULL, " ")) {
+
+		if (strcmp(tok, "--quit") == 0)
+			return -1;
+
+		for (i = 0; jmod[i].opt != NULL; i++)
+			if (strcmp(tok, jmod[i].opt) == 0) {
+				report[3] = (report[3] & 0xF0) | jmod[i].val;
+				break;
+			}
+		if (jmod[i].opt != NULL)
+			continue;
+
+		if (!(tok[0] == '-' && tok[1] == '-') && mvt < 3) {
+			errno = 0;
+			report[mvt++] = (char)strtol(tok, NULL, 0);
+			if (errno != 0) {
+				fprintf(stderr, "Bad value:'%s'\n", tok);
+				report[mvt--] = 0;
+			}
+			continue;
+		}
+
+		fprintf(stderr, "unknown option: %s\n", tok);
+	}
+	return 4;
+}
+
+void print_options(char c)
+{
+	int i = 0;
+
+	if (c == 'k') {
+		printf("	keyboard options:\n"
+		       "		--hold\n");
+		for (i = 0; kmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", kmod[i].opt);
+		printf("\n	keyboard values:\n"
+		       "		[a-z] or\n");
+		for (i = 0; kval[i].opt != NULL; i++)
+			printf("\t\t%-8s%s", kval[i].opt, i % 2 ? "\n" : "");
+		printf("\n");
+	} else if (c == 'm') {
+		printf("	mouse options:\n"
+		       "		--hold\n");
+		for (i = 0; mmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", mmod[i].opt);
+		printf("\n	mouse values:\n"
+		       "		Two signed numbers\n"
+		       "--quit to close\n");
+	} else {
+		printf("	joystick options:\n");
+		for (i = 0; jmod[i].opt != NULL; i++)
+			printf("\t\t%s\n", jmod[i].opt);
+		printf("\n	joystick values:\n"
+		       "		three signed numbers\n"
+		       "--quit to close\n");
+	}
+}
+
+int main(int argc, const char *argv[])
+{
+	const char *filename = NULL;
+	int fd = 0;
+	char buf[BUF_LEN];
+	int cmd_len;
+	char report[8];
+	int to_send = 8;
+	int hold = 0;
+	fd_set rfds;
+	int retval, i;
+
+	if (argc < 3) {
+		fprintf(stderr, "Usage: %s devname mouse|keyboard|joystick\n",
+			argv[0]);
+		return 1;
+	}
+
+	if (argv[2][0] != 'k' && argv[2][0] != 'm' && argv[2][0] != 'j')
+	  return 2;
+
+	filename = argv[1];
+
+	if ((fd = open(filename, O_RDWR, 0666)) == -1) {
+		perror(filename);
+		return 3;
+	}
+
+	print_options(argv[2][0]);
+
+	while (42) {
+
+		FD_ZERO(&rfds);
+		FD_SET(STDIN_FILENO, &rfds);
+		FD_SET(fd, &rfds);
+
+		retval = select(fd + 1, &rfds, NULL, NULL, NULL);
+		if (retval == -1 && errno == EINTR)
+			continue;
+		if (retval < 0) {
+			perror("select()");
+			return 4;
+		}
+
+		if (FD_ISSET(fd, &rfds)) {
+			cmd_len = read(fd, buf, BUF_LEN - 1);
+			printf("recv report:");
+			for (i = 0; i < cmd_len; i++)
+				printf(" %02x", buf[i]);
+			printf("\n");
+		}
+
+		if (FD_ISSET(STDIN_FILENO, &rfds)) {
+			memset(report, 0x0, sizeof(report));
+			cmd_len = read(STDIN_FILENO, buf, BUF_LEN - 1);
+
+			if (cmd_len == 0)
+				break;
+
+			buf[cmd_len - 1] = '\0';
+			hold = 0;
+
+			memset(report, 0x0, sizeof(report));
+			if (argv[2][0] == 'k')
+				to_send = keyboard_fill_report(report, buf, &hold);
+			else if (argv[2][0] == 'm')
+				to_send = mouse_fill_report(report, buf, &hold);
+			else
+				to_send = joystick_fill_report(report, buf, &hold);
+
+			if (to_send == -1)
+				break;
+
+			if (write(fd, report, to_send) != to_send) {
+				perror(filename);
+				return 5;
+			}
+			if (!hold) {
+				memset(report, 0x0, sizeof(report));
+				if (write(fd, report, to_send) != to_send) {
+					perror(filename);
+					return 6;
+				}
+			}
+		}
+	}
+
+	close(fd);
+	return 0;
+}
diff --git a/drivers/usb/gadget/Kconfig b/drivers/usb/gadget/Kconfig
index 11a3e0fa4331..390c13372bdc 100644
--- a/drivers/usb/gadget/Kconfig
+++ b/drivers/usb/gadget/Kconfig
@@ -863,6 +863,17 @@ config USB_G_MULTI_CDC
 
 	  If unsure, say "y".
 
+config USB_G_HID
+	tristate "HID Gadget"
+	help
+	  The HID gadget driver provides generic emulation of USB
+	  Human Interface Devices (HID).
+
+	  For more information, see Documentation/usb/gadget_hid.txt which
+	  includes sample code for accessing the device files.
+
+	  Say "y" to link the driver statically, or "m" to build a
+	  dynamically linked module called "g_hid".
 
 # put drivers that need isochronous transfer support (for audio
 # or video class gadget drivers), or specific hardware, here.
diff --git a/drivers/usb/gadget/Makefile b/drivers/usb/gadget/Makefile
index 43b51da8d727..3075ff9cae62 100644
--- a/drivers/usb/gadget/Makefile
+++ b/drivers/usb/gadget/Makefile
@@ -43,6 +43,7 @@ g_mass_storage-objs		:= mass_storage.o
 g_printer-objs			:= printer.o
 g_cdc-objs			:= cdc2.o
 g_multi-objs			:= multi.o
+g_hid-objs			:= hid.o
 g_nokia-objs			:= nokia.o
 
 obj-$(CONFIG_USB_ZERO)		+= g_zero.o
@@ -55,6 +56,7 @@ obj-$(CONFIG_USB_G_SERIAL)	+= g_serial.o
 obj-$(CONFIG_USB_G_PRINTER)	+= g_printer.o
 obj-$(CONFIG_USB_MIDI_GADGET)	+= g_midi.o
 obj-$(CONFIG_USB_CDC_COMPOSITE) += g_cdc.o
+obj-$(CONFIG_USB_G_HID)		+= g_hid.o
 obj-$(CONFIG_USB_G_MULTI)	+= g_multi.o
 obj-$(CONFIG_USB_G_NOKIA)	+= g_nokia.o
 
diff --git a/drivers/usb/gadget/f_hid.c b/drivers/usb/gadget/f_hid.c
new file mode 100644
index 000000000000..1e00ff9866af
--- /dev/null
+++ b/drivers/usb/gadget/f_hid.c
@@ -0,0 +1,673 @@
+/*
+ * f_hid.c -- USB HID function driver
+ *
+ * Copyright (C) 2010 Fabien Chouteau <fabien.chouteau@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/utsname.h>
+#include <linux/module.h>
+#include <linux/hid.h>
+#include <linux/cdev.h>
+#include <linux/mutex.h>
+#include <linux/poll.h>
+#include <linux/smp_lock.h>
+#include <linux/uaccess.h>
+#include <linux/wait.h>
+#include <linux/usb/g_hid.h>
+
+static int major, minors;
+static struct class *hidg_class;
+
+/*-------------------------------------------------------------------------*/
+/*                            HID gadget struct                            */
+
+struct f_hidg {
+	/* configuration */
+	unsigned char			bInterfaceSubClass;
+	unsigned char			bInterfaceProtocol;
+	unsigned short			report_desc_length;
+	char				*report_desc;
+	unsigned short			report_length;
+
+	/* recv report */
+	char				*set_report_buff;
+	unsigned short			set_report_length;
+	spinlock_t			spinlock;
+	wait_queue_head_t		read_queue;
+
+	/* send report */
+	struct mutex			lock;
+	bool				write_pending;
+	wait_queue_head_t		write_queue;
+	struct usb_request		*req;
+
+	int				minor;
+	struct cdev			cdev;
+	struct usb_function		func;
+	struct usb_ep			*in_ep;
+	struct usb_endpoint_descriptor	*fs_in_ep_desc;
+	struct usb_endpoint_descriptor	*hs_in_ep_desc;
+};
+
+static inline struct f_hidg *func_to_hidg(struct usb_function *f)
+{
+	return container_of(f, struct f_hidg, func);
+}
+
+/*-------------------------------------------------------------------------*/
+/*                           Static descriptors                            */
+
+static struct usb_interface_descriptor hidg_interface_desc = {
+	.bLength		= sizeof hidg_interface_desc,
+	.bDescriptorType	= USB_DT_INTERFACE,
+	/* .bInterfaceNumber	= DYNAMIC */
+	.bAlternateSetting	= 0,
+	.bNumEndpoints		= 1,
+	.bInterfaceClass	= USB_CLASS_HID,
+	/* .bInterfaceSubClass	= DYNAMIC */
+	/* .bInterfaceProtocol	= DYNAMIC */
+	/* .iInterface		= DYNAMIC */
+};
+
+static struct hid_descriptor hidg_desc = {
+	.bLength			= sizeof hidg_desc,
+	.bDescriptorType		= HID_DT_HID,
+	.bcdHID				= 0x0101,
+	.bCountryCode			= 0x00,
+	.bNumDescriptors		= 0x1,
+	/*.desc[0].bDescriptorType	= DYNAMIC */
+	/*.desc[0].wDescriptorLenght	= DYNAMIC */
+};
+
+/* High-Speed Support */
+
+static struct usb_endpoint_descriptor hidg_hs_in_ep_desc = {
+	.bLength		= USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType	= USB_DT_ENDPOINT,
+	.bEndpointAddress	= USB_DIR_IN,
+	.bmAttributes		= USB_ENDPOINT_XFER_INT,
+	/*.wMaxPacketSize	= DYNAMIC */
+	.bInterval		= 4, /* FIXME: Add this field in the
+				      * HID gadget configuration?
+				      * (struct hidg_func_descriptor)
+				      */
+};
+
+static struct usb_descriptor_header *hidg_hs_descriptors[] = {
+	(struct usb_descriptor_header *)&hidg_interface_desc,
+	(struct usb_descriptor_header *)&hidg_desc,
+	(struct usb_descriptor_header *)&hidg_hs_in_ep_desc,
+	NULL,
+};
+
+/* Full-Speed Support */
+
+static struct usb_endpoint_descriptor hidg_fs_in_ep_desc = {
+	.bLength		= USB_DT_ENDPOINT_SIZE,
+	.bDescriptorType	= USB_DT_ENDPOINT,
+	.bEndpointAddress	= USB_DIR_IN,
+	.bmAttributes		= USB_ENDPOINT_XFER_INT,
+	/*.wMaxPacketSize	= DYNAMIC */
+	.bInterval		= 10, /* FIXME: Add this field in the
+				       * HID gadget configuration?
+				       * (struct hidg_func_descriptor)
+				       */
+};
+
+static struct usb_descriptor_header *hidg_fs_descriptors[] = {
+	(struct usb_descriptor_header *)&hidg_interface_desc,
+	(struct usb_descriptor_header *)&hidg_desc,
+	(struct usb_descriptor_header *)&hidg_fs_in_ep_desc,
+	NULL,
+};
+
+/*-------------------------------------------------------------------------*/
+/*                              Char Device                                */
+
+static ssize_t f_hidg_read(struct file *file, char __user *buffer,
+			size_t count, loff_t *ptr)
+{
+	struct f_hidg	*hidg     = (struct f_hidg *)file->private_data;
+	char		*tmp_buff = NULL;
+	unsigned long	flags;
+
+	if (!count)
+		return 0;
+
+	if (!access_ok(VERIFY_WRITE, buffer, count))
+		return -EFAULT;
+
+	spin_lock_irqsave(&hidg->spinlock, flags);
+
+#define READ_COND (hidg->set_report_buff != NULL)
+
+	while (!READ_COND) {
+		spin_unlock_irqrestore(&hidg->spinlock, flags);
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		if (wait_event_interruptible(hidg->read_queue, READ_COND))
+			return -ERESTARTSYS;
+
+		spin_lock_irqsave(&hidg->spinlock, flags);
+	}
+
+
+	count = min_t(unsigned, count, hidg->set_report_length);
+	tmp_buff = hidg->set_report_buff;
+	hidg->set_report_buff = NULL;
+
+	spin_unlock_irqrestore(&hidg->spinlock, flags);
+
+	if (tmp_buff != NULL) {
+		/* copy to user outside spinlock */
+		count -= copy_to_user(buffer, tmp_buff, count);
+		kfree(tmp_buff);
+	} else
+		count = -ENOMEM;
+
+	return count;
+}
+
+static void f_hidg_req_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct f_hidg *hidg = (struct f_hidg *)ep->driver_data;
+
+	if (req->status != 0) {
+		ERROR(hidg->func.config->cdev,
+			"End Point Request ERROR: %d\n", req->status);
+	}
+
+	hidg->write_pending = 0;
+	wake_up(&hidg->write_queue);
+}
+
+static ssize_t f_hidg_write(struct file *file, const char __user *buffer,
+			    size_t count, loff_t *offp)
+{
+	struct f_hidg *hidg  = (struct f_hidg *)file->private_data;
+	ssize_t status = -ENOMEM;
+
+	if (!access_ok(VERIFY_READ, buffer, count))
+		return -EFAULT;
+
+	mutex_lock(&hidg->lock);
+
+#define WRITE_COND (!hidg->write_pending)
+
+	/* write queue */
+	while (!WRITE_COND) {
+		mutex_unlock(&hidg->lock);
+		if (file->f_flags & O_NONBLOCK)
+			return -EAGAIN;
+
+		if (wait_event_interruptible_exclusive(
+				hidg->write_queue, WRITE_COND))
+			return -ERESTARTSYS;
+
+		mutex_lock(&hidg->lock);
+	}
+
+	count  = min_t(unsigned, count, hidg->report_length);
+	status = copy_from_user(hidg->req->buf, buffer, count);
+
+	if (status != 0) {
+		ERROR(hidg->func.config->cdev,
+			"copy_from_user error\n");
+		mutex_unlock(&hidg->lock);
+		return -EINVAL;
+	}
+
+	hidg->req->status   = 0;
+	hidg->req->zero     = 0;
+	hidg->req->length   = count;
+	hidg->req->complete = f_hidg_req_complete;
+	hidg->req->context  = hidg;
+	hidg->write_pending = 1;
+
+	status = usb_ep_queue(hidg->in_ep, hidg->req, GFP_ATOMIC);
+	if (status < 0) {
+		ERROR(hidg->func.config->cdev,
+			"usb_ep_queue error on int endpoint %zd\n", status);
+		hidg->write_pending = 0;
+		wake_up(&hidg->write_queue);
+	} else {
+		status = count;
+	}
+
+	mutex_unlock(&hidg->lock);
+
+	return status;
+}
+
+static unsigned int f_hidg_poll(struct file *file, poll_table *wait)
+{
+	struct f_hidg	*hidg  = (struct f_hidg *)file->private_data;
+	unsigned int	ret = 0;
+
+	poll_wait(file, &hidg->read_queue, wait);
+	poll_wait(file, &hidg->write_queue, wait);
+
+	if (WRITE_COND)
+		ret |= POLLOUT | POLLWRNORM;
+
+	if (READ_COND)
+		ret |= POLLIN | POLLRDNORM;
+
+	return ret;
+}
+
+#undef WRITE_COND
+#undef READ_COND
+
+static int f_hidg_release(struct inode *inode, struct file *fd)
+{
+	fd->private_data = NULL;
+	return 0;
+}
+
+static int f_hidg_open(struct inode *inode, struct file *fd)
+{
+	struct f_hidg *hidg =
+		container_of(inode->i_cdev, struct f_hidg, cdev);
+
+	fd->private_data = hidg;
+
+	return 0;
+}
+
+/*-------------------------------------------------------------------------*/
+/*                                usb_function                             */
+
+static void hidg_set_report_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct f_hidg *hidg = (struct f_hidg *)req->context;
+
+	if (req->status != 0 || req->buf == NULL || req->actual == 0) {
+		ERROR(hidg->func.config->cdev, "%s FAILED\n", __func__);
+		return;
+	}
+
+	spin_lock(&hidg->spinlock);
+
+	hidg->set_report_buff = krealloc(hidg->set_report_buff,
+					 req->actual, GFP_ATOMIC);
+
+	if (hidg->set_report_buff == NULL) {
+		spin_unlock(&hidg->spinlock);
+		return;
+	}
+	hidg->set_report_length = req->actual;
+	memcpy(hidg->set_report_buff, req->buf, req->actual);
+
+	spin_unlock(&hidg->spinlock);
+
+	wake_up(&hidg->read_queue);
+
+	return;
+}
+
+static int hidg_setup(struct usb_function *f,
+		const struct usb_ctrlrequest *ctrl)
+{
+	struct f_hidg			*hidg = func_to_hidg(f);
+	struct usb_composite_dev	*cdev = f->config->cdev;
+	struct usb_request		*req  = cdev->req;
+	int status = 0;
+	__u16 value, length;
+
+	value	= __le16_to_cpu(ctrl->wValue);
+	length	= __le16_to_cpu(ctrl->wLength);
+
+	VDBG(cdev, "hid_setup crtl_request : bRequestType:0x%x bRequest:0x%x "
+		"Value:0x%x\n", ctrl->bRequestType, ctrl->bRequest, value);
+
+	switch ((ctrl->bRequestType << 8) | ctrl->bRequest) {
+	case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_GET_REPORT):
+		VDBG(cdev, "get_report\n");
+
+		/* send an empty report */
+		length = min_t(unsigned, length, hidg->report_length);
+		memset(req->buf, 0x0, length);
+
+		goto respond;
+		break;
+
+	case ((USB_DIR_IN | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_GET_PROTOCOL):
+		VDBG(cdev, "get_protocol\n");
+		goto stall;
+		break;
+
+	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_SET_REPORT):
+		VDBG(cdev, "set_report | wLenght=%d\n", ctrl->wLength);
+		req->context  = hidg;
+		req->complete = hidg_set_report_complete;
+		goto respond;
+		break;
+
+	case ((USB_DIR_OUT | USB_TYPE_CLASS | USB_RECIP_INTERFACE) << 8
+		  | HID_REQ_SET_PROTOCOL):
+		VDBG(cdev, "set_protocol\n");
+		goto stall;
+		break;
+
+	case ((USB_DIR_IN | USB_TYPE_STANDARD | USB_RECIP_INTERFACE) << 8
+		  | USB_REQ_GET_DESCRIPTOR):
+		switch (value >> 8) {
+		case HID_DT_REPORT:
+			VDBG(cdev, "USB_REQ_GET_DESCRIPTOR: REPORT\n");
+			length = min_t(unsigned short, length,
+						   hidg->report_desc_length);
+			memcpy(req->buf, hidg->report_desc, length);
+			goto respond;
+			break;
+
+		default:
+			VDBG(cdev, "Unknown decriptor request 0x%x\n",
+				 value >> 8);
+			goto stall;
+			break;
+		}
+		break;
+
+	default:
+		VDBG(cdev, "Unknown request 0x%x\n",
+			 ctrl->bRequest);
+		goto stall;
+		break;
+	}
+
+stall:
+	return -EOPNOTSUPP;
+
+respond:
+	req->zero = 0;
+	req->length = length;
+	status = usb_ep_queue(cdev->gadget->ep0, req, GFP_ATOMIC);
+	if (status < 0)
+		ERROR(cdev, "usb_ep_queue error on ep0 %d\n", value);
+	return status;
+}
+
+static void hidg_disable(struct usb_function *f)
+{
+	struct f_hidg *hidg = func_to_hidg(f);
+
+	usb_ep_disable(hidg->in_ep);
+	hidg->in_ep->driver_data = NULL;
+
+	return;
+}
+
+static int hidg_set_alt(struct usb_function *f, unsigned intf, unsigned alt)
+{
+	struct usb_composite_dev		*cdev = f->config->cdev;
+	struct f_hidg				*hidg = func_to_hidg(f);
+	const struct usb_endpoint_descriptor	*ep_desc;
+	int status = 0;
+
+	VDBG(cdev, "hidg_set_alt intf:%d alt:%d\n", intf, alt);
+
+	if (hidg->in_ep != NULL) {
+		/* restart endpoint */
+		if (hidg->in_ep->driver_data != NULL)
+			usb_ep_disable(hidg->in_ep);
+
+		ep_desc = ep_choose(f->config->cdev->gadget,
+				hidg->hs_in_ep_desc, hidg->fs_in_ep_desc);
+		status = usb_ep_enable(hidg->in_ep, ep_desc);
+		if (status < 0) {
+			ERROR(cdev, "Enable endpoint FAILED!\n");
+			goto fail;
+		}
+		hidg->in_ep->driver_data = hidg;
+	}
+fail:
+	return status;
+}
+
+const struct file_operations f_hidg_fops = {
+	.owner		= THIS_MODULE,
+	.open		= f_hidg_open,
+	.release	= f_hidg_release,
+	.write		= f_hidg_write,
+	.read		= f_hidg_read,
+	.poll		= f_hidg_poll,
+};
+
+static int __init hidg_bind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct usb_ep		*ep;
+	struct f_hidg		*hidg = func_to_hidg(f);
+	int			status;
+	dev_t			dev;
+
+	/* allocate instance-specific interface IDs, and patch descriptors */
+	status = usb_interface_id(c, f);
+	if (status < 0)
+		goto fail;
+	hidg_interface_desc.bInterfaceNumber = status;
+
+
+	/* allocate instance-specific endpoints */
+	status = -ENODEV;
+	ep = usb_ep_autoconfig(c->cdev->gadget, &hidg_fs_in_ep_desc);
+	if (!ep)
+		goto fail;
+	ep->driver_data = c->cdev;	/* claim */
+	hidg->in_ep = ep;
+
+	/* preallocate request and buffer */
+	status = -ENOMEM;
+	hidg->req = usb_ep_alloc_request(hidg->in_ep, GFP_KERNEL);
+	if (!hidg->req)
+		goto fail;
+
+
+	hidg->req->buf = kmalloc(hidg->report_length, GFP_KERNEL);
+	if (!hidg->req->buf)
+		goto fail;
+
+	/* set descriptor dynamic values */
+	hidg_interface_desc.bInterfaceSubClass = hidg->bInterfaceSubClass;
+	hidg_interface_desc.bInterfaceProtocol = hidg->bInterfaceProtocol;
+	hidg_hs_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
+	hidg_fs_in_ep_desc.wMaxPacketSize = cpu_to_le16(hidg->report_length);
+	hidg_desc.desc[0].bDescriptorType = HID_DT_REPORT;
+	hidg_desc.desc[0].wDescriptorLength =
+		cpu_to_le16(hidg->report_desc_length);
+
+	hidg->set_report_buff = NULL;
+
+	/* copy descriptors */
+	f->descriptors = usb_copy_descriptors(hidg_fs_descriptors);
+	if (!f->descriptors)
+		goto fail;
+
+	hidg->fs_in_ep_desc = usb_find_endpoint(hidg_fs_descriptors,
+						f->descriptors,
+						&hidg_fs_in_ep_desc);
+
+	if (gadget_is_dualspeed(c->cdev->gadget)) {
+		hidg_hs_in_ep_desc.bEndpointAddress =
+			hidg_fs_in_ep_desc.bEndpointAddress;
+		f->hs_descriptors = usb_copy_descriptors(hidg_hs_descriptors);
+		if (!f->hs_descriptors)
+			goto fail;
+		hidg->hs_in_ep_desc = usb_find_endpoint(hidg_hs_descriptors,
+							f->hs_descriptors,
+							&hidg_hs_in_ep_desc);
+	} else {
+		hidg->hs_in_ep_desc = NULL;
+	}
+
+	mutex_init(&hidg->lock);
+	spin_lock_init(&hidg->spinlock);
+	init_waitqueue_head(&hidg->write_queue);
+	init_waitqueue_head(&hidg->read_queue);
+
+	/* create char device */
+	cdev_init(&hidg->cdev, &f_hidg_fops);
+	dev = MKDEV(major, hidg->minor);
+	status = cdev_add(&hidg->cdev, dev, 1);
+	if (status)
+		goto fail;
+
+	device_create(hidg_class, NULL, dev, NULL, "%s%d", "hidg", hidg->minor);
+
+	return 0;
+
+fail:
+	ERROR(f->config->cdev, "hidg_bind FAILED\n");
+	if (hidg->req != NULL) {
+		kfree(hidg->req->buf);
+		if (hidg->in_ep != NULL)
+			usb_ep_free_request(hidg->in_ep, hidg->req);
+	}
+
+	usb_free_descriptors(f->hs_descriptors);
+	usb_free_descriptors(f->descriptors);
+
+	return status;
+}
+
+static void hidg_unbind(struct usb_configuration *c, struct usb_function *f)
+{
+	struct f_hidg *hidg = func_to_hidg(f);
+
+	device_destroy(hidg_class, MKDEV(major, hidg->minor));
+	cdev_del(&hidg->cdev);
+
+	/* disable/free request and end point */
+	usb_ep_disable(hidg->in_ep);
+	usb_ep_dequeue(hidg->in_ep, hidg->req);
+	kfree(hidg->req->buf);
+	usb_ep_free_request(hidg->in_ep, hidg->req);
+
+	/* free descriptors copies */
+	usb_free_descriptors(f->hs_descriptors);
+	usb_free_descriptors(f->descriptors);
+
+	kfree(hidg->report_desc);
+	kfree(hidg->set_report_buff);
+	kfree(hidg);
+}
+
+/*-------------------------------------------------------------------------*/
+/*                                 Strings                                 */
+
+#define CT_FUNC_HID_IDX	0
+
+static struct usb_string ct_func_string_defs[] = {
+	[CT_FUNC_HID_IDX].s	= "HID Interface",
+	{},			/* end of list */
+};
+
+static struct usb_gadget_strings ct_func_string_table = {
+	.language	= 0x0409,	/* en-US */
+	.strings	= ct_func_string_defs,
+};
+
+static struct usb_gadget_strings *ct_func_strings[] = {
+	&ct_func_string_table,
+	NULL,
+};
+
+/*-------------------------------------------------------------------------*/
+/*                             usb_configuration                           */
+
+int __init hidg_bind_config(struct usb_configuration *c,
+			    struct hidg_func_descriptor *fdesc, int index)
+{
+	struct f_hidg *hidg;
+	int status;
+
+	if (index >= minors)
+		return -ENOENT;
+
+	/* maybe allocate device-global string IDs, and patch descriptors */
+	if (ct_func_string_defs[CT_FUNC_HID_IDX].id == 0) {
+		status = usb_string_id(c->cdev);
+		if (status < 0)
+			return status;
+		ct_func_string_defs[CT_FUNC_HID_IDX].id = status;
+		hidg_interface_desc.iInterface = status;
+	}
+
+	/* allocate and initialize one new instance */
+	hidg = kzalloc(sizeof *hidg, GFP_KERNEL);
+	if (!hidg)
+		return -ENOMEM;
+
+	hidg->minor = index;
+	hidg->bInterfaceSubClass = fdesc->subclass;
+	hidg->bInterfaceProtocol = fdesc->protocol;
+	hidg->report_length = fdesc->report_length;
+	hidg->report_desc_length = fdesc->report_desc_length;
+	hidg->report_desc = kmemdup(fdesc->report_desc,
+				    fdesc->report_desc_length,
+				    GFP_KERNEL);
+	if (!hidg->report_desc) {
+		kfree(hidg);
+		return -ENOMEM;
+	}
+
+	hidg->func.name    = "hid";
+	hidg->func.strings = ct_func_strings;
+	hidg->func.bind    = hidg_bind;
+	hidg->func.unbind  = hidg_unbind;
+	hidg->func.set_alt = hidg_set_alt;
+	hidg->func.disable = hidg_disable;
+	hidg->func.setup   = hidg_setup;
+
+	status = usb_add_function(c, &hidg->func);
+	if (status)
+		kfree(hidg);
+
+	return status;
+}
+
+int __init ghid_setup(struct usb_gadget *g, int count)
+{
+	int status;
+	dev_t dev;
+
+	hidg_class = class_create(THIS_MODULE, "hidg");
+
+	status = alloc_chrdev_region(&dev, 0, count, "hidg");
+	if (!status) {
+		major = MAJOR(dev);
+		minors = count;
+	}
+
+	return status;
+}
+
+void ghid_cleanup(void)
+{
+	if (major) {
+		unregister_chrdev_region(MKDEV(major, 0), minors);
+		major = minors = 0;
+	}
+
+	class_destroy(hidg_class);
+	hidg_class = NULL;
+}
diff --git a/drivers/usb/gadget/hid.c b/drivers/usb/gadget/hid.c
new file mode 100644
index 000000000000..b77e97754b4a
--- /dev/null
+++ b/drivers/usb/gadget/hid.c
@@ -0,0 +1,288 @@
+/*
+ * hid.c -- HID Composite driver
+ *
+ * Based on multi.c
+ *
+ * Copyright (C) 2010 Fabien Chouteau <fabien.chouteau@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+#include <linux/kernel.h>
+#include <linux/platform_device.h>
+#include <linux/list.h>
+
+#define DRIVER_DESC		"HID Gadget"
+#define DRIVER_VERSION		"2010/03/16"
+
+/*-------------------------------------------------------------------------*/
+
+#define HIDG_VENDOR_NUM		0x0525	/* XXX NetChip */
+#define HIDG_PRODUCT_NUM	0xa4ac	/* Linux-USB HID gadget */
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * kbuild is not very cooperative with respect to linking separately
+ * compiled library objects into one module.  So for now we won't use
+ * separate compilation ... ensuring init/exit sections work to shrink
+ * the runtime footprint, and giving us at least some parts of what
+ * a "gcc --combine ... part1.c part2.c part3.c ... " build would.
+ */
+
+#include "composite.c"
+#include "usbstring.c"
+#include "config.c"
+#include "epautoconf.c"
+
+#include "f_hid.c"
+
+
+struct hidg_func_node {
+	struct list_head node;
+	struct hidg_func_descriptor *func;
+};
+
+static LIST_HEAD(hidg_func_list);
+
+/*-------------------------------------------------------------------------*/
+
+static struct usb_device_descriptor device_desc = {
+	.bLength =		sizeof device_desc,
+	.bDescriptorType =	USB_DT_DEVICE,
+
+	.bcdUSB =		cpu_to_le16(0x0200),
+
+	/* .bDeviceClass =		USB_CLASS_COMM, */
+	/* .bDeviceSubClass =	0, */
+	/* .bDeviceProtocol =	0, */
+	.bDeviceClass =		0xEF,
+	.bDeviceSubClass =	2,
+	.bDeviceProtocol =	1,
+	/* .bMaxPacketSize0 = f(hardware) */
+
+	/* Vendor and product id can be overridden by module parameters.  */
+	.idVendor =		cpu_to_le16(HIDG_VENDOR_NUM),
+	.idProduct =		cpu_to_le16(HIDG_PRODUCT_NUM),
+	/* .bcdDevice = f(hardware) */
+	/* .iManufacturer = DYNAMIC */
+	/* .iProduct = DYNAMIC */
+	/* NO SERIAL NUMBER */
+	.bNumConfigurations =	1,
+};
+
+static struct usb_otg_descriptor otg_descriptor = {
+	.bLength =		sizeof otg_descriptor,
+	.bDescriptorType =	USB_DT_OTG,
+
+	/* REVISIT SRP-only hardware is possible, although
+	 * it would not be called "OTG" ...
+	 */
+	.bmAttributes =		USB_OTG_SRP | USB_OTG_HNP,
+};
+
+static const struct usb_descriptor_header *otg_desc[] = {
+	(struct usb_descriptor_header *) &otg_descriptor,
+	NULL,
+};
+
+
+/* string IDs are assigned dynamically */
+
+#define STRING_MANUFACTURER_IDX		0
+#define STRING_PRODUCT_IDX		1
+
+static char manufacturer[50];
+
+static struct usb_string strings_dev[] = {
+	[STRING_MANUFACTURER_IDX].s = manufacturer,
+	[STRING_PRODUCT_IDX].s = DRIVER_DESC,
+	{  } /* end of list */
+};
+
+static struct usb_gadget_strings stringtab_dev = {
+	.language	= 0x0409,	/* en-us */
+	.strings	= strings_dev,
+};
+
+static struct usb_gadget_strings *dev_strings[] = {
+	&stringtab_dev,
+	NULL,
+};
+
+
+
+/****************************** Configurations ******************************/
+
+static int __init do_config(struct usb_configuration *c)
+{
+	struct hidg_func_node *e;
+	int func = 0, status = 0;
+
+	if (gadget_is_otg(c->cdev->gadget)) {
+		c->descriptors = otg_desc;
+		c->bmAttributes |= USB_CONFIG_ATT_WAKEUP;
+	}
+
+	list_for_each_entry(e, &hidg_func_list, node) {
+		status = hidg_bind_config(c, e->func, func++);
+		if (status)
+			break;
+	}
+
+	return status;
+}
+
+static struct usb_configuration config_driver = {
+	.label			= "HID Gadget",
+	.bind			= do_config,
+	.bConfigurationValue	= 1,
+	/* .iConfiguration = DYNAMIC */
+	.bmAttributes		= USB_CONFIG_ATT_SELFPOWER,
+};
+
+/****************************** Gadget Bind ******************************/
+
+static int __init hid_bind(struct usb_composite_dev *cdev)
+{
+	struct usb_gadget *gadget = cdev->gadget;
+	struct list_head *tmp;
+	int status, gcnum, funcs = 0;
+
+	list_for_each(tmp, &hidg_func_list)
+		funcs++;
+
+	if (!funcs)
+		return -ENODEV;
+
+	/* set up HID */
+	status = ghid_setup(cdev->gadget, funcs);
+	if (status < 0)
+		return status;
+
+	gcnum = usb_gadget_controller_number(gadget);
+	if (gcnum >= 0)
+		device_desc.bcdDevice = cpu_to_le16(0x0300 | gcnum);
+	else
+		device_desc.bcdDevice = cpu_to_le16(0x0300 | 0x0099);
+
+
+	/* Allocate string descriptor numbers ... note that string
+	 * contents can be overridden by the composite_dev glue.
+	 */
+
+	/* device descriptor strings: manufacturer, product */
+	snprintf(manufacturer, sizeof manufacturer, "%s %s with %s",
+		init_utsname()->sysname, init_utsname()->release,
+		gadget->name);
+	status = usb_string_id(cdev);
+	if (status < 0)
+		return status;
+	strings_dev[STRING_MANUFACTURER_IDX].id = status;
+	device_desc.iManufacturer = status;
+
+	status = usb_string_id(cdev);
+	if (status < 0)
+		return status;
+	strings_dev[STRING_PRODUCT_IDX].id = status;
+	device_desc.iProduct = status;
+
+	/* register our configuration */
+	status = usb_add_config(cdev, &config_driver);
+	if (status < 0)
+		return status;
+
+	dev_info(&gadget->dev, DRIVER_DESC ", version: " DRIVER_VERSION "\n");
+
+	return 0;
+}
+
+static int __exit hid_unbind(struct usb_composite_dev *cdev)
+{
+	ghid_cleanup();
+	return 0;
+}
+
+static int __init hidg_plat_driver_probe(struct platform_device *pdev)
+{
+	struct hidg_func_descriptor *func = pdev->dev.platform_data;
+	struct hidg_func_node *entry;
+
+	if (!func) {
+		dev_err(&pdev->dev, "Platform data missing\n");
+		return -ENODEV;
+	}
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->func = func;
+	list_add_tail(&entry->node, &hidg_func_list);
+
+	return 0;
+}
+
+static int __devexit hidg_plat_driver_remove(struct platform_device *pdev)
+{
+	struct hidg_func_node *e, *n;
+
+	list_for_each_entry_safe(e, n, &hidg_func_list, node) {
+		list_del(&e->node);
+		kfree(e);
+	}
+
+	return 0;
+}
+
+
+/****************************** Some noise ******************************/
+
+
+static struct usb_composite_driver hidg_driver = {
+	.name		= "g_hid",
+	.dev		= &device_desc,
+	.strings	= dev_strings,
+	.bind		= hid_bind,
+	.unbind		= __exit_p(hid_unbind),
+};
+
+static struct platform_driver hidg_plat_driver = {
+	.remove		= __devexit_p(hidg_plat_driver_remove),
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "hidg",
+	},
+};
+
+
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_AUTHOR("Fabien Chouteau, Peter Korsgaard");
+MODULE_LICENSE("GPL");
+
+static int __init hidg_init(void)
+{
+	platform_driver_probe(&hidg_plat_driver, hidg_plat_driver_probe);
+	return usb_composite_register(&hidg_driver);
+}
+module_init(hidg_init);
+
+static void __exit hidg_cleanup(void)
+{
+	platform_driver_unregister(&hidg_plat_driver);
+	usb_composite_unregister(&hidg_driver);
+}
+module_exit(hidg_cleanup);
diff --git a/include/linux/usb/g_hid.h b/include/linux/usb/g_hid.h
new file mode 100644
index 000000000000..50f5745df28c
--- /dev/null
+++ b/include/linux/usb/g_hid.h
@@ -0,0 +1,32 @@
+/*
+ * g_hid.h -- Header file for USB HID gadget driver
+ *
+ * Copyright (C) 2010 Fabien Chouteau <fabien.chouteau@barco.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#ifndef __LINUX_USB_G_HID_H
+#define __LINUX_USB_G_HID_H
+
+struct hidg_func_descriptor {
+	unsigned char		subclass;
+	unsigned char		protocol;
+	unsigned short		report_length;
+	unsigned short		report_desc_length;
+	unsigned char		report_desc[];
+};
+
+#endif /* __LINUX_USB_G_HID_H */
-- 
cgit v1.2.3


From 3a229eb13984a2950ee47fb827c8ed1d654b9d68 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Date: Mon, 3 May 2010 09:13:01 +0300
Subject: usb: otg: add global ULPI register definitions

Definitions for registers defined by ULPI specification v1.1.

Signed-off-by: Heikki Krogerus <ext-heikki.krogerus@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ulpi.h | 140 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 140 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h
index 20675c6ebc4d..2369d07c3c87 100644
--- a/include/linux/usb/ulpi.h
+++ b/include/linux/usb/ulpi.h
@@ -1,6 +1,146 @@
+/*
+ * ulpi.h -- ULPI defines and function prorotypes
+ *
+ * Copyright (C) 2010 Nokia Corporation
+ *
+ * This software is distributed under the terms of the GNU General
+ * Public License ("GPL") as published by the Free Software Foundation,
+ * version 2 of that License.
+ */
+
 #ifndef __LINUX_USB_ULPI_H
 #define __LINUX_USB_ULPI_H
 
+/*-------------------------------------------------------------------------*/
+
+/*
+ * Macros for Set and Clear
+ * See ULPI 1.1 specification to find the registers with Set and Clear offsets
+ */
+#define ULPI_SET(a)				(a + 1)
+#define ULPI_CLR(a)				(a + 2)
+
+/*-------------------------------------------------------------------------*/
+
+/*
+ * Register Map
+ */
+#define ULPI_VENDOR_ID_LOW			0x00
+#define ULPI_VENDOR_ID_HIGH			0x01
+#define ULPI_PRODUCT_ID_LOW			0x02
+#define ULPI_PRODUCT_ID_HIGH			0x03
+#define ULPI_FUNC_CTRL				0x04
+#define ULPI_IFC_CTRL				0x07
+#define ULPI_OTG_CTRL				0x0a
+#define ULPI_USB_INT_EN_RISE			0x0d
+#define ULPI_USB_INT_EN_FALL			0x10
+#define ULPI_USB_INT_STS			0x13
+#define ULPI_USB_INT_LATCH			0x14
+#define ULPI_DEBUG				0x15
+#define ULPI_SCRATCH				0x16
+/* Optional Carkit Registers */
+#define ULPI_CARCIT_CTRL			0x19
+#define ULPI_CARCIT_INT_DELAY			0x1c
+#define ULPI_CARCIT_INT_EN			0x1d
+#define ULPI_CARCIT_INT_STS			0x20
+#define ULPI_CARCIT_INT_LATCH			0x21
+#define ULPI_CARCIT_PLS_CTRL			0x22
+/* Other Optional Registers */
+#define ULPI_TX_POS_WIDTH			0x25
+#define ULPI_TX_NEG_WIDTH			0x26
+#define ULPI_POLARITY_RECOVERY			0x27
+/* Access Extended Register Set */
+#define ULPI_ACCESS_EXTENDED			0x2f
+/* Vendor Specific */
+#define ULPI_VENDOR_SPECIFIC			0x30
+/* Extended Registers */
+#define ULPI_EXT_VENDOR_SPECIFIC		0x80
+
+/*-------------------------------------------------------------------------*/
+
+/* Function Control */
+#define ULPI_FUNC_CTRL_XCVRSEL			(1 << 0)
+#define  ULPI_FUNC_CTRL_XCVRSEL_MASK		(3 << 0)
+#define  ULPI_FUNC_CTRL_HIGH_SPEED		(0 << 0)
+#define  ULPI_FUNC_CTRL_FULL_SPEED		(1 << 0)
+#define  ULPI_FUNC_CTRL_LOW_SPEED		(2 << 0)
+#define  ULPI_FUNC_CTRL_FS4LS			(3 << 0)
+#define ULPI_FUNC_CTRL_TERMSELECT		(1 << 2)
+#define ULPI_FUNC_CTRL_OPMODE			(1 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_MASK		(3 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NORMAL		(0 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NONDRIVING	(1 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_DISABLE_NRZI	(2 << 3)
+#define  ULPI_FUNC_CTRL_OPMODE_NOSYNC_NOEOP	(3 << 3)
+#define ULPI_FUNC_CTRL_RESET			(1 << 5)
+#define ULPI_FUNC_CTRL_SUSPENDM			(1 << 6)
+
+/* Interface Control */
+#define ULPI_IFC_CTRL_6_PIN_SERIAL_MODE		(1 << 0)
+#define ULPI_IFC_CTRL_3_PIN_SERIAL_MODE		(1 << 1)
+#define ULPI_IFC_CTRL_CARKITMODE		(1 << 2)
+#define ULPI_IFC_CTRL_CLOCKSUSPENDM		(1 << 3)
+#define ULPI_IFC_CTRL_AUTORESUME		(1 << 4)
+#define ULPI_IFC_CTRL_EXTERNAL_VBUS		(1 << 5)
+#define ULPI_IFC_CTRL_PASSTHRU			(1 << 6)
+#define ULPI_IFC_CTRL_PROTECT_IFC_DISABLE	(1 << 7)
+
+/* OTG Control */
+#define ULPI_OTG_CTRL_ID_PULLUP			(1 << 0)
+#define ULPI_OTG_CTRL_DP_PULLDOWN		(1 << 1)
+#define ULPI_OTG_CTRL_DM_PULLDOWN		(1 << 2)
+#define ULPI_OTG_CTRL_DISCHRGVBUS		(1 << 3)
+#define ULPI_OTG_CTRL_CHRGVBUS			(1 << 4)
+#define ULPI_OTG_CTRL_DRVVBUS			(1 << 5)
+#define ULPI_OTG_CTRL_DRVVBUS_EXT		(1 << 6)
+#define ULPI_OTG_CTRL_EXTVBUSIND		(1 << 7)
+
+/* USB Interrupt Enable Rising,
+ * USB Interrupt Enable Falling,
+ * USB Interrupt Status and
+ * USB Interrupt Latch
+ */
+#define ULPI_INT_HOST_DISCONNECT		(1 << 0)
+#define ULPI_INT_VBUS_VALID			(1 << 1)
+#define ULPI_INT_SESS_VALID			(1 << 2)
+#define ULPI_INT_SESS_END			(1 << 3)
+#define ULPI_INT_IDGRD				(1 << 4)
+
+/* Debug */
+#define ULPI_DEBUG_LINESTATE0			(1 << 0)
+#define ULPI_DEBUG_LINESTATE1			(1 << 1)
+
+/* Carkit Control */
+#define ULPI_CARKIT_CTRL_CARKITPWR		(1 << 0)
+#define ULPI_CARKIT_CTRL_IDGNDDRV		(1 << 1)
+#define ULPI_CARKIT_CTRL_TXDEN			(1 << 2)
+#define ULPI_CARKIT_CTRL_RXDEN			(1 << 3)
+#define ULPI_CARKIT_CTRL_SPKLEFTEN		(1 << 4)
+#define ULPI_CARKIT_CTRL_SPKRIGHTEN		(1 << 5)
+#define ULPI_CARKIT_CTRL_MICEN			(1 << 6)
+
+/* Carkit Interrupt Enable */
+#define ULPI_CARKIT_INT_EN_IDFLOAT_RISE		(1 << 0)
+#define ULPI_CARKIT_INT_EN_IDFLOAT_FALL		(1 << 1)
+#define ULPI_CARKIT_INT_EN_CARINTDET		(1 << 2)
+#define ULPI_CARKIT_INT_EN_DP_RISE		(1 << 3)
+#define ULPI_CARKIT_INT_EN_DP_FALL		(1 << 4)
+
+/* Carkit Interrupt Status and
+ * Carkit Interrupt Latch
+ */
+#define ULPI_CARKIT_INT_IDFLOAT			(1 << 0)
+#define ULPI_CARKIT_INT_CARINTDET		(1 << 1)
+#define ULPI_CARKIT_INT_DP			(1 << 2)
+
+/* Carkit Pulse Control*/
+#define ULPI_CARKIT_PLS_CTRL_TXPLSEN		(1 << 0)
+#define ULPI_CARKIT_PLS_CTRL_RXPLSEN		(1 << 1)
+#define ULPI_CARKIT_PLS_CTRL_SPKRLEFT_BIASEN	(1 << 2)
+#define ULPI_CARKIT_PLS_CTRL_SPKRRIGHT_BIASEN	(1 << 3)
+
+/*-------------------------------------------------------------------------*/
+
 struct otg_transceiver *otg_ulpi_create(struct otg_io_access_ops *ops,
 					unsigned int flags);
 
-- 
cgit v1.2.3


From ff9c895f07d36193c75533bda8193bde8ca99d02 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 2 Apr 2010 13:27:28 -0400
Subject: USB: fix usbmon and DMA mapping for scatter-gather URBs

This patch (as1368) fixes a rather obscure bug in usbmon: When tracing
URBs sent by the scatter-gather library, it accesses the data buffers
while they are still mapped for DMA.

The solution is to move the mapping and unmapping out of the s-g
library and into the usual place in hcd.c.  This requires the addition
of new URB flag bits to describe the kind of mapping needed, since we
have to call dma_map_sg() if the HCD supports native scatter-gather
operation and dma_map_page() if it doesn't.  The nice thing about
having the new flags is that they simplify the testing for unmapping.

The patch removes the only caller of usb_buffer_[un]map_sg(), so those
functions are #if'ed out.  A later patch will remove them entirely.

As a result of this change, urb->sg will be set in situations where
it wasn't set previously.  Hence the xhci and whci drivers are
adjusted to test urb->num_sgs instead, which retains its original
meaning and is nonzero only when the HCD has to handle a scatterlist.

Finally, even when a submission error occurs we don't want to hand
URBs to usbmon before they are unmapped.  The submission path is
rearranged so that map_urb_for_dma() is called only for non-root-hub
URBs and unmap_urb_for_dma() is called immediately after a submission
error.  This simplifies the error handling.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
CC: <stable@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c       | 169 +++++++++++++++++++++++++++----------------
 drivers/usb/core/message.c   |  45 +++---------
 drivers/usb/core/urb.c       |   9 ++-
 drivers/usb/core/usb.c       |   4 +
 drivers/usb/host/whci/qset.c |   2 +-
 drivers/usb/host/xhci-ring.c |   2 +-
 drivers/usb/mon/mon_bin.c    |   2 +-
 drivers/usb/mon/mon_text.c   |   4 +-
 include/linux/usb.h          |   9 +++
 9 files changed, 138 insertions(+), 108 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 38d4700926f7..6a05e6934455 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1259,6 +1259,51 @@ static void hcd_free_coherent(struct usb_bus *bus, dma_addr_t *dma_handle,
 	*dma_handle = 0;
 }
 
+static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
+{
+	enum dma_data_direction dir;
+
+	if (urb->transfer_flags & URB_SETUP_MAP_SINGLE)
+		dma_unmap_single(hcd->self.controller,
+				urb->setup_dma,
+				sizeof(struct usb_ctrlrequest),
+				DMA_TO_DEVICE);
+	else if (urb->transfer_flags & URB_SETUP_MAP_LOCAL)
+		hcd_free_coherent(urb->dev->bus,
+				&urb->setup_dma,
+				(void **) &urb->setup_packet,
+				sizeof(struct usb_ctrlrequest),
+				DMA_TO_DEVICE);
+
+	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
+	if (urb->transfer_flags & URB_DMA_MAP_SG)
+		dma_unmap_sg(hcd->self.controller,
+				urb->sg->sg,
+				urb->num_sgs,
+				dir);
+	else if (urb->transfer_flags & URB_DMA_MAP_PAGE)
+		dma_unmap_page(hcd->self.controller,
+				urb->transfer_dma,
+				urb->transfer_buffer_length,
+				dir);
+	else if (urb->transfer_flags & URB_DMA_MAP_SINGLE)
+		dma_unmap_single(hcd->self.controller,
+				urb->transfer_dma,
+				urb->transfer_buffer_length,
+				dir);
+	else if (urb->transfer_flags & URB_MAP_LOCAL)
+		hcd_free_coherent(urb->dev->bus,
+				&urb->transfer_dma,
+				&urb->transfer_buffer,
+				urb->transfer_buffer_length,
+				dir);
+
+	/* Make it safe to call this routine more than once */
+	urb->transfer_flags &= ~(URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
+			URB_DMA_MAP_SG | URB_DMA_MAP_PAGE |
+			URB_DMA_MAP_SINGLE | URB_MAP_LOCAL);
+}
+
 static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 			   gfp_t mem_flags)
 {
@@ -1270,8 +1315,6 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	 * unless it uses pio or talks to another transport,
 	 * or uses the provided scatter gather list for bulk.
 	 */
-	if (is_root_hub(urb->dev))
-		return 0;
 
 	if (usb_endpoint_xfer_control(&urb->ep->desc)
 	    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
@@ -1284,6 +1327,7 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 			if (dma_mapping_error(hcd->self.controller,
 						urb->setup_dma))
 				return -EAGAIN;
+			urb->transfer_flags |= URB_SETUP_MAP_SINGLE;
 		} else if (hcd->driver->flags & HCD_LOCAL_MEM)
 			ret = hcd_alloc_coherent(
 					urb->dev->bus, mem_flags,
@@ -1291,20 +1335,57 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 					(void **)&urb->setup_packet,
 					sizeof(struct usb_ctrlrequest),
 					DMA_TO_DEVICE);
+			if (ret)
+				return ret;
+			urb->transfer_flags |= URB_SETUP_MAP_LOCAL;
 	}
 
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	if (ret == 0 && urb->transfer_buffer_length != 0
+	if (urb->transfer_buffer_length != 0
 	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
 		if (hcd->self.uses_dma) {
-			urb->transfer_dma = dma_map_single (
-					hcd->self.controller,
-					urb->transfer_buffer,
-					urb->transfer_buffer_length,
-					dir);
-			if (dma_mapping_error(hcd->self.controller,
+			if (urb->num_sgs) {
+				int n = dma_map_sg(
+						hcd->self.controller,
+						urb->sg->sg,
+						urb->num_sgs,
+						dir);
+				if (n <= 0)
+					ret = -EAGAIN;
+				else
+					urb->transfer_flags |= URB_DMA_MAP_SG;
+				if (n != urb->num_sgs) {
+					urb->num_sgs = n;
+					urb->transfer_flags |=
+							URB_DMA_SG_COMBINED;
+				}
+			} else if (urb->sg) {
+				struct scatterlist *sg;
+
+				sg = (struct scatterlist *) urb->sg;
+				urb->transfer_dma = dma_map_page(
+						hcd->self.controller,
+						sg_page(sg),
+						sg->offset,
+						urb->transfer_buffer_length,
+						dir);
+				if (dma_mapping_error(hcd->self.controller,
 						urb->transfer_dma))
-				return -EAGAIN;
+					ret = -EAGAIN;
+				else
+					urb->transfer_flags |= URB_DMA_MAP_PAGE;
+			} else {
+				urb->transfer_dma = dma_map_single(
+						hcd->self.controller,
+						urb->transfer_buffer,
+						urb->transfer_buffer_length,
+						dir);
+				if (dma_mapping_error(hcd->self.controller,
+						urb->transfer_dma))
+					ret = -EAGAIN;
+				else
+					urb->transfer_flags |= URB_DMA_MAP_SINGLE;
+			}
 		} else if (hcd->driver->flags & HCD_LOCAL_MEM) {
 			ret = hcd_alloc_coherent(
 					urb->dev->bus, mem_flags,
@@ -1312,55 +1393,16 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 					&urb->transfer_buffer,
 					urb->transfer_buffer_length,
 					dir);
-
-			if (ret && usb_endpoint_xfer_control(&urb->ep->desc)
-			    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP))
-				hcd_free_coherent(urb->dev->bus,
-					&urb->setup_dma,
-					(void **)&urb->setup_packet,
-					sizeof(struct usb_ctrlrequest),
-					DMA_TO_DEVICE);
+			if (ret == 0)
+				urb->transfer_flags |= URB_MAP_LOCAL;
 		}
+		if (ret && (urb->transfer_flags & (URB_SETUP_MAP_SINGLE |
+				URB_SETUP_MAP_LOCAL)))
+			unmap_urb_for_dma(hcd, urb);
 	}
 	return ret;
 }
 
-static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
-{
-	enum dma_data_direction dir;
-
-	if (is_root_hub(urb->dev))
-		return;
-
-	if (usb_endpoint_xfer_control(&urb->ep->desc)
-	    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
-		if (hcd->self.uses_dma)
-			dma_unmap_single(hcd->self.controller, urb->setup_dma,
-					sizeof(struct usb_ctrlrequest),
-					DMA_TO_DEVICE);
-		else if (hcd->driver->flags & HCD_LOCAL_MEM)
-			hcd_free_coherent(urb->dev->bus, &urb->setup_dma,
-					(void **)&urb->setup_packet,
-					sizeof(struct usb_ctrlrequest),
-					DMA_TO_DEVICE);
-	}
-
-	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
-	if (urb->transfer_buffer_length != 0
-	    && !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP)) {
-		if (hcd->self.uses_dma)
-			dma_unmap_single(hcd->self.controller,
-					urb->transfer_dma,
-					urb->transfer_buffer_length,
-					dir);
-		else if (hcd->driver->flags & HCD_LOCAL_MEM)
-			hcd_free_coherent(urb->dev->bus, &urb->transfer_dma,
-					&urb->transfer_buffer,
-					urb->transfer_buffer_length,
-					dir);
-	}
-}
-
 /*-------------------------------------------------------------------------*/
 
 /* may be called in any context with a valid urb->dev usecount
@@ -1389,21 +1431,20 @@ int usb_hcd_submit_urb (struct urb *urb, gfp_t mem_flags)
 	 * URBs must be submitted in process context with interrupts
 	 * enabled.
 	 */
-	status = map_urb_for_dma(hcd, urb, mem_flags);
-	if (unlikely(status)) {
-		usbmon_urb_submit_error(&hcd->self, urb, status);
-		goto error;
-	}
 
-	if (is_root_hub(urb->dev))
+	if (is_root_hub(urb->dev)) {
 		status = rh_urb_enqueue(hcd, urb);
-	else
-		status = hcd->driver->urb_enqueue(hcd, urb, mem_flags);
+	} else {
+		status = map_urb_for_dma(hcd, urb, mem_flags);
+		if (likely(status == 0)) {
+			status = hcd->driver->urb_enqueue(hcd, urb, mem_flags);
+			if (unlikely(status))
+				unmap_urb_for_dma(hcd, urb);
+		}
+	}
 
 	if (unlikely(status)) {
 		usbmon_urb_submit_error(&hcd->self, urb, status);
-		unmap_urb_for_dma(hcd, urb);
- error:
 		urb->hcpriv = NULL;
 		INIT_LIST_HEAD(&urb->urb_list);
 		atomic_dec(&urb->use_count);
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 619c44fb8a96..79d1cdf4a635 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -259,9 +259,6 @@ static void sg_clean(struct usb_sg_request *io)
 		kfree(io->urbs);
 		io->urbs = NULL;
 	}
-	if (io->dev->dev.dma_mask != NULL)
-		usb_buffer_unmap_sg(io->dev, usb_pipein(io->pipe),
-				    io->sg, io->nents);
 	io->dev = NULL;
 }
 
@@ -364,7 +361,6 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 {
 	int i;
 	int urb_flags;
-	int dma;
 	int use_sg;
 
 	if (!io || !dev || !sg
@@ -378,21 +374,9 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	io->pipe = pipe;
 	io->sg = sg;
 	io->nents = nents;
-
-	/* not all host controllers use DMA (like the mainstream pci ones);
-	 * they can use PIO (sl811) or be software over another transport.
-	 */
-	dma = (dev->dev.dma_mask != NULL);
-	if (dma)
-		io->entries = usb_buffer_map_sg(dev, usb_pipein(pipe),
-						sg, nents);
-	else
-		io->entries = nents;
+	io->entries = nents;
 
 	/* initialize all the urbs we'll use */
-	if (io->entries <= 0)
-		return io->entries;
-
 	if (dev->bus->sg_tablesize > 0) {
 		io->urbs = kmalloc(sizeof *io->urbs, mem_flags);
 		use_sg = true;
@@ -404,8 +388,6 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 		goto nomem;
 
 	urb_flags = 0;
-	if (dma)
-		urb_flags |= URB_NO_TRANSFER_DMA_MAP;
 	if (usb_pipein(pipe))
 		urb_flags |= URB_SHORT_NOT_OK;
 
@@ -423,12 +405,13 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 
 		io->urbs[0]->complete = sg_complete;
 		io->urbs[0]->context = io;
+
 		/* A length of zero means transfer the whole sg list */
 		io->urbs[0]->transfer_buffer_length = length;
 		if (length == 0) {
 			for_each_sg(sg, sg, io->entries, i) {
 				io->urbs[0]->transfer_buffer_length +=
-					sg_dma_len(sg);
+					sg->length;
 			}
 		}
 		io->urbs[0]->sg = io;
@@ -454,26 +437,16 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 			io->urbs[i]->context = io;
 
 			/*
-			 * Some systems need to revert to PIO when DMA is temporarily
-			 * unavailable.  For their sakes, both transfer_buffer and
-			 * transfer_dma are set when possible.
-			 *
-			 * Note that if IOMMU coalescing occurred, we cannot
-			 * trust sg_page anymore, so check if S/G list shrunk.
+			 * Some systems can't use DMA; they use PIO instead.
+			 * For their sakes, transfer_buffer is set whenever
+			 * possible.
 			 */
-			if (io->nents == io->entries && !PageHighMem(sg_page(sg)))
+			if (!PageHighMem(sg_page(sg)))
 				io->urbs[i]->transfer_buffer = sg_virt(sg);
 			else
 				io->urbs[i]->transfer_buffer = NULL;
 
-			if (dma) {
-				io->urbs[i]->transfer_dma = sg_dma_address(sg);
-				len = sg_dma_len(sg);
-			} else {
-				/* hc may use _only_ transfer_buffer */
-				len = sg->length;
-			}
-
+			len = sg->length;
 			if (length) {
 				len = min_t(unsigned, len, length);
 				length -= len;
@@ -481,6 +454,8 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 					io->entries = i + 1;
 			}
 			io->urbs[i]->transfer_buffer_length = len;
+
+			io->urbs[i]->sg = (struct usb_sg_request *) sg;
 		}
 		io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
 	}
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index 2532a0917f8c..a760e46871c5 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -333,9 +333,12 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 		is_out = usb_endpoint_dir_out(&ep->desc);
 	}
 
-	/* Cache the direction for later use */
-	urb->transfer_flags = (urb->transfer_flags & ~URB_DIR_MASK) |
-			(is_out ? URB_DIR_OUT : URB_DIR_IN);
+	/* Clear the internal flags and cache the direction for later use */
+	urb->transfer_flags &= ~(URB_DIR_MASK | URB_DMA_MAP_SINGLE |
+			URB_DMA_MAP_PAGE | URB_DMA_MAP_SG | URB_MAP_LOCAL |
+			URB_SETUP_MAP_SINGLE | URB_SETUP_MAP_LOCAL |
+			URB_DMA_SG_COMBINED);
+	urb->transfer_flags |= (is_out ? URB_DIR_OUT : URB_DIR_IN);
 
 	if (xfertype != USB_ENDPOINT_XFER_CONTROL &&
 			dev->state < USB_STATE_CONFIGURED)
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 097172e2ba06..8180ce533ebf 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -881,6 +881,7 @@ void usb_buffer_unmap(struct urb *urb)
 EXPORT_SYMBOL_GPL(usb_buffer_unmap);
 #endif  /*  0  */
 
+#if 0
 /**
  * usb_buffer_map_sg - create scatterlist DMA mapping(s) for an endpoint
  * @dev: device to which the scatterlist will be mapped
@@ -924,6 +925,7 @@ int usb_buffer_map_sg(const struct usb_device *dev, int is_in,
 			is_in ? DMA_FROM_DEVICE : DMA_TO_DEVICE) ? : -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(usb_buffer_map_sg);
+#endif
 
 /* XXX DISABLED, no users currently.  If you wish to re-enable this
  * XXX please determine whether the sync is to transfer ownership of
@@ -960,6 +962,7 @@ void usb_buffer_dmasync_sg(const struct usb_device *dev, int is_in,
 EXPORT_SYMBOL_GPL(usb_buffer_dmasync_sg);
 #endif
 
+#if 0
 /**
  * usb_buffer_unmap_sg - free DMA mapping(s) for a scatterlist
  * @dev: device to which the scatterlist will be mapped
@@ -985,6 +988,7 @@ void usb_buffer_unmap_sg(const struct usb_device *dev, int is_in,
 			is_in ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 }
 EXPORT_SYMBOL_GPL(usb_buffer_unmap_sg);
+#endif
 
 /* To disable USB, kernel command line is 'nousb' not 'usbcore.nousb' */
 #ifdef MODULE
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index 141d049beb3e..b388dd1fb4c4 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -646,7 +646,7 @@ int qset_add_urb(struct whc *whc, struct whc_qset *qset, struct urb *urb,
 	wurb->urb = urb;
 	INIT_WORK(&wurb->dequeue_work, urb_dequeue_work);
 
-	if (urb->sg) {
+	if (urb->num_sgs) {
 		ret = qset_add_urb_sg(whc, qset, urb, mem_flags);
 		if (ret == -EINVAL) {
 			qset_free_stds(qset, urb);
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 407d33fb5e84..c1359ed310b5 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1962,7 +1962,7 @@ int xhci_queue_bulk_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	int running_total, trb_buff_len, ret;
 	u64 addr;
 
-	if (urb->sg)
+	if (urb->num_sgs)
 		return queue_bulk_sg_tx(xhci, mem_flags, urb, slot_id, ep_index);
 
 	ep_ring = xhci->devs[slot_id]->eps[ep_index].ring;
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index ddf7f9a1b336..8a7968df278f 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -416,7 +416,7 @@ static unsigned int mon_bin_get_data(const struct mon_reader_bin *rp,
 
 	} else {
 		/* If IOMMU coalescing occurred, we cannot trust sg_page */
-		if (urb->sg->nents != urb->num_sgs) {
+		if (urb->transfer_flags & URB_DMA_SG_COMBINED) {
 			*flag = 'D';
 			return length;
 		}
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index 4d0be130f49b..d56260280f54 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -161,9 +161,7 @@ static inline char mon_text_get_data(struct mon_event_text *ep, struct urb *urb,
 	} else {
 		struct scatterlist *sg = urb->sg->sg;
 
-		/* If IOMMU coalescing occurred, we cannot trust sg_page */
-		if (urb->sg->nents != urb->num_sgs ||
-				PageHighMem(sg_page(sg)))
+		if (PageHighMem(sg_page(sg)))
 			return 'D';
 
 		/* For the text interface we copy only the first sg buffer */
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 739f1fd1cc15..99833029e5a8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -965,10 +965,19 @@ extern int usb_disabled(void);
 					 * needed */
 #define URB_FREE_BUFFER		0x0100	/* Free transfer buffer with the URB */
 
+/* The following flags are used internally by usbcore and HCDs */
 #define URB_DIR_IN		0x0200	/* Transfer from device to host */
 #define URB_DIR_OUT		0
 #define URB_DIR_MASK		URB_DIR_IN
 
+#define URB_DMA_MAP_SINGLE	0x00010000	/* Non-scatter-gather mapping */
+#define URB_DMA_MAP_PAGE	0x00020000	/* HCD-unsupported S-G */
+#define URB_DMA_MAP_SG		0x00040000	/* HCD-supported S-G */
+#define URB_MAP_LOCAL		0x00080000	/* HCD-local-memory mapping */
+#define URB_SETUP_MAP_SINGLE	0x00100000	/* Setup packet DMA mapped */
+#define URB_SETUP_MAP_LOCAL	0x00200000	/* HCD-local setup packet */
+#define URB_DMA_SG_COMBINED	0x00400000	/* S-G entries were combined */
+
 struct usb_iso_packet_descriptor {
 	unsigned int offset;
 	unsigned int length;		/* expected length */
-- 
cgit v1.2.3


From 9e18c821659d836bd63f88df3c19729327728496 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 2 Apr 2010 13:22:09 -0400
Subject: USB: use PM core routines to enable/disable autosuspend

This patch (as1366) replaces the private routines
usb_enable_autosuspend() and usb_disable_autosuspend() with calls to
the standard pm_runtime_allow() and pm_runtime_forbid() functions in
the runtime PM framework.  They do the same thing.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/driver.c | 21 +++++----------------
 drivers/usb/core/sysfs.c  | 10 +++++-----
 include/linux/usb.h       |  6 ++----
 3 files changed, 12 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 271e857be0fa..207146743ea7 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1356,13 +1356,9 @@ int usb_resume(struct device *dev, pm_message_t msg)
  *
  * The caller must hold @udev's device lock.
  */
-int usb_enable_autosuspend(struct usb_device *udev)
+void usb_enable_autosuspend(struct usb_device *udev)
 {
-	if (udev->autosuspend_disabled) {
-		udev->autosuspend_disabled = 0;
-		usb_autosuspend_device(udev);
-	}
-	return 0;
+	pm_runtime_allow(&udev->dev);
 }
 EXPORT_SYMBOL_GPL(usb_enable_autosuspend);
 
@@ -1375,16 +1371,9 @@ EXPORT_SYMBOL_GPL(usb_enable_autosuspend);
  *
  * The caller must hold @udev's device lock.
  */
-int usb_disable_autosuspend(struct usb_device *udev)
+void usb_disable_autosuspend(struct usb_device *udev)
 {
-	int rc = 0;
-
-	if (!udev->autosuspend_disabled) {
-		rc = usb_autoresume_device(udev);
-		if (rc == 0)
-			udev->autosuspend_disabled = 1;
-	}
-	return rc;
+	pm_runtime_forbid(&udev->dev);
 }
 EXPORT_SYMBOL_GPL(usb_disable_autosuspend);
 
@@ -1528,7 +1517,7 @@ void usb_autopm_put_interface_async(struct usb_interface *intf)
 	atomic_dec(&intf->pm_usage_cnt);
 	pm_runtime_put_noidle(&intf->dev);
 
-	if (!udev->autosuspend_disabled) {
+	if (udev->dev.power.runtime_auto) {
 		/* Optimization: Don't schedule a delayed autosuspend if
 		 * the timer is already running and the expiration time
 		 * wouldn't change.
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 43c002e3a9aa..b65c1eaf3aba 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -389,7 +389,7 @@ show_level(struct device *dev, struct device_attribute *attr, char *buf)
 	struct usb_device *udev = to_usb_device(dev);
 	const char *p = auto_string;
 
-	if (udev->state != USB_STATE_SUSPENDED && udev->autosuspend_disabled)
+	if (udev->state != USB_STATE_SUSPENDED && !udev->dev.power.runtime_auto)
 		p = on_string;
 	return sprintf(buf, "%s\n", p);
 }
@@ -401,7 +401,7 @@ set_level(struct device *dev, struct device_attribute *attr,
 	struct usb_device *udev = to_usb_device(dev);
 	int len = count;
 	char *cp;
-	int rc;
+	int rc = count;
 
 	cp = memchr(buf, '\n', count);
 	if (cp)
@@ -411,17 +411,17 @@ set_level(struct device *dev, struct device_attribute *attr,
 
 	if (len == sizeof on_string - 1 &&
 			strncmp(buf, on_string, len) == 0)
-		rc = usb_disable_autosuspend(udev);
+		usb_disable_autosuspend(udev);
 
 	else if (len == sizeof auto_string - 1 &&
 			strncmp(buf, auto_string, len) == 0)
-		rc = usb_enable_autosuspend(udev);
+		usb_enable_autosuspend(udev);
 
 	else
 		rc = -EINVAL;
 
 	usb_unlock_device(udev);
-	return (rc < 0 ? rc : count);
+	return rc;
 }
 
 static DEVICE_ATTR(level, S_IRUGO | S_IWUSR, show_level, set_level);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 99833029e5a8..e32a849f81ce 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -425,7 +425,6 @@ struct usb_tt;
  * @connect_time: time device was first connected
  * @do_remote_wakeup:  remote wakeup should be enabled
  * @reset_resume: needs reset instead of resume
- * @autosuspend_disabled: autosuspend disabled by the user
  * @wusb_dev: if this is a Wireless USB device, link to the WUSB
  *	specific data for the device.
  * @slot_id: Slot ID assigned by xHCI
@@ -501,7 +500,6 @@ struct usb_device {
 
 	unsigned do_remote_wakeup:1;
 	unsigned reset_resume:1;
-	unsigned autosuspend_disabled:1;
 #endif
 	struct wusb_dev *wusb_dev;
 	int slot_id;
@@ -526,8 +524,8 @@ extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
 /* USB autosuspend and autoresume */
 #ifdef CONFIG_USB_SUSPEND
-extern int usb_enable_autosuspend(struct usb_device *udev);
-extern int usb_disable_autosuspend(struct usb_device *udev);
+extern void usb_enable_autosuspend(struct usb_device *udev);
+extern void usb_disable_autosuspend(struct usb_device *udev);
 
 extern int usb_autopm_get_interface(struct usb_interface *intf);
 extern void usb_autopm_put_interface(struct usb_interface *intf);
-- 
cgit v1.2.3


From 22ad1e7f86e1fb96488d71512e2797153a6ad839 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 2 Apr 2010 15:33:56 -0700
Subject: USB: Add parsing of SuperSpeed endpoint companion descriptor.

Allow the xHCI drivers (and any new USB 3.0 drivers) to parse the
SuperSpeed endpoint companion descriptor to find the maximum number of
bulk endpoint streams the endpoint supports.  This is used to calculate
the maximum total number of streams the driver can allocate.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch9.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index e58369ff8168..603b61aadd65 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -556,6 +556,8 @@ struct usb_ss_ep_comp_descriptor {
 } __attribute__ ((packed));
 
 #define USB_DT_SS_EP_COMP_SIZE		6
+/* Bits 4:0 of bmAttributes if this is a bulk endpoint */
+#define USB_SS_MAX_STREAMS(p)		(1 << (p & 0x1f))
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 94af1220985c71cd80d6c161b7a42c51ef08b923 Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Fri, 2 Apr 2010 15:34:10 -0700
Subject: USB: Add stream ID field to struct urb.

Bulk endpoint streams were added in the USB 3.0 specification.  Streams
allow a device driver to overload a bulk endpoint so that multiple
transfers can be queued at once.

Add a new field, stream_id, to struct urb so that USB 3.0 drivers can
specify which stream they want the URB to be queued to.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index e32a849f81ce..191af498c4f5 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1196,6 +1196,7 @@ struct urb {
 	struct usb_device *dev; 	/* (in) pointer to associated device */
 	struct usb_host_endpoint *ep;	/* (internal) pointer to endpoint */
 	unsigned int pipe;		/* (in) pipe information */
+	unsigned int stream_id;		/* (in) stream ID */
 	int status;			/* (return) non-ISO status */
 	unsigned int transfer_flags;	/* (in) URB_SHORT_NOT_OK | ...*/
 	void *transfer_buffer;		/* (in) associated data buffer */
-- 
cgit v1.2.3


From eab1cafc3b524b714b0567ab98fc75ace09db98c Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Mon, 5 Apr 2010 10:55:58 -0700
Subject: USB: Support for allocating USB 3.0 streams.

Bulk endpoint streams were added in the USB 3.0 specification.  Streams
allow a device driver to overload a bulk endpoint so that multiple
transfers can be queued at once.

The device then decides which transfer it wants to work on first, and can
queue part of a transfer before it switches to a new stream.  All this
switching is invisible to the device driver, which just gets a completion
for the URB.  Drivers that use streams must be able to handle URBs
completing in a different order than they were submitted to the endpoint.

This requires adding new API to set up xHCI data structures to support
multiple queues ("stream rings") per endpoint.  Drivers will allocate a
number of stream IDs before enqueueing URBs to the bulk endpoints of the
device, and free the stream IDs in their disconnect function.  See
Documentation/usb/bulk-streams.txt for details.

The new mass storage device class, USB Attached SCSI Protocol (UASP), uses
these streams API.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/bulk-streams.txt | 78 ++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/hcd.c             | 69 +++++++++++++++++++++++++++++++++
 drivers/usb/host/xhci-pci.c        |  2 +
 include/linux/usb.h                | 10 +++++
 include/linux/usb/hcd.h            | 10 +++++
 5 files changed, 169 insertions(+)
 create mode 100644 Documentation/usb/bulk-streams.txt

(limited to 'include/linux')

diff --git a/Documentation/usb/bulk-streams.txt b/Documentation/usb/bulk-streams.txt
new file mode 100644
index 000000000000..ffc02021863e
--- /dev/null
+++ b/Documentation/usb/bulk-streams.txt
@@ -0,0 +1,78 @@
+Background
+==========
+
+Bulk endpoint streams were added in the USB 3.0 specification.  Streams allow a
+device driver to overload a bulk endpoint so that multiple transfers can be
+queued at once.
+
+Streams are defined in sections 4.4.6.4 and 8.12.1.4 of the Universal Serial Bus
+3.0 specification at http://www.usb.org/developers/docs/  The USB Attached SCSI
+Protocol, which uses streams to queue multiple SCSI commands, can be found on
+the T10 website (http://t10.org/).
+
+
+Device-side implications
+========================
+
+Once a buffer has been queued to a stream ring, the device is notified (through
+an out-of-band mechanism on another endpoint) that data is ready for that stream
+ID.  The device then tells the host which "stream" it wants to start.  The host
+can also initiate a transfer on a stream without the device asking, but the
+device can refuse that transfer.  Devices can switch between streams at any
+time.
+
+
+Driver implications
+===================
+
+int usb_alloc_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags);
+
+Device drivers will call this API to request that the host controller driver
+allocate memory so the driver can use up to num_streams stream IDs.  They must
+pass an array of usb_host_endpoints that need to be setup with similar stream
+IDs.  This is to ensure that a UASP driver will be able to use the same stream
+ID for the bulk IN and OUT endpoints used in a Bi-directional command sequence.
+
+The return value is an error condition (if one of the endpoints doesn't support
+streams, or the xHCI driver ran out of memory), or the number of streams the
+host controller allocated for this endpoint.  The xHCI host controller hardware
+declares how many stream IDs it can support, and each bulk endpoint on a
+SuperSpeed device will say how many stream IDs it can handle.  Therefore,
+drivers should be able to deal with being allocated less stream IDs than they
+requested.
+
+Do NOT call this function if you have URBs enqueued for any of the endpoints
+passed in as arguments.  Do not call this function to request less than two
+streams.
+
+Drivers will only be allowed to call this API once for the same endpoint
+without calling usb_free_streams().  This is a simplification for the xHCI host
+controller driver, and may change in the future.
+
+
+Picking new Stream IDs to use
+============================
+
+Stream ID 0 is reserved, and should not be used to communicate with devices.  If
+usb_alloc_streams() returns with a value of N, you may use streams 1 though N.
+To queue an URB for a specific stream, set the urb->stream_id value.  If the
+endpoint does not support streams, an error will be returned.
+
+Note that new API to choose the next stream ID will have to be added if the xHCI
+driver supports secondary stream IDs.
+
+
+Clean up
+========
+
+If a driver wishes to stop using streams to communicate with the device, it
+should call
+
+void usb_free_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags);
+
+All stream IDs will be deallocated when the driver releases the interface, to
+ensure that drivers that don't support streams will be able to use the endpoint.
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 6a05e6934455..3aaee2811f01 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1814,6 +1814,75 @@ void usb_hcd_reset_endpoint(struct usb_device *udev,
 	}
 }
 
+/**
+ * usb_alloc_streams - allocate bulk endpoint stream IDs.
+ * @interface:		alternate setting that includes all endpoints.
+ * @eps:		array of endpoints that need streams.
+ * @num_eps:		number of endpoints in the array.
+ * @num_streams:	number of streams to allocate.
+ * @mem_flags:		flags hcd should use to allocate memory.
+ *
+ * Sets up a group of bulk endpoints to have num_streams stream IDs available.
+ * Drivers may queue multiple transfers to different stream IDs, which may
+ * complete in a different order than they were queued.
+ */
+int usb_alloc_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags)
+{
+	struct usb_hcd *hcd;
+	struct usb_device *dev;
+	int i;
+
+	dev = interface_to_usbdev(interface);
+	hcd = bus_to_hcd(dev->bus);
+	if (!hcd->driver->alloc_streams || !hcd->driver->free_streams)
+		return -EINVAL;
+	if (dev->speed != USB_SPEED_SUPER)
+		return -EINVAL;
+
+	/* Streams only apply to bulk endpoints. */
+	for (i = 0; i < num_eps; i++)
+		if (!usb_endpoint_xfer_bulk(&eps[i]->desc))
+			return -EINVAL;
+
+	return hcd->driver->alloc_streams(hcd, dev, eps, num_eps,
+			num_streams, mem_flags);
+}
+EXPORT_SYMBOL_GPL(usb_alloc_streams);
+
+/**
+ * usb_free_streams - free bulk endpoint stream IDs.
+ * @interface:	alternate setting that includes all endpoints.
+ * @eps:	array of endpoints to remove streams from.
+ * @num_eps:	number of endpoints in the array.
+ * @mem_flags:	flags hcd should use to allocate memory.
+ *
+ * Reverts a group of bulk endpoints back to not using stream IDs.
+ * Can fail if we are given bad arguments, or HCD is broken.
+ */
+void usb_free_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags)
+{
+	struct usb_hcd *hcd;
+	struct usb_device *dev;
+	int i;
+
+	dev = interface_to_usbdev(interface);
+	hcd = bus_to_hcd(dev->bus);
+	if (dev->speed != USB_SPEED_SUPER)
+		return;
+
+	/* Streams only apply to bulk endpoints. */
+	for (i = 0; i < num_eps; i++)
+		if (!usb_endpoint_xfer_bulk(&eps[i]->desc))
+			return;
+
+	hcd->driver->free_streams(hcd, dev, eps, num_eps, mem_flags);
+}
+EXPORT_SYMBOL_GPL(usb_free_streams);
+
 /* Protect against drivers that try to unlink URBs after the device
  * is gone, by waiting until all unlinks for @udev are finished.
  * Since we don't currently track URBs by device, simply wait until
diff --git a/drivers/usb/host/xhci-pci.c b/drivers/usb/host/xhci-pci.c
index 98a73cd20cc6..d295bbc15eb7 100644
--- a/drivers/usb/host/xhci-pci.c
+++ b/drivers/usb/host/xhci-pci.c
@@ -132,6 +132,8 @@ static const struct hc_driver xhci_pci_hc_driver = {
 	.urb_dequeue =		xhci_urb_dequeue,
 	.alloc_dev =		xhci_alloc_dev,
 	.free_dev =		xhci_free_dev,
+	.alloc_streams =	xhci_alloc_streams,
+	.free_streams =		xhci_free_streams,
 	.add_endpoint =		xhci_add_endpoint,
 	.drop_endpoint =	xhci_drop_endpoint,
 	.endpoint_reset =	xhci_endpoint_reset,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 191af498c4f5..1ea25377ca0d 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -570,6 +570,16 @@ static inline void usb_mark_last_busy(struct usb_device *udev)
 /* for drivers using iso endpoints */
 extern int usb_get_current_frame_number(struct usb_device *usb_dev);
 
+/* Sets up a group of bulk endpoints to support multiple stream IDs. */
+extern int usb_alloc_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags);
+
+/* Reverts a group of bulk endpoints back to not using stream IDs. */
+extern void usb_free_streams(struct usb_interface *interface,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags);
+
 /* used these for multi-interface device registration */
 extern int usb_driver_claim_interface(struct usb_driver *driver,
 			struct usb_interface *iface, void *priv);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index d268415b7a40..aca73a5c3af7 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -250,6 +250,16 @@ struct hc_driver {
 	int	(*alloc_dev)(struct usb_hcd *, struct usb_device *);
 		/* Called by usb_disconnect to free HC device structures */
 	void	(*free_dev)(struct usb_hcd *, struct usb_device *);
+	/* Change a group of bulk endpoints to support multiple stream IDs */
+	int	(*alloc_streams)(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		unsigned int num_streams, gfp_t mem_flags);
+	/* Reverts a group of bulk endpoints back to not using stream IDs.
+	 * Can fail if we run out of memory.
+	 */
+	int	(*free_streams)(struct usb_hcd *hcd, struct usb_device *udev,
+		struct usb_host_endpoint **eps, unsigned int num_eps,
+		gfp_t mem_flags);
 
 	/* Bandwidth computation functions */
 	/* Note that add_endpoint() can only be called once per endpoint before
-- 
cgit v1.2.3


From 01154aa714240a9be12a0ed69b047e53d258f5b1 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Fri, 16 Apr 2010 16:13:34 +0300
Subject: usb: cdc: ncm constants and structures added

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/cdc.h | 94 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h
index c24124a42ce5..c117a68d04a7 100644
--- a/include/linux/usb/cdc.h
+++ b/include/linux/usb/cdc.h
@@ -18,6 +18,7 @@
 #define USB_CDC_SUBCLASS_MDLM			0x0a
 #define USB_CDC_SUBCLASS_OBEX			0x0b
 #define USB_CDC_SUBCLASS_EEM			0x0c
+#define USB_CDC_SUBCLASS_NCM			0x0d
 
 #define USB_CDC_PROTO_NONE			0
 
@@ -49,6 +50,7 @@
 #define USB_CDC_MDLM_DETAIL_TYPE	0x13	/* mdlm_detail_desc */
 #define USB_CDC_DMM_TYPE		0x14
 #define USB_CDC_OBEX_TYPE		0x15
+#define USB_CDC_NCM_TYPE		0x1a
 
 /* "Header Functional Descriptor" from CDC spec  5.2.3.1 */
 struct usb_cdc_header_desc {
@@ -174,6 +176,15 @@ struct usb_cdc_obex_desc {
 	__le16	bcdVersion;
 } __attribute__ ((packed));
 
+/* "NCM Control Model Functional Descriptor" */
+struct usb_cdc_ncm_desc {
+	__u8	bLength;
+	__u8	bDescriptorType;
+	__u8	bDescriptorSubType;
+
+	__le16	bcdNcmVersion;
+	__u8	bmNetworkCapabilities;
+} __attribute__ ((packed));
 /*-------------------------------------------------------------------------*/
 
 /*
@@ -197,6 +208,17 @@ struct usb_cdc_obex_desc {
 #define USB_CDC_GET_ETHERNET_PM_PATTERN_FILTER	0x42
 #define USB_CDC_SET_ETHERNET_PACKET_FILTER	0x43
 #define USB_CDC_GET_ETHERNET_STATISTIC		0x44
+#define USB_CDC_GET_NTB_PARAMETERS		0x80
+#define USB_CDC_GET_NET_ADDRESS			0x81
+#define USB_CDC_SET_NET_ADDRESS			0x82
+#define USB_CDC_GET_NTB_FORMAT			0x83
+#define USB_CDC_SET_NTB_FORMAT			0x84
+#define USB_CDC_GET_NTB_INPUT_SIZE		0x85
+#define USB_CDC_SET_NTB_INPUT_SIZE		0x86
+#define USB_CDC_GET_MAX_DATAGRAM_SIZE		0x87
+#define USB_CDC_SET_MAX_DATAGRAM_SIZE		0x88
+#define USB_CDC_GET_CRC_MODE			0x89
+#define USB_CDC_SET_CRC_MODE			0x8a
 
 /* Line Coding Structure from CDC spec 6.2.13 */
 struct usb_cdc_line_coding {
@@ -247,4 +269,76 @@ struct usb_cdc_notification {
 	__le16	wLength;
 } __attribute__ ((packed));
 
+/*-------------------------------------------------------------------------*/
+
+/*
+ * Class Specific structures and constants
+ *
+ * CDC NCM parameter structure, CDC NCM subclass 6.2.1
+ *
+ */
+
+struct usb_cdc_ncm_ntb_parameter {
+	__le16	wLength;
+	__le16	bmNtbFormatSupported;
+	__le32	dwNtbInMaxSize;
+	__le16	wNdpInDivisor;
+	__le16	wNdpInPayloadRemainder;
+	__le16	wNdpInAlignment;
+	__le16	wPadding1;
+	__le32	dwNtbOutMaxSize;
+	__le16	wNdpOutDivisor;
+	__le16	wNdpOutPayloadRemainder;
+	__le16	wNdpOutAlignment;
+	__le16	wPadding2;
+} __attribute__ ((packed));
+
+/*
+ * CDC NCM transfer headers, CDC NCM subclass 3.2
+ */
+
+#define NCM_NTH16_SIGN		0x484D434E /* NCMH */
+#define NCM_NTH32_SIGN		0x686D636E /* ncmh */
+
+struct usb_cdc_ncm_nth16 {
+	__le32	dwSignature;
+	__le16	wHeaderLength;
+	__le16	wSequence;
+	__le16	wBlockLength;
+	__le16	wFpIndex;
+} __attribute__ ((packed));
+
+struct usb_cdc_ncm_nth32 {
+	__le32	dwSignature;
+	__le16	wHeaderLength;
+	__le16	wSequence;
+	__le32	dwBlockLength;
+	__le32	dwFpIndex;
+} __attribute__ ((packed));
+
+/*
+ * CDC NCM datagram pointers, CDC NCM subclass 3.3
+ */
+
+#define NCM_NDP16_CRC_SIGN	0x314D434E /* NCM1 */
+#define NCM_NDP16_NOCRC_SIGN	0x304D434E /* NCM0 */
+#define NCM_NDP32_CRC_SIGN	0x316D636E /* ncm1 */
+#define NCM_NDP32_NOCRC_SIGN	0x306D636E /* ncm0 */
+
+struct usb_cdc_ncm_ndp16 {
+	__le32	dwSignature;
+	__le16	wLength;
+	__le16	wNextFpIndex;
+	__u8	data[0];
+} __attribute__ ((packed));
+
+struct usb_cdc_ncm_ndp32 {
+	__le32	dwSignature;
+	__le16	wLength;
+	__le16	wReserved6;
+	__le32	dwNextFpIndex;
+	__le32	dwReserved12;
+	__u8	data[0];
+} __attribute__ ((packed));
+
 #endif /* __LINUX_USB_CDC_H */
-- 
cgit v1.2.3


From 65e0b499105ec8ff3bc4ab7680873dec20127f9d Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Date: Fri, 16 Apr 2010 16:13:35 +0300
Subject: USB: ncm: added ncm.h with auxiliary definitions

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@nokia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ncm.h | 114 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 include/linux/usb/ncm.h

(limited to 'include/linux')

diff --git a/include/linux/usb/ncm.h b/include/linux/usb/ncm.h
new file mode 100644
index 000000000000..006d1064c8b2
--- /dev/null
+++ b/include/linux/usb/ncm.h
@@ -0,0 +1,114 @@
+/*
+ * USB CDC NCM auxiliary definitions
+ */
+
+#ifndef __LINUX_USB_NCM_H
+#define __LINUX_USB_NCM_H
+
+#include <linux/types.h>
+#include <linux/usb/cdc.h>
+#include <asm/unaligned.h>
+
+#define NCM_NTB_MIN_IN_SIZE		2048
+#define NCM_NTB_MIN_OUT_SIZE		2048
+
+#define NCM_CONTROL_TIMEOUT		(5 * 1000)
+
+/* bmNetworkCapabilities */
+
+#define NCM_NCAP_ETH_FILTER	(1 << 0)
+#define NCM_NCAP_NET_ADDRESS	(1 << 1)
+#define NCM_NCAP_ENCAP_COMM	(1 << 2)
+#define NCM_NCAP_MAX_DGRAM	(1 << 3)
+#define NCM_NCAP_CRC_MODE	(1 << 4)
+
+/*
+ * Here are options for NCM Datagram Pointer table (NDP) parser.
+ * There are 2 different formats: NDP16 and NDP32 in the spec (ch. 3),
+ * in NDP16 offsets and sizes fields are 1 16bit word wide,
+ * in NDP32 -- 2 16bit words wide. Also signatures are different.
+ * To make the parser code the same, put the differences in the structure,
+ * and switch pointers to the structures when the format is changed.
+ */
+
+struct ndp_parser_opts {
+	u32		nth_sign;
+	u32		ndp_sign;
+	unsigned	nth_size;
+	unsigned	ndp_size;
+	unsigned	ndplen_align;
+	/* sizes in u16 units */
+	unsigned	dgram_item_len; /* index or length */
+	unsigned	block_length;
+	unsigned	fp_index;
+	unsigned	reserved1;
+	unsigned	reserved2;
+	unsigned	next_fp_index;
+};
+
+#define INIT_NDP16_OPTS {					\
+		.nth_sign = NCM_NTH16_SIGN,			\
+		.ndp_sign = NCM_NDP16_NOCRC_SIGN,		\
+		.nth_size = sizeof(struct usb_cdc_ncm_nth16),	\
+		.ndp_size = sizeof(struct usb_cdc_ncm_ndp16),	\
+		.ndplen_align = 4,				\
+		.dgram_item_len = 1,				\
+		.block_length = 1,				\
+		.fp_index = 1,					\
+		.reserved1 = 0,					\
+		.reserved2 = 0,					\
+		.next_fp_index = 1,				\
+	}
+
+
+#define INIT_NDP32_OPTS {					\
+		.nth_sign = NCM_NTH32_SIGN,			\
+		.ndp_sign = NCM_NDP32_NOCRC_SIGN,		\
+		.nth_size = sizeof(struct usb_cdc_ncm_nth32),	\
+		.ndp_size = sizeof(struct usb_cdc_ncm_ndp32),	\
+		.ndplen_align = 8,				\
+		.dgram_item_len = 2,				\
+		.block_length = 2,				\
+		.fp_index = 2,					\
+		.reserved1 = 1,					\
+		.reserved2 = 2,					\
+		.next_fp_index = 2,				\
+	}
+
+static inline void put_ncm(__le16 **p, unsigned size, unsigned val)
+{
+	switch (size) {
+	case 1:
+		put_unaligned_le16((u16)val, *p);
+		break;
+	case 2:
+		put_unaligned_le32((u32)val, *p);
+
+		break;
+	default:
+		BUG();
+	}
+
+	*p += size;
+}
+
+static inline unsigned get_ncm(__le16 **p, unsigned size)
+{
+	unsigned tmp;
+
+	switch (size) {
+	case 1:
+		tmp = get_unaligned_le16(*p);
+		break;
+	case 2:
+		tmp = get_unaligned_le32(*p);
+		break;
+	default:
+		BUG();
+	}
+
+	*p += size;
+	return tmp;
+}
+
+#endif /* __LINUX_USB_NCM_H */
-- 
cgit v1.2.3


From f48cf80f93ba974eb3201ab2d0f2c4cef950f3fc Mon Sep 17 00:00:00 2001
From: Fabien Chouteau <fabien.chouteau@barco.com>
Date: Fri, 23 Apr 2010 14:21:26 +0200
Subject: USB: Composite framework: Add suspended sysfs entry

This patch adds a sysfs entry (/sys/devices/platform/_UDC_/gadget/suspended) to
show the suspend state of an USB composite gadget.

Signed-off-by: Fabien Chouteau <fabien.chouteau@barco.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 .../ABI/testing/sysfs-devices-platform-_UDC_-gadget |  9 +++++++++
 drivers/usb/gadget/composite.c                      | 21 +++++++++++++++++++++
 include/linux/usb/composite.h                       |  1 +
 include/linux/usb/gadget.h                          |  4 ++++
 4 files changed, 35 insertions(+)
 create mode 100644 Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
new file mode 100644
index 000000000000..34034027b13c
--- /dev/null
+++ b/Documentation/ABI/testing/sysfs-devices-platform-_UDC_-gadget
@@ -0,0 +1,9 @@
+What:		/sys/devices/platform/_UDC_/gadget/suspended
+Date:		April 2010
+Contact:	Fabien Chouteau <fabien.chouteau@barco.com>
+Description:
+		Show the suspend state of an USB composite gadget.
+		1 -> suspended
+		0 -> resumed
+
+		(_UDC_ is the name of the USB Device Controller driver)
diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index 5465d8767f9a..f9aff1bbcb3e 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -898,6 +898,18 @@ static void composite_disconnect(struct usb_gadget *gadget)
 
 /*-------------------------------------------------------------------------*/
 
+static ssize_t composite_show_suspended(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct usb_gadget *gadget = dev_to_usb_gadget(dev);
+	struct usb_composite_dev *cdev = get_gadget_data(gadget);
+
+	return sprintf(buf, "%d\n", cdev->suspended);
+}
+
+static DEVICE_ATTR(suspended, 0444, composite_show_suspended, NULL);
+
 static void /* __init_or_exit */
 composite_unbind(struct usb_gadget *gadget)
 {
@@ -944,6 +956,7 @@ composite_unbind(struct usb_gadget *gadget)
 	}
 	kfree(cdev);
 	set_gadget_data(gadget, NULL);
+	device_remove_file(&gadget->dev, &dev_attr_suspended);
 	composite = NULL;
 }
 
@@ -1036,6 +1049,10 @@ static int __init composite_bind(struct usb_gadget *gadget)
 		string_override(composite->strings,
 			cdev->desc.iSerialNumber, iSerialNumber);
 
+	status = device_create_file(&gadget->dev, &dev_attr_suspended);
+	if (status)
+		goto fail;
+
 	INFO(cdev, "%s ready\n", composite->name);
 	return 0;
 
@@ -1064,6 +1081,8 @@ composite_suspend(struct usb_gadget *gadget)
 	}
 	if (composite->suspend)
 		composite->suspend(cdev);
+
+	cdev->suspended = 1;
 }
 
 static void
@@ -1084,6 +1103,8 @@ composite_resume(struct usb_gadget *gadget)
 				f->resume(f);
 		}
 	}
+
+	cdev->suspended = 0;
 }
 
 /*-------------------------------------------------------------------------*/
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index 738ea1a691cb..139353efad34 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -326,6 +326,7 @@ struct usb_composite_dev {
 
 	/* private: */
 	/* internals */
+	unsigned int			suspended:1;
 	struct usb_device_descriptor	desc;
 	struct list_head		configs;
 	struct usb_composite_driver	*driver;
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index f4b7ca516cdd..db6141cdb77b 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -494,6 +494,10 @@ static inline void set_gadget_data(struct usb_gadget *gadget, void *data)
 	{ dev_set_drvdata(&gadget->dev, data); }
 static inline void *get_gadget_data(struct usb_gadget *gadget)
 	{ return dev_get_drvdata(&gadget->dev); }
+static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev)
+{
+	return container_of(dev, struct usb_gadget, dev);
+}
 
 /* iterates the non-control endpoints; 'tmp' is a struct usb_ep pointer */
 #define gadget_for_each_ep(tmp,gadget) \
-- 
cgit v1.2.3


From 3b02ca3218cf878e206545041aa811073e4f6c79 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 30 Apr 2010 12:42:23 -0400
Subject: USB: export the new ch11.h file to userspce

This patch (as1374) cleans up a few loose ends in the
include/linux/usb/ch11.h header file and exports it to userspace.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Cc: Eric Lescouet <Eric.Lescouet@virtuallogix.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/Kbuild |  1 +
 include/linux/usb/ch11.h | 10 ++++------
 2 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/Kbuild b/include/linux/usb/Kbuild
index 29fd73b0bffc..51410e0200cf 100644
--- a/include/linux/usb/Kbuild
+++ b/include/linux/usb/Kbuild
@@ -1,6 +1,7 @@
 header-y += audio.h
 header-y += cdc.h
 header-y += ch9.h
+header-y += ch11.h
 header-y += gadgetfs.h
 header-y += midi.h
 header-y += g_printer.h
diff --git a/include/linux/usb/ch11.h b/include/linux/usb/ch11.h
index bd3185aba4ee..119194c85d10 100644
--- a/include/linux/usb/ch11.h
+++ b/include/linux/usb/ch11.h
@@ -6,12 +6,10 @@
  * drivers.
  */
 
-#ifndef __LINUX_HUB_H
-#define __LINUX_HUB_H
+#ifndef __LINUX_CH11_H
+#define __LINUX_CH11_H
 
-#include <linux/list.h>
-#include <linux/workqueue.h>
-#include <linux/compiler.h>	/* likely()/unlikely() */
+#include <linux/types.h>	/* __u8 etc */
 
 /*
  * Hub request types
@@ -166,4 +164,4 @@ enum hub_led_mode {
 #define HUB_TTTT_24_BITS	0x40
 #define HUB_TTTT_32_BITS	0x60
 
-#endif /* __LINUX_HUB_H */
+#endif /* __LINUX_CH11_H */
-- 
cgit v1.2.3


From 842f16905dfc6743c1dd80c3d29b49ba3ab7f7c8 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 30 Apr 2010 12:44:46 -0400
Subject: USB: remove the usb_host_ss_ep_comp structure

This patch (as1375) eliminates the usb_host_ss_ep_comp structure used
for storing a dynamically-allocated copy of the SuperSpeed endpoint
companion descriptor.  The SuperSpeed descriptor is placed directly in
the usb_host_endpoint structure, alongside the standard endpoint
descriptor.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/config.c   | 185 +++++++++++++-------------------------------
 drivers/usb/host/xhci-mem.c |  22 ++----
 drivers/usb/host/xhci.c     |  13 +---
 include/linux/usb.h         |  19 +----
 4 files changed, 68 insertions(+), 171 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/config.c b/drivers/usb/core/config.c
index e4909c26becb..83126b03e7cf 100644
--- a/drivers/usb/core/config.c
+++ b/drivers/usb/core/config.c
@@ -21,32 +21,6 @@ static inline const char *plural(int n)
 	return (n == 1 ? "" : "s");
 }
 
-/* FIXME: this is a kludge */
-static int find_next_descriptor_more(unsigned char *buffer, int size,
-    int dt1, int dt2, int dt3, int *num_skipped)
-{
-	struct usb_descriptor_header *h;
-	int n = 0;
-	unsigned char *buffer0 = buffer;
-
-	/* Find the next descriptor of type dt1 or dt2 or dt3 */
-	while (size > 0) {
-		h = (struct usb_descriptor_header *) buffer;
-		if (h->bDescriptorType == dt1 || h->bDescriptorType == dt2 ||
-				h->bDescriptorType == dt3)
-			break;
-		buffer += h->bLength;
-		size -= h->bLength;
-		++n;
-	}
-
-	/* Store the number of descriptors skipped and return the
-	 * number of bytes skipped */
-	if (num_skipped)
-		*num_skipped = n;
-	return buffer - buffer0;
-}
-
 static int find_next_descriptor(unsigned char *buffer, int size,
     int dt1, int dt2, int *num_skipped)
 {
@@ -71,47 +45,41 @@ static int find_next_descriptor(unsigned char *buffer, int size,
 	return buffer - buffer0;
 }
 
-static int usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
+static void usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 		int inum, int asnum, struct usb_host_endpoint *ep,
-		int num_ep, unsigned char *buffer, int size)
+		unsigned char *buffer, int size)
 {
-	unsigned char *buffer_start = buffer;
-	struct usb_ss_ep_comp_descriptor	*desc;
-	int retval;
-	int num_skipped;
+	struct usb_ss_ep_comp_descriptor *desc;
 	int max_tx;
-	int i;
 
+	/* The SuperSpeed endpoint companion descriptor is supposed to
+	 * be the first thing immediately following the endpoint descriptor.
+	 */
 	desc = (struct usb_ss_ep_comp_descriptor *) buffer;
-	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP) {
+	if (desc->bDescriptorType != USB_DT_SS_ENDPOINT_COMP ||
+			size < USB_DT_SS_EP_COMP_SIZE) {
 		dev_warn(ddev, "No SuperSpeed endpoint companion for config %d "
 				" interface %d altsetting %d ep %d: "
 				"using minimum values\n",
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		/*
-		 * The next descriptor is for an Endpoint or Interface,
-		 * no extra descriptors to copy into the companion structure,
-		 * and we didn't eat up any of the buffer.
+
+		/* Fill in some default values.
+		 * Leave bmAttributes as zero, which will mean no streams for
+		 * bulk, and isoc won't support multiple bursts of packets.
+		 * With bursts of only one packet, and a Mult of 1, the max
+		 * amount of data moved per endpoint service interval is one
+		 * packet.
 		 */
-		return 0;
+		ep->ss_ep_comp.bLength = USB_DT_SS_EP_COMP_SIZE;
+		ep->ss_ep_comp.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
+		if (usb_endpoint_xfer_isoc(&ep->desc) ||
+				usb_endpoint_xfer_int(&ep->desc))
+			ep->ss_ep_comp.wBytesPerInterval =
+					ep->desc.wMaxPacketSize;
+		return;
 	}
-	memcpy(&ep->ss_ep_comp->desc, desc, USB_DT_SS_EP_COMP_SIZE);
-	desc = &ep->ss_ep_comp->desc;
-	buffer += desc->bLength;
-	size -= desc->bLength;
 
-	/* Eat up the other descriptors we don't care about */
-	ep->ss_ep_comp->extra = buffer;
-	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
-			USB_DT_INTERFACE, &num_skipped);
-	ep->ss_ep_comp->extralen = i;
-	buffer += i;
-	size -= i;
-	retval = buffer - buffer_start;
-	if (num_skipped > 0)
-		dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
-				num_skipped, plural(num_skipped),
-				"SuperSpeed endpoint companion");
+	memcpy(&ep->ss_ep_comp, desc, USB_DT_SS_EP_COMP_SIZE);
 
 	/* Check the various values */
 	if (usb_endpoint_xfer_control(&ep->desc) && desc->bMaxBurst != 0) {
@@ -119,47 +87,48 @@ static int usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to zero\n", desc->bMaxBurst,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		desc->bMaxBurst = 0;
-	}
-	if (desc->bMaxBurst > 15) {
+		ep->ss_ep_comp.bMaxBurst = 0;
+	} else if (desc->bMaxBurst > 15) {
 		dev_warn(ddev, "Endpoint with bMaxBurst = %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to 15\n", desc->bMaxBurst,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		desc->bMaxBurst = 15;
+		ep->ss_ep_comp.bMaxBurst = 15;
 	}
-	if ((usb_endpoint_xfer_control(&ep->desc) || usb_endpoint_xfer_int(&ep->desc))
-			&& desc->bmAttributes != 0) {
+
+	if ((usb_endpoint_xfer_control(&ep->desc) ||
+			usb_endpoint_xfer_int(&ep->desc)) &&
+				desc->bmAttributes != 0) {
 		dev_warn(ddev, "%s endpoint with bmAttributes = %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to zero\n",
 				usb_endpoint_xfer_control(&ep->desc) ? "Control" : "Bulk",
 				desc->bmAttributes,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		desc->bmAttributes = 0;
-	}
-	if (usb_endpoint_xfer_bulk(&ep->desc) && desc->bmAttributes > 16) {
+		ep->ss_ep_comp.bmAttributes = 0;
+	} else if (usb_endpoint_xfer_bulk(&ep->desc) &&
+			desc->bmAttributes > 16) {
 		dev_warn(ddev, "Bulk endpoint with more than 65536 streams in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to max\n",
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		desc->bmAttributes = 16;
-	}
-	if (usb_endpoint_xfer_isoc(&ep->desc) && desc->bmAttributes > 2) {
+		ep->ss_ep_comp.bmAttributes = 16;
+	} else if (usb_endpoint_xfer_isoc(&ep->desc) &&
+			desc->bmAttributes > 2) {
 		dev_warn(ddev, "Isoc endpoint has Mult of %d in "
 				"config %d interface %d altsetting %d ep %d: "
 				"setting to 3\n", desc->bmAttributes + 1,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress);
-		desc->bmAttributes = 2;
+		ep->ss_ep_comp.bmAttributes = 2;
 	}
-	if (usb_endpoint_xfer_isoc(&ep->desc)) {
+
+	if (usb_endpoint_xfer_isoc(&ep->desc))
 		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1) *
 			(desc->bmAttributes + 1);
-	} else if (usb_endpoint_xfer_int(&ep->desc)) {
+	else if (usb_endpoint_xfer_int(&ep->desc))
 		max_tx = ep->desc.wMaxPacketSize * (desc->bMaxBurst + 1);
-	} else {
-		goto valid;
-	}
+	else
+		max_tx = 999999;
 	if (desc->wBytesPerInterval > max_tx) {
 		dev_warn(ddev, "%s endpoint with wBytesPerInterval of %d in "
 				"config %d interface %d altsetting %d ep %d: "
@@ -168,10 +137,8 @@ static int usb_parse_ss_endpoint_companion(struct device *ddev, int cfgno,
 				desc->wBytesPerInterval,
 				cfgno, inum, asnum, ep->desc.bEndpointAddress,
 				max_tx);
-		desc->wBytesPerInterval = max_tx;
+		ep->ss_ep_comp.wBytesPerInterval = max_tx;
 	}
-valid:
-	return retval;
 }
 
 static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
@@ -293,61 +260,19 @@ static int usb_parse_endpoint(struct device *ddev, int cfgno, int inum,
 				cfgno, inum, asnum, d->bEndpointAddress,
 				maxp);
 	}
-	/* Allocate room for and parse any SS endpoint companion descriptors */
-	if (to_usb_device(ddev)->speed == USB_SPEED_SUPER) {
-		endpoint->extra = buffer;
-		i = find_next_descriptor_more(buffer, size, USB_DT_SS_ENDPOINT_COMP,
-				USB_DT_ENDPOINT, USB_DT_INTERFACE, &n);
-		endpoint->extralen = i;
-		buffer += i;
-		size -= i;
-
-		/* Allocate space for the SS endpoint companion descriptor */
-		endpoint->ss_ep_comp = kzalloc(sizeof(struct usb_host_ss_ep_comp),
-				GFP_KERNEL);
-		if (!endpoint->ss_ep_comp)
-			return -ENOMEM;
 
-		/* Fill in some default values (may be overwritten later) */
-		endpoint->ss_ep_comp->desc.bLength = USB_DT_SS_EP_COMP_SIZE;
-		endpoint->ss_ep_comp->desc.bDescriptorType = USB_DT_SS_ENDPOINT_COMP;
-		endpoint->ss_ep_comp->desc.bMaxBurst = 0;
-		/*
-		 * Leave bmAttributes as zero, which will mean no streams for
-		 * bulk, and isoc won't support multiple bursts of packets.
-		 * With bursts of only one packet, and a Mult of 1, the max
-		 * amount of data moved per endpoint service interval is one
-		 * packet.
-		 */
-		if (usb_endpoint_xfer_isoc(&endpoint->desc) ||
-				usb_endpoint_xfer_int(&endpoint->desc))
-			endpoint->ss_ep_comp->desc.wBytesPerInterval =
-				endpoint->desc.wMaxPacketSize;
-
-		if (size > 0) {
-			retval = usb_parse_ss_endpoint_companion(ddev, cfgno,
-					inum, asnum, endpoint, num_ep, buffer,
-					size);
-			if (retval >= 0) {
-				buffer += retval;
-				retval = buffer - buffer0;
-			}
-		} else {
-			dev_warn(ddev, "config %d interface %d altsetting %d "
-				"endpoint 0x%X has no "
-				"SuperSpeed companion descriptor\n",
-				cfgno, inum, asnum, d->bEndpointAddress);
-			retval = buffer - buffer0;
-		}
-	} else {
-		/* Skip over any Class Specific or Vendor Specific descriptors;
-		 * find the next endpoint or interface descriptor */
-		endpoint->extra = buffer;
-		i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
-				USB_DT_INTERFACE, &n);
-		endpoint->extralen = i;
-		retval = buffer - buffer0 + i;
-	}
+	/* Parse a possible SuperSpeed endpoint companion descriptor */
+	if (to_usb_device(ddev)->speed == USB_SPEED_SUPER)
+		usb_parse_ss_endpoint_companion(ddev, cfgno,
+				inum, asnum, endpoint, buffer, size);
+
+	/* Skip over any Class Specific or Vendor Specific descriptors;
+	 * find the next endpoint or interface descriptor */
+	endpoint->extra = buffer;
+	i = find_next_descriptor(buffer, size, USB_DT_ENDPOINT,
+			USB_DT_INTERFACE, &n);
+	endpoint->extralen = i;
+	retval = buffer - buffer0 + i;
 	if (n > 0)
 		dev_dbg(ddev, "skipped %d descriptor%s after %s\n",
 		    n, plural(n), "endpoint");
diff --git a/drivers/usb/host/xhci-mem.c b/drivers/usb/host/xhci-mem.c
index 4df752cb0f78..fd9e03afd91c 100644
--- a/drivers/usb/host/xhci-mem.c
+++ b/drivers/usb/host/xhci-mem.c
@@ -1010,9 +1010,9 @@ static inline unsigned int xhci_get_endpoint_interval(struct usb_device *udev,
 static inline u32 xhci_get_endpoint_mult(struct usb_device *udev,
 		struct usb_host_endpoint *ep)
 {
-	if (udev->speed != USB_SPEED_SUPER || !ep->ss_ep_comp)
+	if (udev->speed != USB_SPEED_SUPER)
 		return 0;
-	return ep->ss_ep_comp->desc.bmAttributes;
+	return ep->ss_ep_comp.bmAttributes;
 }
 
 static inline u32 xhci_get_endpoint_type(struct usb_device *udev,
@@ -1061,13 +1061,8 @@ static inline u32 xhci_get_max_esit_payload(struct xhci_hcd *xhci,
 			usb_endpoint_xfer_bulk(&ep->desc))
 		return 0;
 
-	if (udev->speed == USB_SPEED_SUPER) {
-		if (ep->ss_ep_comp)
-			return ep->ss_ep_comp->desc.wBytesPerInterval;
-		xhci_warn(xhci, "WARN no SS endpoint companion descriptor.\n");
-		/* Assume no bursts, no multiple opportunities to send. */
-		return ep->desc.wMaxPacketSize;
-	}
+	if (udev->speed == USB_SPEED_SUPER)
+		return ep->ss_ep_comp.wBytesPerInterval;
 
 	max_packet = ep->desc.wMaxPacketSize & 0x3ff;
 	max_burst = (ep->desc.wMaxPacketSize & 0x1800) >> 11;
@@ -1131,12 +1126,9 @@ int xhci_endpoint_init(struct xhci_hcd *xhci,
 		max_packet = ep->desc.wMaxPacketSize;
 		ep_ctx->ep_info2 |= MAX_PACKET(max_packet);
 		/* dig out max burst from ep companion desc */
-		if (!ep->ss_ep_comp) {
-			xhci_warn(xhci, "WARN no SS endpoint companion descriptor.\n");
-			max_packet = 0;
-		} else {
-			max_packet = ep->ss_ep_comp->desc.bMaxBurst;
-		}
+		max_packet = ep->ss_ep_comp.bMaxBurst;
+		if (!max_packet)
+			xhci_warn(xhci, "WARN no SS endpoint bMaxBurst\n");
 		ep_ctx->ep_info2 |= MAX_BURST(max_packet);
 		break;
 	case USB_SPEED_HIGH:
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 3cac2ff8b50a..59f38a5f2fe6 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -1476,13 +1476,7 @@ static int xhci_check_streams_endpoint(struct xhci_hcd *xhci,
 	ret = xhci_check_args(xhci_to_hcd(xhci), udev, ep, 1, __func__);
 	if (ret <= 0)
 		return -EINVAL;
-	if (!ep->ss_ep_comp) {
-		xhci_warn(xhci, "WARN: No SuperSpeed Endpoint Companion"
-				" descriptor for ep 0x%x\n",
-				ep->desc.bEndpointAddress);
-		return -EINVAL;
-	}
-	if (ep->ss_ep_comp->desc.bmAttributes == 0) {
+	if (ep->ss_ep_comp.bmAttributes == 0) {
 		xhci_warn(xhci, "WARN: SuperSpeed Endpoint Companion"
 				" descriptor for ep 0x%x does not support streams\n",
 				ep->desc.bEndpointAddress);
@@ -1540,7 +1534,6 @@ static int xhci_calculate_streams_and_bitmask(struct xhci_hcd *xhci,
 		struct usb_host_endpoint **eps, unsigned int num_eps,
 		unsigned int *num_streams, u32 *changed_ep_bitmask)
 {
-	struct usb_host_ss_ep_comp *ss_ep_comp;
 	unsigned int max_streams;
 	unsigned int endpoint_flag;
 	int i;
@@ -1552,8 +1545,8 @@ static int xhci_calculate_streams_and_bitmask(struct xhci_hcd *xhci,
 		if (ret < 0)
 			return ret;
 
-		ss_ep_comp = eps[i]->ss_ep_comp;
-		max_streams = USB_SS_MAX_STREAMS(ss_ep_comp->desc.bmAttributes);
+		max_streams = USB_SS_MAX_STREAMS(
+				eps[i]->ss_ep_comp.bmAttributes);
 		if (max_streams < (*num_streams - 1)) {
 			xhci_dbg(xhci, "Ep 0x%x only supports %u stream IDs.\n",
 					eps[i]->desc.bEndpointAddress,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 1ea25377ca0d..a748815ee629 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -45,27 +45,14 @@ struct wusb_dev;
 
 struct ep_device;
 
-/* For SS devices */
-/**
- * struct usb_host_ss_ep_comp - Valid for SuperSpeed devices only
- * @desc: endpoint companion descriptor, wMaxPacketSize in native byteorder
- * @extra: descriptors following this endpoint companion descriptor
- * @extralen: how many bytes of "extra" are valid
- */
-struct usb_host_ss_ep_comp {
-	struct usb_ss_ep_comp_descriptor	desc;
-	unsigned char				*extra;   /* Extra descriptors */
-	int					extralen;
-};
-
 /**
  * struct usb_host_endpoint - host-side endpoint descriptor and queue
  * @desc: descriptor for this endpoint, wMaxPacketSize in native byteorder
+ * @ss_ep_comp: SuperSpeed companion descriptor for this endpoint
  * @urb_list: urbs queued to this endpoint; maintained by usbcore
  * @hcpriv: for use by HCD; typically holds hardware dma queue head (QH)
  *	with one or more transfer descriptors (TDs) per urb
  * @ep_dev: ep_device for sysfs info
- * @ss_ep_comp: companion descriptor information for this endpoint
  * @extra: descriptors following this endpoint in the configuration
  * @extralen: how many bytes of "extra" are valid
  * @enabled: URBs may be submitted to this endpoint
@@ -74,11 +61,11 @@ struct usb_host_ss_ep_comp {
  * descriptor within an active interface in a given USB configuration.
  */
 struct usb_host_endpoint {
-	struct usb_endpoint_descriptor	desc;
+	struct usb_endpoint_descriptor		desc;
+	struct usb_ss_ep_comp_descriptor	ss_ep_comp;
 	struct list_head		urb_list;
 	void				*hcpriv;
 	struct ep_device 		*ep_dev;	/* For sysfs info */
-	struct usb_host_ss_ep_comp	*ss_ep_comp;	/* For SS devices */
 
 	unsigned char *extra;   /* Extra descriptors */
 	int extralen;
-- 
cgit v1.2.3


From 85bcb5ee889e0ebb9154718939e049de265fcdfb Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Fri, 30 Apr 2010 16:35:37 -0400
Subject: USB: remove URB_NO_SETUP_DMA_MAP

Now that URB_NO_SETUP_DMA_MAP is no longer in use, this patch (as1376)
removes all references to it.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/usb/dma.txt            | 18 ++++++------------
 drivers/staging/usbip/usbip_common.c |  2 +-
 drivers/usb/core/hcd.c               |  3 +--
 include/linux/usb.h                  | 28 +++++++++++-----------------
 4 files changed, 19 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/usb/dma.txt b/Documentation/usb/dma.txt
index a37e59cf2786..84ef865237db 100644
--- a/Documentation/usb/dma.txt
+++ b/Documentation/usb/dma.txt
@@ -16,11 +16,11 @@ OR:  they can now be DMA-aware.
   manage dma mappings for existing dma-ready buffers (see below).
 
 - URBs have an additional "transfer_dma" field, as well as a transfer_flags
-  bit saying if it's valid.  (Control requests also have "setup_dma" and a
-  corresponding transfer_flags bit.)
+  bit saying if it's valid.  (Control requests also have "setup_dma", but
+  drivers must not use it.)
 
-- "usbcore" will map those DMA addresses, if a DMA-aware driver didn't do
-  it first and set URB_NO_TRANSFER_DMA_MAP or URB_NO_SETUP_DMA_MAP.  HCDs
+- "usbcore" will map this DMA address, if a DMA-aware driver didn't do
+  it first and set URB_NO_TRANSFER_DMA_MAP.  HCDs
   don't manage dma mappings for URBs.
 
 - There's a new "generic DMA API", parts of which are usable by USB device
@@ -53,12 +53,6 @@ and effects like cache-trashing can impose subtle penalties.
   to use this type of memory ("dma-coherent"), and memory returned from
   kmalloc() will work just fine.
 
-  For control transfers you can use the buffer primitives or not for each
-  of the transfer buffer and setup buffer independently.  Set the flag bits
-  URB_NO_TRANSFER_DMA_MAP and URB_NO_SETUP_DMA_MAP to indicate which
-  buffers you have prepared.  For non-control transfers URB_NO_SETUP_DMA_MAP
-  is ignored.
-
   The memory buffer returned is "dma-coherent"; sometimes you might need to
   force a consistent memory access ordering by using memory barriers.  It's
   not using a streaming DMA mapping, so it's good for small transfers on
@@ -130,8 +124,8 @@ of Documentation/PCI/PCI-DMA-mapping.txt, titled "What memory is DMA-able?")
 	void usb_buffer_unmap (struct urb *urb);
 
   The calls manage urb->transfer_dma for you, and set URB_NO_TRANSFER_DMA_MAP
-  so that usbcore won't map or unmap the buffer.  The same goes for
-  urb->setup_dma and URB_NO_SETUP_DMA_MAP for control requests.
+  so that usbcore won't map or unmap the buffer.  They cannot be used for
+  setup_packet buffers in control requests.
 
 Note that several of those interfaces are currently commented out, since
 they don't have current users.  See the source code.  Other than the dmasync
diff --git a/drivers/staging/usbip/usbip_common.c b/drivers/staging/usbip/usbip_common.c
index e3fa4216c1cd..52408164036f 100644
--- a/drivers/staging/usbip/usbip_common.c
+++ b/drivers/staging/usbip/usbip_common.c
@@ -562,7 +562,7 @@ EXPORT_SYMBOL_GPL(sockfd_to_socket);
 /* there may be more cases to tweak the flags. */
 static unsigned int tweak_transfer_flags(unsigned int flags)
 {
-	flags &= ~(URB_NO_TRANSFER_DMA_MAP|URB_NO_SETUP_DMA_MAP);
+	flags &= ~URB_NO_TRANSFER_DMA_MAP;
 	return flags;
 }
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 3aaee2811f01..0abc5c537f39 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1316,8 +1316,7 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 	 * or uses the provided scatter gather list for bulk.
 	 */
 
-	if (usb_endpoint_xfer_control(&urb->ep->desc)
-	    && !(urb->transfer_flags & URB_NO_SETUP_DMA_MAP)) {
+	if (usb_endpoint_xfer_control(&urb->ep->desc)) {
 		if (hcd->self.uses_dma) {
 			urb->setup_dma = dma_map_single(
 					hcd->self.controller,
diff --git a/include/linux/usb.h b/include/linux/usb.h
index a748815ee629..1eb4762d9ea8 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -953,7 +953,6 @@ extern int usb_disabled(void);
 #define URB_ISO_ASAP		0x0002	/* iso-only, urb->start_frame
 					 * ignored */
 #define URB_NO_TRANSFER_DMA_MAP	0x0004	/* urb->transfer_dma valid on submit */
-#define URB_NO_SETUP_DMA_MAP	0x0008	/* urb->setup_dma valid on submit */
 #define URB_NO_FSBR		0x0020	/* UHCI-specific */
 #define URB_ZERO_PACKET		0x0040	/* Finish bulk OUT with short packet */
 #define URB_NO_INTERRUPT	0x0080	/* HINT: no non-error interrupt
@@ -1049,12 +1048,8 @@ typedef void (*usb_complete_t)(struct urb *);
  * @setup_packet: Only used for control transfers, this points to eight bytes
  *	of setup data.  Control transfers always start by sending this data
  *	to the device.  Then transfer_buffer is read or written, if needed.
- * @setup_dma: For control transfers with URB_NO_SETUP_DMA_MAP set, the
- *	device driver has provided this DMA address for the setup packet.
- *	The host controller driver should use this in preference to
- *	setup_packet, but the HCD may chose to ignore the address if it must
- *	copy the setup packet into internal structures.  Therefore, setup_packet
- *	must always point to a valid buffer.
+ * @setup_dma: DMA pointer for the setup packet.  The caller must not use
+ *	this field; setup_packet must point to a valid buffer.
  * @start_frame: Returns the initial frame for isochronous transfers.
  * @number_of_packets: Lists the number of ISO transfer buffers.
  * @interval: Specifies the polling interval for interrupt or isochronous
@@ -1086,13 +1081,14 @@ typedef void (*usb_complete_t)(struct urb *);
  * bounce buffer or talking to an IOMMU),
  * although they're cheap on commodity x86 and ppc hardware.
  *
- * Alternatively, drivers may pass the URB_NO_xxx_DMA_MAP transfer flags,
- * which tell the host controller driver that no such mapping is needed since
+ * Alternatively, drivers may pass the URB_NO_TRANSFER_DMA_MAP transfer flag,
+ * which tells the host controller driver that no such mapping is needed for
+ * the transfer_buffer since
  * the device driver is DMA-aware.  For example, a device driver might
  * allocate a DMA buffer with usb_alloc_coherent() or call usb_buffer_map().
- * When these transfer flags are provided, host controller drivers will
- * attempt to use the dma addresses found in the transfer_dma and/or
- * setup_dma fields rather than determining a dma address themselves.
+ * When this transfer flag is provided, host controller drivers will
+ * attempt to use the dma address found in the transfer_dma
+ * field rather than determining a dma address themselves.
  *
  * Note that transfer_buffer must still be set if the controller
  * does not support DMA (as indicated by bus.uses_dma) and when talking
@@ -1115,11 +1111,9 @@ typedef void (*usb_complete_t)(struct urb *);
  * should always terminate with a short packet, even if it means adding an
  * extra zero length packet.
  *
- * Control URBs must provide a setup_packet.  The setup_packet and
- * transfer_buffer may each be mapped for DMA or not, independently of
- * the other.  The transfer_flags bits URB_NO_TRANSFER_DMA_MAP and
- * URB_NO_SETUP_DMA_MAP indicate which buffers have already been mapped.
- * URB_NO_SETUP_DMA_MAP is ignored for non-control URBs.
+ * Control URBs must provide a valid pointer in the setup_packet field.
+ * Unlike the transfer_buffer, the setup_packet may not be mapped for DMA
+ * beforehand.
  *
  * Interrupt URBs must provide an interval, saying how often (in milliseconds
  * or, for highspeed devices, 125 microsecond units)
-- 
cgit v1.2.3


From b18a23495fd4492081a61373fc3ef9f5c59e094a Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 30 Apr 2010 13:11:28 -0600
Subject: USB: Add definition for the Pipe Usage descriptor

The Pipe Usage descriptor is needed for USB Attached SCSI

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/ch9.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index 603b61aadd65..e779af5c800b 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -191,6 +191,8 @@ struct usb_ctrlrequest {
 #define USB_DT_WIRE_ADAPTER		0x21
 #define USB_DT_RPIPE			0x22
 #define USB_DT_CS_RADIO_CONTROL		0x23
+/* From the T10 UAS specification */
+#define USB_DT_PIPE_USAGE		0x24
 /* From the USB 3.0 spec */
 #define	USB_DT_SS_ENDPOINT_COMP		0x30
 
-- 
cgit v1.2.3


From fe54b058de9d1df5fef9e2a559651f4b7c9f04b1 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 30 Apr 2010 13:11:29 -0600
Subject: USB: Add a usb_pipe_endpoint() convenience function

Converting a pipe number to a struct usb_host_endpoint pointer is a little
messy.  Introduce a new convenience function to hide the mess.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/message.c | 3 +--
 drivers/usb/core/urb.c     | 3 +--
 include/linux/usb.h        | 8 ++++++++
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 79d1cdf4a635..d8329eb47d4f 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -226,8 +226,7 @@ int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe,
 	struct urb *urb;
 	struct usb_host_endpoint *ep;
 
-	ep = (usb_pipein(pipe) ? usb_dev->ep_in : usb_dev->ep_out)
-			[usb_pipeendpoint(pipe)];
+	ep = usb_pipe_endpoint(usb_dev, pipe);
 	if (!ep || len < 0)
 		return -EINVAL;
 
diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
index a760e46871c5..7c0555548ac8 100644
--- a/drivers/usb/core/urb.c
+++ b/drivers/usb/core/urb.c
@@ -308,8 +308,7 @@ int usb_submit_urb(struct urb *urb, gfp_t mem_flags)
 	 * will be required to set urb->ep directly and we will eliminate
 	 * urb->pipe.
 	 */
-	ep = (usb_pipein(urb->pipe) ? dev->ep_in : dev->ep_out)
-			[usb_pipeendpoint(urb->pipe)];
+	ep = usb_pipe_endpoint(dev, urb->pipe);
 	if (!ep)
 		return -ENOENT;
 
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 1eb4762d9ea8..fd7d61e58198 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1552,6 +1552,14 @@ static inline unsigned int __create_pipe(struct usb_device *dev,
 #define usb_rcvintpipe(dev,endpoint)	\
 	((PIPE_INTERRUPT << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
 
+static inline struct usb_host_endpoint *
+usb_pipe_endpoint(struct usb_device *dev, unsigned int pipe)
+{
+	struct usb_host_endpoint **eps;
+	eps = usb_pipein(pipe) ? dev->ep_in : dev->ep_out;
+	return eps[usb_pipeendpoint(pipe)];
+}
+
 /*-------------------------------------------------------------------------*/
 
 static inline __u16
-- 
cgit v1.2.3


From 1e429018b646bdf903554e92ead1cda96cc552dc Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 30 Apr 2010 13:11:30 -0600
Subject: USB: Turn interface_to_usbdev into an inline function

The stronger type-checking would have prevented a bug I had.

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index fd7d61e58198..cf3b289a87a2 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -184,8 +184,6 @@ struct usb_interface {
 	struct work_struct reset_ws;	/* for resets in atomic context */
 };
 #define	to_usb_interface(d) container_of(d, struct usb_interface, dev)
-#define	interface_to_usbdev(intf) \
-	container_of(intf->dev.parent, struct usb_device, dev)
 
 static inline void *usb_get_intfdata(struct usb_interface *intf)
 {
@@ -493,6 +491,11 @@ struct usb_device {
 };
 #define	to_usb_device(d) container_of(d, struct usb_device, dev)
 
+static inline struct usb_device *interface_to_usbdev(struct usb_interface *intf)
+{
+	return to_usb_device(intf->dev.parent);
+}
+
 extern struct usb_device *usb_get_dev(struct usb_device *dev);
 extern void usb_put_dev(struct usb_device *dev);
 
-- 
cgit v1.2.3


From 910f8d0cede74beff1eee93cf9cf2a28d7600e66 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Sat, 1 May 2010 12:20:01 -0600
Subject: USB: Change the scatterlist type in struct urb

Change the type of the URB's 'sg' pointer from a usb_sg_request to
a scatterlist.  This allows drivers to submit scatter-gather lists
without using the usb_sg_wait() interface.  It has the added benefit
of removing the typecasts that were added as part of patch as1368 (and
slightly decreasing the number of pointer dereferences).

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
Tested-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c       | 8 +++-----
 drivers/usb/core/message.c   | 4 ++--
 drivers/usb/host/ehci-q.c    | 2 +-
 drivers/usb/host/whci/qset.c | 4 ++--
 drivers/usb/host/xhci-ring.c | 4 ++--
 drivers/usb/mon/mon_bin.c    | 2 +-
 drivers/usb/mon/mon_text.c   | 2 +-
 include/linux/usb.h          | 2 +-
 8 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0abc5c537f39..297c92e4bf8d 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1278,7 +1278,7 @@ static void unmap_urb_for_dma(struct usb_hcd *hcd, struct urb *urb)
 	dir = usb_urb_dir_in(urb) ? DMA_FROM_DEVICE : DMA_TO_DEVICE;
 	if (urb->transfer_flags & URB_DMA_MAP_SG)
 		dma_unmap_sg(hcd->self.controller,
-				urb->sg->sg,
+				urb->sg,
 				urb->num_sgs,
 				dir);
 	else if (urb->transfer_flags & URB_DMA_MAP_PAGE)
@@ -1346,7 +1346,7 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 			if (urb->num_sgs) {
 				int n = dma_map_sg(
 						hcd->self.controller,
-						urb->sg->sg,
+						urb->sg,
 						urb->num_sgs,
 						dir);
 				if (n <= 0)
@@ -1359,9 +1359,7 @@ static int map_urb_for_dma(struct usb_hcd *hcd, struct urb *urb,
 							URB_DMA_SG_COMBINED;
 				}
 			} else if (urb->sg) {
-				struct scatterlist *sg;
-
-				sg = (struct scatterlist *) urb->sg;
+				struct scatterlist *sg = urb->sg;
 				urb->transfer_dma = dma_map_page(
 						hcd->self.controller,
 						sg_page(sg),
diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index d8329eb47d4f..63919b8abee1 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -413,7 +413,7 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 					sg->length;
 			}
 		}
-		io->urbs[0]->sg = io;
+		io->urbs[0]->sg = sg;
 		io->urbs[0]->num_sgs = io->entries;
 		io->entries = 1;
 	} else {
@@ -454,7 +454,7 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 			}
 			io->urbs[i]->transfer_buffer_length = len;
 
-			io->urbs[i]->sg = (struct usb_sg_request *) sg;
+			io->urbs[i]->sg = sg;
 		}
 		io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
 	}
diff --git a/drivers/usb/host/ehci-q.c b/drivers/usb/host/ehci-q.c
index 89521775c567..11a79c4f4a9d 100644
--- a/drivers/usb/host/ehci-q.c
+++ b/drivers/usb/host/ehci-q.c
@@ -663,7 +663,7 @@ qh_urb_transaction (
 	 */
 	i = urb->num_sgs;
 	if (len > 0 && i > 0) {
-		sg = urb->sg->sg;
+		sg = urb->sg;
 		buf = sg_dma_address(sg);
 
 		/* urb->transfer_buffer_length may be smaller than the
diff --git a/drivers/usb/host/whci/qset.c b/drivers/usb/host/whci/qset.c
index b388dd1fb4c4..ab5a14fbfeeb 100644
--- a/drivers/usb/host/whci/qset.c
+++ b/drivers/usb/host/whci/qset.c
@@ -443,7 +443,7 @@ static int qset_add_urb_sg(struct whc *whc, struct whc_qset *qset, struct urb *u
 
 	remaining = urb->transfer_buffer_length;
 
-	for_each_sg(urb->sg->sg, sg, urb->num_sgs, i) {
+	for_each_sg(urb->sg, sg, urb->num_sgs, i) {
 		dma_addr_t dma_addr;
 		size_t dma_remaining;
 		dma_addr_t sp, ep;
@@ -561,7 +561,7 @@ static int qset_add_urb_sg_linearize(struct whc *whc, struct whc_qset *qset,
 
 	remaining = urb->transfer_buffer_length;
 
-	for_each_sg(urb->sg->sg, sg, urb->sg->nents, i) {
+	for_each_sg(urb->sg, sg, urb->num_sgs, i) {
 		size_t len;
 		size_t sg_remaining;
 		void *orig;
diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c
index 9e27eb0c7004..a67caef265b1 100644
--- a/drivers/usb/host/xhci-ring.c
+++ b/drivers/usb/host/xhci-ring.c
@@ -1788,7 +1788,7 @@ static unsigned int count_sg_trbs_needed(struct xhci_hcd *xhci, struct urb *urb)
 
 	xhci_dbg(xhci, "count sg list trbs: \n");
 	num_trbs = 0;
-	for_each_sg(urb->sg->sg, sg, num_sgs, i) {
+	for_each_sg(urb->sg, sg, num_sgs, i) {
 		unsigned int previous_total_trbs = num_trbs;
 		unsigned int len = sg_dma_len(sg);
 
@@ -1951,7 +1951,7 @@ static int queue_bulk_sg_tx(struct xhci_hcd *xhci, gfp_t mem_flags,
 	 *    the amount of memory allocated for this scatter-gather list.
 	 * 3. TRBs buffers can't cross 64KB boundaries.
 	 */
-	sg = urb->sg->sg;
+	sg = urb->sg;
 	addr = (u64) sg_dma_address(sg);
 	this_sg_len = sg_dma_len(sg);
 	trb_buff_len = TRB_MAX_BUFF_SIZE -
diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
index 8a7968df278f..e7fa3644ba6a 100644
--- a/drivers/usb/mon/mon_bin.c
+++ b/drivers/usb/mon/mon_bin.c
@@ -422,7 +422,7 @@ static unsigned int mon_bin_get_data(const struct mon_reader_bin *rp,
 		}
 
 		/* Copy up to the first non-addressable segment */
-		for_each_sg(urb->sg->sg, sg, urb->num_sgs, i) {
+		for_each_sg(urb->sg, sg, urb->num_sgs, i) {
 			if (length == 0 || PageHighMem(sg_page(sg)))
 				break;
 			this_len = min_t(unsigned int, sg->length, length);
diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c
index d56260280f54..a545d65f6e57 100644
--- a/drivers/usb/mon/mon_text.c
+++ b/drivers/usb/mon/mon_text.c
@@ -159,7 +159,7 @@ static inline char mon_text_get_data(struct mon_event_text *ep, struct urb *urb,
 		if (src == NULL)
 			return 'Z';	/* '0' would be not as pretty. */
 	} else {
-		struct scatterlist *sg = urb->sg->sg;
+		struct scatterlist *sg = urb->sg;
 
 		if (PageHighMem(sg_page(sg)))
 			return 'D';
diff --git a/include/linux/usb.h b/include/linux/usb.h
index cf3b289a87a2..eec9e74f332f 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1195,7 +1195,7 @@ struct urb {
 	unsigned int transfer_flags;	/* (in) URB_SHORT_NOT_OK | ...*/
 	void *transfer_buffer;		/* (in) associated data buffer */
 	dma_addr_t transfer_dma;	/* (in) dma addr for transfer_buffer */
-	struct usb_sg_request *sg;	/* (in) scatter gather buffer list */
+	struct scatterlist *sg;		/* (in) scatter gather buffer list */
 	int num_sgs;			/* (in) number of entries in the sg list */
 	u32 transfer_buffer_length;	/* (in) data buffer length */
 	u32 actual_length;		/* (return) actual transfer length */
-- 
cgit v1.2.3


From 0ba169aff9181389f30f225ad92e113eeb2290b9 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>
Date: Wed, 5 May 2010 15:26:17 -0400
Subject: USB: simplify usb_sg_init()

This patch (as1377) simplifies the code in usb_sg_init(), without
changing its functionality.  It also removes a couple of unused fields
from the usb_sg_request structure.

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/message.c | 93 +++++++++++++++++++---------------------------
 include/linux/usb.h        |  2 -
 2 files changed, 38 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/message.c b/drivers/usb/core/message.c
index 63919b8abee1..a73e08fdab36 100644
--- a/drivers/usb/core/message.c
+++ b/drivers/usb/core/message.c
@@ -371,79 +371,64 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 	spin_lock_init(&io->lock);
 	io->dev = dev;
 	io->pipe = pipe;
-	io->sg = sg;
-	io->nents = nents;
-	io->entries = nents;
 
-	/* initialize all the urbs we'll use */
 	if (dev->bus->sg_tablesize > 0) {
-		io->urbs = kmalloc(sizeof *io->urbs, mem_flags);
 		use_sg = true;
+		io->entries = 1;
 	} else {
-		io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags);
 		use_sg = false;
+		io->entries = nents;
 	}
+
+	/* initialize all the urbs we'll use */
+	io->urbs = kmalloc(io->entries * sizeof *io->urbs, mem_flags);
 	if (!io->urbs)
 		goto nomem;
 
-	urb_flags = 0;
+	urb_flags = URB_NO_INTERRUPT;
 	if (usb_pipein(pipe))
 		urb_flags |= URB_SHORT_NOT_OK;
 
-	if (use_sg) {
-		io->urbs[0] = usb_alloc_urb(0, mem_flags);
-		if (!io->urbs[0]) {
-			io->entries = 0;
-			goto nomem;
-		}
-
-		io->urbs[0]->dev = NULL;
-		io->urbs[0]->pipe = pipe;
-		io->urbs[0]->interval = period;
-		io->urbs[0]->transfer_flags = urb_flags;
+	for_each_sg(sg, sg, io->entries, i) {
+		struct urb *urb;
+		unsigned len;
 
-		io->urbs[0]->complete = sg_complete;
-		io->urbs[0]->context = io;
-
-		/* A length of zero means transfer the whole sg list */
-		io->urbs[0]->transfer_buffer_length = length;
-		if (length == 0) {
-			for_each_sg(sg, sg, io->entries, i) {
-				io->urbs[0]->transfer_buffer_length +=
-					sg->length;
-			}
+		urb = usb_alloc_urb(0, mem_flags);
+		if (!urb) {
+			io->entries = i;
+			goto nomem;
 		}
-		io->urbs[0]->sg = sg;
-		io->urbs[0]->num_sgs = io->entries;
-		io->entries = 1;
-	} else {
-		urb_flags |= URB_NO_INTERRUPT;
-		for_each_sg(sg, sg, io->entries, i) {
-			unsigned len;
-
-			io->urbs[i] = usb_alloc_urb(0, mem_flags);
-			if (!io->urbs[i]) {
-				io->entries = i;
-				goto nomem;
+		io->urbs[i] = urb;
+
+		urb->dev = NULL;
+		urb->pipe = pipe;
+		urb->interval = period;
+		urb->transfer_flags = urb_flags;
+		urb->complete = sg_complete;
+		urb->context = io;
+		urb->sg = sg;
+
+		if (use_sg) {
+			/* There is no single transfer buffer */
+			urb->transfer_buffer = NULL;
+			urb->num_sgs = nents;
+
+			/* A length of zero means transfer the whole sg list */
+			len = length;
+			if (len == 0) {
+				for_each_sg(sg, sg, nents, i)
+					len += sg->length;
 			}
-
-			io->urbs[i]->dev = NULL;
-			io->urbs[i]->pipe = pipe;
-			io->urbs[i]->interval = period;
-			io->urbs[i]->transfer_flags = urb_flags;
-
-			io->urbs[i]->complete = sg_complete;
-			io->urbs[i]->context = io;
-
+		} else {
 			/*
 			 * Some systems can't use DMA; they use PIO instead.
 			 * For their sakes, transfer_buffer is set whenever
 			 * possible.
 			 */
 			if (!PageHighMem(sg_page(sg)))
-				io->urbs[i]->transfer_buffer = sg_virt(sg);
+				urb->transfer_buffer = sg_virt(sg);
 			else
-				io->urbs[i]->transfer_buffer = NULL;
+				urb->transfer_buffer = NULL;
 
 			len = sg->length;
 			if (length) {
@@ -452,12 +437,10 @@ int usb_sg_init(struct usb_sg_request *io, struct usb_device *dev,
 				if (length == 0)
 					io->entries = i + 1;
 			}
-			io->urbs[i]->transfer_buffer_length = len;
-
-			io->urbs[i]->sg = sg;
 		}
-		io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
+		urb->transfer_buffer_length = len;
 	}
+	io->urbs[--i]->transfer_flags &= ~URB_NO_INTERRUPT;
 
 	/* transaction state */
 	io->count = io->entries;
diff --git a/include/linux/usb.h b/include/linux/usb.h
index eec9e74f332f..ce07062ebc28 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1469,8 +1469,6 @@ struct usb_sg_request {
 
 	struct usb_device	*dev;
 	int			pipe;
-	struct scatterlist	*sg;
-	int			nents;
 
 	int			entries;
 	struct urb		**urbs;
-- 
cgit v1.2.3


From 27c7acf22047fbe4ec4cc36b7c2610dba227697c Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 5 May 2010 23:57:37 +0200
Subject: USB: serial: reimplement generic fifo-based writes

Reimplement fifo-based writes in the generic driver using a multiple
pre-allocated urb scheme.

In contrast to multi-urb writes, no allocations (of urbs or buffers) are
made during run-time and there is less pressure on the host stack
queues as currently only two urbs are used (implementation is generic
and can handle more than two urbs as well, though).

Initial tests using ftdi_sio show that the implementation achieves the
same (maximum) throughput at high baudrates as multi-urb writes. The CPU
usage is much lower than for multi-urb writes for small write requests
and only slightly higher for large (e.g. 2k) requests (due to extra copy
via fifo?).

Also outperforms multi-urb writes for small write requests on an
embedded arm-9 system, where multi-urb writes are CPU-bound at high
baudrates (perf reveals that a lot of time is spent in the host stack
enqueue function -- could perhaps be a bug as well).

Keeping the original write_urb, buffer and flag for now as there are
other drivers depending on them.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/generic.c    | 59 +++++++++++++++++++++++++++--------------
 drivers/usb/serial/usb-serial.c | 33 +++++++++++++++++++++++
 include/linux/usb/serial.h      | 12 +++++++++
 3 files changed, 84 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 1a134f9c64f3..3ae17840175c 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -1,6 +1,7 @@
 /*
  * USB Serial Converter Generic functions
  *
+ * Copyright (C) 2010 Johan Hovold (jhovold@gmail.com)
  * Copyright (C) 1999 - 2002 Greg Kroah-Hartman (greg@kroah.com)
  *
  *	This program is free software; you can redistribute it and/or
@@ -143,6 +144,7 @@ static void generic_cleanup(struct usb_serial_port *port)
 {
 	struct usb_serial *serial = port->serial;
 	unsigned long flags;
+	int i;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -150,6 +152,8 @@ static void generic_cleanup(struct usb_serial_port *port)
 		/* shutdown any bulk transfers that might be going on */
 		if (port->bulk_out_size) {
 			usb_kill_urb(port->write_urb);
+			for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
+				usb_kill_urb(port->write_urbs[i]);
 
 			spin_lock_irqsave(&port->lock, flags);
 			kfifo_reset_out(&port->write_fifo);
@@ -258,46 +262,56 @@ err_urb:
  * usb_serial_generic_write_start - kick off an URB write
  * @port:	Pointer to the &struct usb_serial_port data
  *
- * Returns the number of bytes queued on success. This will be zero if there
- * was nothing to send. Otherwise, it returns a negative errno value
+ * Returns zero on success, or a negative errno value
  */
 static int usb_serial_generic_write_start(struct usb_serial_port *port)
 {
-	int result;
-	int count;
+	struct urb *urb;
+	int count, result;
 	unsigned long flags;
+	int i;
 
+	if (test_and_set_bit_lock(USB_SERIAL_WRITE_BUSY, &port->flags))
+		return 0;
+retry:
 	spin_lock_irqsave(&port->lock, flags);
-	if (port->write_urb_busy || !kfifo_len(&port->write_fifo)) {
+	if (!port->write_urbs_free || !kfifo_len(&port->write_fifo)) {
+		clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags);
 		spin_unlock_irqrestore(&port->lock, flags);
 		return 0;
 	}
-	port->write_urb_busy = 1;
+	i = (int)find_first_bit(&port->write_urbs_free,
+						ARRAY_SIZE(port->write_urbs));
 	spin_unlock_irqrestore(&port->lock, flags);
 
+	urb = port->write_urbs[i];
 	count = port->serial->type->prepare_write_buffer(port,
-					&port->write_urb->transfer_buffer,
-					port->bulk_out_size, NULL, 0);
-	usb_serial_debug_data(debug, &port->dev, __func__,
-				count, port->write_urb->transfer_buffer);
-	port->write_urb->transfer_buffer_length = count;
-
-	/* send the data out the bulk port */
-	result = usb_submit_urb(port->write_urb, GFP_ATOMIC);
+						&urb->transfer_buffer,
+						port->bulk_out_size, NULL, 0);
+	urb->transfer_buffer_length = count;
+	usb_serial_debug_data(debug, &port->dev, __func__, count,
+						urb->transfer_buffer);
+	result = usb_submit_urb(urb, GFP_ATOMIC);
 	if (result) {
 		dev_err(&port->dev, "%s - error submitting urb: %d\n",
 						__func__, result);
-		/* don't have to grab the lock here, as we will
-		   retry if != 0 */
-		port->write_urb_busy = 0;
+		clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags);
 		return result;
 	}
+	clear_bit(i, &port->write_urbs_free);
 
 	spin_lock_irqsave(&port->lock, flags);
 	port->tx_bytes += count;
 	spin_unlock_irqrestore(&port->lock, flags);
 
-	return count;
+	/* Try sending off another urb, unless in irq context (in which case
+	 * there will be no free urb). */
+	if (!in_irq())
+		goto retry;
+
+	clear_bit_unlock(USB_SERIAL_WRITE_BUSY, &port->flags);
+
+	return 0;
 }
 
 /**
@@ -461,6 +475,7 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 	unsigned long flags;
 	struct usb_serial_port *port = urb->context;
 	int status = urb->status;
+	int i;
 
 	dbg("%s - port %d", __func__, port->number);
 
@@ -472,9 +487,13 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 		port->tx_urbs--;
 		spin_unlock_irqrestore(&port->lock, flags);
 	} else {
+		for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
+			if (port->write_urbs[i] == urb)
+				break;
+
 		spin_lock_irqsave(&port->lock, flags);
 		port->tx_bytes -= urb->transfer_buffer_length;
-		port->write_urb_busy = 0;
+		set_bit(i, &port->write_urbs_free);
 		spin_unlock_irqrestore(&port->lock, flags);
 
 		if (status) {
@@ -576,7 +595,7 @@ int usb_serial_generic_resume(struct usb_serial *serial)
 				c++;
 		}
 
-		if (port->write_urb) {
+		if (port->bulk_out_size) {
 			r = usb_serial_generic_write_start(port);
 			if (r < 0)
 				c++;
diff --git a/drivers/usb/serial/usb-serial.c b/drivers/usb/serial/usb-serial.c
index 8249fd8381fb..941c2d409f85 100644
--- a/drivers/usb/serial/usb-serial.c
+++ b/drivers/usb/serial/usb-serial.c
@@ -548,8 +548,12 @@ static void usb_serial_port_work(struct work_struct *work)
 
 static void kill_traffic(struct usb_serial_port *port)
 {
+	int i;
+
 	usb_kill_urb(port->read_urb);
 	usb_kill_urb(port->write_urb);
+	for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
+		usb_kill_urb(port->write_urbs[i]);
 	/*
 	 * This is tricky.
 	 * Some drivers submit the read_urb in the
@@ -568,6 +572,7 @@ static void kill_traffic(struct usb_serial_port *port)
 static void port_release(struct device *dev)
 {
 	struct usb_serial_port *port = to_usb_serial_port(dev);
+	int i;
 
 	dbg ("%s - %s", __func__, dev_name(dev));
 
@@ -582,6 +587,10 @@ static void port_release(struct device *dev)
 	usb_free_urb(port->write_urb);
 	usb_free_urb(port->interrupt_in_urb);
 	usb_free_urb(port->interrupt_out_urb);
+	for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i) {
+		usb_free_urb(port->write_urbs[i]);
+		kfree(port->bulk_out_buffers[i]);
+	}
 	kfifo_free(&port->write_fifo);
 	kfree(port->bulk_in_buffer);
 	kfree(port->bulk_out_buffer);
@@ -920,6 +929,8 @@ int usb_serial_probe(struct usb_interface *interface,
 	}
 
 	for (i = 0; i < num_bulk_out; ++i) {
+		int j;
+
 		endpoint = bulk_out_endpoint[i];
 		port = serial->port[i];
 		port->write_urb = usb_alloc_urb(0, GFP_KERNEL);
@@ -945,6 +956,28 @@ int usb_serial_probe(struct usb_interface *interface,
 					endpoint->bEndpointAddress),
 				port->bulk_out_buffer, buffer_size,
 				serial->type->write_bulk_callback, port);
+		for (j = 0; j < ARRAY_SIZE(port->write_urbs); ++j) {
+			set_bit(j, &port->write_urbs_free);
+			port->write_urbs[j] = usb_alloc_urb(0, GFP_KERNEL);
+			if (!port->write_urbs[j]) {
+				dev_err(&interface->dev,
+						"No free urbs available\n");
+				goto probe_error;
+			}
+			port->bulk_out_buffers[j] = kmalloc(buffer_size,
+								GFP_KERNEL);
+			if (!port->bulk_out_buffers[j]) {
+				dev_err(&interface->dev,
+					"Couldn't allocate bulk_out_buffer\n");
+				goto probe_error;
+			}
+			usb_fill_bulk_urb(port->write_urbs[j], dev,
+					usb_sndbulkpipe(dev,
+						endpoint->bEndpointAddress),
+					port->bulk_out_buffers[j], buffer_size,
+					serial->type->write_bulk_callback,
+					port);
+		}
 	}
 
 	if (serial->type->read_int_callback) {
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index a4c99ea390e7..70b6d6b28997 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -35,6 +35,9 @@ enum port_dev_state {
 	PORT_UNREGISTERING,
 };
 
+/* USB serial flags */
+#define USB_SERIAL_WRITE_BUSY	0
+
 /**
  * usb_serial_port: structure for the specific ports of a device.
  * @serial: pointer back to the struct usb_serial owner of this port.
@@ -60,10 +63,14 @@ enum port_dev_state {
  * @write_urb: pointer to the bulk out struct urb for this port.
  * @write_fifo: kfifo used to buffer outgoing data
  * @write_urb_busy: port`s writing status
+ * @bulk_out_buffers: pointers to the bulk out buffers for this port
+ * @write_urbs: pointers to the bulk out urbs for this port
+ * @write_urbs_free: status bitmap the for bulk out urbs
  * @tx_bytes: number of bytes currently in host stack queues
  * @tx_urbs: number of urbs currently in host stack queues
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
+ * @flags: usb serial port flags
  * @write_wait: a wait_queue_head_t used by the port.
  * @work: work queue entry for the line discipline waking up.
  * @throttled: nonzero if the read urb is inactive to throttle the device
@@ -98,11 +105,16 @@ struct usb_serial_port {
 	struct urb		*write_urb;
 	struct kfifo		write_fifo;
 	int			write_urb_busy;
+
+	unsigned char		*bulk_out_buffers[2];
+	struct urb		*write_urbs[2];
+	unsigned long		write_urbs_free;
 	__u8			bulk_out_endpointAddress;
 
 	int			tx_bytes;
 	int			tx_urbs;
 
+	unsigned long		flags;
 	wait_queue_head_t	write_wait;
 	struct work_struct	work;
 	char			throttled;
-- 
cgit v1.2.3


From c23e5fc1f7dba228558b4a46e68f7af89515b13c Mon Sep 17 00:00:00 2001
From: Johan Hovold <jhovold@gmail.com>
Date: Wed, 5 May 2010 23:58:13 +0200
Subject: USB: serial: remove multi-urb write from generic driver

Remove multi-urb write from the generic driver and simplify the
prepare_write_buffer prototype:

	int (*prepare_write_buffer)(struct usb_serial_port *port,
						void *dest, size_t size);

The default implementation simply fills dest with data from port write
fifo but drivers can override it if they need to process the outgoing
data (e.g. add headers).

Turn ftdi_sio into a generic fifo-based driver, which lowers CPU usage
significantly for small writes while retaining maximum throughput.

Signed-off-by: Johan Hovold <jhovold@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/serial/aircable.c |   7 +-
 drivers/usb/serial/ftdi_sio.c |  59 ++++++-----------
 drivers/usb/serial/generic.c  | 150 +++++++-----------------------------------
 include/linux/usb/serial.h    |   8 +--
 4 files changed, 48 insertions(+), 176 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/serial/aircable.c b/drivers/usb/serial/aircable.c
index cac5162937b3..8a990a763c21 100644
--- a/drivers/usb/serial/aircable.c
+++ b/drivers/usb/serial/aircable.c
@@ -84,9 +84,10 @@ static const struct usb_device_id id_table[] = {
 MODULE_DEVICE_TABLE(usb, id_table);
 
 static int aircable_prepare_write_buffer(struct usb_serial_port *port,
-		void **dest, size_t size, const void *src, size_t count)
+						void *dest, size_t size)
 {
-	unsigned char *buf = *dest;
+	int count;
+	unsigned char *buf = dest;
 
 	count = kfifo_out_locked(&port->write_fifo, buf + HCI_HEADER_LENGTH,
 					size - HCI_HEADER_LENGTH, &port->lock);
@@ -185,8 +186,6 @@ static struct usb_serial_driver aircable_device = {
 	.id_table = 		id_table,
 	.num_ports =		1,
 	.bulk_out_size =	HCI_COMPLETE_FRAME,
-	/* Must modify prepare_write_buffer if multi_urb_write is changed. */
-	.multi_urb_write =	0,
 	.probe =		aircable_probe,
 	.process_read_urb =	aircable_process_read_urb,
 	.prepare_write_buffer =	aircable_prepare_write_buffer,
diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
index f515f32cde68..14f7a34d614c 100644
--- a/drivers/usb/serial/ftdi_sio.c
+++ b/drivers/usb/serial/ftdi_sio.c
@@ -781,7 +781,7 @@ static void ftdi_close(struct usb_serial_port *port);
 static void ftdi_dtr_rts(struct usb_serial_port *port, int on);
 static void ftdi_process_read_urb(struct urb *urb);
 static int ftdi_prepare_write_buffer(struct usb_serial_port *port,
-		void **dest, size_t size, const void *buf, size_t count);
+						void *dest, size_t size);
 static void ftdi_set_termios(struct tty_struct *tty,
 			struct usb_serial_port *port, struct ktermios *old);
 static int  ftdi_tiocmget(struct tty_struct *tty, struct file *file);
@@ -808,8 +808,7 @@ static struct usb_serial_driver ftdi_sio_device = {
 	.id_table =		id_table_combined,
 	.num_ports =		1,
 	.bulk_in_size =		512,
-	/* Must modify prepare_write_buffer if multi_urb_write is changed. */
-	.multi_urb_write =	1,
+	.bulk_out_size =	256,
 	.probe =		ftdi_sio_probe,
 	.port_probe =		ftdi_sio_port_probe,
 	.port_remove =		ftdi_sio_port_remove,
@@ -1531,15 +1530,6 @@ static int ftdi_sio_port_probe(struct usb_serial_port *port)
 		quirk->port_probe(priv);
 
 	priv->port = port;
-
-	/* Free port's existing write urb and transfer buffer. */
-	if (port->write_urb) {
-		usb_free_urb(port->write_urb);
-		port->write_urb = NULL;
-	}
-	kfree(port->bulk_out_buffer);
-	port->bulk_out_buffer = NULL;
-
 	usb_set_serial_port_data(port, priv);
 
 	ftdi_determine_type(port);
@@ -1734,8 +1724,7 @@ static void ftdi_close(struct usb_serial_port *port)
 
 	dbg("%s", __func__);
 
-	/* shutdown our bulk read */
-	usb_kill_urb(port->read_urb);
+	usb_serial_generic_close(port);
 	kref_put(&priv->kref, ftdi_sio_priv_release);
 }
 
@@ -1747,40 +1736,34 @@ static void ftdi_close(struct usb_serial_port *port)
  * The new devices do not require this byte
  */
 static int ftdi_prepare_write_buffer(struct usb_serial_port *port,
-		void **dest, size_t size, const void *src, size_t count)
+						void *dest, size_t size)
 {
 	struct ftdi_private *priv;
-	unsigned char *buffer;
-	int len;
+	int count;
+	unsigned long flags;
 
 	priv = usb_get_serial_port_data(port);
 
-	len = count;
-	if (priv->chip_type == SIO && count != 0)
-		len += ((count - 1) / (priv->max_packet_size - 1)) + 1;
-
-	buffer = kmalloc(len, GFP_ATOMIC);
-	if (!buffer) {
-		dev_err(&port->dev, "%s - could not allocate buffer\n",
-				__func__);
-		return -ENOMEM;
-	}
-
 	if (priv->chip_type == SIO) {
-		int i, msg_len;
-
-		for (i = 0; i < len; i += priv->max_packet_size) {
-			msg_len = min_t(int, len - i, priv->max_packet_size) - 1;
-			buffer[i] = (msg_len << 2) + 1;
-			memcpy(&buffer[i + 1], src, msg_len);
-			src += msg_len;
+		unsigned char *buffer = dest;
+		int i, len, c;
+
+		count = 0;
+		spin_lock_irqsave(&port->lock, flags);
+		for (i = 0; i < size - 1; i += priv->max_packet_size) {
+			len = min_t(int, size - i, priv->max_packet_size) - 1;
+			buffer[i] = (len << 2) + 1;
+			c = kfifo_out(&port->write_fifo, &buffer[i + 1], len);
+			if (!c)
+				break;
+			count += c + 1;
 		}
+		spin_unlock_irqrestore(&port->lock, flags);
 	} else {
-		memcpy(buffer, src, count);
+		count = kfifo_out_locked(&port->write_fifo, dest, size,
+								&port->lock);
 	}
 
-	*dest = buffer;
-
 	return count;
 }
 
diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c
index 3ae17840175c..fcd30b841559 100644
--- a/drivers/usb/serial/generic.c
+++ b/drivers/usb/serial/generic.c
@@ -26,8 +26,6 @@
 
 static int debug;
 
-#define MAX_TX_URBS		40
-
 #ifdef CONFIG_USB_SERIAL_GENERIC
 
 static int generic_probe(struct usb_interface *interface,
@@ -172,90 +170,9 @@ void usb_serial_generic_close(struct usb_serial_port *port)
 EXPORT_SYMBOL_GPL(usb_serial_generic_close);
 
 int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
-		void **dest, size_t size, const void *src, size_t count)
+						void *dest, size_t size)
 {
-	if (!*dest) {
-		size = count;
-		*dest = kmalloc(count, GFP_ATOMIC);
-		if (!*dest) {
-			dev_err(&port->dev, "%s - could not allocate buffer\n",
-					__func__);
-			return -ENOMEM;
-		}
-	}
-	if (src) {
-		count = size;
-		memcpy(*dest, src, size);
-	} else {
-		count = kfifo_out_locked(&port->write_fifo, *dest, size,
-								&port->lock);
-	}
-	return count;
-}
-EXPORT_SYMBOL_GPL(usb_serial_generic_prepare_write_buffer);
-
-static int usb_serial_multi_urb_write(struct tty_struct *tty,
-	struct usb_serial_port *port, const unsigned char *buf, int count)
-{
-	unsigned long flags;
-	struct urb *urb;
-	void *buffer;
-	int status;
-
-	spin_lock_irqsave(&port->lock, flags);
-	if (port->tx_urbs == MAX_TX_URBS) {
-		spin_unlock_irqrestore(&port->lock, flags);
-		dbg("%s - write limit hit", __func__);
-		return 0;
-	}
-	port->tx_urbs++;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!urb) {
-		dev_err(&port->dev, "%s - no free urbs available\n", __func__);
-		status = -ENOMEM;
-		goto err_urb;
-	}
-
-	buffer = NULL;
-	count = min_t(int, count, PAGE_SIZE);
-	count = port->serial->type->prepare_write_buffer(port, &buffer, 0,
-								buf, count);
-	if (count < 0) {
-		status = count;
-		goto err_buf;
-	}
-	usb_serial_debug_data(debug, &port->dev, __func__, count, buffer);
-	usb_fill_bulk_urb(urb, port->serial->dev,
-			usb_sndbulkpipe(port->serial->dev,
-					port->bulk_out_endpointAddress),
-			buffer, count,
-			port->serial->type->write_bulk_callback, port);
-
-	status = usb_submit_urb(urb, GFP_ATOMIC);
-	if (status) {
-		dev_err(&port->dev, "%s - error submitting urb: %d\n",
-				__func__, status);
-		goto err;
-	}
-	spin_lock_irqsave(&port->lock, flags);
-	port->tx_bytes += urb->transfer_buffer_length;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	usb_free_urb(urb);
-
-	return count;
-err:
-	kfree(buffer);
-err_buf:
-	usb_free_urb(urb);
-err_urb:
-	spin_lock_irqsave(&port->lock, flags);
-	port->tx_urbs--;
-	spin_unlock_irqrestore(&port->lock, flags);
-
-	return status;
+	return kfifo_out_locked(&port->write_fifo, dest, size, &port->lock);
 }
 
 /**
@@ -286,8 +203,8 @@ retry:
 
 	urb = port->write_urbs[i];
 	count = port->serial->type->prepare_write_buffer(port,
-						&urb->transfer_buffer,
-						port->bulk_out_size, NULL, 0);
+						urb->transfer_buffer,
+						port->bulk_out_size);
 	urb->transfer_buffer_length = count;
 	usb_serial_debug_data(debug, &port->dev, __func__, count,
 						urb->transfer_buffer);
@@ -328,7 +245,6 @@ retry:
 int usb_serial_generic_write(struct tty_struct *tty,
 	struct usb_serial_port *port, const unsigned char *buf, int count)
 {
-	struct usb_serial *serial = port->serial;
 	int result;
 
 	dbg("%s - port %d", __func__, port->number);
@@ -340,23 +256,18 @@ int usb_serial_generic_write(struct tty_struct *tty,
 	if (!count)
 		return 0;
 
-	if (serial->type->multi_urb_write)
-		return usb_serial_multi_urb_write(tty, port, buf, count);
-
 	count = kfifo_in_locked(&port->write_fifo, buf, count, &port->lock);
 	result = usb_serial_generic_write_start(port);
+	if (result)
+		return result;
 
-	if (result >= 0)
-		result = count;
-
-	return result;
+	return count;
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write);
 
 int usb_serial_generic_write_room(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	struct usb_serial *serial = port->serial;
 	unsigned long flags;
 	int room;
 
@@ -366,10 +277,7 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 		return 0;
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (serial->type->multi_urb_write)
-		room = (MAX_TX_URBS - port->tx_urbs) * PAGE_SIZE;
-	else
-		room = kfifo_avail(&port->write_fifo);
+	room = kfifo_avail(&port->write_fifo);
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, room);
@@ -379,7 +287,6 @@ int usb_serial_generic_write_room(struct tty_struct *tty)
 int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 {
 	struct usb_serial_port *port = tty->driver_data;
-	struct usb_serial *serial = port->serial;
 	unsigned long flags;
 	int chars;
 
@@ -389,10 +296,7 @@ int usb_serial_generic_chars_in_buffer(struct tty_struct *tty)
 		return 0;
 
 	spin_lock_irqsave(&port->lock, flags);
-	if (serial->type->multi_urb_write)
-		chars = port->tx_bytes;
-	else
-		chars = kfifo_len(&port->write_fifo) + port->tx_bytes;
+	chars = kfifo_len(&port->write_fifo) + port->tx_bytes;
 	spin_unlock_irqrestore(&port->lock, flags);
 
 	dbg("%s - returns %d", __func__, chars);
@@ -479,35 +383,25 @@ void usb_serial_generic_write_bulk_callback(struct urb *urb)
 
 	dbg("%s - port %d", __func__, port->number);
 
-	if (port->serial->type->multi_urb_write) {
-		kfree(urb->transfer_buffer);
+	for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
+		if (port->write_urbs[i] == urb)
+			break;
 
-		spin_lock_irqsave(&port->lock, flags);
-		port->tx_bytes -= urb->transfer_buffer_length;
-		port->tx_urbs--;
-		spin_unlock_irqrestore(&port->lock, flags);
-	} else {
-		for (i = 0; i < ARRAY_SIZE(port->write_urbs); ++i)
-			if (port->write_urbs[i] == urb)
-				break;
+	spin_lock_irqsave(&port->lock, flags);
+	port->tx_bytes -= urb->transfer_buffer_length;
+	set_bit(i, &port->write_urbs_free);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	if (status) {
+		dbg("%s - non-zero urb status: %d", __func__, status);
 
 		spin_lock_irqsave(&port->lock, flags);
-		port->tx_bytes -= urb->transfer_buffer_length;
-		set_bit(i, &port->write_urbs_free);
+		kfifo_reset_out(&port->write_fifo);
 		spin_unlock_irqrestore(&port->lock, flags);
-
-		if (status) {
-			spin_lock_irqsave(&port->lock, flags);
-			kfifo_reset_out(&port->write_fifo);
-			spin_unlock_irqrestore(&port->lock, flags);
-		} else {
-			usb_serial_generic_write_start(port);
-		}
+	} else {
+		usb_serial_generic_write_start(port);
 	}
 
-	if (status)
-		dbg("%s - non-zero urb status: %d", __func__, status);
-
 	usb_serial_port_softint(port);
 }
 EXPORT_SYMBOL_GPL(usb_serial_generic_write_bulk_callback);
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 70b6d6b28997..061c997ae0cf 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -67,7 +67,6 @@ enum port_dev_state {
  * @write_urbs: pointers to the bulk out urbs for this port
  * @write_urbs_free: status bitmap the for bulk out urbs
  * @tx_bytes: number of bytes currently in host stack queues
- * @tx_urbs: number of urbs currently in host stack queues
  * @bulk_out_endpointAddress: endpoint address for the bulk out pipe for this
  *	port.
  * @flags: usb serial port flags
@@ -112,7 +111,6 @@ struct usb_serial_port {
 	__u8			bulk_out_endpointAddress;
 
 	int			tx_bytes;
-	int			tx_urbs;
 
 	unsigned long		flags;
 	wait_queue_head_t	write_wait;
@@ -238,8 +236,6 @@ struct usb_serial_driver {
 	struct usb_driver	*usb_driver;
 	struct usb_dynids	dynids;
 
-	unsigned char		multi_urb_write:1;
-
 	size_t			bulk_in_size;
 	size_t			bulk_out_size;
 
@@ -291,7 +287,7 @@ struct usb_serial_driver {
 	void (*process_read_urb)(struct urb *urb);
 	/* Called by the generic write implementation */
 	int (*prepare_write_buffer)(struct usb_serial_port *port,
-		void **dest, size_t size, const void *src, size_t count);
+						void *dest, size_t size);
 };
 #define to_usb_serial_driver(d) \
 	container_of(d, struct usb_serial_driver, driver)
@@ -345,7 +341,7 @@ extern int usb_serial_generic_submit_read_urb(struct usb_serial_port *port,
 						 gfp_t mem_flags);
 extern void usb_serial_generic_process_read_urb(struct urb *urb);
 extern int usb_serial_generic_prepare_write_buffer(struct usb_serial_port *port,
-		void **dest, size_t size, const void *src, size_t count);
+						void *dest, size_t size);
 extern int usb_serial_handle_sysrq_char(struct tty_struct *tty,
 					struct usb_serial_port *port,
 					unsigned int ch);
-- 
cgit v1.2.3


From 22c43c81a51e05f61e90445ceb59d486c12fd921 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Wed, 5 May 2010 12:53:11 +0200
Subject: wait_event_interruptible_locked() interface

New wait_event_interruptible{,_exclusive}_locked{,_irq} macros added.
They work just like versions without _locked* suffix but require the
wait queue's lock to be held.  Also __wake_up_locked() is now exported
as to pair it with the above macros.

The use case of this new facility is when one uses wait queue's lock
to  protect a data structure.  This may be advantageous if the
structure needs to be protected by a spinlock anyway.  In particular,
with additional spinlock the following code has to be used to wait
for a condition:

spin_lock(&data.lock);
...
for (ret = 0; !ret && !(condition); ) {
	spin_unlock(&data.lock);
	ret = wait_event_interruptible(data.wqh, (condition));
	spin_lock(&data.lock);
}
...
spin_unlock(&data.lock);

This looks bizarre plus wait_event_interruptible() locks the wait
queue's lock anyway so there is a unlock+lock sequence where it could
be avoided.

To avoid those problems and benefit from wait queue's lock, a code
similar to the following should be used:

/* Waiting */
spin_lock(&data.wqh.lock);
...
ret = wait_event_interruptible_locked(data.wqh, (condition));
...
spin_unlock(&data.wqh.lock);

/* Waiting exclusively */
spin_lock(&data.whq.lock);
...
ret = wait_event_interruptible_exclusive_locked(data.whq, (condition));
...
spin_unlock(&data.whq.lock);

/* Waking up */
spin_lock(&data.wqh.lock);
...
wake_up_locked(&data.wqh);
...
spin_unlock(&data.wqh.lock);

When spin_lock_irq() is used matching versions of macros need to be
used (*_locked_irq()).

Signed-off-by: Michal Nazarewicz <m.nazarewicz@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Takashi Iwai <tiwai@suse.de>
Cc: David Howells <dhowells@redhat.com>
Cc: Andreas Herrmann <andreas.herrmann3@amd.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mike Galbraith <efault@gmx.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/wait.h | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/sched.c       |   1 +
 2 files changed, 150 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index a48e16b77d5e..fc3c040e5e3a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -362,6 +362,155 @@ do {									\
 	__ret;								\
 })
 
+
+#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
+({									\
+	int __ret = 0;							\
+	DEFINE_WAIT(__wait);						\
+	if (exclusive)							\
+		__wait.flags |= WQ_FLAG_EXCLUSIVE;			\
+	do {								\
+		if (likely(list_empty(&__wait.task_list)))		\
+			__add_wait_queue_tail(&(wq), &__wait);		\
+		set_current_state(TASK_INTERRUPTIBLE);			\
+		if (signal_pending(current)) {				\
+			__ret = -ERESTARTSYS;				\
+			break;						\
+		}							\
+		if (irq)						\
+			spin_unlock_irq(&(wq).lock);			\
+		else							\
+			spin_unlock(&(wq).lock);			\
+		schedule();						\
+		if (irq)						\
+			spin_lock_irq(&(wq).lock);			\
+		else							\
+			spin_lock(&(wq).lock);				\
+	} while (!(condition));						\
+	__remove_wait_queue(&(wq), &__wait);				\
+	__set_current_state(TASK_RUNNING);				\
+	__ret;								\
+})
+
+
+/**
+ * wait_event_interruptible_locked - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * It must be called with wq.lock being held.  This spinlock is
+ * unlocked while sleeping but @condition testing is done while lock
+ * is held and when this macro exits the lock is held.
+ *
+ * The lock is locked/unlocked using spin_lock()/spin_unlock()
+ * functions which must match the way they are locked/unlocked outside
+ * of this macro.
+ *
+ * wake_up_locked() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_locked(wq, condition)			\
+	((condition)							\
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0))
+
+/**
+ * wait_event_interruptible_locked_irq - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * It must be called with wq.lock being held.  This spinlock is
+ * unlocked while sleeping but @condition testing is done while lock
+ * is held and when this macro exits the lock is held.
+ *
+ * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
+ * functions which must match the way they are locked/unlocked outside
+ * of this macro.
+ *
+ * wake_up_locked() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_locked_irq(wq, condition)		\
+	((condition)							\
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1))
+
+/**
+ * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * It must be called with wq.lock being held.  This spinlock is
+ * unlocked while sleeping but @condition testing is done while lock
+ * is held and when this macro exits the lock is held.
+ *
+ * The lock is locked/unlocked using spin_lock()/spin_unlock()
+ * functions which must match the way they are locked/unlocked outside
+ * of this macro.
+ *
+ * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
+ * set thus when other process waits process on the list if this
+ * process is awaken further processes are not considered.
+ *
+ * wake_up_locked() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_exclusive_locked(wq, condition)	\
+	((condition)							\
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0))
+
+/**
+ * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * It must be called with wq.lock being held.  This spinlock is
+ * unlocked while sleeping but @condition testing is done while lock
+ * is held and when this macro exits the lock is held.
+ *
+ * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq()
+ * functions which must match the way they are locked/unlocked outside
+ * of this macro.
+ *
+ * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag
+ * set thus when other process waits process on the list if this
+ * process is awaken further processes are not considered.
+ *
+ * wake_up_locked() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function will return -ERESTARTSYS if it was interrupted by a
+ * signal and 0 if @condition evaluated to true.
+ */
+#define wait_event_interruptible_exclusive_locked_irq(wq, condition)	\
+	((condition)							\
+	 ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1))
+
+
+
 #define __wait_event_killable(wq, condition, ret)			\
 do {									\
 	DEFINE_WAIT(__wait);						\
diff --git a/kernel/sched.c b/kernel/sched.c
index 3c2a54f70ffe..9584b66c249a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3950,6 +3950,7 @@ void __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
 {
 	__wake_up_common(q, mode, 1, 0, NULL);
 }
+EXPORT_SYMBOL_GPL(__wake_up_locked);
 
 void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key)
 {
-- 
cgit v1.2.3


From ddf8abd2599491cbad959c700b90ba72a5dce8d0 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <m.nazarewicz@samsung.com>
Date: Wed, 5 May 2010 12:53:14 +0200
Subject: USB: f_fs: the FunctionFS driver

The FunctionFS is a USB composite function that can be used
with the composite framework to create an USB gadget.

>From kernel point of view it is just a composite function with
some unique behaviour.  It may be added to an USB
configuration only after the user space driver has registered
by writing descriptors and strings (the user space program has
to provide the same information that kernel level composite
functions provide when they are added to the configuration).

>From user space point of view it is a file system which when
mounted provide an "ep0" file.  User space driver need to
write descriptors and strings to that file.  It does not need
to worry about endpoints, interfaces or strings numbers but
simply provide descriptors such as if the function was the
only one (endpoints and strings numbers starting from one and
interface numbers starting from core).  The FunctionFS changes
numbers of those as needed also handling situation when
numbers differ in different configurations.

When descriptors and strings are written "ep#" files appear
(one for each declared endpoint) which handle communication on
a single endpoint.  Again, FunctionFS takes care of the real
numbers and changing of the configuration (which means that
"ep1" file may be really mapped to (say) endpoint 3 (and when
configuration changes to (say) endpoint 2)).  "ep0" is used
for receiving events and handling setup requests.

When all files are closed the function disables itself.

Signed-off-by: Michal Nazarewicz <m.nazarewicz@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/gadget/f_fs.c      | 2441 ++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/functionfs.h |  199 ++++
 2 files changed, 2640 insertions(+)
 create mode 100644 drivers/usb/gadget/f_fs.c
 create mode 100644 include/linux/usb/functionfs.h

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c
new file mode 100644
index 000000000000..af89ca672c60
--- /dev/null
+++ b/drivers/usb/gadget/f_fs.c
@@ -0,0 +1,2441 @@
+/*
+ * f_fs.c -- user mode filesystem api for usb composite funtcion controllers
+ *
+ * Copyright (C) 2010 Samsung Electronics
+ * Author: Michal Nazarewicz <m.nazarewicz@samsung.com>
+ *
+ * Based on inode.c (GadgetFS):
+ * Copyright (C) 2003-2004 David Brownell
+ * Copyright (C) 2003 Agilent Technologies
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+
+/* #define DEBUG */
+/* #define VERBOSE_DEBUG */
+
+#include <linux/blkdev.h>
+#include <asm/unaligned.h>
+#include <linux/smp_lock.h>
+
+#include <linux/usb/composite.h>
+#include <linux/usb/functionfs.h>
+
+
+#define FUNCTIONFS_MAGIC	0xa647361 /* Chosen by a honest dice roll ;) */
+
+
+/* Debuging *****************************************************************/
+
+#define ffs_printk(level, fmt, args...) printk(level "f_fs: " fmt "\n", ## args)
+
+#define FERR(...)  ffs_printk(KERN_ERR,  __VA_ARGS__)
+#define FINFO(...) ffs_printk(KERN_INFO, __VA_ARGS__)
+
+#ifdef DEBUG
+#  define FDBG(...) ffs_printk(KERN_DEBUG, __VA_ARGS__)
+#else
+#  define FDBG(...) do { } while (0)
+#endif /* DEBUG */
+
+#ifdef VERBOSE_DEBUG
+#  define FVDBG FDBG
+#else
+#  define FVDBG(...) do { } while (0)
+#endif /* VERBOSE_DEBUG */
+
+#define ENTER()    FVDBG("%s()", __func__)
+
+#ifdef VERBOSE_DEBUG
+#  define ffs_dump_mem(prefix, ptr, len) \
+	print_hex_dump_bytes("f_fs" prefix ": ", DUMP_PREFIX_NONE, ptr, len)
+#else
+#  define ffs_dump_mem(prefix, ptr, len) do { } while (0)
+#endif
+
+
+/* The data structure and setup file ****************************************/
+
+enum ffs_state {
+	/* Waiting for descriptors and strings. */
+	/* In this state no open(2), read(2) or write(2) on epfiles
+	 * may succeed (which should not be the problem as there
+	 * should be no such files opened in the firts place). */
+	FFS_READ_DESCRIPTORS,
+	FFS_READ_STRINGS,
+
+	/* We've got descriptors and strings.  We are or have called
+	 * functionfs_ready_callback().  functionfs_bind() may have
+	 * been called but we don't know. */
+	/* This is the only state in which operations on epfiles may
+	 * succeed. */
+	FFS_ACTIVE,
+
+	/* All endpoints have been closed.  This state is also set if
+	 * we encounter an unrecoverable error.  The only
+	 * unrecoverable error is situation when after reading strings
+	 * from user space we fail to initialise EP files or
+	 * functionfs_ready_callback() returns with error (<0). */
+	/* In this state no open(2), read(2) or write(2) (both on ep0
+	 * as well as epfile) may succeed (at this point epfiles are
+	 * unlinked and all closed so this is not a problem; ep0 is
+	 * also closed but ep0 file exists and so open(2) on ep0 must
+	 * fail). */
+	FFS_CLOSING
+};
+
+
+enum ffs_setup_state {
+	/* There is no setup request pending. */
+	FFS_NO_SETUP,
+	/* User has read events and there was a setup request event
+	 * there.  The next read/write on ep0 will handle the
+	 * request. */
+	FFS_SETUP_PENDING,
+	/* There was event pending but before user space handled it
+	 * some other event was introduced which canceled existing
+	 * setup.  If this state is set read/write on ep0 return
+	 * -EIDRM.  This state is only set when adding event. */
+	FFS_SETUP_CANCELED
+};
+
+
+
+struct ffs_epfile;
+struct ffs_function;
+
+struct ffs_data {
+	struct usb_gadget		*gadget;
+
+	/* Protect access read/write operations, only one read/write
+	 * at a time.  As a consequence protects ep0req and company.
+	 * While setup request is being processed (queued) this is
+	 * held. */
+	struct mutex			mutex;
+
+	/* Protect access to enpoint related structures (basically
+	 * usb_ep_queue(), usb_ep_dequeue(), etc. calls) except for
+	 * endpint zero. */
+	spinlock_t			eps_lock;
+
+	/* XXX REVISIT do we need our own request? Since we are not
+	 * handling setup requests immidiatelly user space may be so
+	 * slow that another setup will be sent to the gadget but this
+	 * time not to us but another function and then there could be
+	 * a race.  Is taht the case? Or maybe we can use cdev->req
+	 * after all, maybe we just need some spinlock for that? */
+	struct usb_request		*ep0req;		/* P: mutex */
+	struct completion		ep0req_completion;	/* P: mutex */
+	int				ep0req_status;		/* P: mutex */
+
+	/* reference counter */
+	atomic_t			ref;
+	/* how many files are opened (EP0 and others) */
+	atomic_t			opened;
+
+	/* EP0 state */
+	enum ffs_state			state;
+
+	/*
+	 * Possible transations:
+	 * + FFS_NO_SETUP       -> FFS_SETUP_PENDING  -- P: ev.waitq.lock
+	 *               happens only in ep0 read which is P: mutex
+	 * + FFS_SETUP_PENDING  -> FFS_NO_SETUP       -- P: ev.waitq.lock
+	 *               happens only in ep0 i/o  which is P: mutex
+	 * + FFS_SETUP_PENDING  -> FFS_SETUP_CANCELED -- P: ev.waitq.lock
+	 * + FFS_SETUP_CANCELED -> FFS_NO_SETUP       -- cmpxchg
+	 */
+	enum ffs_setup_state		setup_state;
+
+#define FFS_SETUP_STATE(ffs)					\
+	((enum ffs_setup_state)cmpxchg(&(ffs)->setup_state,	\
+				       FFS_SETUP_CANCELED, FFS_NO_SETUP))
+
+	/* Events & such. */
+	struct {
+		u8				types[4];
+		unsigned short			count;
+		/* XXX REVISIT need to update it in some places, or do we? */
+		unsigned short			can_stall;
+		struct usb_ctrlrequest		setup;
+
+		wait_queue_head_t		waitq;
+	} ev; /* the whole structure, P: ev.waitq.lock */
+
+	/* Flags */
+	unsigned long			flags;
+#define FFS_FL_CALL_CLOSED_CALLBACK 0
+#define FFS_FL_BOUND                1
+
+	/* Active function */
+	struct ffs_function		*func;
+
+	/* Device name, write once when file system is mounted.
+	 * Intendet for user to read if she wants. */
+	const char			*dev_name;
+	/* Private data for our user (ie. gadget).  Managed by
+	 * user. */
+	void				*private_data;
+
+	/* filled by __ffs_data_got_descs() */
+	/* real descriptors are 16 bytes after raw_descs (so you need
+	 * to skip 16 bytes (ie. ffs->raw_descs + 16) to get to the
+	 * first full speed descriptor).  raw_descs_length and
+	 * raw_fs_descs_length do not have those 16 bytes added. */
+	const void			*raw_descs;
+	unsigned			raw_descs_length;
+	unsigned			raw_fs_descs_length;
+	unsigned			fs_descs_count;
+	unsigned			hs_descs_count;
+
+	unsigned short			strings_count;
+	unsigned short			interfaces_count;
+	unsigned short			eps_count;
+	unsigned short			_pad1;
+
+	/* filled by __ffs_data_got_strings() */
+	/* ids in stringtabs are set in functionfs_bind() */
+	const void			*raw_strings;
+	struct usb_gadget_strings	**stringtabs;
+
+	/* File system's super block, write once when file system is mounted. */
+	struct super_block		*sb;
+
+	/* File permissions, written once when fs is mounted*/
+	struct ffs_file_perms {
+		umode_t				mode;
+		uid_t				uid;
+		gid_t				gid;
+	}				file_perms;
+
+	/* The endpoint files, filled by ffs_epfiles_create(),
+	 * destroyed by ffs_epfiles_destroy(). */
+	struct ffs_epfile		*epfiles;
+};
+
+/* Reference counter handling */
+static void ffs_data_get(struct ffs_data *ffs);
+static void ffs_data_put(struct ffs_data *ffs);
+/* Creates new ffs_data object. */
+static struct ffs_data *__must_check ffs_data_new(void) __attribute__((malloc));
+
+/* Opened counter handling. */
+static void ffs_data_opened(struct ffs_data *ffs);
+static void ffs_data_closed(struct ffs_data *ffs);
+
+/* Called with ffs->mutex held; take over ownerrship of data. */
+static int __must_check
+__ffs_data_got_descs(struct ffs_data *ffs, char *data, size_t len);
+static int __must_check
+__ffs_data_got_strings(struct ffs_data *ffs, char *data, size_t len);
+
+
+/* The function structure ***************************************************/
+
+struct ffs_ep;
+
+struct ffs_function {
+	struct usb_configuration	*conf;
+	struct usb_gadget		*gadget;
+	struct ffs_data			*ffs;
+
+	struct ffs_ep			*eps;
+	u8				eps_revmap[16];
+	short				*interfaces_nums;
+
+	struct usb_function		function;
+};
+
+
+static struct ffs_function *ffs_func_from_usb(struct usb_function *f)
+{
+	return container_of(f, struct ffs_function, function);
+}
+
+static void ffs_func_free(struct ffs_function *func);
+
+
+static void ffs_func_eps_disable(struct ffs_function *func);
+static int __must_check ffs_func_eps_enable(struct ffs_function *func);
+
+
+static int ffs_func_bind(struct usb_configuration *,
+			 struct usb_function *);
+static void ffs_func_unbind(struct usb_configuration *,
+			    struct usb_function *);
+static int ffs_func_set_alt(struct usb_function *, unsigned, unsigned);
+static void ffs_func_disable(struct usb_function *);
+static int ffs_func_setup(struct usb_function *,
+			  const struct usb_ctrlrequest *);
+static void ffs_func_suspend(struct usb_function *);
+static void ffs_func_resume(struct usb_function *);
+
+
+static int ffs_func_revmap_ep(struct ffs_function *func, u8 num);
+static int ffs_func_revmap_intf(struct ffs_function *func, u8 intf);
+
+
+
+/* The endpoints structures *************************************************/
+
+struct ffs_ep {
+	struct usb_ep			*ep;	/* P: ffs->eps_lock */
+	struct usb_request		*req;	/* P: epfile->mutex */
+
+	/* [0]: full speed, [1]: high speed */
+	struct usb_endpoint_descriptor	*descs[2];
+
+	u8				num;
+
+	int				status;	/* P: epfile->mutex */
+};
+
+struct ffs_epfile {
+	/* Protects ep->ep and ep->req. */
+	struct mutex			mutex;
+	wait_queue_head_t		wait;
+
+	struct ffs_data			*ffs;
+	struct ffs_ep			*ep;	/* P: ffs->eps_lock */
+
+	struct dentry			*dentry;
+
+	char				name[5];
+
+	unsigned char			in;	/* P: ffs->eps_lock */
+	unsigned char			isoc;	/* P: ffs->eps_lock */
+
+	unsigned char			_pad;
+};
+
+
+static int  __must_check ffs_epfiles_create(struct ffs_data *ffs);
+static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count);
+
+static struct inode *__must_check
+ffs_sb_create_file(struct super_block *sb, const char *name, void *data,
+		   const struct file_operations *fops,
+		   struct dentry **dentry_p);
+
+
+/* Misc helper functions ****************************************************/
+
+static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock)
+	__attribute__((warn_unused_result, nonnull));
+static char *ffs_prepare_buffer(const char * __user buf, size_t len)
+	__attribute__((warn_unused_result, nonnull));
+
+
+/* Control file aka ep0 *****************************************************/
+
+static void ffs_ep0_complete(struct usb_ep *ep, struct usb_request *req)
+{
+	struct ffs_data *ffs = req->context;
+
+	complete_all(&ffs->ep0req_completion);
+}
+
+
+static int __ffs_ep0_queue_wait(struct ffs_data *ffs, char *data, size_t len)
+{
+	struct usb_request *req = ffs->ep0req;
+	int ret;
+
+	req->zero     = len < le16_to_cpu(ffs->ev.setup.wLength);
+
+	spin_unlock_irq(&ffs->ev.waitq.lock);
+
+	req->buf      = data;
+	req->length   = len;
+
+	INIT_COMPLETION(ffs->ep0req_completion);
+
+	ret = usb_ep_queue(ffs->gadget->ep0, req, GFP_ATOMIC);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = wait_for_completion_interruptible(&ffs->ep0req_completion);
+	if (unlikely(ret)) {
+		usb_ep_dequeue(ffs->gadget->ep0, req);
+		return -EINTR;
+	}
+
+	ffs->setup_state = FFS_NO_SETUP;
+	return ffs->ep0req_status;
+}
+
+static int __ffs_ep0_stall(struct ffs_data *ffs)
+{
+	if (ffs->ev.can_stall) {
+		FVDBG("ep0 stall\n");
+		usb_ep_set_halt(ffs->gadget->ep0);
+		ffs->setup_state = FFS_NO_SETUP;
+		return -EL2HLT;
+	} else {
+		FDBG("bogus ep0 stall!\n");
+		return -ESRCH;
+	}
+}
+
+
+static ssize_t ffs_ep0_write(struct file *file, const char __user *buf,
+			     size_t len, loff_t *ptr)
+{
+	struct ffs_data *ffs = file->private_data;
+	ssize_t ret;
+	char *data;
+
+	ENTER();
+
+	/* Fast check if setup was canceled */
+	if (FFS_SETUP_STATE(ffs) == FFS_SETUP_CANCELED)
+		return -EIDRM;
+
+	/* Acquire mutex */
+	ret = ffs_mutex_lock(&ffs->mutex, file->f_flags & O_NONBLOCK);
+	if (unlikely(ret < 0))
+		return ret;
+
+
+	/* Check state */
+	switch (ffs->state) {
+	case FFS_READ_DESCRIPTORS:
+	case FFS_READ_STRINGS:
+		/* Copy data */
+		if (unlikely(len < 16)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		data = ffs_prepare_buffer(buf, len);
+		if (unlikely(IS_ERR(data))) {
+			ret = PTR_ERR(data);
+			break;
+		}
+
+		/* Handle data */
+		if (ffs->state == FFS_READ_DESCRIPTORS) {
+			FINFO("read descriptors");
+			ret = __ffs_data_got_descs(ffs, data, len);
+			if (unlikely(ret < 0))
+				break;
+
+			ffs->state = FFS_READ_STRINGS;
+			ret = len;
+		} else {
+			FINFO("read strings");
+			ret = __ffs_data_got_strings(ffs, data, len);
+			if (unlikely(ret < 0))
+				break;
+
+			ret = ffs_epfiles_create(ffs);
+			if (unlikely(ret)) {
+				ffs->state = FFS_CLOSING;
+				break;
+			}
+
+			ffs->state = FFS_ACTIVE;
+			mutex_unlock(&ffs->mutex);
+
+			ret = functionfs_ready_callback(ffs);
+			if (unlikely(ret < 0)) {
+				ffs->state = FFS_CLOSING;
+				return ret;
+			}
+
+			set_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags);
+			return len;
+		}
+		break;
+
+
+	case FFS_ACTIVE:
+		data = NULL;
+		/* We're called from user space, we can use _irq
+		 * rather then _irqsave */
+		spin_lock_irq(&ffs->ev.waitq.lock);
+		switch (FFS_SETUP_STATE(ffs)) {
+		case FFS_SETUP_CANCELED:
+			ret = -EIDRM;
+			goto done_spin;
+
+		case FFS_NO_SETUP:
+			ret = -ESRCH;
+			goto done_spin;
+
+		case FFS_SETUP_PENDING:
+			break;
+		}
+
+		/* FFS_SETUP_PENDING */
+		if (!(ffs->ev.setup.bRequestType & USB_DIR_IN)) {
+			spin_unlock_irq(&ffs->ev.waitq.lock);
+			ret = __ffs_ep0_stall(ffs);
+			break;
+		}
+
+		/* FFS_SETUP_PENDING and not stall */
+		len = min(len, (size_t)le16_to_cpu(ffs->ev.setup.wLength));
+
+		spin_unlock_irq(&ffs->ev.waitq.lock);
+
+		data = ffs_prepare_buffer(buf, len);
+		if (unlikely(IS_ERR(data))) {
+			ret = PTR_ERR(data);
+			break;
+		}
+
+		spin_lock_irq(&ffs->ev.waitq.lock);
+
+		/* We are guaranteed to be still in FFS_ACTIVE state
+		 * but the state of setup could have changed from
+		 * FFS_SETUP_PENDING to FFS_SETUP_CANCELED so we need
+		 * to check for that.  If that happened we copied data
+		 * from user space in vain but it's unlikely. */
+		/* For sure we are not in FFS_NO_SETUP since this is
+		 * the only place FFS_SETUP_PENDING -> FFS_NO_SETUP
+		 * transition can be performed and it's protected by
+		 * mutex. */
+
+		if (FFS_SETUP_STATE(ffs) == FFS_SETUP_CANCELED) {
+			ret = -EIDRM;
+done_spin:
+			spin_unlock_irq(&ffs->ev.waitq.lock);
+		} else {
+			/* unlocks spinlock */
+			ret = __ffs_ep0_queue_wait(ffs, data, len);
+		}
+		kfree(data);
+		break;
+
+
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+
+	mutex_unlock(&ffs->mutex);
+	return ret;
+}
+
+
+
+static ssize_t __ffs_ep0_read_events(struct ffs_data *ffs, char __user *buf,
+				     size_t n)
+{
+	/* We are holding ffs->ev.waitq.lock and ffs->mutex and we need
+	 * to release them. */
+
+	struct usb_functionfs_event events[n];
+	unsigned i = 0;
+
+	memset(events, 0, sizeof events);
+
+	do {
+		events[i].type = ffs->ev.types[i];
+		if (events[i].type == FUNCTIONFS_SETUP) {
+			events[i].u.setup = ffs->ev.setup;
+			ffs->setup_state = FFS_SETUP_PENDING;
+		}
+	} while (++i < n);
+
+	if (n < ffs->ev.count) {
+		ffs->ev.count -= n;
+		memmove(ffs->ev.types, ffs->ev.types + n,
+			ffs->ev.count * sizeof *ffs->ev.types);
+	} else {
+		ffs->ev.count = 0;
+	}
+
+	spin_unlock_irq(&ffs->ev.waitq.lock);
+	mutex_unlock(&ffs->mutex);
+
+	return unlikely(__copy_to_user(buf, events, sizeof events))
+		? -EFAULT : sizeof events;
+}
+
+
+static ssize_t ffs_ep0_read(struct file *file, char __user *buf,
+			    size_t len, loff_t *ptr)
+{
+	struct ffs_data *ffs = file->private_data;
+	char *data = NULL;
+	size_t n;
+	int ret;
+
+	ENTER();
+
+	/* Fast check if setup was canceled */
+	if (FFS_SETUP_STATE(ffs) == FFS_SETUP_CANCELED)
+		return -EIDRM;
+
+	/* Acquire mutex */
+	ret = ffs_mutex_lock(&ffs->mutex, file->f_flags & O_NONBLOCK);
+	if (unlikely(ret < 0))
+		return ret;
+
+
+	/* Check state */
+	if (ffs->state != FFS_ACTIVE) {
+		ret = -EBADFD;
+		goto done_mutex;
+	}
+
+
+	/* We're called from user space, we can use _irq rather then
+	 * _irqsave */
+	spin_lock_irq(&ffs->ev.waitq.lock);
+
+	switch (FFS_SETUP_STATE(ffs)) {
+	case FFS_SETUP_CANCELED:
+		ret = -EIDRM;
+		break;
+
+	case FFS_NO_SETUP:
+		n = len / sizeof(struct usb_functionfs_event);
+		if (unlikely(!n)) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if ((file->f_flags & O_NONBLOCK) && !ffs->ev.count) {
+			ret = -EAGAIN;
+			break;
+		}
+
+		if (unlikely(wait_event_interruptible_exclusive_locked_irq(ffs->ev.waitq, ffs->ev.count))) {
+			ret = -EINTR;
+			break;
+		}
+
+		return __ffs_ep0_read_events(ffs, buf,
+					     min(n, (size_t)ffs->ev.count));
+
+
+	case FFS_SETUP_PENDING:
+		if (ffs->ev.setup.bRequestType & USB_DIR_IN) {
+			spin_unlock_irq(&ffs->ev.waitq.lock);
+			ret = __ffs_ep0_stall(ffs);
+			goto done_mutex;
+		}
+
+		len = min(len, (size_t)le16_to_cpu(ffs->ev.setup.wLength));
+
+		spin_unlock_irq(&ffs->ev.waitq.lock);
+
+		if (likely(len)) {
+			data = kmalloc(len, GFP_KERNEL);
+			if (unlikely(!data)) {
+				ret = -ENOMEM;
+				goto done_mutex;
+			}
+		}
+
+		spin_lock_irq(&ffs->ev.waitq.lock);
+
+		/* See ffs_ep0_write() */
+		if (FFS_SETUP_STATE(ffs) == FFS_SETUP_CANCELED) {
+			ret = -EIDRM;
+			break;
+		}
+
+		/* unlocks spinlock */
+		ret = __ffs_ep0_queue_wait(ffs, data, len);
+		if (likely(ret > 0) && unlikely(__copy_to_user(buf, data, len)))
+			ret = -EFAULT;
+		goto done_mutex;
+
+	default:
+		ret = -EBADFD;
+		break;
+	}
+
+	spin_unlock_irq(&ffs->ev.waitq.lock);
+done_mutex:
+	mutex_unlock(&ffs->mutex);
+	kfree(data);
+	return ret;
+}
+
+
+
+static int ffs_ep0_open(struct inode *inode, struct file *file)
+{
+	struct ffs_data *ffs = inode->i_private;
+
+	ENTER();
+
+	if (unlikely(ffs->state == FFS_CLOSING))
+		return -EBUSY;
+
+	file->private_data = ffs;
+	ffs_data_opened(ffs);
+
+	return 0;
+}
+
+
+static int ffs_ep0_release(struct inode *inode, struct file *file)
+{
+	struct ffs_data *ffs = file->private_data;
+
+	ENTER();
+
+	ffs_data_closed(ffs);
+
+	return 0;
+}
+
+
+static long ffs_ep0_ioctl(struct file *file, unsigned code, unsigned long value)
+{
+	struct ffs_data *ffs = file->private_data;
+	struct usb_gadget *gadget = ffs->gadget;
+	long ret;
+
+	ENTER();
+
+	if (code == FUNCTIONFS_INTERFACE_REVMAP) {
+		struct ffs_function *func = ffs->func;
+		ret = func ? ffs_func_revmap_intf(func, value) : -ENODEV;
+	} else if (gadget->ops->ioctl) {
+		lock_kernel();
+		ret = gadget->ops->ioctl(gadget, code, value);
+		unlock_kernel();
+	} else {
+		ret = -ENOTTY;
+	}
+
+	return ret;
+}
+
+
+static const struct file_operations ffs_ep0_operations = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+
+	.open =		ffs_ep0_open,
+	.write =	ffs_ep0_write,
+	.read =		ffs_ep0_read,
+	.release =	ffs_ep0_release,
+	.unlocked_ioctl =	ffs_ep0_ioctl,
+};
+
+
+/* "Normal" endpoints operations ********************************************/
+
+
+static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
+{
+	ENTER();
+	if (likely(req->context)) {
+		struct ffs_ep *ep = _ep->driver_data;
+		ep->status = req->status ? req->status : req->actual;
+		complete(req->context);
+	}
+}
+
+
+static ssize_t ffs_epfile_io(struct file *file,
+			     char __user *buf, size_t len, int read)
+{
+	struct ffs_epfile *epfile = file->private_data;
+	struct ffs_ep *ep;
+	char *data = NULL;
+	ssize_t ret;
+	int halt;
+
+	goto first_try;
+	do {
+		spin_unlock_irq(&epfile->ffs->eps_lock);
+		mutex_unlock(&epfile->mutex);
+
+first_try:
+		/* Are we still active? */
+		if (WARN_ON(epfile->ffs->state != FFS_ACTIVE)) {
+			ret = -ENODEV;
+			goto error;
+		}
+
+		/* Wait for endpoint to be enabled */
+		ep = epfile->ep;
+		if (!ep) {
+			if (file->f_flags & O_NONBLOCK) {
+				ret = -EAGAIN;
+				goto error;
+			}
+
+			if (unlikely(wait_event_interruptible
+				     (epfile->wait, (ep = epfile->ep)))) {
+				ret = -EINTR;
+				goto error;
+			}
+		}
+
+		/* Do we halt? */
+		halt = !read == !epfile->in;
+		if (halt && epfile->isoc) {
+			ret = -EINVAL;
+			goto error;
+		}
+
+		/* Allocate & copy */
+		if (!halt && !data) {
+			data = kzalloc(len, GFP_KERNEL);
+			if (unlikely(!data))
+				return -ENOMEM;
+
+			if (!read &&
+			    unlikely(__copy_from_user(data, buf, len))) {
+				ret = -EFAULT;
+				goto error;
+			}
+		}
+
+		/* We will be using request */
+		ret = ffs_mutex_lock(&epfile->mutex,
+				     file->f_flags & O_NONBLOCK);
+		if (unlikely(ret))
+			goto error;
+
+		/* We're called from user space, we can use _irq rather then
+		 * _irqsave */
+		spin_lock_irq(&epfile->ffs->eps_lock);
+
+		/* While we were acquiring mutex endpoint got disabled
+		 * or changed? */
+	} while (unlikely(epfile->ep != ep));
+
+	/* Halt */
+	if (unlikely(halt)) {
+		if (likely(epfile->ep == ep) && !WARN_ON(!ep->ep))
+			usb_ep_set_halt(ep->ep);
+		spin_unlock_irq(&epfile->ffs->eps_lock);
+		ret = -EBADMSG;
+	} else {
+		/* Fire the request */
+		DECLARE_COMPLETION_ONSTACK(done);
+
+		struct usb_request *req = ep->req;
+		req->context  = &done;
+		req->complete = ffs_epfile_io_complete;
+		req->buf      = data;
+		req->length   = len;
+
+		ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC);
+
+		spin_unlock_irq(&epfile->ffs->eps_lock);
+
+		if (unlikely(ret < 0)) {
+			/* nop */
+		} else if (unlikely(wait_for_completion_interruptible(&done))) {
+			ret = -EINTR;
+			usb_ep_dequeue(ep->ep, req);
+		} else {
+			ret = ep->status;
+			if (read && ret > 0 &&
+			    unlikely(copy_to_user(buf, data, ret)))
+				ret = -EFAULT;
+		}
+	}
+
+	mutex_unlock(&epfile->mutex);
+error:
+	kfree(data);
+	return ret;
+}
+
+
+static ssize_t
+ffs_epfile_write(struct file *file, const char __user *buf, size_t len,
+		 loff_t *ptr)
+{
+	ENTER();
+
+	return ffs_epfile_io(file, (char __user *)buf, len, 0);
+}
+
+static ssize_t
+ffs_epfile_read(struct file *file, char __user *buf, size_t len, loff_t *ptr)
+{
+	ENTER();
+
+	return ffs_epfile_io(file, buf, len, 1);
+}
+
+static int
+ffs_epfile_open(struct inode *inode, struct file *file)
+{
+	struct ffs_epfile *epfile = inode->i_private;
+
+	ENTER();
+
+	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
+		return -ENODEV;
+
+	file->private_data = epfile;
+	ffs_data_opened(epfile->ffs);
+
+	return 0;
+}
+
+static int
+ffs_epfile_release(struct inode *inode, struct file *file)
+{
+	struct ffs_epfile *epfile = inode->i_private;
+
+	ENTER();
+
+	ffs_data_closed(epfile->ffs);
+
+	return 0;
+}
+
+
+static long ffs_epfile_ioctl(struct file *file, unsigned code,
+			     unsigned long value)
+{
+	struct ffs_epfile *epfile = file->private_data;
+	int ret;
+
+	ENTER();
+
+	if (WARN_ON(epfile->ffs->state != FFS_ACTIVE))
+		return -ENODEV;
+
+	spin_lock_irq(&epfile->ffs->eps_lock);
+	if (likely(epfile->ep)) {
+		switch (code) {
+		case FUNCTIONFS_FIFO_STATUS:
+			ret = usb_ep_fifo_status(epfile->ep->ep);
+			break;
+		case FUNCTIONFS_FIFO_FLUSH:
+			usb_ep_fifo_flush(epfile->ep->ep);
+			ret = 0;
+			break;
+		case FUNCTIONFS_CLEAR_HALT:
+			ret = usb_ep_clear_halt(epfile->ep->ep);
+			break;
+		case FUNCTIONFS_ENDPOINT_REVMAP:
+			ret = epfile->ep->num;
+			break;
+		default:
+			ret = -ENOTTY;
+		}
+	} else {
+		ret = -ENODEV;
+	}
+	spin_unlock_irq(&epfile->ffs->eps_lock);
+
+	return ret;
+}
+
+
+static const struct file_operations ffs_epfile_operations = {
+	.owner =	THIS_MODULE,
+	.llseek =	no_llseek,
+
+	.open =		ffs_epfile_open,
+	.write =	ffs_epfile_write,
+	.read =		ffs_epfile_read,
+	.release =	ffs_epfile_release,
+	.unlocked_ioctl =	ffs_epfile_ioctl,
+};
+
+
+
+/* File system and super block operations ***********************************/
+
+/*
+ * Mounting the filesystem creates a controller file, used first for
+ * function configuration then later for event monitoring.
+ */
+
+
+static struct inode *__must_check
+ffs_sb_make_inode(struct super_block *sb, void *data,
+		  const struct file_operations *fops,
+		  const struct inode_operations *iops,
+		  struct ffs_file_perms *perms)
+{
+	struct inode *inode;
+
+	ENTER();
+
+	inode = new_inode(sb);
+
+	if (likely(inode)) {
+		struct timespec current_time = CURRENT_TIME;
+
+		inode->i_mode    = perms->mode;
+		inode->i_uid     = perms->uid;
+		inode->i_gid     = perms->gid;
+		inode->i_atime   = current_time;
+		inode->i_mtime   = current_time;
+		inode->i_ctime   = current_time;
+		inode->i_private = data;
+		if (fops)
+			inode->i_fop = fops;
+		if (iops)
+			inode->i_op  = iops;
+	}
+
+	return inode;
+}
+
+
+/* Create "regular" file */
+
+static struct inode *ffs_sb_create_file(struct super_block *sb,
+					const char *name, void *data,
+					const struct file_operations *fops,
+					struct dentry **dentry_p)
+{
+	struct ffs_data	*ffs = sb->s_fs_info;
+	struct dentry	*dentry;
+	struct inode	*inode;
+
+	ENTER();
+
+	dentry = d_alloc_name(sb->s_root, name);
+	if (unlikely(!dentry))
+		return NULL;
+
+	inode = ffs_sb_make_inode(sb, data, fops, NULL, &ffs->file_perms);
+	if (unlikely(!inode)) {
+		dput(dentry);
+		return NULL;
+	}
+
+	d_add(dentry, inode);
+	if (dentry_p)
+		*dentry_p = dentry;
+
+	return inode;
+}
+
+
+/* Super block */
+
+static const struct super_operations ffs_sb_operations = {
+	.statfs =	simple_statfs,
+	.drop_inode =	generic_delete_inode,
+};
+
+struct ffs_sb_fill_data {
+	struct ffs_file_perms perms;
+	umode_t root_mode;
+	const char *dev_name;
+};
+
+static int ffs_sb_fill(struct super_block *sb, void *_data, int silent)
+{
+	struct ffs_sb_fill_data *data = _data;
+	struct inode	*inode;
+	struct dentry	*d;
+	struct ffs_data	*ffs;
+
+	ENTER();
+
+	/* Initialize data */
+	ffs = ffs_data_new();
+	if (unlikely(!ffs))
+		goto enomem0;
+
+	ffs->sb              = sb;
+	ffs->dev_name        = data->dev_name;
+	ffs->file_perms      = data->perms;
+
+	sb->s_fs_info        = ffs;
+	sb->s_blocksize      = PAGE_CACHE_SIZE;
+	sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+	sb->s_magic          = FUNCTIONFS_MAGIC;
+	sb->s_op             = &ffs_sb_operations;
+	sb->s_time_gran      = 1;
+
+	/* Root inode */
+	data->perms.mode = data->root_mode;
+	inode = ffs_sb_make_inode(sb, NULL,
+				  &simple_dir_operations,
+				  &simple_dir_inode_operations,
+				  &data->perms);
+	if (unlikely(!inode))
+		goto enomem1;
+	d = d_alloc_root(inode);
+	if (unlikely(!d))
+		goto enomem2;
+	sb->s_root = d;
+
+	/* EP0 file */
+	if (unlikely(!ffs_sb_create_file(sb, "ep0", ffs,
+					 &ffs_ep0_operations, NULL)))
+		goto enomem3;
+
+	return 0;
+
+enomem3:
+	dput(d);
+enomem2:
+	iput(inode);
+enomem1:
+	ffs_data_put(ffs);
+enomem0:
+	return -ENOMEM;
+}
+
+
+static int ffs_fs_parse_opts(struct ffs_sb_fill_data *data, char *opts)
+{
+	ENTER();
+
+	if (!opts || !*opts)
+		return 0;
+
+	for (;;) {
+		char *end, *eq, *comma;
+		unsigned long value;
+
+		/* Option limit */
+		comma = strchr(opts, ',');
+		if (comma)
+			*comma = 0;
+
+		/* Value limit */
+		eq = strchr(opts, '=');
+		if (unlikely(!eq)) {
+			FERR("'=' missing in %s", opts);
+			return -EINVAL;
+		}
+		*eq = 0;
+
+		/* Parse value */
+		value = simple_strtoul(eq + 1, &end, 0);
+		if (unlikely(*end != ',' && *end != 0)) {
+			FERR("%s: invalid value: %s", opts, eq + 1);
+			return -EINVAL;
+		}
+
+		/* Interpret option */
+		switch (eq - opts) {
+		case 5:
+			if (!memcmp(opts, "rmode", 5))
+				data->root_mode  = (value & 0555) | S_IFDIR;
+			else if (!memcmp(opts, "fmode", 5))
+				data->perms.mode = (value & 0666) | S_IFREG;
+			else
+				goto invalid;
+			break;
+
+		case 4:
+			if (!memcmp(opts, "mode", 4)) {
+				data->root_mode  = (value & 0555) | S_IFDIR;
+				data->perms.mode = (value & 0666) | S_IFREG;
+			} else {
+				goto invalid;
+			}
+			break;
+
+		case 3:
+			if (!memcmp(opts, "uid", 3))
+				data->perms.uid = value;
+			else if (!memcmp(opts, "gid", 3))
+				data->perms.gid = value;
+			else
+				goto invalid;
+			break;
+
+		default:
+invalid:
+			FERR("%s: invalid option", opts);
+			return -EINVAL;
+		}
+
+		/* Next iteration */
+		if (!comma)
+			break;
+		opts = comma + 1;
+	}
+
+	return 0;
+}
+
+
+/* "mount -t functionfs dev_name /dev/function" ends up here */
+
+static int
+ffs_fs_get_sb(struct file_system_type *t, int flags,
+	      const char *dev_name, void *opts, struct vfsmount *mnt)
+{
+	struct ffs_sb_fill_data data = {
+		.perms = {
+			.mode = S_IFREG | 0600,
+			.uid = 0,
+			.gid = 0
+		},
+		.root_mode = S_IFDIR | 0500,
+	};
+	int ret;
+
+	ENTER();
+
+	ret = functionfs_check_dev_callback(dev_name);
+	if (unlikely(ret < 0))
+		return ret;
+
+	ret = ffs_fs_parse_opts(&data, opts);
+	if (unlikely(ret < 0))
+		return ret;
+
+	data.dev_name = dev_name;
+	return get_sb_single(t, flags, &data, ffs_sb_fill, mnt);
+}
+
+static void
+ffs_fs_kill_sb(struct super_block *sb)
+{
+	void *ptr;
+
+	ENTER();
+
+	kill_litter_super(sb);
+	ptr = xchg(&sb->s_fs_info, NULL);
+	if (ptr)
+		ffs_data_put(ptr);
+}
+
+static struct file_system_type ffs_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "functionfs",
+	.get_sb		= ffs_fs_get_sb,
+	.kill_sb	= ffs_fs_kill_sb,
+};
+
+
+
+/* Driver's main init/cleanup functions *************************************/
+
+
+static int functionfs_init(void)
+{
+	int ret;
+
+	ENTER();
+
+	ret = register_filesystem(&ffs_fs_type);
+	if (likely(!ret))
+		FINFO("file system registered");
+	else
+		FERR("failed registering file system (%d)", ret);
+
+	return ret;
+}
+
+static void functionfs_cleanup(void)
+{
+	ENTER();
+
+	FINFO("unloading");
+	unregister_filesystem(&ffs_fs_type);
+}
+
+
+
+/* ffs_data and ffs_function construction and destruction code **************/
+
+static void ffs_data_clear(struct ffs_data *ffs);
+static void ffs_data_reset(struct ffs_data *ffs);
+
+
+static void ffs_data_get(struct ffs_data *ffs)
+{
+	ENTER();
+
+	atomic_inc(&ffs->ref);
+}
+
+static void ffs_data_opened(struct ffs_data *ffs)
+{
+	ENTER();
+
+	atomic_inc(&ffs->ref);
+	atomic_inc(&ffs->opened);
+}
+
+static void ffs_data_put(struct ffs_data *ffs)
+{
+	ENTER();
+
+	if (unlikely(atomic_dec_and_test(&ffs->ref))) {
+		FINFO("%s(): freeing", __func__);
+		ffs_data_clear(ffs);
+		BUG_ON(mutex_is_locked(&ffs->mutex) ||
+		       spin_is_locked(&ffs->ev.waitq.lock) ||
+		       waitqueue_active(&ffs->ev.waitq) ||
+		       waitqueue_active(&ffs->ep0req_completion.wait));
+		kfree(ffs);
+	}
+}
+
+
+
+static void ffs_data_closed(struct ffs_data *ffs)
+{
+	ENTER();
+
+	if (atomic_dec_and_test(&ffs->opened)) {
+		ffs->state = FFS_CLOSING;
+		ffs_data_reset(ffs);
+	}
+
+	ffs_data_put(ffs);
+}
+
+
+static struct ffs_data *ffs_data_new(void)
+{
+	struct ffs_data *ffs = kzalloc(sizeof *ffs, GFP_KERNEL);
+	if (unlikely(!ffs))
+		return 0;
+
+	ENTER();
+
+	atomic_set(&ffs->ref, 1);
+	atomic_set(&ffs->opened, 0);
+	ffs->state = FFS_READ_DESCRIPTORS;
+	mutex_init(&ffs->mutex);
+	spin_lock_init(&ffs->eps_lock);
+	init_waitqueue_head(&ffs->ev.waitq);
+	init_completion(&ffs->ep0req_completion);
+
+	/* XXX REVISIT need to update it in some places, or do we? */
+	ffs->ev.can_stall = 1;
+
+	return ffs;
+}
+
+
+static void ffs_data_clear(struct ffs_data *ffs)
+{
+	ENTER();
+
+	if (test_and_clear_bit(FFS_FL_CALL_CLOSED_CALLBACK, &ffs->flags))
+		functionfs_closed_callback(ffs);
+
+	BUG_ON(ffs->gadget);
+
+	if (ffs->epfiles)
+		ffs_epfiles_destroy(ffs->epfiles, ffs->eps_count);
+
+	kfree(ffs->raw_descs);
+	kfree(ffs->raw_strings);
+	kfree(ffs->stringtabs);
+}
+
+
+static void ffs_data_reset(struct ffs_data *ffs)
+{
+	ENTER();
+
+	ffs_data_clear(ffs);
+
+	ffs->epfiles = NULL;
+	ffs->raw_descs = NULL;
+	ffs->raw_strings = NULL;
+	ffs->stringtabs = NULL;
+
+	ffs->raw_descs_length = 0;
+	ffs->raw_fs_descs_length = 0;
+	ffs->fs_descs_count = 0;
+	ffs->hs_descs_count = 0;
+
+	ffs->strings_count = 0;
+	ffs->interfaces_count = 0;
+	ffs->eps_count = 0;
+
+	ffs->ev.count = 0;
+
+	ffs->state = FFS_READ_DESCRIPTORS;
+	ffs->setup_state = FFS_NO_SETUP;
+	ffs->flags = 0;
+}
+
+
+static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev)
+{
+	unsigned i, count;
+
+	ENTER();
+
+	if (WARN_ON(ffs->state != FFS_ACTIVE
+		 || test_and_set_bit(FFS_FL_BOUND, &ffs->flags)))
+		return -EBADFD;
+
+	ffs_data_get(ffs);
+
+	ffs->ep0req = usb_ep_alloc_request(cdev->gadget->ep0, GFP_KERNEL);
+	if (unlikely(!ffs->ep0req))
+		return -ENOMEM;
+	ffs->ep0req->complete = ffs_ep0_complete;
+	ffs->ep0req->context = ffs;
+
+	/* Get strings identifiers */
+	for (count = ffs->strings_count, i = 0; i < count; ++i) {
+		struct usb_gadget_strings **lang;
+
+		int id = usb_string_id(cdev);
+		if (unlikely(id < 0)) {
+			usb_ep_free_request(cdev->gadget->ep0, ffs->ep0req);
+			ffs->ep0req = NULL;
+			return id;
+		}
+
+		lang = ffs->stringtabs;
+		do {
+			(*lang)->strings[i].id = id;
+			++lang;
+		} while (*lang);
+	}
+
+	ffs->gadget = cdev->gadget;
+	return 0;
+}
+
+
+static void functionfs_unbind(struct ffs_data *ffs)
+{
+	ENTER();
+
+	if (!WARN_ON(!ffs->gadget)) {
+		usb_ep_free_request(ffs->gadget->ep0, ffs->ep0req);
+		ffs->ep0req = NULL;
+		ffs->gadget = NULL;
+		ffs_data_put(ffs);
+	}
+}
+
+
+static int ffs_epfiles_create(struct ffs_data *ffs)
+{
+	struct ffs_epfile *epfile, *epfiles;
+	unsigned i, count;
+
+	ENTER();
+
+	count = ffs->eps_count;
+	epfiles = kzalloc(count * sizeof *epfiles, GFP_KERNEL);
+	if (!epfiles)
+		return -ENOMEM;
+
+	epfile = epfiles;
+	for (i = 1; i <= count; ++i, ++epfile) {
+		epfile->ffs = ffs;
+		mutex_init(&epfile->mutex);
+		init_waitqueue_head(&epfile->wait);
+		sprintf(epfiles->name, "ep%u",  i);
+		if (!unlikely(ffs_sb_create_file(ffs->sb, epfiles->name, epfile,
+						 &ffs_epfile_operations,
+						 &epfile->dentry))) {
+			ffs_epfiles_destroy(epfiles, i - 1);
+			return -ENOMEM;
+		}
+	}
+
+	ffs->epfiles = epfiles;
+	return 0;
+}
+
+
+static void ffs_epfiles_destroy(struct ffs_epfile *epfiles, unsigned count)
+{
+	struct ffs_epfile *epfile = epfiles;
+
+	ENTER();
+
+	for (; count; --count, ++epfile) {
+		BUG_ON(mutex_is_locked(&epfile->mutex) ||
+		       waitqueue_active(&epfile->wait));
+		if (epfile->dentry) {
+			d_delete(epfile->dentry);
+			dput(epfile->dentry);
+			epfile->dentry = NULL;
+		}
+	}
+
+	kfree(epfiles);
+}
+
+
+static int functionfs_add(struct usb_composite_dev *cdev,
+			  struct usb_configuration *c,
+			  struct ffs_data *ffs)
+{
+	struct ffs_function *func;
+	int ret;
+
+	ENTER();
+
+	func = kzalloc(sizeof *func, GFP_KERNEL);
+	if (unlikely(!func))
+		return -ENOMEM;
+
+	func->function.name    = "Function FS Gadget";
+	func->function.strings = ffs->stringtabs;
+
+	func->function.bind    = ffs_func_bind;
+	func->function.unbind  = ffs_func_unbind;
+	func->function.set_alt = ffs_func_set_alt;
+	/*func->function.get_alt = ffs_func_get_alt;*/
+	func->function.disable = ffs_func_disable;
+	func->function.setup   = ffs_func_setup;
+	func->function.suspend = ffs_func_suspend;
+	func->function.resume  = ffs_func_resume;
+
+	func->conf   = c;
+	func->gadget = cdev->gadget;
+	func->ffs = ffs;
+	ffs_data_get(ffs);
+
+	ret = usb_add_function(c, &func->function);
+	if (unlikely(ret))
+		ffs_func_free(func);
+
+	return ret;
+}
+
+static void ffs_func_free(struct ffs_function *func)
+{
+	ENTER();
+
+	ffs_data_put(func->ffs);
+
+	kfree(func->eps);
+	/* eps and interfaces_nums are allocated in the same chunk so
+	 * only one free is required.  Descriptors are also allocated
+	 * in the same chunk. */
+
+	kfree(func);
+}
+
+
+static void ffs_func_eps_disable(struct ffs_function *func)
+{
+	struct ffs_ep *ep         = func->eps;
+	struct ffs_epfile *epfile = func->ffs->epfiles;
+	unsigned count            = func->ffs->eps_count;
+	unsigned long flags;
+
+	spin_lock_irqsave(&func->ffs->eps_lock, flags);
+	do {
+		/* pending requests get nuked */
+		if (likely(ep->ep))
+			usb_ep_disable(ep->ep);
+		epfile->ep = NULL;
+
+		++ep;
+		++epfile;
+	} while (--count);
+	spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+}
+
+static int ffs_func_eps_enable(struct ffs_function *func)
+{
+	struct ffs_data *ffs      = func->ffs;
+	struct ffs_ep *ep         = func->eps;
+	struct ffs_epfile *epfile = ffs->epfiles;
+	unsigned count            = ffs->eps_count;
+	unsigned long flags;
+	int ret = 0;
+
+	spin_lock_irqsave(&func->ffs->eps_lock, flags);
+	do {
+		struct usb_endpoint_descriptor *ds;
+		ds = ep->descs[ep->descs[1] ? 1 : 0];
+
+		ep->ep->driver_data = ep;
+		ret = usb_ep_enable(ep->ep, ds);
+		if (likely(!ret)) {
+			epfile->ep = ep;
+			epfile->in = usb_endpoint_dir_in(ds);
+			epfile->isoc = usb_endpoint_xfer_isoc(ds);
+		} else {
+			break;
+		}
+
+		wake_up(&epfile->wait);
+
+		++ep;
+		++epfile;
+	} while (--count);
+	spin_unlock_irqrestore(&func->ffs->eps_lock, flags);
+
+	return ret;
+}
+
+
+/* Parsing and building descriptors and strings *****************************/
+
+
+/* This validates if data pointed by data is a valid USB descriptor as
+ * well as record how many interfaces, endpoints and strings are
+ * required by given configuration.  Returns address afther the
+ * descriptor or NULL if data is invalid. */
+
+enum ffs_entity_type {
+	FFS_DESCRIPTOR, FFS_INTERFACE, FFS_STRING, FFS_ENDPOINT
+};
+
+typedef int (*ffs_entity_callback)(enum ffs_entity_type entity,
+				   u8 *valuep,
+				   struct usb_descriptor_header *desc,
+				   void *priv);
+
+static int __must_check ffs_do_desc(char *data, unsigned len,
+				    ffs_entity_callback entity, void *priv)
+{
+	struct usb_descriptor_header *_ds = (void *)data;
+	u8 length;
+	int ret;
+
+	ENTER();
+
+	/* At least two bytes are required: length and type */
+	if (len < 2) {
+		FVDBG("descriptor too short");
+		return -EINVAL;
+	}
+
+	/* If we have at least as many bytes as the descriptor takes? */
+	length = _ds->bLength;
+	if (len < length) {
+		FVDBG("descriptor longer then available data");
+		return -EINVAL;
+	}
+
+#define __entity_check_INTERFACE(val)  1
+#define __entity_check_STRING(val)     (val)
+#define __entity_check_ENDPOINT(val)   ((val) & USB_ENDPOINT_NUMBER_MASK)
+#define __entity(type, val) do {					\
+		FVDBG("entity " #type "(%02x)", (val));			\
+		if (unlikely(!__entity_check_ ##type(val))) {		\
+			FVDBG("invalid entity's value");		\
+			return -EINVAL;					\
+		}							\
+		ret = entity(FFS_ ##type, &val, _ds, priv);		\
+		if (unlikely(ret < 0)) {				\
+			FDBG("entity " #type "(%02x); ret = %d",	\
+			     (val), ret);				\
+			return ret;					\
+		}							\
+	} while (0)
+
+	/* Parse descriptor depending on type. */
+	switch (_ds->bDescriptorType) {
+	case USB_DT_DEVICE:
+	case USB_DT_CONFIG:
+	case USB_DT_STRING:
+	case USB_DT_DEVICE_QUALIFIER:
+		/* function can't have any of those */
+		FVDBG("descriptor reserved for gadget: %d", _ds->bDescriptorType);
+		return -EINVAL;
+
+	case USB_DT_INTERFACE: {
+		struct usb_interface_descriptor *ds = (void *)_ds;
+		FVDBG("interface descriptor");
+		if (length != sizeof *ds)
+			goto inv_length;
+
+		__entity(INTERFACE, ds->bInterfaceNumber);
+		if (ds->iInterface)
+			__entity(STRING, ds->iInterface);
+	}
+		break;
+
+	case USB_DT_ENDPOINT: {
+		struct usb_endpoint_descriptor *ds = (void *)_ds;
+		FVDBG("endpoint descriptor");
+		if (length != USB_DT_ENDPOINT_SIZE &&
+		    length != USB_DT_ENDPOINT_AUDIO_SIZE)
+			goto inv_length;
+		__entity(ENDPOINT, ds->bEndpointAddress);
+	}
+		break;
+
+	case USB_DT_OTG:
+		if (length != sizeof(struct usb_otg_descriptor))
+			goto inv_length;
+		break;
+
+	case USB_DT_INTERFACE_ASSOCIATION: {
+		struct usb_interface_assoc_descriptor *ds = (void *)_ds;
+		FVDBG("interface association descriptor");
+		if (length != sizeof *ds)
+			goto inv_length;
+		if (ds->iFunction)
+			__entity(STRING, ds->iFunction);
+	}
+		break;
+
+	case USB_DT_OTHER_SPEED_CONFIG:
+	case USB_DT_INTERFACE_POWER:
+	case USB_DT_DEBUG:
+	case USB_DT_SECURITY:
+	case USB_DT_CS_RADIO_CONTROL:
+		/* TODO */
+		FVDBG("unimplemented descriptor: %d", _ds->bDescriptorType);
+		return -EINVAL;
+
+	default:
+		/* We should never be here */
+		FVDBG("unknown descriptor: %d", _ds->bDescriptorType);
+		return -EINVAL;
+
+	inv_length:
+		FVDBG("invalid length: %d (descriptor %d)",
+		      _ds->bLength, _ds->bDescriptorType);
+		return -EINVAL;
+	}
+
+#undef __entity
+#undef __entity_check_DESCRIPTOR
+#undef __entity_check_INTERFACE
+#undef __entity_check_STRING
+#undef __entity_check_ENDPOINT
+
+	return length;
+}
+
+
+static int __must_check ffs_do_descs(unsigned count, char *data, unsigned len,
+				     ffs_entity_callback entity, void *priv)
+{
+	const unsigned _len = len;
+	unsigned long num = 0;
+
+	ENTER();
+
+	for (;;) {
+		int ret;
+
+		if (num == count)
+			data = NULL;
+
+		/* Record "descriptor" entitny */
+		ret = entity(FFS_DESCRIPTOR, (u8 *)num, (void *)data, priv);
+		if (unlikely(ret < 0)) {
+			FDBG("entity DESCRIPTOR(%02lx); ret = %d", num, ret);
+			return ret;
+		}
+
+		if (!data)
+			return _len - len;
+
+		ret = ffs_do_desc(data, len, entity, priv);
+		if (unlikely(ret < 0)) {
+			FDBG("%s returns %d", __func__, ret);
+			return ret;
+		}
+
+		len -= ret;
+		data += ret;
+		++num;
+	}
+}
+
+
+static int __ffs_data_do_entity(enum ffs_entity_type type,
+				u8 *valuep, struct usb_descriptor_header *desc,
+				void *priv)
+{
+	struct ffs_data *ffs = priv;
+
+	ENTER();
+
+	switch (type) {
+	case FFS_DESCRIPTOR:
+		break;
+
+	case FFS_INTERFACE:
+		/* Interfaces are indexed from zero so if we
+		 * encountered interface "n" then there are at least
+		 * "n+1" interfaces. */
+		if (*valuep >= ffs->interfaces_count)
+			ffs->interfaces_count = *valuep + 1;
+		break;
+
+	case FFS_STRING:
+		/* Strings are indexed from 1 (0 is magic ;) reserved
+		 * for languages list or some such) */
+		if (*valuep > ffs->strings_count)
+			ffs->strings_count = *valuep;
+		break;
+
+	case FFS_ENDPOINT:
+		/* Endpoints are indexed from 1 as well. */
+		if ((*valuep & USB_ENDPOINT_NUMBER_MASK) > ffs->eps_count)
+			ffs->eps_count = (*valuep & USB_ENDPOINT_NUMBER_MASK);
+		break;
+	}
+
+	return 0;
+}
+
+
+static int __ffs_data_got_descs(struct ffs_data *ffs,
+				char *const _data, size_t len)
+{
+	unsigned fs_count, hs_count;
+	int fs_len, ret = -EINVAL;
+	char *data = _data;
+
+	ENTER();
+
+	if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_DESCRIPTORS_MAGIC ||
+		     get_unaligned_le32(data + 4) != len))
+		goto error;
+	fs_count = get_unaligned_le32(data +  8);
+	hs_count = get_unaligned_le32(data + 12);
+
+	if (!fs_count && !hs_count)
+		goto einval;
+
+	data += 16;
+	len  -= 16;
+
+	if (likely(fs_count)) {
+		fs_len = ffs_do_descs(fs_count, data, len,
+				      __ffs_data_do_entity, ffs);
+		if (unlikely(fs_len < 0)) {
+			ret = fs_len;
+			goto error;
+		}
+
+		data += fs_len;
+		len  -= fs_len;
+	} else {
+		fs_len = 0;
+	}
+
+	if (likely(hs_count)) {
+		ret = ffs_do_descs(hs_count, data, len,
+				   __ffs_data_do_entity, ffs);
+		if (unlikely(ret < 0))
+			goto error;
+	} else {
+		ret = 0;
+	}
+
+	if (unlikely(len != ret))
+		goto einval;
+
+	ffs->raw_fs_descs_length = fs_len;
+	ffs->raw_descs_length    = fs_len + ret;
+	ffs->raw_descs           = _data;
+	ffs->fs_descs_count      = fs_count;
+	ffs->hs_descs_count      = hs_count;
+
+	return 0;
+
+einval:
+	ret = -EINVAL;
+error:
+	kfree(_data);
+	return ret;
+}
+
+
+
+static int __ffs_data_got_strings(struct ffs_data *ffs,
+				  char *const _data, size_t len)
+{
+	u32 str_count, needed_count, lang_count;
+	struct usb_gadget_strings **stringtabs, *t;
+	struct usb_string *strings, *s;
+	const char *data = _data;
+
+	ENTER();
+
+	if (unlikely(get_unaligned_le32(data) != FUNCTIONFS_STRINGS_MAGIC ||
+		     get_unaligned_le32(data + 4) != len))
+		goto error;
+	str_count  = get_unaligned_le32(data + 8);
+	lang_count = get_unaligned_le32(data + 12);
+
+	/* if one is zero the other must be zero */
+	if (unlikely(!str_count != !lang_count))
+		goto error;
+
+	/* Do we have at least as many strings as descriptors need? */
+	needed_count = ffs->strings_count;
+	if (unlikely(str_count < needed_count))
+		goto error;
+
+	/* If we don't need any strings just return and free all
+	 * memory */
+	if (!needed_count) {
+		kfree(_data);
+		return 0;
+	}
+
+	/* Allocate */
+	{
+		/* Allocate everything in one chunk so there's less
+		 * maintanance. */
+		struct {
+			struct usb_gadget_strings *stringtabs[lang_count + 1];
+			struct usb_gadget_strings stringtab[lang_count];
+			struct usb_string strings[lang_count*(needed_count+1)];
+		} *d;
+		unsigned i = 0;
+
+		d = kmalloc(sizeof *d, GFP_KERNEL);
+		if (unlikely(!d)) {
+			kfree(_data);
+			return -ENOMEM;
+		}
+
+		stringtabs = d->stringtabs;
+		t = d->stringtab;
+		i = lang_count;
+		do {
+			*stringtabs++ = t++;
+		} while (--i);
+		*stringtabs = NULL;
+
+		stringtabs = d->stringtabs;
+		t = d->stringtab;
+		s = d->strings;
+		strings = s;
+	}
+
+	/* For each language */
+	data += 16;
+	len -= 16;
+
+	do { /* lang_count > 0 so we can use do-while */
+		unsigned needed = needed_count;
+
+		if (unlikely(len < 3))
+			goto error_free;
+		t->language = get_unaligned_le16(data);
+		t->strings  = s;
+		++t;
+
+		data += 2;
+		len -= 2;
+
+		/* For each string */
+		do { /* str_count > 0 so we can use do-while */
+			size_t length = strnlen(data, len);
+
+			if (unlikely(length == len))
+				goto error_free;
+
+			/* user may provide more strings then we need,
+			 * if that's the case we simply ingore the
+			 * rest */
+			if (likely(needed)) {
+				/* s->id will be set while adding
+				 * function to configuration so for
+				 * now just leave garbage here. */
+				s->s = data;
+				--needed;
+				++s;
+			}
+
+			data += length + 1;
+			len -= length + 1;
+		} while (--str_count);
+
+		s->id = 0;   /* terminator */
+		s->s = NULL;
+		++s;
+
+	} while (--lang_count);
+
+	/* Some garbage left? */
+	if (unlikely(len))
+		goto error_free;
+
+	/* Done! */
+	ffs->stringtabs = stringtabs;
+	ffs->raw_strings = _data;
+
+	return 0;
+
+error_free:
+	kfree(stringtabs);
+error:
+	kfree(_data);
+	return -EINVAL;
+}
+
+
+
+
+/* Events handling and management *******************************************/
+
+static void __ffs_event_add(struct ffs_data *ffs,
+			    enum usb_functionfs_event_type type)
+{
+	enum usb_functionfs_event_type rem_type1, rem_type2 = type;
+	int neg = 0;
+
+	/* Abort any unhandled setup */
+	/* We do not need to worry about some cmpxchg() changing value
+	 * of ffs->setup_state without holding the lock because when
+	 * state is FFS_SETUP_PENDING cmpxchg() in several places in
+	 * the source does nothing. */
+	if (ffs->setup_state == FFS_SETUP_PENDING)
+		ffs->setup_state = FFS_SETUP_CANCELED;
+
+	switch (type) {
+	case FUNCTIONFS_RESUME:
+		rem_type2 = FUNCTIONFS_SUSPEND;
+		/* FALL THGOUTH */
+	case FUNCTIONFS_SUSPEND:
+	case FUNCTIONFS_SETUP:
+		rem_type1 = type;
+		/* discard all similar events */
+		break;
+
+	case FUNCTIONFS_BIND:
+	case FUNCTIONFS_UNBIND:
+	case FUNCTIONFS_DISABLE:
+	case FUNCTIONFS_ENABLE:
+		/* discard everything other then power management. */
+		rem_type1 = FUNCTIONFS_SUSPEND;
+		rem_type2 = FUNCTIONFS_RESUME;
+		neg = 1;
+		break;
+
+	default:
+		BUG();
+	}
+
+	{
+		u8 *ev  = ffs->ev.types, *out = ev;
+		unsigned n = ffs->ev.count;
+		for (; n; --n, ++ev)
+			if ((*ev == rem_type1 || *ev == rem_type2) == neg)
+				*out++ = *ev;
+			else
+				FVDBG("purging event %d", *ev);
+		ffs->ev.count = out - ffs->ev.types;
+	}
+
+	FVDBG("adding event %d", type);
+	ffs->ev.types[ffs->ev.count++] = type;
+	wake_up_locked(&ffs->ev.waitq);
+}
+
+static void ffs_event_add(struct ffs_data *ffs,
+			  enum usb_functionfs_event_type type)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ffs->ev.waitq.lock, flags);
+	__ffs_event_add(ffs, type);
+	spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
+}
+
+
+/* Bind/unbind USB function hooks *******************************************/
+
+static int __ffs_func_bind_do_descs(enum ffs_entity_type type, u8 *valuep,
+				    struct usb_descriptor_header *desc,
+				    void *priv)
+{
+	struct usb_endpoint_descriptor *ds = (void *)desc;
+	struct ffs_function *func = priv;
+	struct ffs_ep *ffs_ep;
+
+	/* If hs_descriptors is not NULL then we are reading hs
+	 * descriptors now */
+	const int isHS = func->function.hs_descriptors != NULL;
+	unsigned idx;
+
+	if (type != FFS_DESCRIPTOR)
+		return 0;
+
+	if (isHS)
+		func->function.hs_descriptors[(long)valuep] = desc;
+	else
+		func->function.descriptors[(long)valuep]    = desc;
+
+	if (!desc || desc->bDescriptorType != USB_DT_ENDPOINT)
+		return 0;
+
+	idx = (ds->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK) - 1;
+	ffs_ep = func->eps + idx;
+
+	if (unlikely(ffs_ep->descs[isHS])) {
+		FVDBG("two %sspeed descriptors for EP %d",
+		      isHS ? "high" : "full",
+		      ds->bEndpointAddress & USB_ENDPOINT_NUMBER_MASK);
+		return -EINVAL;
+	}
+	ffs_ep->descs[isHS] = ds;
+
+	ffs_dump_mem(": Original  ep desc", ds, ds->bLength);
+	if (ffs_ep->ep) {
+		ds->bEndpointAddress = ffs_ep->descs[0]->bEndpointAddress;
+		if (!ds->wMaxPacketSize)
+			ds->wMaxPacketSize = ffs_ep->descs[0]->wMaxPacketSize;
+	} else {
+		struct usb_request *req;
+		struct usb_ep *ep;
+
+		FVDBG("autoconfig");
+		ep = usb_ep_autoconfig(func->gadget, ds);
+		if (unlikely(!ep))
+			return -ENOTSUPP;
+		ep->driver_data = func->eps + idx;;
+
+		req = usb_ep_alloc_request(ep, GFP_KERNEL);
+		if (unlikely(!req))
+			return -ENOMEM;
+
+		ffs_ep->ep  = ep;
+		ffs_ep->req = req;
+		func->eps_revmap[ds->bEndpointAddress &
+				 USB_ENDPOINT_NUMBER_MASK] = idx + 1;
+	}
+	ffs_dump_mem(": Rewritten ep desc", ds, ds->bLength);
+
+	return 0;
+}
+
+
+static int __ffs_func_bind_do_nums(enum ffs_entity_type type, u8 *valuep,
+				   struct usb_descriptor_header *desc,
+				   void *priv)
+{
+	struct ffs_function *func = priv;
+	unsigned idx;
+	u8 newValue;
+
+	switch (type) {
+	default:
+	case FFS_DESCRIPTOR:
+		/* Handled in previous pass by __ffs_func_bind_do_descs() */
+		return 0;
+
+	case FFS_INTERFACE:
+		idx = *valuep;
+		if (func->interfaces_nums[idx] < 0) {
+			int id = usb_interface_id(func->conf, &func->function);
+			if (unlikely(id < 0))
+				return id;
+			func->interfaces_nums[idx] = id;
+		}
+		newValue = func->interfaces_nums[idx];
+		break;
+
+	case FFS_STRING:
+		/* String' IDs are allocated when fsf_data is bound to cdev */
+		newValue = func->ffs->stringtabs[0]->strings[*valuep - 1].id;
+		break;
+
+	case FFS_ENDPOINT:
+		/* USB_DT_ENDPOINT are handled in
+		 * __ffs_func_bind_do_descs(). */
+		if (desc->bDescriptorType == USB_DT_ENDPOINT)
+			return 0;
+
+		idx = (*valuep & USB_ENDPOINT_NUMBER_MASK) - 1;
+		if (unlikely(!func->eps[idx].ep))
+			return -EINVAL;
+
+		{
+			struct usb_endpoint_descriptor **descs;
+			descs = func->eps[idx].descs;
+			newValue = descs[descs[0] ? 0 : 1]->bEndpointAddress;
+		}
+		break;
+	}
+
+	FVDBG("%02x -> %02x", *valuep, newValue);
+	*valuep = newValue;
+	return 0;
+}
+
+static int ffs_func_bind(struct usb_configuration *c,
+			 struct usb_function *f)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+	struct ffs_data *ffs = func->ffs;
+
+	const int full = !!func->ffs->fs_descs_count;
+	const int high = gadget_is_dualspeed(func->gadget) &&
+		func->ffs->hs_descs_count;
+
+	int ret;
+
+	/* Make it a single chunk, less management later on */
+	struct {
+		struct ffs_ep eps[ffs->eps_count];
+		struct usb_descriptor_header
+			*fs_descs[full ? ffs->fs_descs_count + 1 : 0];
+		struct usb_descriptor_header
+			*hs_descs[high ? ffs->hs_descs_count + 1 : 0];
+		short inums[ffs->interfaces_count];
+		char raw_descs[high ? ffs->raw_descs_length
+				    : ffs->raw_fs_descs_length];
+	} *data;
+
+	ENTER();
+
+	/* Only high speed but not supported by gadget? */
+	if (unlikely(!(full | high)))
+		return -ENOTSUPP;
+
+	/* Allocate */
+	data = kmalloc(sizeof *data, GFP_KERNEL);
+	if (unlikely(!data))
+		return -ENOMEM;
+
+	/* Zero */
+	memset(data->eps, 0, sizeof data->eps);
+	memcpy(data->raw_descs, ffs->raw_descs + 16, sizeof data->raw_descs);
+	memset(data->inums, 0xff, sizeof data->inums);
+	for (ret = ffs->eps_count; ret; --ret)
+		data->eps[ret].num = -1;
+
+	/* Save pointers */
+	func->eps             = data->eps;
+	func->interfaces_nums = data->inums;
+
+	/* Go throught all the endpoint descriptors and allocate
+	 * endpoints first, so that later we can rewrite the endpoint
+	 * numbers without worying that it may be described later on. */
+	if (likely(full)) {
+		func->function.descriptors = data->fs_descs;
+		ret = ffs_do_descs(ffs->fs_descs_count,
+				   data->raw_descs,
+				   sizeof data->raw_descs,
+				   __ffs_func_bind_do_descs, func);
+		if (unlikely(ret < 0))
+			goto error;
+	} else {
+		ret = 0;
+	}
+
+	if (likely(high)) {
+		func->function.hs_descriptors = data->hs_descs;
+		ret = ffs_do_descs(ffs->hs_descs_count,
+				   data->raw_descs + ret,
+				   (sizeof data->raw_descs) - ret,
+				   __ffs_func_bind_do_descs, func);
+	}
+
+	/* Now handle interface numbers allocation and interface and
+	 * enpoint numbers rewritting.  We can do that in one go
+	 * now. */
+	ret = ffs_do_descs(ffs->fs_descs_count +
+			   (high ? ffs->hs_descs_count : 0),
+			   data->raw_descs, sizeof data->raw_descs,
+			   __ffs_func_bind_do_nums, func);
+	if (unlikely(ret < 0))
+		goto error;
+
+	/* And we're done */
+	ffs_event_add(ffs, FUNCTIONFS_BIND);
+	return 0;
+
+error:
+	/* XXX Do we need to release all claimed endpoints here? */
+	return ret;
+}
+
+
+/* Other USB function hooks *************************************************/
+
+static void ffs_func_unbind(struct usb_configuration *c,
+			    struct usb_function *f)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+	struct ffs_data *ffs = func->ffs;
+
+	ENTER();
+
+	if (ffs->func == func) {
+		ffs_func_eps_disable(func);
+		ffs->func = NULL;
+	}
+
+	ffs_event_add(ffs, FUNCTIONFS_UNBIND);
+
+	ffs_func_free(func);
+}
+
+
+static int ffs_func_set_alt(struct usb_function *f,
+			    unsigned interface, unsigned alt)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+	struct ffs_data *ffs = func->ffs;
+	int ret = 0, intf;
+
+	if (alt != (unsigned)-1) {
+		intf = ffs_func_revmap_intf(func, interface);
+		if (unlikely(intf < 0))
+			return intf;
+	}
+
+	if (ffs->func)
+		ffs_func_eps_disable(ffs->func);
+
+	if (ffs->state != FFS_ACTIVE)
+		return -ENODEV;
+
+	if (alt == (unsigned)-1) {
+		ffs->func = NULL;
+		ffs_event_add(ffs, FUNCTIONFS_DISABLE);
+		return 0;
+	}
+
+	ffs->func = func;
+	ret = ffs_func_eps_enable(func);
+	if (likely(ret >= 0))
+		ffs_event_add(ffs, FUNCTIONFS_ENABLE);
+	return ret;
+}
+
+static void ffs_func_disable(struct usb_function *f)
+{
+	ffs_func_set_alt(f, 0, (unsigned)-1);
+}
+
+static int ffs_func_setup(struct usb_function *f,
+			  const struct usb_ctrlrequest *creq)
+{
+	struct ffs_function *func = ffs_func_from_usb(f);
+	struct ffs_data *ffs = func->ffs;
+	unsigned long flags;
+	int ret;
+
+	ENTER();
+
+	FVDBG("creq->bRequestType = %02x", creq->bRequestType);
+	FVDBG("creq->bRequest     = %02x", creq->bRequest);
+	FVDBG("creq->wValue       = %04x", le16_to_cpu(creq->wValue));
+	FVDBG("creq->wIndex       = %04x", le16_to_cpu(creq->wIndex));
+	FVDBG("creq->wLength      = %04x", le16_to_cpu(creq->wLength));
+
+	/* Most requests directed to interface go throught here
+	 * (notable exceptions are set/get interface) so we need to
+	 * handle them.  All other either handled by composite or
+	 * passed to usb_configuration->setup() (if one is set).  No
+	 * matter, we will handle requests directed to endpoint here
+	 * as well (as it's straightforward) but what to do with any
+	 * other request? */
+
+	if (ffs->state != FFS_ACTIVE)
+		return -ENODEV;
+
+	switch (creq->bRequestType & USB_RECIP_MASK) {
+	case USB_RECIP_INTERFACE:
+		ret = ffs_func_revmap_intf(func, le16_to_cpu(creq->wIndex));
+		if (unlikely(ret < 0))
+			return ret;
+		break;
+
+	case USB_RECIP_ENDPOINT:
+		ret = ffs_func_revmap_ep(func, le16_to_cpu(creq->wIndex));
+		if (unlikely(ret < 0))
+			return ret;
+		break;
+
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	spin_lock_irqsave(&ffs->ev.waitq.lock, flags);
+	ffs->ev.setup = *creq;
+	ffs->ev.setup.wIndex = cpu_to_le16(ret);
+	__ffs_event_add(ffs, FUNCTIONFS_SETUP);
+	spin_unlock_irqrestore(&ffs->ev.waitq.lock, flags);
+
+	return 0;
+}
+
+static void ffs_func_suspend(struct usb_function *f)
+{
+	ENTER();
+	ffs_event_add(ffs_func_from_usb(f)->ffs, FUNCTIONFS_SUSPEND);
+}
+
+static void ffs_func_resume(struct usb_function *f)
+{
+	ENTER();
+	ffs_event_add(ffs_func_from_usb(f)->ffs, FUNCTIONFS_RESUME);
+}
+
+
+
+/* Enpoint and interface numbers reverse mapping ****************************/
+
+static int ffs_func_revmap_ep(struct ffs_function *func, u8 num)
+{
+	num = func->eps_revmap[num & USB_ENDPOINT_NUMBER_MASK];
+	return num ? num : -EDOM;
+}
+
+static int ffs_func_revmap_intf(struct ffs_function *func, u8 intf)
+{
+	short *nums = func->interfaces_nums;
+	unsigned count = func->ffs->interfaces_count;
+
+	for (; count; --count, ++nums) {
+		if (*nums >= 0 && *nums == intf)
+			return nums - func->interfaces_nums;
+	}
+
+	return -EDOM;
+}
+
+
+/* Misc helper functions ****************************************************/
+
+static int ffs_mutex_lock(struct mutex *mutex, unsigned nonblock)
+{
+	return nonblock
+		? likely(mutex_trylock(mutex)) ? 0 : -EAGAIN
+		: mutex_lock_interruptible(mutex);
+}
+
+
+static char *ffs_prepare_buffer(const char * __user buf, size_t len)
+{
+	char *data;
+
+	if (unlikely(!len))
+		return NULL;
+
+	data = kmalloc(len, GFP_KERNEL);
+	if (unlikely(!data))
+		return ERR_PTR(-ENOMEM);
+
+	if (unlikely(__copy_from_user(data, buf, len))) {
+		kfree(data);
+		return ERR_PTR(-EFAULT);
+	}
+
+	FVDBG("Buffer from user space:");
+	ffs_dump_mem("", data, len);
+
+	return data;
+}
diff --git a/include/linux/usb/functionfs.h b/include/linux/usb/functionfs.h
new file mode 100644
index 000000000000..a34a2a043b21
--- /dev/null
+++ b/include/linux/usb/functionfs.h
@@ -0,0 +1,199 @@
+#ifndef __LINUX_FUNCTIONFS_H__
+#define __LINUX_FUNCTIONFS_H__ 1
+
+
+#include <linux/types.h>
+#include <linux/ioctl.h>
+
+#include <linux/usb/ch9.h>
+
+
+enum {
+	FUNCTIONFS_DESCRIPTORS_MAGIC = 1,
+	FUNCTIONFS_STRINGS_MAGIC     = 2
+};
+
+
+#ifndef __KERNEL__
+
+/* Descriptor of an non-audio endpoint */
+struct usb_endpoint_descriptor_no_audio {
+	__u8  bLength;
+	__u8  bDescriptorType;
+
+	__u8  bEndpointAddress;
+	__u8  bmAttributes;
+	__le16 wMaxPacketSize;
+	__u8  bInterval;
+} __attribute__((packed));
+
+
+/*
+ * All numbers must be in little endian order.
+ */
+
+struct usb_functionfs_descs_head {
+	__le32 magic;
+	__le32 length;
+	__le32 fs_count;
+	__le32 hs_count;
+} __attribute__((packed));
+
+/*
+ * Descriptors format:
+ *
+ * | off | name      | type         | description                          |
+ * |-----+-----------+--------------+--------------------------------------|
+ * |   0 | magic     | LE32         | FUNCTIONFS_{FS,HS}_DESCRIPTORS_MAGIC |
+ * |   4 | lenght    | LE32         | length of the whole data chunk       |
+ * |   8 | fs_count  | LE32         | number of full-speed descriptors     |
+ * |  12 | hs_count  | LE32         | number of high-speed descriptors     |
+ * |  16 | fs_descrs | Descriptor[] | list of full-speed descriptors       |
+ * |     | hs_descrs | Descriptor[] | list of high-speed descriptors       |
+ *
+ * descs are just valid USB descriptors and have the following format:
+ *
+ * | off | name            | type | description              |
+ * |-----+-----------------+------+--------------------------|
+ * |   0 | bLength         | U8   | length of the descriptor |
+ * |   1 | bDescriptorType | U8   | descriptor type          |
+ * |   2 | payload         |      | descriptor's payload     |
+ */
+
+struct usb_functionfs_strings_head {
+	__le32 magic;
+	__le32 length;
+	__le32 str_count;
+	__le32 lang_count;
+} __attribute__((packed));
+
+/*
+ * Strings format:
+ *
+ * | off | name       | type                  | description                |
+ * |-----+------------+-----------------------+----------------------------|
+ * |   0 | magic      | LE32                  | FUNCTIONFS_STRINGS_MAGIC   |
+ * |   4 | length     | LE32                  | length of the data chunk   |
+ * |   8 | str_count  | LE32                  | number of strings          |
+ * |  12 | lang_count | LE32                  | number of languages        |
+ * |  16 | stringtab  | StringTab[lang_count] | table of strings per lang  |
+ *
+ * For each language there is one stringtab entry (ie. there are lang_count
+ * stringtab entires).  Each StringTab has following format:
+ *
+ * | off | name    | type              | description                        |
+ * |-----+---------+-------------------+------------------------------------|
+ * |   0 | lang    | LE16              | language code                      |
+ * |   2 | strings | String[str_count] | array of strings in given language |
+ *
+ * For each string ther is one strings entry (ie. there are str_count
+ * string entries).  Each String is a NUL terminated string encoded in
+ * UTF-8.
+ */
+
+#endif
+
+
+/*
+ * Events are delivered on the ep0 file descriptor, when the user mode driver
+ * reads from this file descriptor after writing the descriptors.  Don't
+ * stop polling this descriptor.
+ */
+
+enum usb_functionfs_event_type {
+	FUNCTIONFS_BIND,
+	FUNCTIONFS_UNBIND,
+
+	FUNCTIONFS_ENABLE,
+	FUNCTIONFS_DISABLE,
+
+	FUNCTIONFS_SETUP,
+
+	FUNCTIONFS_SUSPEND,
+	FUNCTIONFS_RESUME
+};
+
+/* NOTE:  this structure must stay the same size and layout on
+ * both 32-bit and 64-bit kernels.
+ */
+struct usb_functionfs_event {
+	union {
+		/* SETUP: packet; DATA phase i/o precedes next event
+		 *(setup.bmRequestType & USB_DIR_IN) flags direction */
+		struct usb_ctrlrequest	setup;
+	} __attribute__((packed)) u;
+
+	/* enum usb_functionfs_event_type */
+	__u8				type;
+	__u8				_pad[3];
+} __attribute__((packed));
+
+
+/* Endpoint ioctls */
+/* The same as in gadgetfs */
+
+/* IN transfers may be reported to the gadget driver as complete
+ *	when the fifo is loaded, before the host reads the data;
+ * OUT transfers may be reported to the host's "client" driver as
+ *	complete when they're sitting in the FIFO unread.
+ * THIS returns how many bytes are "unclaimed" in the endpoint fifo
+ * (needed for precise fault handling, when the hardware allows it)
+ */
+#define	FUNCTIONFS_FIFO_STATUS	_IO('g', 1)
+
+/* discards any unclaimed data in the fifo. */
+#define	FUNCTIONFS_FIFO_FLUSH	_IO('g', 2)
+
+/* resets endpoint halt+toggle; used to implement set_interface.
+ * some hardware (like pxa2xx) can't support this.
+ */
+#define	FUNCTIONFS_CLEAR_HALT	_IO('g', 3)
+
+/* Specific for functionfs */
+
+/*
+ * Returns reverse mapping of an interface.  Called on EP0.  If there
+ * is no such interface returns -EDOM.  If function is not active
+ * returns -ENODEV.
+ */
+#define	FUNCTIONFS_INTERFACE_REVMAP	_IO('g', 128)
+
+/*
+ * Returns real bEndpointAddress of an endpoint.  If function is not
+ * active returns -ENODEV.
+ */
+#define	FUNCTIONFS_ENDPOINT_REVMAP	_IO('g', 129)
+
+
+#ifdef __KERNEL__
+
+struct ffs_data;
+struct usb_composite_dev;
+struct usb_configuration;
+
+
+static int  functionfs_init(void) __attribute__((warn_unused_result));
+static void functionfs_cleanup(void);
+
+static int functionfs_bind(struct ffs_data *ffs, struct usb_composite_dev *cdev)
+	__attribute__((warn_unused_result, nonnull));
+static void functionfs_unbind(struct ffs_data *ffs)
+	__attribute__((nonnull));
+
+static int functionfs_add(struct usb_composite_dev *cdev,
+			  struct usb_configuration *c,
+			  struct ffs_data *ffs)
+	__attribute__((warn_unused_result, nonnull));
+
+
+static int functionfs_ready_callback(struct ffs_data *ffs)
+	__attribute__((warn_unused_result, nonnull));
+static void functionfs_closed_callback(struct ffs_data *ffs)
+	__attribute__((nonnull));
+static int functionfs_check_dev_callback(const char *dev_name)
+	__attribute__((warn_unused_result, nonnull));
+
+
+#endif
+
+#endif
-- 
cgit v1.2.3


From 22b4b6113ec5d0ff2a9175bc611f3f3f61e4e172 Mon Sep 17 00:00:00 2001
From: Ming Lei <tom.leiming@gmail.com>
Date: Wed, 12 May 2010 23:38:46 +0800
Subject: USB: remove usb_find_device

Now on one uses this function and it seems useless,
so remove usb_find_device.

[tom@tom linux-2.6-next]$ grep -r -n -I usb_find_device ./
drivers/media/dvb/dvb-usb/dvb-usb-init.c:160:static struct
dvb_usb_device_description * dvb_usb_find_device(struct usb_device
*udev,struct dvb_usb_device_properties *props, int *cold)

drivers/media/dvb/dvb-usb/dvb-usb-init.c:230:   if ((desc =
dvb_usb_find_device(udev,props,&cold)) == NULL) {

drivers/usb/core/usb.c:630: * usb_find_device - find a specific usb device in the system
drivers/usb/core/usb.c:642:struct usb_device *usb_find_device(u16 vendor_id, u16 product_id)

Signed-off-by: Ming Lei <tom.leiming@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/usb.c | 37 -------------------------------------
 include/linux/usb.h    |  1 -
 2 files changed, 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 8180ce533ebf..902f821d8d14 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -626,43 +626,6 @@ exit:
 	return ret_dev;
 }
 
-/**
- * usb_find_device - find a specific usb device in the system
- * @vendor_id: the vendor id of the device to find
- * @product_id: the product id of the device to find
- *
- * Returns a pointer to a struct usb_device if such a specified usb
- * device is present in the system currently.  The usage count of the
- * device will be incremented if a device is found.  Make sure to call
- * usb_put_dev() when the caller is finished with the device.
- *
- * If a device with the specified vendor and product id is not found,
- * NULL is returned.
- */
-struct usb_device *usb_find_device(u16 vendor_id, u16 product_id)
-{
-	struct list_head *buslist;
-	struct usb_bus *bus;
-	struct usb_device *dev = NULL;
-
-	mutex_lock(&usb_bus_list_lock);
-	for (buslist = usb_bus_list.next;
-	     buslist != &usb_bus_list;
-	     buslist = buslist->next) {
-		bus = container_of(buslist, struct usb_bus, bus_list);
-		if (!bus->root_hub)
-			continue;
-		usb_lock_device(bus->root_hub);
-		dev = match_device(bus->root_hub, vendor_id, product_id);
-		usb_unlock_device(bus->root_hub);
-		if (dev)
-			goto exit;
-	}
-exit:
-	mutex_unlock(&usb_bus_list_lock);
-	return dev;
-}
-
 /**
  * usb_get_current_frame_number - return current bus frame number
  * @dev: the device whose bus is being queried
diff --git a/include/linux/usb.h b/include/linux/usb.h
index ce07062ebc28..3185eb46d85a 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -510,7 +510,6 @@ extern int usb_lock_device_for_reset(struct usb_device *udev,
 extern int usb_reset_device(struct usb_device *dev);
 extern void usb_queue_reset_device(struct usb_interface *dev);
 
-extern struct usb_device *usb_find_device(u16 vendor_id, u16 product_id);
 
 /* USB autosuspend and autoresume */
 #ifdef CONFIG_USB_SUSPEND
-- 
cgit v1.2.3


From 812219ab8facf07b94e4b3fe81e9cd3fe4129777 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 17 May 2010 10:40:55 -0700
Subject: USB: usb.h: checkpatch cleanups

Minor formatting changes to clean up the file.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 3185eb46d85a..88e854cd6b32 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -65,7 +65,7 @@ struct usb_host_endpoint {
 	struct usb_ss_ep_comp_descriptor	ss_ep_comp;
 	struct list_head		urb_list;
 	void				*hcpriv;
-	struct ep_device 		*ep_dev;	/* For sysfs info */
+	struct ep_device		*ep_dev;	/* For sysfs info */
 
 	unsigned char *extra;   /* Extra descriptors */
 	int extralen;
@@ -96,8 +96,8 @@ enum usb_interface_condition {
 /**
  * struct usb_interface - what usb device drivers talk to
  * @altsetting: array of interface structures, one for each alternate
- * 	setting that may be selected.  Each one includes a set of
- * 	endpoint configurations.  They will be in no particular order.
+ *	setting that may be selected.  Each one includes a set of
+ *	endpoint configurations.  They will be in no particular order.
  * @cur_altsetting: the current altsetting.
  * @num_altsetting: number of altsettings defined.
  * @intf_assoc: interface association descriptor
@@ -200,7 +200,7 @@ void usb_put_intf(struct usb_interface *intf);
 
 /* this maximum is arbitrary */
 #define USB_MAXINTERFACES	32
-#define USB_MAXIADS		USB_MAXINTERFACES/2
+#define USB_MAXIADS		(USB_MAXINTERFACES/2)
 
 /**
  * struct usb_interface_cache - long-term representation of a device interface
@@ -420,7 +420,7 @@ struct usb_tt;
  */
 struct usb_device {
 	int		devnum;
-	char		devpath [16];
+	char		devpath[16];
 	u32		route;
 	enum usb_device_state	state;
 	enum usb_device_speed	speed;
@@ -453,7 +453,7 @@ struct usb_device {
 	unsigned persist_enabled:1;
 	unsigned have_langid:1;
 	unsigned authorized:1;
- 	unsigned authenticated:1;
+	unsigned authenticated:1;
 	unsigned wusb:1;
 	int string_langid;
 
@@ -664,7 +664,7 @@ static inline int usb_make_path(struct usb_device *dev, char *buf, size_t size)
  * This macro is used to create a struct usb_device_id that matches a
  * specific device.
  */
-#define USB_DEVICE(vend,prod) \
+#define USB_DEVICE(vend, prod) \
 	.match_flags = USB_DEVICE_ID_MATCH_DEVICE, \
 	.idVendor = (vend), \
 	.idProduct = (prod)
@@ -1186,7 +1186,7 @@ struct urb {
 					 * current owner */
 	struct list_head anchor_list;	/* the URB may be anchored */
 	struct usb_anchor *anchor;
-	struct usb_device *dev; 	/* (in) pointer to associated device */
+	struct usb_device *dev;		/* (in) pointer to associated device */
 	struct usb_host_endpoint *ep;	/* (internal) pointer to endpoint */
 	unsigned int pipe;		/* (in) pipe information */
 	unsigned int stream_id;		/* (in) stream ID */
@@ -1535,21 +1535,21 @@ static inline unsigned int __create_pipe(struct usb_device *dev,
 }
 
 /* Create various pipes... */
-#define usb_sndctrlpipe(dev,endpoint)	\
+#define usb_sndctrlpipe(dev, endpoint)	\
 	((PIPE_CONTROL << 30) | __create_pipe(dev, endpoint))
-#define usb_rcvctrlpipe(dev,endpoint)	\
+#define usb_rcvctrlpipe(dev, endpoint)	\
 	((PIPE_CONTROL << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
-#define usb_sndisocpipe(dev,endpoint)	\
+#define usb_sndisocpipe(dev, endpoint)	\
 	((PIPE_ISOCHRONOUS << 30) | __create_pipe(dev, endpoint))
-#define usb_rcvisocpipe(dev,endpoint)	\
+#define usb_rcvisocpipe(dev, endpoint)	\
 	((PIPE_ISOCHRONOUS << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
-#define usb_sndbulkpipe(dev,endpoint)	\
+#define usb_sndbulkpipe(dev, endpoint)	\
 	((PIPE_BULK << 30) | __create_pipe(dev, endpoint))
-#define usb_rcvbulkpipe(dev,endpoint)	\
+#define usb_rcvbulkpipe(dev, endpoint)	\
 	((PIPE_BULK << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
-#define usb_sndintpipe(dev,endpoint)	\
+#define usb_sndintpipe(dev, endpoint)	\
 	((PIPE_INTERRUPT << 30) | __create_pipe(dev, endpoint))
-#define usb_rcvintpipe(dev,endpoint)	\
+#define usb_rcvintpipe(dev, endpoint)	\
 	((PIPE_INTERRUPT << 30) | __create_pipe(dev, endpoint) | USB_DIR_IN)
 
 static inline struct usb_host_endpoint *
-- 
cgit v1.2.3


From 0858a3a52f659dabf2860f350e5a6a61f069e851 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Mon, 17 May 2010 10:58:12 -0700
Subject: USB: include/usb/*.h checkpatch cleanup

Lots of minor formatting cleanups in includes/usb/ to make checkpatch
happier.

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb/atmel_usba_udc.h |  2 +-
 include/linux/usb/audio.h          |  6 ++--
 include/linux/usb/ch9.h            | 12 +++----
 include/linux/usb/ehci_def.h       |  6 ++--
 include/linux/usb/gadget.h         |  2 +-
 include/linux/usb/gadgetfs.h       |  2 +-
 include/linux/usb/hcd.h            | 28 ++++++++--------
 include/linux/usb/langwell_udc.h   |  2 +-
 include/linux/usb/musb.h           |  4 +--
 include/linux/usb/net2280.h        |  6 ++--
 include/linux/usb/rndis_host.h     | 66 +++++++++++++++++++-------------------
 include/linux/usb/serial.h         |  2 +-
 include/linux/usb/usbnet.h         | 40 ++++++++++++-----------
 include/linux/usb/wusb-wa.h        |  2 +-
 14 files changed, 92 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/atmel_usba_udc.h b/include/linux/usb/atmel_usba_udc.h
index baf41c8616e9..ba99af275a31 100644
--- a/include/linux/usb/atmel_usba_udc.h
+++ b/include/linux/usb/atmel_usba_udc.h
@@ -15,7 +15,7 @@ struct usba_ep_data {
 
 struct usba_platform_data {
 	int			vbus_pin;
-	int		 	vbus_pin_inverted;
+	int			vbus_pin_inverted;
 	int			num_ep;
 	struct usba_ep_data	ep[0];
 };
diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h
index 4d3e450e2b03..6b892aeadf50 100644
--- a/include/linux/usb/audio.h
+++ b/include/linux/usb/audio.h
@@ -110,7 +110,7 @@ struct uac_ac_header_descriptor_v1 {
 #define UAC_DT_AC_HEADER_SIZE(n)	(8 + (n))
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n) 			\
+#define DECLARE_UAC_AC_HEADER_DESCRIPTOR(n)			\
 struct uac_ac_header_descriptor_v1_##n {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
@@ -178,7 +178,7 @@ struct uac_output_terminal_descriptor_v1 {
 #define UAC_DT_FEATURE_UNIT_SIZE(ch)		(7 + ((ch) + 1) * 2)
 
 /* As above, but more useful for defining your own descriptors: */
-#define DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(ch) 		\
+#define DECLARE_UAC_FEATURE_UNIT_DESCRIPTOR(ch)			\
 struct uac_feature_unit_descriptor_##ch {			\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
@@ -250,7 +250,7 @@ struct uac_format_type_i_discrete_descriptor {
 	__u8  tSamFreq[][3];
 } __attribute__ ((packed));
 
-#define DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(n) 		\
+#define DECLARE_UAC_FORMAT_TYPE_I_DISCRETE_DESC(n)		\
 struct uac_format_type_i_discrete_descriptor_##n {		\
 	__u8  bLength;						\
 	__u8  bDescriptorType;					\
diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h
index e779af5c800b..da2ed77d3e8d 100644
--- a/include/linux/usb/ch9.h
+++ b/include/linux/usb/ch9.h
@@ -477,7 +477,7 @@ static inline int usb_endpoint_xfer_isoc(
 static inline int usb_endpoint_is_bulk_in(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_in(epd));
+	return usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_in(epd);
 }
 
 /**
@@ -490,7 +490,7 @@ static inline int usb_endpoint_is_bulk_in(
 static inline int usb_endpoint_is_bulk_out(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_out(epd));
+	return usb_endpoint_xfer_bulk(epd) && usb_endpoint_dir_out(epd);
 }
 
 /**
@@ -503,7 +503,7 @@ static inline int usb_endpoint_is_bulk_out(
 static inline int usb_endpoint_is_int_in(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_in(epd));
+	return usb_endpoint_xfer_int(epd) && usb_endpoint_dir_in(epd);
 }
 
 /**
@@ -516,7 +516,7 @@ static inline int usb_endpoint_is_int_in(
 static inline int usb_endpoint_is_int_out(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_int(epd) && usb_endpoint_dir_out(epd));
+	return usb_endpoint_xfer_int(epd) && usb_endpoint_dir_out(epd);
 }
 
 /**
@@ -529,7 +529,7 @@ static inline int usb_endpoint_is_int_out(
 static inline int usb_endpoint_is_isoc_in(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_in(epd));
+	return usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_in(epd);
 }
 
 /**
@@ -542,7 +542,7 @@ static inline int usb_endpoint_is_isoc_in(
 static inline int usb_endpoint_is_isoc_out(
 				const struct usb_endpoint_descriptor *epd)
 {
-	return (usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_out(epd));
+	return usb_endpoint_xfer_isoc(epd) && usb_endpoint_dir_out(epd);
 }
 
 /*-------------------------------------------------------------------------*/
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index af4b86f3aca3..80287af2a738 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -45,7 +45,7 @@ struct ehci_caps {
 #define HCC_CANPARK(p)		((p)&(1 << 2))  /* true: can park on async qh */
 #define HCC_PGM_FRAMELISTLEN(p) ((p)&(1 << 1))  /* true: periodic_size changes*/
 #define HCC_64BIT_ADDR(p)       ((p)&(1))       /* true: can use 64-bit addr */
-	u8		portroute [8];	 /* nibbles for routing - offset 0xC */
+	u8		portroute[8];	 /* nibbles for routing - offset 0xC */
 } __attribute__ ((packed));
 
 
@@ -92,14 +92,14 @@ struct ehci_regs {
 	/* ASYNCLISTADDR: offset 0x18 */
 	u32		async_next;	/* address of next async queue head */
 
-	u32		reserved [9];
+	u32		reserved[9];
 
 	/* CONFIGFLAG: offset 0x40 */
 	u32		configured_flag;
 #define FLAG_CF		(1<<0)		/* true: we'll support "high speed" */
 
 	/* PORTSC: offset 0x44 */
-	u32		port_status [0];	/* up to N_PORTS */
+	u32		port_status[0];	/* up to N_PORTS */
 /* 31:23 reserved */
 #define PORT_WKOC_E	(1<<22)		/* wake on overcurrent (enable) */
 #define PORT_WKDISC_E	(1<<21)		/* wake on disconnect (enable) */
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index db6141cdb77b..d3ef42d7d2f0 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -500,7 +500,7 @@ static inline struct usb_gadget *dev_to_usb_gadget(struct device *dev)
 }
 
 /* iterates the non-control endpoints; 'tmp' is a struct usb_ep pointer */
-#define gadget_for_each_ep(tmp,gadget) \
+#define gadget_for_each_ep(tmp, gadget) \
 	list_for_each_entry(tmp, &(gadget)->ep_list, ep_list)
 
 
diff --git a/include/linux/usb/gadgetfs.h b/include/linux/usb/gadgetfs.h
index 612102e4d75e..0bb12e0d4f8f 100644
--- a/include/linux/usb/gadgetfs.h
+++ b/include/linux/usb/gadgetfs.h
@@ -19,7 +19,7 @@
 #define __LINUX_USB_GADGETFS_H
 
 #include <linux/types.h>
-#include <asm/ioctl.h>
+#include <linux/ioctl.h>
 
 #include <linux/usb/ch9.h>
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index aca73a5c3af7..2e3a4ea1a3da 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -126,7 +126,7 @@ struct usb_hcd {
 
 
 #define HCD_BUFFER_POOLS	4
-	struct dma_pool		*pool [HCD_BUFFER_POOLS];
+	struct dma_pool		*pool[HCD_BUFFER_POOLS];
 
 	int			state;
 #	define	__ACTIVE		0x01
@@ -219,12 +219,12 @@ struct hc_driver {
 				struct urb *urb, int status);
 
 	/* hw synch, freeing endpoint resources that urb_dequeue can't */
-	void 	(*endpoint_disable)(struct usb_hcd *hcd,
+	void	(*endpoint_disable)(struct usb_hcd *hcd,
 			struct usb_host_endpoint *ep);
 
 	/* (optional) reset any endpoint state such as sequence number
 	   and current window */
-	void 	(*endpoint_reset)(struct usb_hcd *hcd,
+	void	(*endpoint_reset)(struct usb_hcd *hcd,
 			struct usb_host_endpoint *ep);
 
 	/* root hub support */
@@ -265,16 +265,18 @@ struct hc_driver {
 	/* Note that add_endpoint() can only be called once per endpoint before
 	 * check_bandwidth() or reset_bandwidth() must be called.
 	 * drop_endpoint() can only be called once per endpoint also.
-	 * A call to xhci_drop_endpoint() followed by a call to xhci_add_endpoint() will
-	 * add the endpoint to the schedule with possibly new parameters denoted by a
-	 * different endpoint descriptor in usb_host_endpoint.
-	 * A call to xhci_add_endpoint() followed by a call to xhci_drop_endpoint() is
-	 * not allowed.
+	 * A call to xhci_drop_endpoint() followed by a call to
+	 * xhci_add_endpoint() will add the endpoint to the schedule with
+	 * possibly new parameters denoted by a different endpoint descriptor
+	 * in usb_host_endpoint.  A call to xhci_add_endpoint() followed by a
+	 * call to xhci_drop_endpoint() is not allowed.
 	 */
 		/* Allocate endpoint resources and add them to a new schedule */
-	int 	(*add_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
+	int	(*add_endpoint)(struct usb_hcd *, struct usb_device *,
+				struct usb_host_endpoint *);
 		/* Drop an endpoint from a new schedule */
-	int 	(*drop_endpoint)(struct usb_hcd *, struct usb_device *, struct usb_host_endpoint *);
+	int	(*drop_endpoint)(struct usb_hcd *, struct usb_device *,
+				 struct usb_host_endpoint *);
 		/* Check that a new hardware configuration, set using
 		 * endpoint_enable and endpoint_disable, does not exceed bus
 		 * bandwidth.  This must be called before any set configuration
@@ -484,8 +486,8 @@ extern void usb_ep0_reinit(struct usb_device *);
 #define HS_NSECS_ISO(bytes) (((38 * 8 * 2083) \
 	+ (2083UL * (3 + BitTime(bytes))))/1000 \
 	+ USB2_HOST_DELAY)
-#define HS_USECS(bytes) NS_TO_US (HS_NSECS(bytes))
-#define HS_USECS_ISO(bytes) NS_TO_US (HS_NSECS_ISO(bytes))
+#define HS_USECS(bytes)		NS_TO_US(HS_NSECS(bytes))
+#define HS_USECS_ISO(bytes)	NS_TO_US(HS_NSECS_ISO(bytes))
 
 extern long usb_calc_bus_time(int speed, int is_input,
 			int isoc, int bytecount);
@@ -596,7 +598,7 @@ static inline void usbmon_urb_complete(struct usb_bus *bus, struct urb *urb,
 
 /* hub.h ... DeviceRemovable in 2.4.2-ac11, gone in 2.4.10 */
 /* bleech -- resurfaced in 2.4.11 or 2.4.12 */
-#define bitmap 	DeviceRemovable
+#define bitmap	DeviceRemovable
 
 
 /*-------------------------------------------------------------------------*/
diff --git a/include/linux/usb/langwell_udc.h b/include/linux/usb/langwell_udc.h
index c949178a6530..2d2d1bbad9d2 100644
--- a/include/linux/usb/langwell_udc.h
+++ b/include/linux/usb/langwell_udc.h
@@ -181,7 +181,7 @@ struct langwell_op_regs {
 #define	PORTS_PIC	(BIT(15) | BIT(14))	/* port indicator control */
 #define	PORTS_PO	BIT(13)	/* port owner */
 #define	PORTS_PP	BIT(12)	/* port power */
-#define	PORTS_LS	(BIT(11) | BIT(10)) 	/* line status */
+#define	PORTS_LS	(BIT(11) | BIT(10))	/* line status */
 #define	PORTS_SLP	BIT(9)	/* suspend using L1 */
 #define	PORTS_PR	BIT(8)	/* port reset */
 #define	PORTS_SUSP	BIT(7)	/* suspend */
diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h
index fbb83fe21851..ee2dd1d506ed 100644
--- a/include/linux/usb/musb.h
+++ b/include/linux/usb/musb.h
@@ -86,8 +86,8 @@ struct musb_hdrc_config {
 
 	struct musb_hdrc_eps_bits *eps_bits __deprecated;
 #ifdef CONFIG_BLACKFIN
-        /* A GPIO controlling VRSEL in Blackfin */
-        unsigned int    gpio_vrsel;
+	/* A GPIO controlling VRSEL in Blackfin */
+	unsigned int	gpio_vrsel;
 	unsigned int	gpio_vrsel_active;
 #endif
 
diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h
index 96ca549a778d..148b8fa5b1a2 100644
--- a/include/linux/usb/net2280.h
+++ b/include/linux/usb/net2280.h
@@ -353,7 +353,7 @@ struct net2280_dma_regs {	/* [11.7] */
 #define     DMA_TRANSACTION_DONE_INTERRUPT                      24
 #define     DMA_ABORT                                           1
 #define     DMA_START                                           0
-	u32		_unused0 [2];
+	u32		_unused0[2];
 	/* offset 0x0190, 0x01b0, 0x01d0, 0x01f0, */
 	u32		dmacount;
 #define     VALID_BIT                                           31
@@ -374,7 +374,7 @@ struct net2280_dep_regs {	/* [11.8] */
 	u32		dep_cfg;
 	/* offset 0x0204, 0x0214, 0x224, 0x234, 0x244 */
 	u32		dep_rsp;
-	u32		_unused [2];
+	u32		_unused[2];
 } __attribute__ ((packed));
 
 /* configurable endpoint registers, BAR0 + 0x0300 ... array of seven structs
@@ -437,7 +437,7 @@ struct net2280_ep_regs {	/* [11.9] */
 	/* offset 0x0310, 0x0330, 0x0350, 0x0370, 0x0390, 0x03b0, 0x03d0 */
 	u32		ep_avail;
 	u32		ep_data;
-	u32		_unused0 [2];
+	u32		_unused0[2];
 } __attribute__ ((packed));
 
 #endif /* __LINUX_USB_NET2280_H */
diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h
index 1ef1ebc2b04f..05ef52861988 100644
--- a/include/linux/usb/rndis_host.h
+++ b/include/linux/usb/rndis_host.h
@@ -34,10 +34,10 @@
 struct rndis_msg_hdr {
 	__le32	msg_type;			/* RNDIS_MSG_* */
 	__le32	msg_len;
-	// followed by data that varies between messages
+	/* followed by data that varies between messages */
 	__le32	request_id;
 	__le32	status;
-	// ... and more
+	/* ... and more */
 } __attribute__ ((packed));
 
 /* MS-Windows uses this strange size, but RNDIS spec says 1024 minimum */
@@ -92,67 +92,67 @@ struct rndis_msg_hdr {
 
 struct rndis_data_hdr {
 	__le32	msg_type;		/* RNDIS_MSG_PACKET */
-	__le32	msg_len;		// rndis_data_hdr + data_len + pad
-	__le32	data_offset;		// 36 -- right after header
-	__le32	data_len;		// ... real packet size
+	__le32	msg_len;		/* rndis_data_hdr + data_len + pad */
+	__le32	data_offset;		/* 36 -- right after header */
+	__le32	data_len;		/* ... real packet size */
 
-	__le32	oob_data_offset;	// zero
-	__le32	oob_data_len;		// zero
-	__le32	num_oob;		// zero
-	__le32	packet_data_offset;	// zero
+	__le32	oob_data_offset;	/* zero */
+	__le32	oob_data_len;		/* zero */
+	__le32	num_oob;		/* zero */
+	__le32	packet_data_offset;	/* zero */
 
-	__le32	packet_data_len;	// zero
-	__le32	vc_handle;		// zero
-	__le32	reserved;		// zero
+	__le32	packet_data_len;	/* zero */
+	__le32	vc_handle;		/* zero */
+	__le32	reserved;		/* zero */
 } __attribute__ ((packed));
 
 struct rndis_init {		/* OUT */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_INIT */
-	__le32	msg_len;			// 24
+	__le32	msg_len;			/* 24 */
 	__le32	request_id;
-	__le32	major_version;			// of rndis (1.0)
+	__le32	major_version;			/* of rndis (1.0) */
 	__le32	minor_version;
 	__le32	max_transfer_size;
 } __attribute__ ((packed));
 
 struct rndis_init_c {		/* IN */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_INIT_C */
 	__le32	msg_len;
 	__le32	request_id;
 	__le32	status;
-	__le32	major_version;			// of rndis (1.0)
+	__le32	major_version;			/* of rndis (1.0) */
 	__le32	minor_version;
 	__le32	device_flags;
-	__le32	medium;				// zero == 802.3
+	__le32	medium;				/* zero == 802.3 */
 	__le32	max_packets_per_message;
 	__le32	max_transfer_size;
-	__le32	packet_alignment;		// max 7; (1<<n) bytes
-	__le32	af_list_offset;			// zero
-	__le32	af_list_size;			// zero
+	__le32	packet_alignment;		/* max 7; (1<<n) bytes */
+	__le32	af_list_offset;			/* zero */
+	__le32	af_list_size;			/* zero */
 } __attribute__ ((packed));
 
 struct rndis_halt {		/* OUT (no reply) */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_HALT */
 	__le32	msg_len;
 	__le32	request_id;
 } __attribute__ ((packed));
 
 struct rndis_query {		/* OUT */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_QUERY */
 	__le32	msg_len;
 	__le32	request_id;
 	__le32	oid;
 	__le32	len;
 	__le32	offset;
-/*?*/	__le32	handle;				// zero
+/*?*/	__le32	handle;				/* zero */
 } __attribute__ ((packed));
 
 struct rndis_query_c {		/* IN */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_QUERY_C */
 	__le32	msg_len;
 	__le32	request_id;
@@ -162,18 +162,18 @@ struct rndis_query_c {		/* IN */
 } __attribute__ ((packed));
 
 struct rndis_set {		/* OUT */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_SET */
 	__le32	msg_len;
 	__le32	request_id;
 	__le32	oid;
 	__le32	len;
 	__le32	offset;
-/*?*/	__le32	handle;				// zero
+/*?*/	__le32	handle;				/* zero */
 } __attribute__ ((packed));
 
 struct rndis_set_c {		/* IN */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_SET_C */
 	__le32	msg_len;
 	__le32	request_id;
@@ -181,14 +181,14 @@ struct rndis_set_c {		/* IN */
 } __attribute__ ((packed));
 
 struct rndis_reset {		/* IN */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_RESET */
 	__le32	msg_len;
 	__le32	reserved;
 } __attribute__ ((packed));
 
 struct rndis_reset_c {		/* OUT */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_RESET_C */
 	__le32	msg_len;
 	__le32	status;
@@ -196,7 +196,7 @@ struct rndis_reset_c {		/* OUT */
 } __attribute__ ((packed));
 
 struct rndis_indicate {		/* IN (unrequested) */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_INDICATE */
 	__le32	msg_len;
 	__le32	status;
@@ -208,14 +208,14 @@ struct rndis_indicate {		/* IN (unrequested) */
 } __attribute__ ((packed));
 
 struct rndis_keepalive {	/* OUT (optionally IN) */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_KEEPALIVE */
 	__le32	msg_len;
 	__le32	request_id;
 } __attribute__ ((packed));
 
 struct rndis_keepalive_c {	/* IN (optionally OUT) */
-	// header and:
+	/* header and: */
 	__le32	msg_type;			/* RNDIS_MSG_KEEPALIVE_C */
 	__le32	msg_len;
 	__le32	request_id;
diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h
index 061c997ae0cf..84a4c44c208b 100644
--- a/include/linux/usb/serial.h
+++ b/include/linux/usb/serial.h
@@ -52,7 +52,7 @@ enum port_dev_state {
  * @interrupt_out_size: the size of the interrupt_out_buffer, in bytes.
  * @interrupt_out_urb: pointer to the interrupt out struct urb for this port.
  * @interrupt_out_endpointAddress: endpoint address for the interrupt out pipe
- * 	for this port.
+ *	for this port.
  * @bulk_in_buffer: pointer to the bulk in buffer for this port.
  * @bulk_in_size: the size of the bulk_in_buffer, in bytes.
  * @read_urb: pointer to the bulk in struct urb for this port.
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index df1e83dd9a54..7ae27a473818 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -43,7 +43,7 @@ struct usbnet {
 	/* protocol/interface state */
 	struct net_device	*net;
 	int			msg_enable;
-	unsigned long		data [5];
+	unsigned long		data[5];
 	u32			xid;
 	u32			hard_mtu;	/* count any extra framing */
 	size_t			rx_urb_size;	/* size for rx urbs */
@@ -148,8 +148,8 @@ struct driver_info {
  * much everything except custom framing and chip-specific stuff.
  */
 extern int usbnet_probe(struct usb_interface *, const struct usb_device_id *);
-extern int usbnet_suspend (struct usb_interface *, pm_message_t );
-extern int usbnet_resume (struct usb_interface *);
+extern int usbnet_suspend(struct usb_interface *, pm_message_t);
+extern int usbnet_resume(struct usb_interface *);
 extern void usbnet_disconnect(struct usb_interface *);
 
 
@@ -165,8 +165,8 @@ struct cdc_state {
 	struct usb_interface		*data;
 };
 
-extern int usbnet_generic_cdc_bind (struct usbnet *, struct usb_interface *);
-extern void usbnet_cdc_unbind (struct usbnet *, struct usb_interface *);
+extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *);
+extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *);
 
 /* CDC and RNDIS support the same host-chosen packet filters for IN transfers */
 #define	DEFAULT_FILTER	(USB_CDC_PACKET_TYPE_BROADCAST \
@@ -189,29 +189,31 @@ struct skb_data {	/* skb->cb is one of these */
 	size_t			length;
 };
 
-extern int usbnet_open (struct net_device *net);
-extern int usbnet_stop (struct net_device *net);
-extern netdev_tx_t usbnet_start_xmit (struct sk_buff *skb,
-				      struct net_device *net);
-extern void usbnet_tx_timeout (struct net_device *net);
-extern int usbnet_change_mtu (struct net_device *net, int new_mtu);
+extern int usbnet_open(struct net_device *net);
+extern int usbnet_stop(struct net_device *net);
+extern netdev_tx_t usbnet_start_xmit(struct sk_buff *skb,
+				     struct net_device *net);
+extern void usbnet_tx_timeout(struct net_device *net);
+extern int usbnet_change_mtu(struct net_device *net, int new_mtu);
 
 extern int usbnet_get_endpoints(struct usbnet *, struct usb_interface *);
 extern int usbnet_get_ethernet_addr(struct usbnet *, int);
-extern void usbnet_defer_kevent (struct usbnet *, int);
-extern void usbnet_skb_return (struct usbnet *, struct sk_buff *);
+extern void usbnet_defer_kevent(struct usbnet *, int);
+extern void usbnet_skb_return(struct usbnet *, struct sk_buff *);
 extern void usbnet_unlink_rx_urbs(struct usbnet *);
 
 extern void usbnet_pause_rx(struct usbnet *);
 extern void usbnet_resume_rx(struct usbnet *);
 extern void usbnet_purge_paused_rxq(struct usbnet *);
 
-extern int usbnet_get_settings (struct net_device *net, struct ethtool_cmd *cmd);
-extern int usbnet_set_settings (struct net_device *net, struct ethtool_cmd *cmd);
-extern u32 usbnet_get_link (struct net_device *net);
-extern u32 usbnet_get_msglevel (struct net_device *);
-extern void usbnet_set_msglevel (struct net_device *, u32);
-extern void usbnet_get_drvinfo (struct net_device *, struct ethtool_drvinfo *);
+extern int usbnet_get_settings(struct net_device *net,
+			       struct ethtool_cmd *cmd);
+extern int usbnet_set_settings(struct net_device *net,
+			       struct ethtool_cmd *cmd);
+extern u32 usbnet_get_link(struct net_device *net);
+extern u32 usbnet_get_msglevel(struct net_device *);
+extern void usbnet_set_msglevel(struct net_device *, u32);
+extern void usbnet_get_drvinfo(struct net_device *, struct ethtool_drvinfo *);
 extern int usbnet_nway_reset(struct net_device *net);
 
 #endif /* __LINUX_USB_USBNET_H */
diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h
index fb7c359bdfba..f9dec37f617b 100644
--- a/include/linux/usb/wusb-wa.h
+++ b/include/linux/usb/wusb-wa.h
@@ -87,7 +87,7 @@ enum rpipe_crs {
  * FIXME: explain rpipes
  */
 struct usb_rpipe_descriptor {
-	u8 	bLength;
+	u8	bLength;
 	u8	bDescriptorType;
 	__le16  wRPipeIndex;
 	__le16	wRequests;
-- 
cgit v1.2.3


From e26bcf37234c67624f62d9fc95f922b8dbda1363 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Thu, 29 Apr 2010 15:46:07 -0700
Subject: USB: remove unused usb_buffer_alloc and usb_buffer_free macros

Now that all callers are converted over, remove the compatibility
functions and all is good.

Cc: Daniel Mack <daniel@caiaq.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/usb.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb.h b/include/linux/usb.h
index 88e854cd6b32..d5922a877994 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1372,18 +1372,6 @@ void *usb_alloc_coherent(struct usb_device *dev, size_t size,
 void usb_free_coherent(struct usb_device *dev, size_t size,
 	void *addr, dma_addr_t dma);
 
-/* Compatible macros while we switch over */
-static inline void *usb_buffer_alloc(struct usb_device *dev, size_t size,
-				     gfp_t mem_flags, dma_addr_t *dma)
-{
-	return usb_alloc_coherent(dev, size, mem_flags, dma);
-}
-static inline void usb_buffer_free(struct usb_device *dev, size_t size,
-				   void *addr, dma_addr_t dma)
-{
-	return usb_free_coherent(dev, size, addr, dma);
-}
-
 #if 0
 struct urb *usb_buffer_map(struct urb *urb);
 void usb_buffer_dmasync(struct urb *urb);
-- 
cgit v1.2.3


From 53197fc49549240f6c6a963b2713a4cd9517964b Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Fri, 2 Apr 2010 11:48:03 -0500
Subject: Separate the gdbstub from the debug core

Split the former kernel/kgdb.c into debug_core.c which contains the
kernel debugger exception logic and to the gdbstub.c which contains
the logic for allowing gdb to talk to the debug core.

This also created a private include file called debug_core.h which
contains all the definitions to glue the debug_core to any other
debugger connections.

CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h      |   1 +
 kernel/debug/Makefile     |   3 +-
 kernel/debug/debug_core.c | 994 ++--------------------------------------------
 kernel/debug/debug_core.h |  55 +++
 kernel/debug/gdbstub.c    | 934 +++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 1030 insertions(+), 957 deletions(-)
 create mode 100644 kernel/debug/debug_core.h
 create mode 100644 kernel/debug/gdbstub.c

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 19ec41a183f5..4830142ec339 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -264,6 +264,7 @@ extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
 
 extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
+extern struct kgdb_io *dbg_io_ops;
 
 extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
 extern int kgdb_mem2hex(char *mem, char *buf, int count);
diff --git a/kernel/debug/Makefile b/kernel/debug/Makefile
index 5d7850415266..fd4dc6e7782c 100644
--- a/kernel/debug/Makefile
+++ b/kernel/debug/Makefile
@@ -2,5 +2,4 @@
 # Makefile for the linux kernel debugger
 #
 
-obj-$(CONFIG_KGDB) += debug_core.o
-
+obj-$(CONFIG_KGDB) += debug_core.o gdbstub.o
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 11f3515ca83f..7e03969330bc 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -1,5 +1,5 @@
 /*
- * KGDB stub.
+ * Kernel Debug Core
  *
  * Maintainer: Jason Wessel <jason.wessel@windriver.com>
  *
@@ -9,7 +9,7 @@
  * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
  * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
  * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
- * Copyright (C) 2005-2008 Wind River Systems, Inc.
+ * Copyright (C) 2005-2009 Wind River Systems, Inc.
  * Copyright (C) 2007 MontaVista Software, Inc.
  * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  *
@@ -37,7 +37,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
-#include <linux/reboot.h>
 #include <linux/string.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
@@ -52,34 +51,12 @@
 #include <asm/byteorder.h>
 #include <asm/atomic.h>
 #include <asm/system.h>
-#include <asm/unaligned.h>
 
-static int kgdb_break_asap;
-
-#define KGDB_MAX_THREAD_QUERY 17
-struct kgdb_state {
-	int			ex_vector;
-	int			signo;
-	int			err_code;
-	int			cpu;
-	int			pass_exception;
-	unsigned long		thr_query;
-	unsigned long		threadid;
-	long			kgdb_usethreadid;
-	struct pt_regs		*linux_regs;
-};
+#include "debug_core.h"
 
-/* Exception state values */
-#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
-#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
-#define DCPU_IS_SLAVE    0x4 /* Slave cpu enter exception */
-#define DCPU_SSTEP       0x8 /* CPU is single stepping */
+static int kgdb_break_asap;
 
-static struct debuggerinfo_struct {
-	void			*debuggerinfo;
-	struct task_struct	*task;
-	int 			exception_state;
-} kgdb_info[NR_CPUS];
+struct debuggerinfo_struct kgdb_info[NR_CPUS];
 
 /**
  * kgdb_connected - Is a host GDB connected to us?
@@ -93,7 +70,7 @@ static int			kgdb_io_module_registered;
 /* Guard for recursive entry */
 static int			exception_level;
 
-static struct kgdb_io		*kgdb_io_ops;
+struct kgdb_io		*dbg_io_ops;
 static DEFINE_SPINLOCK(kgdb_registration_lock);
 
 /* kgdb console driver is loaded */
@@ -136,16 +113,7 @@ struct task_struct		*kgdb_usethread;
 struct task_struct		*kgdb_contthread;
 
 int				kgdb_single_step;
-pid_t				kgdb_sstep_pid;
-
-/* Our I/O buffers. */
-static char			remcom_in_buffer[BUFMAX];
-static char			remcom_out_buffer[BUFMAX];
-
-/* Storage for the registers, in GDB format. */
-static unsigned long		gdb_regs[(NUMREGBYTES +
-					sizeof(unsigned long) - 1) /
-					sizeof(unsigned long)];
+static pid_t			kgdb_sstep_pid;
 
 /* to keep track of the CPU which is doing the single stepping*/
 atomic_t			kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
@@ -246,324 +214,6 @@ void __weak kgdb_disable_hw_debug(struct pt_regs *regs)
 {
 }
 
-/*
- * GDB remote protocol parser:
- */
-
-static int hex(char ch)
-{
-	if ((ch >= 'a') && (ch <= 'f'))
-		return ch - 'a' + 10;
-	if ((ch >= '0') && (ch <= '9'))
-		return ch - '0';
-	if ((ch >= 'A') && (ch <= 'F'))
-		return ch - 'A' + 10;
-	return -1;
-}
-
-/* scan for the sequence $<data>#<checksum> */
-static void get_packet(char *buffer)
-{
-	unsigned char checksum;
-	unsigned char xmitcsum;
-	int count;
-	char ch;
-
-	do {
-		/*
-		 * Spin and wait around for the start character, ignore all
-		 * other characters:
-		 */
-		while ((ch = (kgdb_io_ops->read_char())) != '$')
-			/* nothing */;
-
-		kgdb_connected = 1;
-		checksum = 0;
-		xmitcsum = -1;
-
-		count = 0;
-
-		/*
-		 * now, read until a # or end of buffer is found:
-		 */
-		while (count < (BUFMAX - 1)) {
-			ch = kgdb_io_ops->read_char();
-			if (ch == '#')
-				break;
-			checksum = checksum + ch;
-			buffer[count] = ch;
-			count = count + 1;
-		}
-		buffer[count] = 0;
-
-		if (ch == '#') {
-			xmitcsum = hex(kgdb_io_ops->read_char()) << 4;
-			xmitcsum += hex(kgdb_io_ops->read_char());
-
-			if (checksum != xmitcsum)
-				/* failed checksum */
-				kgdb_io_ops->write_char('-');
-			else
-				/* successful transfer */
-				kgdb_io_ops->write_char('+');
-			if (kgdb_io_ops->flush)
-				kgdb_io_ops->flush();
-		}
-	} while (checksum != xmitcsum);
-}
-
-/*
- * Send the packet in buffer.
- * Check for gdb connection if asked for.
- */
-static void put_packet(char *buffer)
-{
-	unsigned char checksum;
-	int count;
-	char ch;
-
-	/*
-	 * $<packet info>#<checksum>.
-	 */
-	while (1) {
-		kgdb_io_ops->write_char('$');
-		checksum = 0;
-		count = 0;
-
-		while ((ch = buffer[count])) {
-			kgdb_io_ops->write_char(ch);
-			checksum += ch;
-			count++;
-		}
-
-		kgdb_io_ops->write_char('#');
-		kgdb_io_ops->write_char(hex_asc_hi(checksum));
-		kgdb_io_ops->write_char(hex_asc_lo(checksum));
-		if (kgdb_io_ops->flush)
-			kgdb_io_ops->flush();
-
-		/* Now see what we get in reply. */
-		ch = kgdb_io_ops->read_char();
-
-		if (ch == 3)
-			ch = kgdb_io_ops->read_char();
-
-		/* If we get an ACK, we are done. */
-		if (ch == '+')
-			return;
-
-		/*
-		 * If we get the start of another packet, this means
-		 * that GDB is attempting to reconnect.  We will NAK
-		 * the packet being sent, and stop trying to send this
-		 * packet.
-		 */
-		if (ch == '$') {
-			kgdb_io_ops->write_char('-');
-			if (kgdb_io_ops->flush)
-				kgdb_io_ops->flush();
-			return;
-		}
-	}
-}
-
-/*
- * Convert the memory pointed to by mem into hex, placing result in buf.
- * Return a pointer to the last char put in buf (null). May return an error.
- */
-int kgdb_mem2hex(char *mem, char *buf, int count)
-{
-	char *tmp;
-	int err;
-
-	/*
-	 * We use the upper half of buf as an intermediate buffer for the
-	 * raw memory copy.  Hex conversion will work against this one.
-	 */
-	tmp = buf + count;
-
-	err = probe_kernel_read(tmp, mem, count);
-	if (!err) {
-		while (count > 0) {
-			buf = pack_hex_byte(buf, *tmp);
-			tmp++;
-			count--;
-		}
-
-		*buf = 0;
-	}
-
-	return err;
-}
-
-/*
- * Copy the binary array pointed to by buf into mem.  Fix $, #, and
- * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
- * The input buf is overwitten with the result to write to mem.
- */
-static int kgdb_ebin2mem(char *buf, char *mem, int count)
-{
-	int size = 0;
-	char *c = buf;
-
-	while (count-- > 0) {
-		c[size] = *buf++;
-		if (c[size] == 0x7d)
-			c[size] = *buf++ ^ 0x20;
-		size++;
-	}
-
-	return probe_kernel_write(mem, c, size);
-}
-
-/*
- * Convert the hex array pointed to by buf into binary to be placed in mem.
- * Return a pointer to the character AFTER the last byte written.
- * May return an error.
- */
-int kgdb_hex2mem(char *buf, char *mem, int count)
-{
-	char *tmp_raw;
-	char *tmp_hex;
-
-	/*
-	 * We use the upper half of buf as an intermediate buffer for the
-	 * raw memory that is converted from hex.
-	 */
-	tmp_raw = buf + count * 2;
-
-	tmp_hex = tmp_raw - 1;
-	while (tmp_hex >= buf) {
-		tmp_raw--;
-		*tmp_raw = hex(*tmp_hex--);
-		*tmp_raw |= hex(*tmp_hex--) << 4;
-	}
-
-	return probe_kernel_write(mem, tmp_raw, count);
-}
-
-/*
- * While we find nice hex chars, build a long_val.
- * Return number of chars processed.
- */
-int kgdb_hex2long(char **ptr, unsigned long *long_val)
-{
-	int hex_val;
-	int num = 0;
-	int negate = 0;
-
-	*long_val = 0;
-
-	if (**ptr == '-') {
-		negate = 1;
-		(*ptr)++;
-	}
-	while (**ptr) {
-		hex_val = hex(**ptr);
-		if (hex_val < 0)
-			break;
-
-		*long_val = (*long_val << 4) | hex_val;
-		num++;
-		(*ptr)++;
-	}
-
-	if (negate)
-		*long_val = -*long_val;
-
-	return num;
-}
-
-/* Write memory due to an 'M' or 'X' packet. */
-static int write_mem_msg(int binary)
-{
-	char *ptr = &remcom_in_buffer[1];
-	unsigned long addr;
-	unsigned long length;
-	int err;
-
-	if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
-	    kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
-		if (binary)
-			err = kgdb_ebin2mem(ptr, (char *)addr, length);
-		else
-			err = kgdb_hex2mem(ptr, (char *)addr, length);
-		if (err)
-			return err;
-		if (CACHE_FLUSH_IS_SAFE)
-			flush_icache_range(addr, addr + length);
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static void error_packet(char *pkt, int error)
-{
-	error = -error;
-	pkt[0] = 'E';
-	pkt[1] = hex_asc[(error / 10)];
-	pkt[2] = hex_asc[(error % 10)];
-	pkt[3] = '\0';
-}
-
-/*
- * Thread ID accessors. We represent a flat TID space to GDB, where
- * the per CPU idle threads (which under Linux all have PID 0) are
- * remapped to negative TIDs.
- */
-
-#define BUF_THREAD_ID_SIZE	16
-
-static char *pack_threadid(char *pkt, unsigned char *id)
-{
-	char *limit;
-
-	limit = pkt + BUF_THREAD_ID_SIZE;
-	while (pkt < limit)
-		pkt = pack_hex_byte(pkt, *id++);
-
-	return pkt;
-}
-
-static void int_to_threadref(unsigned char *id, int value)
-{
-	unsigned char *scan;
-	int i = 4;
-
-	scan = (unsigned char *)id;
-	while (i--)
-		*scan++ = 0;
-	put_unaligned_be32(value, scan);
-}
-
-static struct task_struct *getthread(struct pt_regs *regs, int tid)
-{
-	/*
-	 * Non-positive TIDs are remapped to the cpu shadow information
-	 */
-	if (tid == 0 || tid == -1)
-		tid = -atomic_read(&kgdb_active) - 2;
-	if (tid < -1 && tid > -NR_CPUS - 2) {
-		if (kgdb_info[-tid - 2].task)
-			return kgdb_info[-tid - 2].task;
-		else
-			return idle_task(-tid - 2);
-	}
-	if (tid <= 0) {
-		printk(KERN_ERR "KGDB: Internal thread select error\n");
-		dump_stack();
-		return NULL;
-	}
-
-	/*
-	 * find_task_by_pid_ns() does not take the tasklist lock anymore
-	 * but is nicely RCU locked - hence is a pretty resilient
-	 * thing to use:
-	 */
-	return find_task_by_pid_ns(tid, &init_pid_ns);
-}
-
 /*
  * Some architectures need cache flushes when we set/clear a
  * breakpoint:
@@ -584,7 +234,7 @@ static void kgdb_flush_swbreak_addr(unsigned long addr)
 /*
  * SW breakpoint management:
  */
-static int kgdb_activate_sw_breakpoints(void)
+int dbg_activate_sw_breakpoints(void)
 {
 	unsigned long addr;
 	int error;
@@ -610,7 +260,7 @@ static int kgdb_activate_sw_breakpoints(void)
 	return ret;
 }
 
-static int kgdb_set_sw_break(unsigned long addr)
+int dbg_set_sw_break(unsigned long addr)
 {
 	int err = kgdb_validate_break_address(addr);
 	int breakno = -1;
@@ -675,7 +325,7 @@ static int kgdb_deactivate_sw_breakpoints(void)
 	return ret;
 }
 
-static int kgdb_remove_sw_break(unsigned long addr)
+int dbg_remove_sw_break(unsigned long addr)
 {
 	int i;
 
@@ -701,7 +351,7 @@ int kgdb_isremovedbreak(unsigned long addr)
 	return 0;
 }
 
-static int remove_all_break(void)
+int dbg_remove_all_break(void)
 {
 	unsigned long addr;
 	int error;
@@ -728,53 +378,6 @@ setundefined:
 	return 0;
 }
 
-/*
- * Remap normal tasks to their real PID,
- * CPU shadow threads are mapped to -CPU - 2
- */
-static inline int shadow_pid(int realpid)
-{
-	if (realpid)
-		return realpid;
-
-	return -raw_smp_processor_id() - 2;
-}
-
-static char gdbmsgbuf[BUFMAX + 1];
-
-static void kgdb_msg_write(const char *s, int len)
-{
-	char *bufptr;
-	int wcount;
-	int i;
-
-	/* 'O'utput */
-	gdbmsgbuf[0] = 'O';
-
-	/* Fill and send buffers... */
-	while (len > 0) {
-		bufptr = gdbmsgbuf + 1;
-
-		/* Calculate how many this time */
-		if ((len << 1) > (BUFMAX - 2))
-			wcount = (BUFMAX - 2) >> 1;
-		else
-			wcount = len;
-
-		/* Pack in hex chars */
-		for (i = 0; i < wcount; i++)
-			bufptr = pack_hex_byte(bufptr, s[i]);
-		*bufptr = '\0';
-
-		/* Move up */
-		s += wcount;
-		len -= wcount;
-
-		/* Write packet */
-		put_packet(gdbmsgbuf);
-	}
-}
-
 /*
  * Return true if there is a valid kgdb I/O module.  Also if no
  * debugger is attached a message can be printed to the console about
@@ -786,7 +389,7 @@ static void kgdb_msg_write(const char *s, int len)
  */
 static int kgdb_io_ready(int print_wait)
 {
-	if (!kgdb_io_ops)
+	if (!dbg_io_ops)
 		return 0;
 	if (kgdb_connected)
 		return 1;
@@ -797,525 +400,6 @@ static int kgdb_io_ready(int print_wait)
 	return 1;
 }
 
-/*
- * All the functions that start with gdb_cmd are the various
- * operations to implement the handlers for the gdbserial protocol
- * where KGDB is communicating with an external debugger
- */
-
-/* Handle the '?' status packets */
-static void gdb_cmd_status(struct kgdb_state *ks)
-{
-	/*
-	 * We know that this packet is only sent
-	 * during initial connect.  So to be safe,
-	 * we clear out our breakpoints now in case
-	 * GDB is reconnecting.
-	 */
-	remove_all_break();
-
-	remcom_out_buffer[0] = 'S';
-	pack_hex_byte(&remcom_out_buffer[1], ks->signo);
-}
-
-/* Handle the 'g' get registers request */
-static void gdb_cmd_getregs(struct kgdb_state *ks)
-{
-	struct task_struct *thread;
-	void *local_debuggerinfo;
-	int i;
-
-	thread = kgdb_usethread;
-	if (!thread) {
-		thread = kgdb_info[ks->cpu].task;
-		local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
-	} else {
-		local_debuggerinfo = NULL;
-		for_each_online_cpu(i) {
-			/*
-			 * Try to find the task on some other
-			 * or possibly this node if we do not
-			 * find the matching task then we try
-			 * to approximate the results.
-			 */
-			if (thread == kgdb_info[i].task)
-				local_debuggerinfo = kgdb_info[i].debuggerinfo;
-		}
-	}
-
-	/*
-	 * All threads that don't have debuggerinfo should be
-	 * in schedule() sleeping, since all other CPUs
-	 * are in kgdb_wait, and thus have debuggerinfo.
-	 */
-	if (local_debuggerinfo) {
-		pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
-	} else {
-		/*
-		 * Pull stuff saved during switch_to; nothing
-		 * else is accessible (or even particularly
-		 * relevant).
-		 *
-		 * This should be enough for a stack trace.
-		 */
-		sleeping_thread_to_gdb_regs(gdb_regs, thread);
-	}
-	kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
-}
-
-/* Handle the 'G' set registers request */
-static void gdb_cmd_setregs(struct kgdb_state *ks)
-{
-	kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
-
-	if (kgdb_usethread && kgdb_usethread != current) {
-		error_packet(remcom_out_buffer, -EINVAL);
-	} else {
-		gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
-		strcpy(remcom_out_buffer, "OK");
-	}
-}
-
-/* Handle the 'm' memory read bytes */
-static void gdb_cmd_memread(struct kgdb_state *ks)
-{
-	char *ptr = &remcom_in_buffer[1];
-	unsigned long length;
-	unsigned long addr;
-	int err;
-
-	if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
-					kgdb_hex2long(&ptr, &length) > 0) {
-		err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
-		if (err)
-			error_packet(remcom_out_buffer, err);
-	} else {
-		error_packet(remcom_out_buffer, -EINVAL);
-	}
-}
-
-/* Handle the 'M' memory write bytes */
-static void gdb_cmd_memwrite(struct kgdb_state *ks)
-{
-	int err = write_mem_msg(0);
-
-	if (err)
-		error_packet(remcom_out_buffer, err);
-	else
-		strcpy(remcom_out_buffer, "OK");
-}
-
-/* Handle the 'X' memory binary write bytes */
-static void gdb_cmd_binwrite(struct kgdb_state *ks)
-{
-	int err = write_mem_msg(1);
-
-	if (err)
-		error_packet(remcom_out_buffer, err);
-	else
-		strcpy(remcom_out_buffer, "OK");
-}
-
-/* Handle the 'D' or 'k', detach or kill packets */
-static void gdb_cmd_detachkill(struct kgdb_state *ks)
-{
-	int error;
-
-	/* The detach case */
-	if (remcom_in_buffer[0] == 'D') {
-		error = remove_all_break();
-		if (error < 0) {
-			error_packet(remcom_out_buffer, error);
-		} else {
-			strcpy(remcom_out_buffer, "OK");
-			kgdb_connected = 0;
-		}
-		put_packet(remcom_out_buffer);
-	} else {
-		/*
-		 * Assume the kill case, with no exit code checking,
-		 * trying to force detach the debugger:
-		 */
-		remove_all_break();
-		kgdb_connected = 0;
-	}
-}
-
-/* Handle the 'R' reboot packets */
-static int gdb_cmd_reboot(struct kgdb_state *ks)
-{
-	/* For now, only honor R0 */
-	if (strcmp(remcom_in_buffer, "R0") == 0) {
-		printk(KERN_CRIT "Executing emergency reboot\n");
-		strcpy(remcom_out_buffer, "OK");
-		put_packet(remcom_out_buffer);
-
-		/*
-		 * Execution should not return from
-		 * machine_emergency_restart()
-		 */
-		machine_emergency_restart();
-		kgdb_connected = 0;
-
-		return 1;
-	}
-	return 0;
-}
-
-/* Handle the 'q' query packets */
-static void gdb_cmd_query(struct kgdb_state *ks)
-{
-	struct task_struct *g;
-	struct task_struct *p;
-	unsigned char thref[8];
-	char *ptr;
-	int i;
-	int cpu;
-	int finished = 0;
-
-	switch (remcom_in_buffer[1]) {
-	case 's':
-	case 'f':
-		if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
-			error_packet(remcom_out_buffer, -EINVAL);
-			break;
-		}
-
-		i = 0;
-		remcom_out_buffer[0] = 'm';
-		ptr = remcom_out_buffer + 1;
-		if (remcom_in_buffer[1] == 'f') {
-			/* Each cpu is a shadow thread */
-			for_each_online_cpu(cpu) {
-				ks->thr_query = 0;
-				int_to_threadref(thref, -cpu - 2);
-				pack_threadid(ptr, thref);
-				ptr += BUF_THREAD_ID_SIZE;
-				*(ptr++) = ',';
-				i++;
-			}
-		}
-
-		do_each_thread(g, p) {
-			if (i >= ks->thr_query && !finished) {
-				int_to_threadref(thref, p->pid);
-				pack_threadid(ptr, thref);
-				ptr += BUF_THREAD_ID_SIZE;
-				*(ptr++) = ',';
-				ks->thr_query++;
-				if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
-					finished = 1;
-			}
-			i++;
-		} while_each_thread(g, p);
-
-		*(--ptr) = '\0';
-		break;
-
-	case 'C':
-		/* Current thread id */
-		strcpy(remcom_out_buffer, "QC");
-		ks->threadid = shadow_pid(current->pid);
-		int_to_threadref(thref, ks->threadid);
-		pack_threadid(remcom_out_buffer + 2, thref);
-		break;
-	case 'T':
-		if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
-			error_packet(remcom_out_buffer, -EINVAL);
-			break;
-		}
-		ks->threadid = 0;
-		ptr = remcom_in_buffer + 17;
-		kgdb_hex2long(&ptr, &ks->threadid);
-		if (!getthread(ks->linux_regs, ks->threadid)) {
-			error_packet(remcom_out_buffer, -EINVAL);
-			break;
-		}
-		if ((int)ks->threadid > 0) {
-			kgdb_mem2hex(getthread(ks->linux_regs,
-					ks->threadid)->comm,
-					remcom_out_buffer, 16);
-		} else {
-			static char tmpstr[23 + BUF_THREAD_ID_SIZE];
-
-			sprintf(tmpstr, "shadowCPU%d",
-					(int)(-ks->threadid - 2));
-			kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
-		}
-		break;
-	}
-}
-
-/* Handle the 'H' task query packets */
-static void gdb_cmd_task(struct kgdb_state *ks)
-{
-	struct task_struct *thread;
-	char *ptr;
-
-	switch (remcom_in_buffer[1]) {
-	case 'g':
-		ptr = &remcom_in_buffer[2];
-		kgdb_hex2long(&ptr, &ks->threadid);
-		thread = getthread(ks->linux_regs, ks->threadid);
-		if (!thread && ks->threadid > 0) {
-			error_packet(remcom_out_buffer, -EINVAL);
-			break;
-		}
-		kgdb_usethread = thread;
-		ks->kgdb_usethreadid = ks->threadid;
-		strcpy(remcom_out_buffer, "OK");
-		break;
-	case 'c':
-		ptr = &remcom_in_buffer[2];
-		kgdb_hex2long(&ptr, &ks->threadid);
-		if (!ks->threadid) {
-			kgdb_contthread = NULL;
-		} else {
-			thread = getthread(ks->linux_regs, ks->threadid);
-			if (!thread && ks->threadid > 0) {
-				error_packet(remcom_out_buffer, -EINVAL);
-				break;
-			}
-			kgdb_contthread = thread;
-		}
-		strcpy(remcom_out_buffer, "OK");
-		break;
-	}
-}
-
-/* Handle the 'T' thread query packets */
-static void gdb_cmd_thread(struct kgdb_state *ks)
-{
-	char *ptr = &remcom_in_buffer[1];
-	struct task_struct *thread;
-
-	kgdb_hex2long(&ptr, &ks->threadid);
-	thread = getthread(ks->linux_regs, ks->threadid);
-	if (thread)
-		strcpy(remcom_out_buffer, "OK");
-	else
-		error_packet(remcom_out_buffer, -EINVAL);
-}
-
-/* Handle the 'z' or 'Z' breakpoint remove or set packets */
-static void gdb_cmd_break(struct kgdb_state *ks)
-{
-	/*
-	 * Since GDB-5.3, it's been drafted that '0' is a software
-	 * breakpoint, '1' is a hardware breakpoint, so let's do that.
-	 */
-	char *bpt_type = &remcom_in_buffer[1];
-	char *ptr = &remcom_in_buffer[2];
-	unsigned long addr;
-	unsigned long length;
-	int error = 0;
-
-	if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
-		/* Unsupported */
-		if (*bpt_type > '4')
-			return;
-	} else {
-		if (*bpt_type != '0' && *bpt_type != '1')
-			/* Unsupported. */
-			return;
-	}
-
-	/*
-	 * Test if this is a hardware breakpoint, and
-	 * if we support it:
-	 */
-	if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
-		/* Unsupported. */
-		return;
-
-	if (*(ptr++) != ',') {
-		error_packet(remcom_out_buffer, -EINVAL);
-		return;
-	}
-	if (!kgdb_hex2long(&ptr, &addr)) {
-		error_packet(remcom_out_buffer, -EINVAL);
-		return;
-	}
-	if (*(ptr++) != ',' ||
-		!kgdb_hex2long(&ptr, &length)) {
-		error_packet(remcom_out_buffer, -EINVAL);
-		return;
-	}
-
-	if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
-		error = kgdb_set_sw_break(addr);
-	else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
-		error = kgdb_remove_sw_break(addr);
-	else if (remcom_in_buffer[0] == 'Z')
-		error = arch_kgdb_ops.set_hw_breakpoint(addr,
-			(int)length, *bpt_type - '0');
-	else if (remcom_in_buffer[0] == 'z')
-		error = arch_kgdb_ops.remove_hw_breakpoint(addr,
-			(int) length, *bpt_type - '0');
-
-	if (error == 0)
-		strcpy(remcom_out_buffer, "OK");
-	else
-		error_packet(remcom_out_buffer, error);
-}
-
-/* Handle the 'C' signal / exception passing packets */
-static int gdb_cmd_exception_pass(struct kgdb_state *ks)
-{
-	/* C09 == pass exception
-	 * C15 == detach kgdb, pass exception
-	 */
-	if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
-
-		ks->pass_exception = 1;
-		remcom_in_buffer[0] = 'c';
-
-	} else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
-
-		ks->pass_exception = 1;
-		remcom_in_buffer[0] = 'D';
-		remove_all_break();
-		kgdb_connected = 0;
-		return 1;
-
-	} else {
-		kgdb_msg_write("KGDB only knows signal 9 (pass)"
-			" and 15 (pass and disconnect)\n"
-			"Executing a continue without signal passing\n", 0);
-		remcom_in_buffer[0] = 'c';
-	}
-
-	/* Indicate fall through */
-	return -1;
-}
-
-/*
- * This function performs all gdbserial command procesing
- */
-static int gdb_serial_stub(struct kgdb_state *ks)
-{
-	int error = 0;
-	int tmp;
-
-	/* Clear the out buffer. */
-	memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
-
-	if (kgdb_connected) {
-		unsigned char thref[8];
-		char *ptr;
-
-		/* Reply to host that an exception has occurred */
-		ptr = remcom_out_buffer;
-		*ptr++ = 'T';
-		ptr = pack_hex_byte(ptr, ks->signo);
-		ptr += strlen(strcpy(ptr, "thread:"));
-		int_to_threadref(thref, shadow_pid(current->pid));
-		ptr = pack_threadid(ptr, thref);
-		*ptr++ = ';';
-		put_packet(remcom_out_buffer);
-	}
-
-	kgdb_usethread = kgdb_info[ks->cpu].task;
-	ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
-	ks->pass_exception = 0;
-
-	while (1) {
-		error = 0;
-
-		/* Clear the out buffer. */
-		memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
-
-		get_packet(remcom_in_buffer);
-
-		switch (remcom_in_buffer[0]) {
-		case '?': /* gdbserial status */
-			gdb_cmd_status(ks);
-			break;
-		case 'g': /* return the value of the CPU registers */
-			gdb_cmd_getregs(ks);
-			break;
-		case 'G': /* set the value of the CPU registers - return OK */
-			gdb_cmd_setregs(ks);
-			break;
-		case 'm': /* mAA..AA,LLLL  Read LLLL bytes at address AA..AA */
-			gdb_cmd_memread(ks);
-			break;
-		case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
-			gdb_cmd_memwrite(ks);
-			break;
-		case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
-			gdb_cmd_binwrite(ks);
-			break;
-			/* kill or detach. KGDB should treat this like a
-			 * continue.
-			 */
-		case 'D': /* Debugger detach */
-		case 'k': /* Debugger detach via kill */
-			gdb_cmd_detachkill(ks);
-			goto default_handle;
-		case 'R': /* Reboot */
-			if (gdb_cmd_reboot(ks))
-				goto default_handle;
-			break;
-		case 'q': /* query command */
-			gdb_cmd_query(ks);
-			break;
-		case 'H': /* task related */
-			gdb_cmd_task(ks);
-			break;
-		case 'T': /* Query thread status */
-			gdb_cmd_thread(ks);
-			break;
-		case 'z': /* Break point remove */
-		case 'Z': /* Break point set */
-			gdb_cmd_break(ks);
-			break;
-		case 'C': /* Exception passing */
-			tmp = gdb_cmd_exception_pass(ks);
-			if (tmp > 0)
-				goto default_handle;
-			if (tmp == 0)
-				break;
-			/* Fall through on tmp < 0 */
-		case 'c': /* Continue packet */
-		case 's': /* Single step packet */
-			if (kgdb_contthread && kgdb_contthread != current) {
-				/* Can't switch threads in kgdb */
-				error_packet(remcom_out_buffer, -EINVAL);
-				break;
-			}
-			kgdb_activate_sw_breakpoints();
-			/* Fall through to default processing */
-		default:
-default_handle:
-			error = kgdb_arch_handle_exception(ks->ex_vector,
-						ks->signo,
-						ks->err_code,
-						remcom_in_buffer,
-						remcom_out_buffer,
-						ks->linux_regs);
-			/*
-			 * Leave cmd processing on error, detach,
-			 * kill, continue, or single step.
-			 */
-			if (error >= 0 || remcom_in_buffer[0] == 'D' ||
-			    remcom_in_buffer[0] == 'k') {
-				error = 0;
-				goto kgdb_exit;
-			}
-
-		}
-
-		/* reply to the request */
-		put_packet(remcom_out_buffer);
-	}
-
-kgdb_exit:
-	if (ks->pass_exception)
-		error = 1;
-	return error;
-}
-
 static int kgdb_reenter_check(struct kgdb_state *ks)
 {
 	unsigned long addr;
@@ -1334,17 +418,17 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
 	 * user because the user planted a breakpoint in a place that
 	 * KGDB needs in order to function.
 	 */
-	if (kgdb_remove_sw_break(addr) == 0) {
+	if (dbg_remove_sw_break(addr) == 0) {
 		exception_level = 0;
 		kgdb_skipexception(ks->ex_vector, ks->linux_regs);
-		kgdb_activate_sw_breakpoints();
+		dbg_activate_sw_breakpoints();
 		printk(KERN_CRIT "KGDB: re-enter error: breakpoint removed %lx\n",
 			addr);
 		WARN_ON_ONCE(1);
 
 		return 1;
 	}
-	remove_all_break();
+	dbg_remove_all_break();
 	kgdb_skipexception(ks->ex_vector, ks->linux_regs);
 
 	if (exception_level > 1) {
@@ -1430,7 +514,7 @@ return_normal:
 
 	if (!kgdb_io_ready(1)) {
 		error = 1;
-		goto kgdb_restore; /* No I/O connection, so resume the system */
+		goto kgdb_restore; /* No I/O connection, resume the system */
 	}
 
 	/*
@@ -1440,8 +524,8 @@ return_normal:
 		goto kgdb_restore;
 
 	/* Call the I/O driver's pre_exception routine */
-	if (kgdb_io_ops->pre_exception)
-		kgdb_io_ops->pre_exception();
+	if (dbg_io_ops->pre_exception)
+		dbg_io_ops->pre_exception();
 
 	kgdb_disable_hw_debug(ks->linux_regs);
 
@@ -1485,8 +569,8 @@ return_normal:
 	error = gdb_serial_stub(ks);
 
 	/* Call the I/O driver's post_exception routine */
-	if (kgdb_io_ops->post_exception)
-		kgdb_io_ops->post_exception();
+	if (dbg_io_ops->post_exception)
+		dbg_io_ops->post_exception();
 
 	atomic_dec(&cpu_in_kgdb[ks->cpu]);
 
@@ -1585,7 +669,7 @@ static void kgdb_console_write(struct console *co, const char *s,
 		return;
 
 	local_irq_save(flags);
-	kgdb_msg_write(s, count);
+	gdbstub_msg_write(s, count);
 	local_irq_restore(flags);
 }
 
@@ -1597,9 +681,9 @@ static struct console kgdbcons = {
 };
 
 #ifdef CONFIG_MAGIC_SYSRQ
-static void sysrq_handle_gdb(int key, struct tty_struct *tty)
+static void sysrq_handle_dbg(int key, struct tty_struct *tty)
 {
-	if (!kgdb_io_ops) {
+	if (!dbg_io_ops) {
 		printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
 		return;
 	}
@@ -1609,8 +693,8 @@ static void sysrq_handle_gdb(int key, struct tty_struct *tty)
 	kgdb_breakpoint();
 }
 
-static struct sysrq_key_op sysrq_gdb_op = {
-	.handler	= sysrq_handle_gdb,
+static struct sysrq_key_op sysrq_dbg_op = {
+	.handler	= sysrq_handle_dbg,
 	.help_msg	= "debug(G)",
 	.action_msg	= "DEBUG",
 };
@@ -1622,7 +706,7 @@ static void kgdb_register_callbacks(void)
 		kgdb_io_module_registered = 1;
 		kgdb_arch_init();
 #ifdef CONFIG_MAGIC_SYSRQ
-		register_sysrq_key('g', &sysrq_gdb_op);
+		register_sysrq_key('g', &sysrq_dbg_op);
 #endif
 		if (kgdb_use_con && !kgdb_con_registered) {
 			register_console(&kgdbcons);
@@ -1642,7 +726,7 @@ static void kgdb_unregister_callbacks(void)
 		kgdb_io_module_registered = 0;
 		kgdb_arch_exit();
 #ifdef CONFIG_MAGIC_SYSRQ
-		unregister_sysrq_key('g', &sysrq_gdb_op);
+		unregister_sysrq_key('g', &sysrq_dbg_op);
 #endif
 		if (kgdb_con_registered) {
 			unregister_console(&kgdbcons);
@@ -1661,17 +745,17 @@ static void kgdb_initial_breakpoint(void)
 
 /**
  *	kgdb_register_io_module - register KGDB IO module
- *	@new_kgdb_io_ops: the io ops vector
+ *	@new_dbg_io_ops: the io ops vector
  *
  *	Register it with the KGDB core.
  */
-int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
+int kgdb_register_io_module(struct kgdb_io *new_dbg_io_ops)
 {
 	int err;
 
 	spin_lock(&kgdb_registration_lock);
 
-	if (kgdb_io_ops) {
+	if (dbg_io_ops) {
 		spin_unlock(&kgdb_registration_lock);
 
 		printk(KERN_ERR "kgdb: Another I/O driver is already "
@@ -1679,20 +763,20 @@ int kgdb_register_io_module(struct kgdb_io *new_kgdb_io_ops)
 		return -EBUSY;
 	}
 
-	if (new_kgdb_io_ops->init) {
-		err = new_kgdb_io_ops->init();
+	if (new_dbg_io_ops->init) {
+		err = new_dbg_io_ops->init();
 		if (err) {
 			spin_unlock(&kgdb_registration_lock);
 			return err;
 		}
 	}
 
-	kgdb_io_ops = new_kgdb_io_ops;
+	dbg_io_ops = new_dbg_io_ops;
 
 	spin_unlock(&kgdb_registration_lock);
 
 	printk(KERN_INFO "kgdb: Registered I/O driver %s.\n",
-	       new_kgdb_io_ops->name);
+	       new_dbg_io_ops->name);
 
 	/* Arm KGDB now. */
 	kgdb_register_callbacks();
@@ -1706,11 +790,11 @@ EXPORT_SYMBOL_GPL(kgdb_register_io_module);
 
 /**
  *	kkgdb_unregister_io_module - unregister KGDB IO module
- *	@old_kgdb_io_ops: the io ops vector
+ *	@old_dbg_io_ops: the io ops vector
  *
  *	Unregister it with the KGDB core.
  */
-void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops)
+void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
 {
 	BUG_ON(kgdb_connected);
 
@@ -1722,14 +806,14 @@ void kgdb_unregister_io_module(struct kgdb_io *old_kgdb_io_ops)
 
 	spin_lock(&kgdb_registration_lock);
 
-	WARN_ON_ONCE(kgdb_io_ops != old_kgdb_io_ops);
-	kgdb_io_ops = NULL;
+	WARN_ON_ONCE(dbg_io_ops != old_dbg_io_ops);
+	dbg_io_ops = NULL;
 
 	spin_unlock(&kgdb_registration_lock);
 
 	printk(KERN_INFO
 		"kgdb: Unregistered I/O driver %s, debugger disabled.\n",
-		old_kgdb_io_ops->name);
+		old_dbg_io_ops->name);
 }
 EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
 
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h
new file mode 100644
index 000000000000..db554f9be51d
--- /dev/null
+++ b/kernel/debug/debug_core.h
@@ -0,0 +1,55 @@
+/*
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#ifndef _DEBUG_CORE_H_
+#define _DEBUG_CORE_H_
+/*
+ * These are the private implementation headers between the kernel
+ * debugger core and the debugger front end code.
+ */
+
+/* kernel debug core data structures */
+struct kgdb_state {
+	int			ex_vector;
+	int			signo;
+	int			err_code;
+	int			cpu;
+	int			pass_exception;
+	unsigned long		thr_query;
+	unsigned long		threadid;
+	long			kgdb_usethreadid;
+	struct pt_regs		*linux_regs;
+};
+
+/* Exception state values */
+#define DCPU_WANT_MASTER 0x1 /* Waiting to become a master kgdb cpu */
+#define DCPU_NEXT_MASTER 0x2 /* Transition from one master cpu to another */
+#define DCPU_IS_SLAVE    0x4 /* Slave cpu enter exception */
+#define DCPU_SSTEP       0x8 /* CPU is single stepping */
+
+struct debuggerinfo_struct {
+	void			*debuggerinfo;
+	struct task_struct	*task;
+	int			exception_state;
+};
+
+extern struct debuggerinfo_struct kgdb_info[];
+
+/* kernel debug core break point routines */
+extern int dbg_remove_all_break(void);
+extern int dbg_set_sw_break(unsigned long addr);
+extern int dbg_remove_sw_break(unsigned long addr);
+extern int dbg_activate_sw_breakpoints(void);
+
+/* gdbstub interface functions */
+extern int gdb_serial_stub(struct kgdb_state *ks);
+extern void gdbstub_msg_write(const char *s, int len);
+
+#endif /* _DEBUG_CORE_H_ */
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
new file mode 100644
index 000000000000..ccdf0929f12d
--- /dev/null
+++ b/kernel/debug/gdbstub.c
@@ -0,0 +1,934 @@
+/*
+ * Kernel Debug Core
+ *
+ * Maintainer: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (C) 2000-2001 VERITAS Software Corporation.
+ * Copyright (C) 2002-2004 Timesys Corporation
+ * Copyright (C) 2003-2004 Amit S. Kale <amitkale@linsyssoft.com>
+ * Copyright (C) 2004 Pavel Machek <pavel@suse.cz>
+ * Copyright (C) 2004-2006 Tom Rini <trini@kernel.crashing.org>
+ * Copyright (C) 2004-2006 LinSysSoft Technologies Pvt. Ltd.
+ * Copyright (C) 2005-2009 Wind River Systems, Inc.
+ * Copyright (C) 2007 MontaVista Software, Inc.
+ * Copyright (C) 2008 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
+ *
+ * Contributors at various stages not listed above:
+ *  Jason Wessel ( jason.wessel@windriver.com )
+ *  George Anzinger <george@mvista.com>
+ *  Anurekh Saxena (anurekh.saxena@timesys.com)
+ *  Lake Stevens Instrument Division (Glenn Engel)
+ *  Jim Kingdon, Cygnus Support.
+ *
+ * Original KGDB stub: David Grothe <dave@gcom.com>,
+ * Tigran Aivazian <tigran@sco.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2. This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kgdb.h>
+#include <linux/reboot.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+#include <asm/unaligned.h>
+#include "debug_core.h"
+
+#define KGDB_MAX_THREAD_QUERY 17
+
+/* Our I/O buffers. */
+static char			remcom_in_buffer[BUFMAX];
+static char			remcom_out_buffer[BUFMAX];
+
+/* Storage for the registers, in GDB format. */
+static unsigned long		gdb_regs[(NUMREGBYTES +
+					sizeof(unsigned long) - 1) /
+					sizeof(unsigned long)];
+
+/*
+ * GDB remote protocol parser:
+ */
+
+static int hex(char ch)
+{
+	if ((ch >= 'a') && (ch <= 'f'))
+		return ch - 'a' + 10;
+	if ((ch >= '0') && (ch <= '9'))
+		return ch - '0';
+	if ((ch >= 'A') && (ch <= 'F'))
+		return ch - 'A' + 10;
+	return -1;
+}
+
+/* scan for the sequence $<data>#<checksum> */
+static void get_packet(char *buffer)
+{
+	unsigned char checksum;
+	unsigned char xmitcsum;
+	int count;
+	char ch;
+
+	do {
+		/*
+		 * Spin and wait around for the start character, ignore all
+		 * other characters:
+		 */
+		while ((ch = (dbg_io_ops->read_char())) != '$')
+			/* nothing */;
+
+		kgdb_connected = 1;
+		checksum = 0;
+		xmitcsum = -1;
+
+		count = 0;
+
+		/*
+		 * now, read until a # or end of buffer is found:
+		 */
+		while (count < (BUFMAX - 1)) {
+			ch = dbg_io_ops->read_char();
+			if (ch == '#')
+				break;
+			checksum = checksum + ch;
+			buffer[count] = ch;
+			count = count + 1;
+		}
+		buffer[count] = 0;
+
+		if (ch == '#') {
+			xmitcsum = hex(dbg_io_ops->read_char()) << 4;
+			xmitcsum += hex(dbg_io_ops->read_char());
+
+			if (checksum != xmitcsum)
+				/* failed checksum */
+				dbg_io_ops->write_char('-');
+			else
+				/* successful transfer */
+				dbg_io_ops->write_char('+');
+			if (dbg_io_ops->flush)
+				dbg_io_ops->flush();
+		}
+	} while (checksum != xmitcsum);
+}
+
+/*
+ * Send the packet in buffer.
+ * Check for gdb connection if asked for.
+ */
+static void put_packet(char *buffer)
+{
+	unsigned char checksum;
+	int count;
+	char ch;
+
+	/*
+	 * $<packet info>#<checksum>.
+	 */
+	while (1) {
+		dbg_io_ops->write_char('$');
+		checksum = 0;
+		count = 0;
+
+		while ((ch = buffer[count])) {
+			dbg_io_ops->write_char(ch);
+			checksum += ch;
+			count++;
+		}
+
+		dbg_io_ops->write_char('#');
+		dbg_io_ops->write_char(hex_asc_hi(checksum));
+		dbg_io_ops->write_char(hex_asc_lo(checksum));
+		if (dbg_io_ops->flush)
+			dbg_io_ops->flush();
+
+		/* Now see what we get in reply. */
+		ch = dbg_io_ops->read_char();
+
+		if (ch == 3)
+			ch = dbg_io_ops->read_char();
+
+		/* If we get an ACK, we are done. */
+		if (ch == '+')
+			return;
+
+		/*
+		 * If we get the start of another packet, this means
+		 * that GDB is attempting to reconnect.  We will NAK
+		 * the packet being sent, and stop trying to send this
+		 * packet.
+		 */
+		if (ch == '$') {
+			dbg_io_ops->write_char('-');
+			if (dbg_io_ops->flush)
+				dbg_io_ops->flush();
+			return;
+		}
+	}
+}
+
+static char gdbmsgbuf[BUFMAX + 1];
+
+void gdbstub_msg_write(const char *s, int len)
+{
+	char *bufptr;
+	int wcount;
+	int i;
+
+	/* 'O'utput */
+	gdbmsgbuf[0] = 'O';
+
+	/* Fill and send buffers... */
+	while (len > 0) {
+		bufptr = gdbmsgbuf + 1;
+
+		/* Calculate how many this time */
+		if ((len << 1) > (BUFMAX - 2))
+			wcount = (BUFMAX - 2) >> 1;
+		else
+			wcount = len;
+
+		/* Pack in hex chars */
+		for (i = 0; i < wcount; i++)
+			bufptr = pack_hex_byte(bufptr, s[i]);
+		*bufptr = '\0';
+
+		/* Move up */
+		s += wcount;
+		len -= wcount;
+
+		/* Write packet */
+		put_packet(gdbmsgbuf);
+	}
+}
+
+/*
+ * Convert the memory pointed to by mem into hex, placing result in
+ * buf.  Return a pointer to the last char put in buf (null). May
+ * return an error.
+ */
+int kgdb_mem2hex(char *mem, char *buf, int count)
+{
+	char *tmp;
+	int err;
+
+	/*
+	 * We use the upper half of buf as an intermediate buffer for the
+	 * raw memory copy.  Hex conversion will work against this one.
+	 */
+	tmp = buf + count;
+
+	err = probe_kernel_read(tmp, mem, count);
+	if (!err) {
+		while (count > 0) {
+			buf = pack_hex_byte(buf, *tmp);
+			tmp++;
+			count--;
+		}
+
+		*buf = 0;
+	}
+
+	return err;
+}
+
+/*
+ * Convert the hex array pointed to by buf into binary to be placed in
+ * mem.  Return a pointer to the character AFTER the last byte
+ * written.  May return an error.
+ */
+int kgdb_hex2mem(char *buf, char *mem, int count)
+{
+	char *tmp_raw;
+	char *tmp_hex;
+
+	/*
+	 * We use the upper half of buf as an intermediate buffer for the
+	 * raw memory that is converted from hex.
+	 */
+	tmp_raw = buf + count * 2;
+
+	tmp_hex = tmp_raw - 1;
+	while (tmp_hex >= buf) {
+		tmp_raw--;
+		*tmp_raw = hex(*tmp_hex--);
+		*tmp_raw |= hex(*tmp_hex--) << 4;
+	}
+
+	return probe_kernel_write(mem, tmp_raw, count);
+}
+
+/*
+ * While we find nice hex chars, build a long_val.
+ * Return number of chars processed.
+ */
+int kgdb_hex2long(char **ptr, unsigned long *long_val)
+{
+	int hex_val;
+	int num = 0;
+	int negate = 0;
+
+	*long_val = 0;
+
+	if (**ptr == '-') {
+		negate = 1;
+		(*ptr)++;
+	}
+	while (**ptr) {
+		hex_val = hex(**ptr);
+		if (hex_val < 0)
+			break;
+
+		*long_val = (*long_val << 4) | hex_val;
+		num++;
+		(*ptr)++;
+	}
+
+	if (negate)
+		*long_val = -*long_val;
+
+	return num;
+}
+
+/*
+ * Copy the binary array pointed to by buf into mem.  Fix $, #, and
+ * 0x7d escaped with 0x7d. Return -EFAULT on failure or 0 on success.
+ * The input buf is overwitten with the result to write to mem.
+ */
+static int kgdb_ebin2mem(char *buf, char *mem, int count)
+{
+	int size = 0;
+	char *c = buf;
+
+	while (count-- > 0) {
+		c[size] = *buf++;
+		if (c[size] == 0x7d)
+			c[size] = *buf++ ^ 0x20;
+		size++;
+	}
+
+	return probe_kernel_write(mem, c, size);
+}
+
+/* Write memory due to an 'M' or 'X' packet. */
+static int write_mem_msg(int binary)
+{
+	char *ptr = &remcom_in_buffer[1];
+	unsigned long addr;
+	unsigned long length;
+	int err;
+
+	if (kgdb_hex2long(&ptr, &addr) > 0 && *(ptr++) == ',' &&
+	    kgdb_hex2long(&ptr, &length) > 0 && *(ptr++) == ':') {
+		if (binary)
+			err = kgdb_ebin2mem(ptr, (char *)addr, length);
+		else
+			err = kgdb_hex2mem(ptr, (char *)addr, length);
+		if (err)
+			return err;
+		if (CACHE_FLUSH_IS_SAFE)
+			flush_icache_range(addr, addr + length);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+static void error_packet(char *pkt, int error)
+{
+	error = -error;
+	pkt[0] = 'E';
+	pkt[1] = hex_asc[(error / 10)];
+	pkt[2] = hex_asc[(error % 10)];
+	pkt[3] = '\0';
+}
+
+/*
+ * Thread ID accessors. We represent a flat TID space to GDB, where
+ * the per CPU idle threads (which under Linux all have PID 0) are
+ * remapped to negative TIDs.
+ */
+
+#define BUF_THREAD_ID_SIZE	16
+
+static char *pack_threadid(char *pkt, unsigned char *id)
+{
+	char *limit;
+
+	limit = pkt + BUF_THREAD_ID_SIZE;
+	while (pkt < limit)
+		pkt = pack_hex_byte(pkt, *id++);
+
+	return pkt;
+}
+
+static void int_to_threadref(unsigned char *id, int value)
+{
+	unsigned char *scan;
+	int i = 4;
+
+	scan = (unsigned char *)id;
+	while (i--)
+		*scan++ = 0;
+	put_unaligned_be32(value, scan);
+}
+
+static struct task_struct *getthread(struct pt_regs *regs, int tid)
+{
+	/*
+	 * Non-positive TIDs are remapped to the cpu shadow information
+	 */
+	if (tid == 0 || tid == -1)
+		tid = -atomic_read(&kgdb_active) - 2;
+	if (tid < -1 && tid > -NR_CPUS - 2) {
+		if (kgdb_info[-tid - 2].task)
+			return kgdb_info[-tid - 2].task;
+		else
+			return idle_task(-tid - 2);
+	}
+	if (tid <= 0) {
+		printk(KERN_ERR "KGDB: Internal thread select error\n");
+		dump_stack();
+		return NULL;
+	}
+
+	/*
+	 * find_task_by_pid_ns() does not take the tasklist lock anymore
+	 * but is nicely RCU locked - hence is a pretty resilient
+	 * thing to use:
+	 */
+	return find_task_by_pid_ns(tid, &init_pid_ns);
+}
+
+
+/*
+ * Remap normal tasks to their real PID,
+ * CPU shadow threads are mapped to -CPU - 2
+ */
+static inline int shadow_pid(int realpid)
+{
+	if (realpid)
+		return realpid;
+
+	return -raw_smp_processor_id() - 2;
+}
+
+/*
+ * All the functions that start with gdb_cmd are the various
+ * operations to implement the handlers for the gdbserial protocol
+ * where KGDB is communicating with an external debugger
+ */
+
+/* Handle the '?' status packets */
+static void gdb_cmd_status(struct kgdb_state *ks)
+{
+	/*
+	 * We know that this packet is only sent
+	 * during initial connect.  So to be safe,
+	 * we clear out our breakpoints now in case
+	 * GDB is reconnecting.
+	 */
+	dbg_remove_all_break();
+
+	remcom_out_buffer[0] = 'S';
+	pack_hex_byte(&remcom_out_buffer[1], ks->signo);
+}
+
+/* Handle the 'g' get registers request */
+static void gdb_cmd_getregs(struct kgdb_state *ks)
+{
+	struct task_struct *thread;
+	void *local_debuggerinfo;
+	int i;
+
+	thread = kgdb_usethread;
+	if (!thread) {
+		thread = kgdb_info[ks->cpu].task;
+		local_debuggerinfo = kgdb_info[ks->cpu].debuggerinfo;
+	} else {
+		local_debuggerinfo = NULL;
+		for_each_online_cpu(i) {
+			/*
+			 * Try to find the task on some other
+			 * or possibly this node if we do not
+			 * find the matching task then we try
+			 * to approximate the results.
+			 */
+			if (thread == kgdb_info[i].task)
+				local_debuggerinfo = kgdb_info[i].debuggerinfo;
+		}
+	}
+
+	/*
+	 * All threads that don't have debuggerinfo should be
+	 * in schedule() sleeping, since all other CPUs
+	 * are in kgdb_wait, and thus have debuggerinfo.
+	 */
+	if (local_debuggerinfo) {
+		pt_regs_to_gdb_regs(gdb_regs, local_debuggerinfo);
+	} else {
+		/*
+		 * Pull stuff saved during switch_to; nothing
+		 * else is accessible (or even particularly
+		 * relevant).
+		 *
+		 * This should be enough for a stack trace.
+		 */
+		sleeping_thread_to_gdb_regs(gdb_regs, thread);
+	}
+	kgdb_mem2hex((char *)gdb_regs, remcom_out_buffer, NUMREGBYTES);
+}
+
+/* Handle the 'G' set registers request */
+static void gdb_cmd_setregs(struct kgdb_state *ks)
+{
+	kgdb_hex2mem(&remcom_in_buffer[1], (char *)gdb_regs, NUMREGBYTES);
+
+	if (kgdb_usethread && kgdb_usethread != current) {
+		error_packet(remcom_out_buffer, -EINVAL);
+	} else {
+		gdb_regs_to_pt_regs(gdb_regs, ks->linux_regs);
+		strcpy(remcom_out_buffer, "OK");
+	}
+}
+
+/* Handle the 'm' memory read bytes */
+static void gdb_cmd_memread(struct kgdb_state *ks)
+{
+	char *ptr = &remcom_in_buffer[1];
+	unsigned long length;
+	unsigned long addr;
+	int err;
+
+	if (kgdb_hex2long(&ptr, &addr) > 0 && *ptr++ == ',' &&
+					kgdb_hex2long(&ptr, &length) > 0) {
+		err = kgdb_mem2hex((char *)addr, remcom_out_buffer, length);
+		if (err)
+			error_packet(remcom_out_buffer, err);
+	} else {
+		error_packet(remcom_out_buffer, -EINVAL);
+	}
+}
+
+/* Handle the 'M' memory write bytes */
+static void gdb_cmd_memwrite(struct kgdb_state *ks)
+{
+	int err = write_mem_msg(0);
+
+	if (err)
+		error_packet(remcom_out_buffer, err);
+	else
+		strcpy(remcom_out_buffer, "OK");
+}
+
+/* Handle the 'X' memory binary write bytes */
+static void gdb_cmd_binwrite(struct kgdb_state *ks)
+{
+	int err = write_mem_msg(1);
+
+	if (err)
+		error_packet(remcom_out_buffer, err);
+	else
+		strcpy(remcom_out_buffer, "OK");
+}
+
+/* Handle the 'D' or 'k', detach or kill packets */
+static void gdb_cmd_detachkill(struct kgdb_state *ks)
+{
+	int error;
+
+	/* The detach case */
+	if (remcom_in_buffer[0] == 'D') {
+		error = dbg_remove_all_break();
+		if (error < 0) {
+			error_packet(remcom_out_buffer, error);
+		} else {
+			strcpy(remcom_out_buffer, "OK");
+			kgdb_connected = 0;
+		}
+		put_packet(remcom_out_buffer);
+	} else {
+		/*
+		 * Assume the kill case, with no exit code checking,
+		 * trying to force detach the debugger:
+		 */
+		dbg_remove_all_break();
+		kgdb_connected = 0;
+	}
+}
+
+/* Handle the 'R' reboot packets */
+static int gdb_cmd_reboot(struct kgdb_state *ks)
+{
+	/* For now, only honor R0 */
+	if (strcmp(remcom_in_buffer, "R0") == 0) {
+		printk(KERN_CRIT "Executing emergency reboot\n");
+		strcpy(remcom_out_buffer, "OK");
+		put_packet(remcom_out_buffer);
+
+		/*
+		 * Execution should not return from
+		 * machine_emergency_restart()
+		 */
+		machine_emergency_restart();
+		kgdb_connected = 0;
+
+		return 1;
+	}
+	return 0;
+}
+
+/* Handle the 'q' query packets */
+static void gdb_cmd_query(struct kgdb_state *ks)
+{
+	struct task_struct *g;
+	struct task_struct *p;
+	unsigned char thref[8];
+	char *ptr;
+	int i;
+	int cpu;
+	int finished = 0;
+
+	switch (remcom_in_buffer[1]) {
+	case 's':
+	case 'f':
+		if (memcmp(remcom_in_buffer + 2, "ThreadInfo", 10)) {
+			error_packet(remcom_out_buffer, -EINVAL);
+			break;
+		}
+
+		i = 0;
+		remcom_out_buffer[0] = 'm';
+		ptr = remcom_out_buffer + 1;
+		if (remcom_in_buffer[1] == 'f') {
+			/* Each cpu is a shadow thread */
+			for_each_online_cpu(cpu) {
+				ks->thr_query = 0;
+				int_to_threadref(thref, -cpu - 2);
+				pack_threadid(ptr, thref);
+				ptr += BUF_THREAD_ID_SIZE;
+				*(ptr++) = ',';
+				i++;
+			}
+		}
+
+		do_each_thread(g, p) {
+			if (i >= ks->thr_query && !finished) {
+				int_to_threadref(thref, p->pid);
+				pack_threadid(ptr, thref);
+				ptr += BUF_THREAD_ID_SIZE;
+				*(ptr++) = ',';
+				ks->thr_query++;
+				if (ks->thr_query % KGDB_MAX_THREAD_QUERY == 0)
+					finished = 1;
+			}
+			i++;
+		} while_each_thread(g, p);
+
+		*(--ptr) = '\0';
+		break;
+
+	case 'C':
+		/* Current thread id */
+		strcpy(remcom_out_buffer, "QC");
+		ks->threadid = shadow_pid(current->pid);
+		int_to_threadref(thref, ks->threadid);
+		pack_threadid(remcom_out_buffer + 2, thref);
+		break;
+	case 'T':
+		if (memcmp(remcom_in_buffer + 1, "ThreadExtraInfo,", 16)) {
+			error_packet(remcom_out_buffer, -EINVAL);
+			break;
+		}
+		ks->threadid = 0;
+		ptr = remcom_in_buffer + 17;
+		kgdb_hex2long(&ptr, &ks->threadid);
+		if (!getthread(ks->linux_regs, ks->threadid)) {
+			error_packet(remcom_out_buffer, -EINVAL);
+			break;
+		}
+		if ((int)ks->threadid > 0) {
+			kgdb_mem2hex(getthread(ks->linux_regs,
+					ks->threadid)->comm,
+					remcom_out_buffer, 16);
+		} else {
+			static char tmpstr[23 + BUF_THREAD_ID_SIZE];
+
+			sprintf(tmpstr, "shadowCPU%d",
+					(int)(-ks->threadid - 2));
+			kgdb_mem2hex(tmpstr, remcom_out_buffer, strlen(tmpstr));
+		}
+		break;
+	}
+}
+
+/* Handle the 'H' task query packets */
+static void gdb_cmd_task(struct kgdb_state *ks)
+{
+	struct task_struct *thread;
+	char *ptr;
+
+	switch (remcom_in_buffer[1]) {
+	case 'g':
+		ptr = &remcom_in_buffer[2];
+		kgdb_hex2long(&ptr, &ks->threadid);
+		thread = getthread(ks->linux_regs, ks->threadid);
+		if (!thread && ks->threadid > 0) {
+			error_packet(remcom_out_buffer, -EINVAL);
+			break;
+		}
+		kgdb_usethread = thread;
+		ks->kgdb_usethreadid = ks->threadid;
+		strcpy(remcom_out_buffer, "OK");
+		break;
+	case 'c':
+		ptr = &remcom_in_buffer[2];
+		kgdb_hex2long(&ptr, &ks->threadid);
+		if (!ks->threadid) {
+			kgdb_contthread = NULL;
+		} else {
+			thread = getthread(ks->linux_regs, ks->threadid);
+			if (!thread && ks->threadid > 0) {
+				error_packet(remcom_out_buffer, -EINVAL);
+				break;
+			}
+			kgdb_contthread = thread;
+		}
+		strcpy(remcom_out_buffer, "OK");
+		break;
+	}
+}
+
+/* Handle the 'T' thread query packets */
+static void gdb_cmd_thread(struct kgdb_state *ks)
+{
+	char *ptr = &remcom_in_buffer[1];
+	struct task_struct *thread;
+
+	kgdb_hex2long(&ptr, &ks->threadid);
+	thread = getthread(ks->linux_regs, ks->threadid);
+	if (thread)
+		strcpy(remcom_out_buffer, "OK");
+	else
+		error_packet(remcom_out_buffer, -EINVAL);
+}
+
+/* Handle the 'z' or 'Z' breakpoint remove or set packets */
+static void gdb_cmd_break(struct kgdb_state *ks)
+{
+	/*
+	 * Since GDB-5.3, it's been drafted that '0' is a software
+	 * breakpoint, '1' is a hardware breakpoint, so let's do that.
+	 */
+	char *bpt_type = &remcom_in_buffer[1];
+	char *ptr = &remcom_in_buffer[2];
+	unsigned long addr;
+	unsigned long length;
+	int error = 0;
+
+	if (arch_kgdb_ops.set_hw_breakpoint && *bpt_type >= '1') {
+		/* Unsupported */
+		if (*bpt_type > '4')
+			return;
+	} else {
+		if (*bpt_type != '0' && *bpt_type != '1')
+			/* Unsupported. */
+			return;
+	}
+
+	/*
+	 * Test if this is a hardware breakpoint, and
+	 * if we support it:
+	 */
+	if (*bpt_type == '1' && !(arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT))
+		/* Unsupported. */
+		return;
+
+	if (*(ptr++) != ',') {
+		error_packet(remcom_out_buffer, -EINVAL);
+		return;
+	}
+	if (!kgdb_hex2long(&ptr, &addr)) {
+		error_packet(remcom_out_buffer, -EINVAL);
+		return;
+	}
+	if (*(ptr++) != ',' ||
+		!kgdb_hex2long(&ptr, &length)) {
+		error_packet(remcom_out_buffer, -EINVAL);
+		return;
+	}
+
+	if (remcom_in_buffer[0] == 'Z' && *bpt_type == '0')
+		error = dbg_set_sw_break(addr);
+	else if (remcom_in_buffer[0] == 'z' && *bpt_type == '0')
+		error = dbg_remove_sw_break(addr);
+	else if (remcom_in_buffer[0] == 'Z')
+		error = arch_kgdb_ops.set_hw_breakpoint(addr,
+			(int)length, *bpt_type - '0');
+	else if (remcom_in_buffer[0] == 'z')
+		error = arch_kgdb_ops.remove_hw_breakpoint(addr,
+			(int) length, *bpt_type - '0');
+
+	if (error == 0)
+		strcpy(remcom_out_buffer, "OK");
+	else
+		error_packet(remcom_out_buffer, error);
+}
+
+/* Handle the 'C' signal / exception passing packets */
+static int gdb_cmd_exception_pass(struct kgdb_state *ks)
+{
+	/* C09 == pass exception
+	 * C15 == detach kgdb, pass exception
+	 */
+	if (remcom_in_buffer[1] == '0' && remcom_in_buffer[2] == '9') {
+
+		ks->pass_exception = 1;
+		remcom_in_buffer[0] = 'c';
+
+	} else if (remcom_in_buffer[1] == '1' && remcom_in_buffer[2] == '5') {
+
+		ks->pass_exception = 1;
+		remcom_in_buffer[0] = 'D';
+		dbg_remove_all_break();
+		kgdb_connected = 0;
+		return 1;
+
+	} else {
+		gdbstub_msg_write("KGDB only knows signal 9 (pass)"
+			" and 15 (pass and disconnect)\n"
+			"Executing a continue without signal passing\n", 0);
+		remcom_in_buffer[0] = 'c';
+	}
+
+	/* Indicate fall through */
+	return -1;
+}
+
+/*
+ * This function performs all gdbserial command procesing
+ */
+int gdb_serial_stub(struct kgdb_state *ks)
+{
+	int error = 0;
+	int tmp;
+
+	/* Clear the out buffer. */
+	memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
+
+	if (kgdb_connected) {
+		unsigned char thref[8];
+		char *ptr;
+
+		/* Reply to host that an exception has occurred */
+		ptr = remcom_out_buffer;
+		*ptr++ = 'T';
+		ptr = pack_hex_byte(ptr, ks->signo);
+		ptr += strlen(strcpy(ptr, "thread:"));
+		int_to_threadref(thref, shadow_pid(current->pid));
+		ptr = pack_threadid(ptr, thref);
+		*ptr++ = ';';
+		put_packet(remcom_out_buffer);
+	}
+
+	kgdb_usethread = kgdb_info[ks->cpu].task;
+	ks->kgdb_usethreadid = shadow_pid(kgdb_info[ks->cpu].task->pid);
+	ks->pass_exception = 0;
+
+	while (1) {
+		error = 0;
+
+		/* Clear the out buffer. */
+		memset(remcom_out_buffer, 0, sizeof(remcom_out_buffer));
+
+		get_packet(remcom_in_buffer);
+
+		switch (remcom_in_buffer[0]) {
+		case '?': /* gdbserial status */
+			gdb_cmd_status(ks);
+			break;
+		case 'g': /* return the value of the CPU registers */
+			gdb_cmd_getregs(ks);
+			break;
+		case 'G': /* set the value of the CPU registers - return OK */
+			gdb_cmd_setregs(ks);
+			break;
+		case 'm': /* mAA..AA,LLLL  Read LLLL bytes at address AA..AA */
+			gdb_cmd_memread(ks);
+			break;
+		case 'M': /* MAA..AA,LLLL: Write LLLL bytes at address AA..AA */
+			gdb_cmd_memwrite(ks);
+			break;
+		case 'X': /* XAA..AA,LLLL: Write LLLL bytes at address AA..AA */
+			gdb_cmd_binwrite(ks);
+			break;
+			/* kill or detach. KGDB should treat this like a
+			 * continue.
+			 */
+		case 'D': /* Debugger detach */
+		case 'k': /* Debugger detach via kill */
+			gdb_cmd_detachkill(ks);
+			goto default_handle;
+		case 'R': /* Reboot */
+			if (gdb_cmd_reboot(ks))
+				goto default_handle;
+			break;
+		case 'q': /* query command */
+			gdb_cmd_query(ks);
+			break;
+		case 'H': /* task related */
+			gdb_cmd_task(ks);
+			break;
+		case 'T': /* Query thread status */
+			gdb_cmd_thread(ks);
+			break;
+		case 'z': /* Break point remove */
+		case 'Z': /* Break point set */
+			gdb_cmd_break(ks);
+			break;
+		case 'C': /* Exception passing */
+			tmp = gdb_cmd_exception_pass(ks);
+			if (tmp > 0)
+				goto default_handle;
+			if (tmp == 0)
+				break;
+			/* Fall through on tmp < 0 */
+		case 'c': /* Continue packet */
+		case 's': /* Single step packet */
+			if (kgdb_contthread && kgdb_contthread != current) {
+				/* Can't switch threads in kgdb */
+				error_packet(remcom_out_buffer, -EINVAL);
+				break;
+			}
+			dbg_activate_sw_breakpoints();
+			/* Fall through to default processing */
+		default:
+default_handle:
+			error = kgdb_arch_handle_exception(ks->ex_vector,
+						ks->signo,
+						ks->err_code,
+						remcom_in_buffer,
+						remcom_out_buffer,
+						ks->linux_regs);
+			/*
+			 * Leave cmd processing on error, detach,
+			 * kill, continue, or single step.
+			 */
+			if (error >= 0 || remcom_in_buffer[0] == 'D' ||
+			    remcom_in_buffer[0] == 'k') {
+				error = 0;
+				goto kgdb_exit;
+			}
+
+		}
+
+		/* reply to the request */
+		put_packet(remcom_out_buffer);
+	}
+
+kgdb_exit:
+	if (ks->pass_exception)
+		error = 1;
+	return error;
+}
-- 
cgit v1.2.3


From 5d5314d6795f3c1c0f415348ff8c51f7de042b77 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:20 -0500
Subject: kdb: core for kgdb back end (1 of 2)

This patch contains only the kdb core.  Because the change set was
large, it was split.  The next patch in the series includes the
instrumentation into the core kernel which are mainly helper functions
for kdb.

This work is directly derived from kdb v4.4 found at:

ftp://oss.sgi.com/projects/kdb/download/v4.4/

The kdb internals have been re-organized to make them mostly platform
independent and to connect everything to the debug core which is used by
gdbstub (which has long been known as kgdb).

The original version of kdb was 58,000 lines worth of changes to
support x86.  From that implementation only the kdb shell, and basic
commands for memory access, runcontrol, lsmod, and dmesg where carried
forward.

This is a generic implementation which aims to cover all the current
architectures using the kgdb core: ppc, arm, x86, mips, sparc, sh and
blackfin.  More archictectures can be added by implementing the
architecture specific kgdb functions.

[mort@sgi.com: Compile fix with hugepages enabled]
[mort@sgi.com: Clean breakpoint code renaming kdba_ -> kdb_]
[mort@sgi.com: fix new line after printing registers]
[mort@sgi.com: Remove the concept of global vs. local breakpoints]
[mort@sgi.com: Rework kdb_si_swapinfo to use more generic name]
[mort@sgi.com: fix the information dump macros, remove 'arch' from the names]
[sfr@canb.auug.org.au: include fixup to include linux/slab.h]

CC: linux-arch@vger.kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Martin Hicks <mort@sgi.com>
---
 include/linux/kdb.h             |  113 ++
 kernel/debug/Makefile           |    1 +
 kernel/debug/kdb/.gitignore     |    1 +
 kernel/debug/kdb/Makefile       |   24 +
 kernel/debug/kdb/kdb_bp.c       |  564 ++++++++
 kernel/debug/kdb/kdb_bt.c       |  208 +++
 kernel/debug/kdb/kdb_cmds       |   35 +
 kernel/debug/kdb/kdb_debugger.c |  159 +++
 kernel/debug/kdb/kdb_io.c       |  789 +++++++++++
 kernel/debug/kdb/kdb_main.c     | 2845 +++++++++++++++++++++++++++++++++++++++
 kernel/debug/kdb/kdb_private.h  |  301 +++++
 kernel/debug/kdb/kdb_support.c  |  927 +++++++++++++
 12 files changed, 5967 insertions(+)
 create mode 100644 include/linux/kdb.h
 create mode 100644 kernel/debug/kdb/.gitignore
 create mode 100644 kernel/debug/kdb/Makefile
 create mode 100644 kernel/debug/kdb/kdb_bp.c
 create mode 100644 kernel/debug/kdb/kdb_bt.c
 create mode 100644 kernel/debug/kdb/kdb_cmds
 create mode 100644 kernel/debug/kdb/kdb_debugger.c
 create mode 100644 kernel/debug/kdb/kdb_io.c
 create mode 100644 kernel/debug/kdb/kdb_main.c
 create mode 100644 kernel/debug/kdb/kdb_private.h
 create mode 100644 kernel/debug/kdb/kdb_support.c

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
new file mode 100644
index 000000000000..4d93790faec3
--- /dev/null
+++ b/include/linux/kdb.h
@@ -0,0 +1,113 @@
+#ifndef _KDB_H
+#define _KDB_H
+
+/*
+ * Kernel Debugger Architecture Independent Global Headers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2007 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Copyright (C) 2009 Jason Wessel <jason.wessel@windriver.com>
+ */
+
+#ifdef	CONFIG_KGDB_KDB
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <asm/atomic.h>
+
+#define KDB_POLL_FUNC_MAX	5
+
+/*
+ * kdb_initial_cpu is initialized to -1, and is set to the cpu
+ * number whenever the kernel debugger is entered.
+ */
+extern int kdb_initial_cpu;
+extern atomic_t kdb_event;
+
+/*
+ * kdb_diemsg
+ *
+ *	Contains a pointer to the last string supplied to the
+ *	kernel 'die' panic function.
+ */
+extern const char *kdb_diemsg;
+
+#define KDB_FLAG_EARLYKDB	(1 << 0) /* set from boot parameter kdb=early */
+#define KDB_FLAG_CATASTROPHIC	(1 << 1) /* A catastrophic event has occurred */
+#define KDB_FLAG_CMD_INTERRUPT	(1 << 2) /* Previous command was interrupted */
+#define KDB_FLAG_NOIPI		(1 << 3) /* Do not send IPIs */
+#define KDB_FLAG_ONLY_DO_DUMP	(1 << 4) /* Only do a dump, used when
+					  * kdb is off */
+#define KDB_FLAG_NO_CONSOLE	(1 << 5) /* No console is available,
+					  * kdb is disabled */
+#define KDB_FLAG_NO_VT_CONSOLE	(1 << 6) /* No VT console is available, do
+					  * not use keyboard */
+#define KDB_FLAG_NO_I8042	(1 << 7) /* No i8042 chip is available, do
+					  * not use keyboard */
+
+extern int kdb_flags;	/* Global flags, see kdb_state for per cpu state */
+
+extern void kdb_save_flags(void);
+extern void kdb_restore_flags(void);
+
+#define KDB_FLAG(flag)		(kdb_flags & KDB_FLAG_##flag)
+#define KDB_FLAG_SET(flag)	((void)(kdb_flags |= KDB_FLAG_##flag))
+#define KDB_FLAG_CLEAR(flag)	((void)(kdb_flags &= ~KDB_FLAG_##flag))
+
+/*
+ * External entry point for the kernel debugger.  The pt_regs
+ * at the time of entry are supplied along with the reason for
+ * entry to the kernel debugger.
+ */
+
+typedef enum {
+	KDB_REASON_ENTER = 1,	/* KDB_ENTER() trap/fault - regs valid */
+	KDB_REASON_ENTER_SLAVE,	/* KDB_ENTER_SLAVE() trap/fault - regs valid */
+	KDB_REASON_BREAK,	/* Breakpoint inst. - regs valid */
+	KDB_REASON_DEBUG,	/* Debug Fault - regs valid */
+	KDB_REASON_OOPS,	/* Kernel Oops - regs valid */
+	KDB_REASON_SWITCH,	/* CPU switch - regs valid*/
+	KDB_REASON_KEYBOARD,	/* Keyboard entry - regs valid */
+	KDB_REASON_NMI,		/* Non-maskable interrupt; regs valid */
+	KDB_REASON_RECURSE,	/* Recursive entry to kdb;
+				 * regs probably valid */
+	KDB_REASON_SSTEP,	/* Single Step trap. - regs valid */
+} kdb_reason_t;
+
+extern int kdb_printf(const char *, ...)
+	    __attribute__ ((format (printf, 1, 2)));
+typedef int (*kdb_printf_t)(const char *, ...)
+	     __attribute__ ((format (printf, 1, 2)));
+
+extern void kdb_init(int level);
+
+/* Access to kdb specific polling devices */
+typedef int (*get_char_func)(void);
+extern get_char_func kdb_poll_funcs[];
+extern int kdb_get_kbd_char(void);
+
+static inline
+int kdb_process_cpu(const struct task_struct *p)
+{
+	unsigned int cpu = task_thread_info(p)->cpu;
+	if (cpu > num_possible_cpus())
+		cpu = 0;
+	return cpu;
+}
+
+/* kdb access to register set for stack dumping */
+extern struct pt_regs *kdb_current_regs;
+
+#else /* ! CONFIG_KGDB_KDB */
+#define kdb_printf(...)
+#define kdb_init(x)
+#endif	/* CONFIG_KGDB_KDB */
+enum {
+	KDB_NOT_INITIALIZED,
+	KDB_INIT_EARLY,
+	KDB_INIT_FULL,
+};
+#endif	/* !_KDB_H */
diff --git a/kernel/debug/Makefile b/kernel/debug/Makefile
index fd4dc6e7782c..a85edc339985 100644
--- a/kernel/debug/Makefile
+++ b/kernel/debug/Makefile
@@ -3,3 +3,4 @@
 #
 
 obj-$(CONFIG_KGDB) += debug_core.o gdbstub.o
+obj-$(CONFIG_KGDB_KDB) += kdb/
diff --git a/kernel/debug/kdb/.gitignore b/kernel/debug/kdb/.gitignore
new file mode 100644
index 000000000000..396d12eda9e8
--- /dev/null
+++ b/kernel/debug/kdb/.gitignore
@@ -0,0 +1 @@
+gen-kdb_cmds.c
diff --git a/kernel/debug/kdb/Makefile b/kernel/debug/kdb/Makefile
new file mode 100644
index 000000000000..d1e925eddbcd
--- /dev/null
+++ b/kernel/debug/kdb/Makefile
@@ -0,0 +1,24 @@
+# This file is subject to the terms and conditions of the GNU General Public
+# License.  See the file "COPYING" in the main directory of this archive
+# for more details.
+#
+# Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+# Copyright (c) 2009 Wind River Systems, Inc. All Rights Reserved.
+#
+
+CCVERSION	:= $(shell $(CC) -v 2>&1 | sed -ne '$$p')
+obj-y := kdb_io.o kdb_main.o kdb_support.o kdb_bt.o gen-kdb_cmds.o kdb_bp.o kdb_debugger.o
+
+clean-files := gen-kdb_cmds.c
+
+quiet_cmd_gen-kdb = GENKDB  $@
+      cmd_gen-kdb = $(AWK) 'BEGIN {print "\#include <linux/stddef.h>"; print "\#include <linux/init.h>"} \
+		/^\#/{next} \
+		/^[ \t]*$$/{next} \
+		{gsub(/"/, "\\\"", $$0); \
+		  print "static __initdata char kdb_cmd" cmds++ "[] = \"" $$0 "\\n\";"} \
+		END {print "extern char *kdb_cmds[]; char __initdata *kdb_cmds[] = {"; for (i = 0; i < cmds; ++i) {print "  kdb_cmd" i ","}; print("  NULL\n};");}' \
+		$(filter-out %/Makefile,$^) > $@#
+
+$(obj)/gen-kdb_cmds.c:	$(src)/kdb_cmds $(src)/Makefile
+	$(call cmd,gen-kdb)
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
new file mode 100644
index 000000000000..75bd9b3ebbb7
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -0,0 +1,564 @@
+/*
+ * Kernel Debugger Architecture Independent Breakpoint Handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ */
+
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kdb.h>
+#include <linux/kgdb.h>
+#include <linux/smp.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include "kdb_private.h"
+
+/*
+ * Table of kdb_breakpoints
+ */
+kdb_bp_t kdb_breakpoints[KDB_MAXBPT];
+
+static void kdb_setsinglestep(struct pt_regs *regs)
+{
+	KDB_STATE_SET(DOING_SS);
+}
+
+static char *kdb_rwtypes[] = {
+	"Instruction(i)",
+	"Instruction(Register)",
+	"Data Write",
+	"I/O",
+	"Data Access"
+};
+
+static char *kdb_bptype(kdb_bp_t *bp)
+{
+	if (bp->bp_type < 0 || bp->bp_type > 4)
+		return "";
+
+	return kdb_rwtypes[bp->bp_type];
+}
+
+static int kdb_parsebp(int argc, const char **argv, int *nextargp, kdb_bp_t *bp)
+{
+	int nextarg = *nextargp;
+	int diag;
+
+	bp->bph_length = 1;
+	if ((argc + 1) != nextarg) {
+		if (strnicmp(argv[nextarg], "datar", sizeof("datar")) == 0)
+			bp->bp_type = BP_ACCESS_WATCHPOINT;
+		else if (strnicmp(argv[nextarg], "dataw", sizeof("dataw")) == 0)
+			bp->bp_type = BP_WRITE_WATCHPOINT;
+		else if (strnicmp(argv[nextarg], "inst", sizeof("inst")) == 0)
+			bp->bp_type = BP_HARDWARE_BREAKPOINT;
+		else
+			return KDB_ARGCOUNT;
+
+		bp->bph_length = 1;
+
+		nextarg++;
+
+		if ((argc + 1) != nextarg) {
+			unsigned long len;
+
+			diag = kdbgetularg((char *)argv[nextarg],
+					   &len);
+			if (diag)
+				return diag;
+
+
+			if (len > 8)
+				return KDB_BADLENGTH;
+
+			bp->bph_length = len;
+			nextarg++;
+		}
+
+		if ((argc + 1) != nextarg)
+			return KDB_ARGCOUNT;
+	}
+
+	*nextargp = nextarg;
+	return 0;
+}
+
+static int _kdb_bp_remove(kdb_bp_t *bp)
+{
+	int ret = 1;
+	if (!bp->bp_installed)
+		return ret;
+	if (!bp->bp_type)
+		ret = dbg_remove_sw_break(bp->bp_addr);
+	else
+		ret = arch_kgdb_ops.remove_hw_breakpoint(bp->bp_addr,
+			 bp->bph_length,
+			 bp->bp_type);
+	if (ret == 0)
+		bp->bp_installed = 0;
+	return ret;
+}
+
+static void kdb_handle_bp(struct pt_regs *regs, kdb_bp_t *bp)
+{
+	if (KDB_DEBUG(BP))
+		kdb_printf("regs->ip = 0x%lx\n", instruction_pointer(regs));
+
+	/*
+	 * Setup single step
+	 */
+	kdb_setsinglestep(regs);
+
+	/*
+	 * Reset delay attribute
+	 */
+	bp->bp_delay = 0;
+	bp->bp_delayed = 1;
+}
+
+static int _kdb_bp_install(struct pt_regs *regs, kdb_bp_t *bp)
+{
+	int ret;
+	/*
+	 * Install the breakpoint, if it is not already installed.
+	 */
+
+	if (KDB_DEBUG(BP))
+		kdb_printf("%s: bp_installed %d\n",
+			   __func__, bp->bp_installed);
+	if (!KDB_STATE(SSBPT))
+		bp->bp_delay = 0;
+	if (bp->bp_installed)
+		return 1;
+	if (bp->bp_delay || (bp->bp_delayed && KDB_STATE(DOING_SS))) {
+		if (KDB_DEBUG(BP))
+			kdb_printf("%s: delayed bp\n", __func__);
+		kdb_handle_bp(regs, bp);
+		return 0;
+	}
+	if (!bp->bp_type)
+		ret = dbg_set_sw_break(bp->bp_addr);
+	else
+		ret = arch_kgdb_ops.set_hw_breakpoint(bp->bp_addr,
+			 bp->bph_length,
+			 bp->bp_type);
+	if (ret == 0) {
+		bp->bp_installed = 1;
+	} else {
+		kdb_printf("%s: failed to set breakpoint at 0x%lx\n",
+			   __func__, bp->bp_addr);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * kdb_bp_install
+ *
+ *	Install kdb_breakpoints prior to returning from the
+ *	kernel debugger.  This allows the kdb_breakpoints to be set
+ *	upon functions that are used internally by kdb, such as
+ *	printk().  This function is only called once per kdb session.
+ */
+void kdb_bp_install(struct pt_regs *regs)
+{
+	int i;
+
+	for (i = 0; i < KDB_MAXBPT; i++) {
+		kdb_bp_t *bp = &kdb_breakpoints[i];
+
+		if (KDB_DEBUG(BP)) {
+			kdb_printf("%s: bp %d bp_enabled %d\n",
+				   __func__, i, bp->bp_enabled);
+		}
+		if (bp->bp_enabled)
+			_kdb_bp_install(regs, bp);
+	}
+}
+
+/*
+ * kdb_bp_remove
+ *
+ *	Remove kdb_breakpoints upon entry to the kernel debugger.
+ *
+ * Parameters:
+ *	None.
+ * Outputs:
+ *	None.
+ * Returns:
+ *	None.
+ * Locking:
+ *	None.
+ * Remarks:
+ */
+void kdb_bp_remove(void)
+{
+	int i;
+
+	for (i = KDB_MAXBPT - 1; i >= 0; i--) {
+		kdb_bp_t *bp = &kdb_breakpoints[i];
+
+		if (KDB_DEBUG(BP)) {
+			kdb_printf("%s: bp %d bp_enabled %d\n",
+				   __func__, i, bp->bp_enabled);
+		}
+		if (bp->bp_enabled)
+			_kdb_bp_remove(bp);
+	}
+}
+
+
+/*
+ * kdb_printbp
+ *
+ *	Internal function to format and print a breakpoint entry.
+ *
+ * Parameters:
+ *	None.
+ * Outputs:
+ *	None.
+ * Returns:
+ *	None.
+ * Locking:
+ *	None.
+ * Remarks:
+ */
+
+static void kdb_printbp(kdb_bp_t *bp, int i)
+{
+	kdb_printf("%s ", kdb_bptype(bp));
+	kdb_printf("BP #%d at ", i);
+	kdb_symbol_print(bp->bp_addr, NULL, KDB_SP_DEFAULT);
+
+	if (bp->bp_enabled)
+		kdb_printf("\n    is enabled");
+	else
+		kdb_printf("\n    is disabled");
+
+	kdb_printf("\taddr at %016lx, hardtype=%d installed=%d\n",
+		   bp->bp_addr, bp->bp_type, bp->bp_installed);
+
+	kdb_printf("\n");
+}
+
+/*
+ * kdb_bp
+ *
+ *	Handle the bp commands.
+ *
+ *	[bp|bph] <addr-expression> [DATAR|DATAW]
+ *
+ * Parameters:
+ *	argc	Count of arguments in argv
+ *	argv	Space delimited command line arguments
+ * Outputs:
+ *	None.
+ * Returns:
+ *	Zero for success, a kdb diagnostic if failure.
+ * Locking:
+ *	None.
+ * Remarks:
+ *
+ *	bp	Set breakpoint on all cpus.  Only use hardware assist if need.
+ *	bph	Set breakpoint on all cpus.  Force hardware register
+ */
+
+static int kdb_bp(int argc, const char **argv)
+{
+	int i, bpno;
+	kdb_bp_t *bp, *bp_check;
+	int diag;
+	int free;
+	char *symname = NULL;
+	long offset = 0ul;
+	int nextarg;
+	kdb_bp_t template = {0};
+
+	if (argc == 0) {
+		/*
+		 * Display breakpoint table
+		 */
+		for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT;
+		     bpno++, bp++) {
+			if (bp->bp_free)
+				continue;
+			kdb_printbp(bp, bpno);
+		}
+
+		return 0;
+	}
+
+	nextarg = 1;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &template.bp_addr,
+			     &offset, &symname);
+	if (diag)
+		return diag;
+	if (!template.bp_addr)
+		return KDB_BADINT;
+
+	/*
+	 * Find an empty bp structure to allocate
+	 */
+	free = KDB_MAXBPT;
+	for (bpno = 0, bp = kdb_breakpoints; bpno < KDB_MAXBPT; bpno++, bp++) {
+		if (bp->bp_free)
+			break;
+	}
+
+	if (bpno == KDB_MAXBPT)
+		return KDB_TOOMANYBPT;
+
+	if (strcmp(argv[0], "bph") == 0) {
+		template.bp_type = BP_HARDWARE_BREAKPOINT;
+		diag = kdb_parsebp(argc, argv, &nextarg, &template);
+		if (diag)
+			return diag;
+	} else {
+		template.bp_type = BP_BREAKPOINT;
+	}
+
+	/*
+	 * Check for clashing breakpoints.
+	 *
+	 * Note, in this design we can't have hardware breakpoints
+	 * enabled for both read and write on the same address.
+	 */
+	for (i = 0, bp_check = kdb_breakpoints; i < KDB_MAXBPT;
+	     i++, bp_check++) {
+		if (!bp_check->bp_free &&
+		    bp_check->bp_addr == template.bp_addr) {
+			kdb_printf("You already have a breakpoint at "
+				   kdb_bfd_vma_fmt0 "\n", template.bp_addr);
+			return KDB_DUPBPT;
+		}
+	}
+
+	template.bp_enabled = 1;
+
+	/*
+	 * Actually allocate the breakpoint found earlier
+	 */
+	*bp = template;
+	bp->bp_free = 0;
+
+	kdb_printbp(bp, bpno);
+
+	return 0;
+}
+
+/*
+ * kdb_bc
+ *
+ *	Handles the 'bc', 'be', and 'bd' commands
+ *
+ *	[bd|bc|be] <breakpoint-number>
+ *	[bd|bc|be] *
+ *
+ * Parameters:
+ *	argc	Count of arguments in argv
+ *	argv	Space delimited command line arguments
+ * Outputs:
+ *	None.
+ * Returns:
+ *	Zero for success, a kdb diagnostic for failure
+ * Locking:
+ *	None.
+ * Remarks:
+ */
+static int kdb_bc(int argc, const char **argv)
+{
+	unsigned long addr;
+	kdb_bp_t *bp = NULL;
+	int lowbp = KDB_MAXBPT;
+	int highbp = 0;
+	int done = 0;
+	int i;
+	int diag = 0;
+
+	int cmd;			/* KDBCMD_B? */
+#define KDBCMD_BC	0
+#define KDBCMD_BE	1
+#define KDBCMD_BD	2
+
+	if (strcmp(argv[0], "be") == 0)
+		cmd = KDBCMD_BE;
+	else if (strcmp(argv[0], "bd") == 0)
+		cmd = KDBCMD_BD;
+	else
+		cmd = KDBCMD_BC;
+
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+
+	if (strcmp(argv[1], "*") == 0) {
+		lowbp = 0;
+		highbp = KDB_MAXBPT;
+	} else {
+		diag = kdbgetularg(argv[1], &addr);
+		if (diag)
+			return diag;
+
+		/*
+		 * For addresses less than the maximum breakpoint number,
+		 * assume that the breakpoint number is desired.
+		 */
+		if (addr < KDB_MAXBPT) {
+			bp = &kdb_breakpoints[addr];
+			lowbp = highbp = addr;
+			highbp++;
+		} else {
+			for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT;
+			    i++, bp++) {
+				if (bp->bp_addr == addr) {
+					lowbp = highbp = i;
+					highbp++;
+					break;
+				}
+			}
+		}
+	}
+
+	/*
+	 * Now operate on the set of breakpoints matching the input
+	 * criteria (either '*' for all, or an individual breakpoint).
+	 */
+	for (bp = &kdb_breakpoints[lowbp], i = lowbp;
+	    i < highbp;
+	    i++, bp++) {
+		if (bp->bp_free)
+			continue;
+
+		done++;
+
+		switch (cmd) {
+		case KDBCMD_BC:
+			bp->bp_enabled = 0;
+
+			kdb_printf("Breakpoint %d at "
+				   kdb_bfd_vma_fmt " cleared\n",
+				   i, bp->bp_addr);
+
+			bp->bp_addr = 0;
+			bp->bp_free = 1;
+
+			break;
+		case KDBCMD_BE:
+			bp->bp_enabled = 1;
+
+			kdb_printf("Breakpoint %d at "
+				   kdb_bfd_vma_fmt " enabled",
+				   i, bp->bp_addr);
+
+			kdb_printf("\n");
+			break;
+		case KDBCMD_BD:
+			if (!bp->bp_enabled)
+				break;
+
+			bp->bp_enabled = 0;
+
+			kdb_printf("Breakpoint %d at "
+				   kdb_bfd_vma_fmt " disabled\n",
+				   i, bp->bp_addr);
+
+			break;
+		}
+		if (bp->bp_delay && (cmd == KDBCMD_BC || cmd == KDBCMD_BD)) {
+			bp->bp_delay = 0;
+			KDB_STATE_CLEAR(SSBPT);
+		}
+	}
+
+	return (!done) ? KDB_BPTNOTFOUND : 0;
+}
+
+/*
+ * kdb_ss
+ *
+ *	Process the 'ss' (Single Step) and 'ssb' (Single Step to Branch)
+ *	commands.
+ *
+ *	ss
+ *	ssb
+ *
+ * Parameters:
+ *	argc	Argument count
+ *	argv	Argument vector
+ * Outputs:
+ *	None.
+ * Returns:
+ *	KDB_CMD_SS[B] for success, a kdb error if failure.
+ * Locking:
+ *	None.
+ * Remarks:
+ *
+ *	Set the arch specific option to trigger a debug trap after the next
+ *	instruction.
+ *
+ *	For 'ssb', set the trace flag in the debug trap handler
+ *	after printing the current insn and return directly without
+ *	invoking the kdb command processor, until a branch instruction
+ *	is encountered.
+ */
+
+static int kdb_ss(int argc, const char **argv)
+{
+	int ssb = 0;
+
+	ssb = (strcmp(argv[0], "ssb") == 0);
+	if (argc != 0)
+		return KDB_ARGCOUNT;
+	/*
+	 * Set trace flag and go.
+	 */
+	KDB_STATE_SET(DOING_SS);
+	if (ssb) {
+		KDB_STATE_SET(DOING_SSB);
+		return KDB_CMD_SSB;
+	}
+	return KDB_CMD_SS;
+}
+
+/* Initialize the breakpoint table and register	breakpoint commands. */
+
+void __init kdb_initbptab(void)
+{
+	int i;
+	kdb_bp_t *bp;
+
+	/*
+	 * First time initialization.
+	 */
+	memset(&kdb_breakpoints, '\0', sizeof(kdb_breakpoints));
+
+	for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++)
+		bp->bp_free = 1;
+
+	kdb_register_repeat("bp", kdb_bp, "[<vaddr>]",
+		"Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("bl", kdb_bp, "[<vaddr>]",
+		"Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
+		kdb_register_repeat("bph", kdb_bp, "[<vaddr>]",
+		"[datar [length]|dataw [length]]   Set hw brk", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("bc", kdb_bc, "<bpnum>",
+		"Clear Breakpoint", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("be", kdb_bc, "<bpnum>",
+		"Enable Breakpoint", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("bd", kdb_bc, "<bpnum>",
+		"Disable Breakpoint", 0, KDB_REPEAT_NONE);
+
+	kdb_register_repeat("ss", kdb_ss, "",
+		"Single Step", 1, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("ssb", kdb_ss, "",
+		"Single step to branch/call", 0, KDB_REPEAT_NO_ARGS);
+	/*
+	 * Architecture dependent initialization.
+	 */
+}
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
new file mode 100644
index 000000000000..483fa4e7aaac
--- /dev/null
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -0,0 +1,208 @@
+/*
+ * Kernel Debugger Architecture Independent Stack Traceback
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/kdb.h>
+#include <linux/nmi.h>
+#include <asm/system.h>
+#include "kdb_private.h"
+
+
+static void kdb_show_stack(struct task_struct *p, void *addr)
+{
+	int old_lvl = console_loglevel;
+	console_loglevel = 15;
+	kdb_set_current_task(p);
+	if (addr) {
+		show_stack((struct task_struct *)p, addr);
+	} else if (kdb_current_regs) {
+#ifdef CONFIG_X86
+		show_stack(p, &kdb_current_regs->sp);
+#else
+		show_stack(p, NULL);
+#endif
+	} else {
+		show_stack(p, NULL);
+	}
+	console_loglevel = old_lvl;
+}
+
+/*
+ * kdb_bt
+ *
+ *	This function implements the 'bt' command.  Print a stack
+ *	traceback.
+ *
+ *	bt [<address-expression>]	(addr-exp is for alternate stacks)
+ *	btp <pid>			Kernel stack for <pid>
+ *	btt <address-expression>	Kernel stack for task structure at
+ *					<address-expression>
+ *	bta [DRSTCZEUIMA]		All useful processes, optionally
+ *					filtered by state
+ *	btc [<cpu>]			The current process on one cpu,
+ *					default is all cpus
+ *
+ *	bt <address-expression> refers to a address on the stack, that location
+ *	is assumed to contain a return address.
+ *
+ *	btt <address-expression> refers to the address of a struct task.
+ *
+ * Inputs:
+ *	argc	argument count
+ *	argv	argument vector
+ * Outputs:
+ *	None.
+ * Returns:
+ *	zero for success, a kdb diagnostic if error
+ * Locking:
+ *	none.
+ * Remarks:
+ *	Backtrack works best when the code uses frame pointers.  But even
+ *	without frame pointers we should get a reasonable trace.
+ *
+ *	mds comes in handy when examining the stack to do a manual traceback or
+ *	to get a starting point for bt <address-expression>.
+ */
+
+static int
+kdb_bt1(struct task_struct *p, unsigned long mask,
+	int argcount, int btaprompt)
+{
+	char buffer[2];
+	if (kdb_getarea(buffer[0], (unsigned long)p) ||
+	    kdb_getarea(buffer[0], (unsigned long)(p+1)-1))
+		return KDB_BADADDR;
+	if (!kdb_task_state(p, mask))
+		return 0;
+	kdb_printf("Stack traceback for pid %d\n", p->pid);
+	kdb_ps1(p);
+	kdb_show_stack(p, NULL);
+	if (btaprompt) {
+		kdb_getstr(buffer, sizeof(buffer),
+			   "Enter <q> to end, <cr> to continue:");
+		if (buffer[0] == 'q') {
+			kdb_printf("\n");
+			return 1;
+		}
+	}
+	touch_nmi_watchdog();
+	return 0;
+}
+
+int
+kdb_bt(int argc, const char **argv)
+{
+	int diag;
+	int argcount = 5;
+	int btaprompt = 1;
+	int nextarg;
+	unsigned long addr;
+	long offset;
+
+	kdbgetintenv("BTARGS", &argcount);	/* Arguments to print */
+	kdbgetintenv("BTAPROMPT", &btaprompt);	/* Prompt after each
+						 * proc in bta */
+
+	if (strcmp(argv[0], "bta") == 0) {
+		struct task_struct *g, *p;
+		unsigned long cpu;
+		unsigned long mask = kdb_task_state_string(argc ? argv[1] :
+							   NULL);
+		if (argc == 0)
+			kdb_ps_suppressed();
+		/* Run the active tasks first */
+		for_each_online_cpu(cpu) {
+			p = kdb_curr_task(cpu);
+			if (kdb_bt1(p, mask, argcount, btaprompt))
+				return 0;
+		}
+		/* Now the inactive tasks */
+		kdb_do_each_thread(g, p) {
+			if (task_curr(p))
+				continue;
+			if (kdb_bt1(p, mask, argcount, btaprompt))
+				return 0;
+		} kdb_while_each_thread(g, p);
+	} else if (strcmp(argv[0], "btp") == 0) {
+		struct task_struct *p;
+		unsigned long pid;
+		if (argc != 1)
+			return KDB_ARGCOUNT;
+		diag = kdbgetularg((char *)argv[1], &pid);
+		if (diag)
+			return diag;
+		p = find_task_by_pid_ns(pid, &init_pid_ns);
+		if (p) {
+			kdb_set_current_task(p);
+			return kdb_bt1(p, ~0UL, argcount, 0);
+		}
+		kdb_printf("No process with pid == %ld found\n", pid);
+		return 0;
+	} else if (strcmp(argv[0], "btt") == 0) {
+		if (argc != 1)
+			return KDB_ARGCOUNT;
+		diag = kdbgetularg((char *)argv[1], &addr);
+		if (diag)
+			return diag;
+		kdb_set_current_task((struct task_struct *)addr);
+		return kdb_bt1((struct task_struct *)addr, ~0UL, argcount, 0);
+	} else if (strcmp(argv[0], "btc") == 0) {
+		unsigned long cpu = ~0;
+		struct task_struct *save_current_task = kdb_current_task;
+		char buf[80];
+		if (argc > 1)
+			return KDB_ARGCOUNT;
+		if (argc == 1) {
+			diag = kdbgetularg((char *)argv[1], &cpu);
+			if (diag)
+				return diag;
+		}
+		/* Recursive use of kdb_parse, do not use argv after
+		 * this point */
+		argv = NULL;
+		if (cpu != ~0) {
+			if (cpu >= num_possible_cpus() || !cpu_online(cpu)) {
+				kdb_printf("no process for cpu %ld\n", cpu);
+				return 0;
+			}
+			sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+			kdb_parse(buf);
+			return 0;
+		}
+		kdb_printf("btc: cpu status: ");
+		kdb_parse("cpu\n");
+		for_each_online_cpu(cpu) {
+			sprintf(buf, "btt 0x%p\n", KDB_TSK(cpu));
+			kdb_parse(buf);
+			touch_nmi_watchdog();
+		}
+		kdb_set_current_task(save_current_task);
+		return 0;
+	} else {
+		if (argc) {
+			nextarg = 1;
+			diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
+					     &offset, NULL);
+			if (diag)
+				return diag;
+			kdb_show_stack(kdb_current_task, (void *)addr);
+			return 0;
+		} else {
+			return kdb_bt1(kdb_current_task, ~0UL, argcount, 0);
+		}
+	}
+
+	/* NOTREACHED */
+	return 0;
+}
diff --git a/kernel/debug/kdb/kdb_cmds b/kernel/debug/kdb/kdb_cmds
new file mode 100644
index 000000000000..56c88e4db309
--- /dev/null
+++ b/kernel/debug/kdb/kdb_cmds
@@ -0,0 +1,35 @@
+# Initial commands for kdb, alter to suit your needs.
+# These commands are executed in kdb_init() context, no SMP, no
+# processes.  Commands that require process data (including stack or
+# registers) are not reliable this early.  set and bp commands should
+# be safe.  Global breakpoint commands affect each cpu as it is booted.
+
+# Standard debugging information for first level support, just type archkdb
+# or archkdbcpu or archkdbshort at the kdb prompt.
+
+defcmd dumpcommon "" "Common kdb debugging"
+  set BTAPROMPT 0
+  set LINES 10000
+  -summary
+  -cpu
+  -ps
+  -dmesg 600
+  -bt
+endefcmd
+
+defcmd dumpall "" "First line debugging"
+  set BTSYMARG 1
+  set BTARGS 9
+  pid R
+  -dumpcommon
+  -bta
+endefcmd
+
+defcmd dumpcpu "" "Same as dumpall but only tasks on cpus"
+  set BTSYMARG 1
+  set BTARGS 9
+  pid R
+  -dumpcommon
+  -btc
+endefcmd
+
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
new file mode 100644
index 000000000000..f024c0c4b8c4
--- /dev/null
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -0,0 +1,159 @@
+/*
+ * Created by: Jason Wessel <jason.wessel@windriver.com>
+ *
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/kdebug.h>
+#include "kdb_private.h"
+#include "../debug_core.h"
+
+/*
+ * KDB interface to KGDB internals
+ */
+get_char_func kdb_poll_funcs[] = {
+	dbg_io_get_char,
+	NULL,
+};
+
+int kdb_stub(struct kgdb_state *ks)
+{
+	int error = 0;
+	kdb_bp_t *bp;
+	unsigned long addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
+	kdb_reason_t reason = KDB_REASON_OOPS;
+	kdb_dbtrap_t db_result = KDB_DB_NOBPT;
+	int i;
+
+	if (KDB_STATE(REENTRY)) {
+		reason = KDB_REASON_SWITCH;
+		KDB_STATE_CLEAR(REENTRY);
+		addr = instruction_pointer(ks->linux_regs);
+	}
+	ks->pass_exception = 0;
+	if (atomic_read(&kgdb_setting_breakpoint))
+		reason = KDB_REASON_KEYBOARD;
+
+	for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
+		if ((bp->bp_enabled) && (bp->bp_addr == addr)) {
+			reason = KDB_REASON_BREAK;
+			db_result = KDB_DB_BPT;
+			if (addr != instruction_pointer(ks->linux_regs))
+				kgdb_arch_set_pc(ks->linux_regs, addr);
+			break;
+		}
+	}
+	if (reason == KDB_REASON_BREAK || reason == KDB_REASON_SWITCH) {
+		for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++) {
+			if (bp->bp_free)
+				continue;
+			if (bp->bp_addr == addr) {
+				bp->bp_delay = 1;
+				bp->bp_delayed = 1;
+	/*
+	 * SSBPT is set when the kernel debugger must single step a
+	 * task in order to re-establish an instruction breakpoint
+	 * which uses the instruction replacement mechanism.  It is
+	 * cleared by any action that removes the need to single-step
+	 * the breakpoint.
+	 */
+				reason = KDB_REASON_BREAK;
+				db_result = KDB_DB_BPT;
+				KDB_STATE_SET(SSBPT);
+				break;
+			}
+		}
+	}
+
+	if (reason != KDB_REASON_BREAK && ks->ex_vector == 0 &&
+		ks->signo == SIGTRAP) {
+		reason = KDB_REASON_SSTEP;
+		db_result = KDB_DB_BPT;
+	}
+	/* Set initial kdb state variables */
+	KDB_STATE_CLEAR(KGDB_TRANS);
+	kdb_initial_cpu = ks->cpu;
+	kdb_current_task = kgdb_info[ks->cpu].task;
+	kdb_current_regs = kgdb_info[ks->cpu].debuggerinfo;
+	/* Remove any breakpoints as needed by kdb and clear single step */
+	kdb_bp_remove();
+	KDB_STATE_CLEAR(DOING_SS);
+	KDB_STATE_CLEAR(DOING_SSB);
+	/* zero out any offline cpu data */
+	for_each_present_cpu(i) {
+		if (!cpu_online(i)) {
+			kgdb_info[i].debuggerinfo = NULL;
+			kgdb_info[i].task = NULL;
+		}
+	}
+	if (ks->err_code == DIE_OOPS || reason == KDB_REASON_OOPS) {
+		ks->pass_exception = 1;
+		KDB_FLAG_SET(CATASTROPHIC);
+	}
+	kdb_initial_cpu = ks->cpu;
+	if (KDB_STATE(SSBPT) && reason == KDB_REASON_SSTEP) {
+		KDB_STATE_CLEAR(SSBPT);
+		KDB_STATE_CLEAR(DOING_SS);
+	} else {
+		/* Start kdb main loop */
+		error = kdb_main_loop(KDB_REASON_ENTER, reason,
+				      ks->err_code, db_result, ks->linux_regs);
+	}
+	/*
+	 * Upon exit from the kdb main loop setup break points and restart
+	 * the system based on the requested continue state
+	 */
+	kdb_initial_cpu = -1;
+	kdb_current_task = NULL;
+	kdb_current_regs = NULL;
+	kdbnearsym_cleanup();
+	if (error == KDB_CMD_KGDB) {
+		if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
+	/*
+	 * This inteface glue which allows kdb to transition in into
+	 * the gdb stub.  In order to do this the '?' or '' gdb serial
+	 * packet response is processed here.  And then control is
+	 * passed to the gdbstub.
+	 */
+			if (KDB_STATE(DOING_KGDB))
+				gdbstub_state(ks, "?");
+			else
+				gdbstub_state(ks, "");
+			KDB_STATE_CLEAR(DOING_KGDB);
+			KDB_STATE_CLEAR(DOING_KGDB2);
+		}
+		return DBG_PASS_EVENT;
+	}
+	kdb_bp_install(ks->linux_regs);
+	dbg_activate_sw_breakpoints();
+	/* Set the exit state to a single step or a continue */
+	if (KDB_STATE(DOING_SS))
+		gdbstub_state(ks, "s");
+	else
+		gdbstub_state(ks, "c");
+
+	KDB_FLAG_CLEAR(CATASTROPHIC);
+
+	/* Invoke arch specific exception handling prior to system resume */
+	kgdb_info[ks->cpu].ret_state = gdbstub_state(ks, "e");
+	if (ks->pass_exception)
+		kgdb_info[ks->cpu].ret_state = 1;
+	if (error == KDB_CMD_CPU) {
+		KDB_STATE_SET(REENTRY);
+		/*
+		 * Force clear the single step bit because kdb emulates this
+		 * differently vs the gdbstub
+		 */
+		kgdb_single_step = 0;
+		dbg_deactivate_sw_breakpoints();
+		return DBG_SWITCH_CPU_EVENT;
+	}
+	return kgdb_info[ks->cpu].ret_state;
+}
+
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
new file mode 100644
index 000000000000..9e3cec7a925c
--- /dev/null
+++ b/kernel/debug/kdb/kdb_io.c
@@ -0,0 +1,789 @@
+/*
+ * Kernel Debugger Architecture Independent Console I/O handler
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2006 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/ctype.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/kdev_t.h>
+#include <linux/console.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/smp.h>
+#include <linux/nmi.h>
+#include <linux/delay.h>
+#include <linux/kdb.h>
+#include <linux/kallsyms.h>
+#include "kdb_private.h"
+
+#define CMD_BUFLEN 256
+char kdb_prompt_str[CMD_BUFLEN];
+
+
+static void kgdb_transition_check(char *buffer)
+{
+	int slen = strlen(buffer);
+	if (strncmp(buffer, "$?#3f", slen) != 0 &&
+	    strncmp(buffer, "$qSupported#37", slen) != 0 &&
+	    strncmp(buffer, "+$qSupported#37", slen) != 0) {
+		KDB_STATE_SET(KGDB_TRANS);
+		kdb_printf("%s", buffer);
+	}
+}
+
+static int kdb_read_get_key(char *buffer, size_t bufsize)
+{
+#define ESCAPE_UDELAY 1000
+#define ESCAPE_DELAY (2*1000000/ESCAPE_UDELAY) /* 2 seconds worth of udelays */
+	char escape_data[5];	/* longest vt100 escape sequence is 4 bytes */
+	char *ped = escape_data;
+	int escape_delay = 0;
+	get_char_func *f, *f_escape = NULL;
+	int key;
+
+	for (f = &kdb_poll_funcs[0]; ; ++f) {
+		if (*f == NULL) {
+			/* Reset NMI watchdog once per poll loop */
+			touch_nmi_watchdog();
+			f = &kdb_poll_funcs[0];
+		}
+		if (escape_delay == 2) {
+			*ped = '\0';
+			ped = escape_data;
+			--escape_delay;
+		}
+		if (escape_delay == 1) {
+			key = *ped++;
+			if (!*ped)
+				--escape_delay;
+			break;
+		}
+		key = (*f)();
+		if (key == -1) {
+			if (escape_delay) {
+				udelay(ESCAPE_UDELAY);
+				--escape_delay;
+			}
+			continue;
+		}
+		if (bufsize <= 2) {
+			if (key == '\r')
+				key = '\n';
+			*buffer++ = key;
+			*buffer = '\0';
+			return -1;
+		}
+		if (escape_delay == 0 && key == '\e') {
+			escape_delay = ESCAPE_DELAY;
+			ped = escape_data;
+			f_escape = f;
+		}
+		if (escape_delay) {
+			*ped++ = key;
+			if (f_escape != f) {
+				escape_delay = 2;
+				continue;
+			}
+			if (ped - escape_data == 1) {
+				/* \e */
+				continue;
+			} else if (ped - escape_data == 2) {
+				/* \e<something> */
+				if (key != '[')
+					escape_delay = 2;
+				continue;
+			} else if (ped - escape_data == 3) {
+				/* \e[<something> */
+				int mapkey = 0;
+				switch (key) {
+				case 'A': /* \e[A, up arrow */
+					mapkey = 16;
+					break;
+				case 'B': /* \e[B, down arrow */
+					mapkey = 14;
+					break;
+				case 'C': /* \e[C, right arrow */
+					mapkey = 6;
+					break;
+				case 'D': /* \e[D, left arrow */
+					mapkey = 2;
+					break;
+				case '1': /* dropthrough */
+				case '3': /* dropthrough */
+				/* \e[<1,3,4>], may be home, del, end */
+				case '4':
+					mapkey = -1;
+					break;
+				}
+				if (mapkey != -1) {
+					if (mapkey > 0) {
+						escape_data[0] = mapkey;
+						escape_data[1] = '\0';
+					}
+					escape_delay = 2;
+				}
+				continue;
+			} else if (ped - escape_data == 4) {
+				/* \e[<1,3,4><something> */
+				int mapkey = 0;
+				if (key == '~') {
+					switch (escape_data[2]) {
+					case '1': /* \e[1~, home */
+						mapkey = 1;
+						break;
+					case '3': /* \e[3~, del */
+						mapkey = 4;
+						break;
+					case '4': /* \e[4~, end */
+						mapkey = 5;
+						break;
+					}
+				}
+				if (mapkey > 0) {
+					escape_data[0] = mapkey;
+					escape_data[1] = '\0';
+				}
+				escape_delay = 2;
+				continue;
+			}
+		}
+		break;	/* A key to process */
+	}
+	return key;
+}
+
+/*
+ * kdb_read
+ *
+ *	This function reads a string of characters, terminated by
+ *	a newline, or by reaching the end of the supplied buffer,
+ *	from the current kernel debugger console device.
+ * Parameters:
+ *	buffer	- Address of character buffer to receive input characters.
+ *	bufsize - size, in bytes, of the character buffer
+ * Returns:
+ *	Returns a pointer to the buffer containing the received
+ *	character string.  This string will be terminated by a
+ *	newline character.
+ * Locking:
+ *	No locks are required to be held upon entry to this
+ *	function.  It is not reentrant - it relies on the fact
+ *	that while kdb is running on only one "master debug" cpu.
+ * Remarks:
+ *
+ * The buffer size must be >= 2.  A buffer size of 2 means that the caller only
+ * wants a single key.
+ *
+ * An escape key could be the start of a vt100 control sequence such as \e[D
+ * (left arrow) or it could be a character in its own right.  The standard
+ * method for detecting the difference is to wait for 2 seconds to see if there
+ * are any other characters.  kdb is complicated by the lack of a timer service
+ * (interrupts are off), by multiple input sources and by the need to sometimes
+ * return after just one key.  Escape sequence processing has to be done as
+ * states in the polling loop.
+ */
+
+static char *kdb_read(char *buffer, size_t bufsize)
+{
+	char *cp = buffer;
+	char *bufend = buffer+bufsize-2;	/* Reserve space for newline
+						 * and null byte */
+	char *lastchar;
+	char *p_tmp;
+	char tmp;
+	static char tmpbuffer[CMD_BUFLEN];
+	int len = strlen(buffer);
+	int len_tmp;
+	int tab = 0;
+	int count;
+	int i;
+	int diag, dtab_count;
+	int key;
+
+
+	diag = kdbgetintenv("DTABCOUNT", &dtab_count);
+	if (diag)
+		dtab_count = 30;
+
+	if (len > 0) {
+		cp += len;
+		if (*(buffer+len-1) == '\n')
+			cp--;
+	}
+
+	lastchar = cp;
+	*cp = '\0';
+	kdb_printf("%s", buffer);
+poll_again:
+	key = kdb_read_get_key(buffer, bufsize);
+	if (key == -1)
+		return buffer;
+	if (key != 9)
+		tab = 0;
+	switch (key) {
+	case 8: /* backspace */
+		if (cp > buffer) {
+			if (cp < lastchar) {
+				memcpy(tmpbuffer, cp, lastchar - cp);
+				memcpy(cp-1, tmpbuffer, lastchar - cp);
+			}
+			*(--lastchar) = '\0';
+			--cp;
+			kdb_printf("\b%s \r", cp);
+			tmp = *cp;
+			*cp = '\0';
+			kdb_printf(kdb_prompt_str);
+			kdb_printf("%s", buffer);
+			*cp = tmp;
+		}
+		break;
+	case 13: /* enter */
+		*lastchar++ = '\n';
+		*lastchar++ = '\0';
+		kdb_printf("\n");
+		return buffer;
+	case 4: /* Del */
+		if (cp < lastchar) {
+			memcpy(tmpbuffer, cp+1, lastchar - cp - 1);
+			memcpy(cp, tmpbuffer, lastchar - cp - 1);
+			*(--lastchar) = '\0';
+			kdb_printf("%s \r", cp);
+			tmp = *cp;
+			*cp = '\0';
+			kdb_printf(kdb_prompt_str);
+			kdb_printf("%s", buffer);
+			*cp = tmp;
+		}
+		break;
+	case 1: /* Home */
+		if (cp > buffer) {
+			kdb_printf("\r");
+			kdb_printf(kdb_prompt_str);
+			cp = buffer;
+		}
+		break;
+	case 5: /* End */
+		if (cp < lastchar) {
+			kdb_printf("%s", cp);
+			cp = lastchar;
+		}
+		break;
+	case 2: /* Left */
+		if (cp > buffer) {
+			kdb_printf("\b");
+			--cp;
+		}
+		break;
+	case 14: /* Down */
+		memset(tmpbuffer, ' ',
+		       strlen(kdb_prompt_str) + (lastchar-buffer));
+		*(tmpbuffer+strlen(kdb_prompt_str) +
+		  (lastchar-buffer)) = '\0';
+		kdb_printf("\r%s\r", tmpbuffer);
+		*lastchar = (char)key;
+		*(lastchar+1) = '\0';
+		return lastchar;
+	case 6: /* Right */
+		if (cp < lastchar) {
+			kdb_printf("%c", *cp);
+			++cp;
+		}
+		break;
+	case 16: /* Up */
+		memset(tmpbuffer, ' ',
+		       strlen(kdb_prompt_str) + (lastchar-buffer));
+		*(tmpbuffer+strlen(kdb_prompt_str) +
+		  (lastchar-buffer)) = '\0';
+		kdb_printf("\r%s\r", tmpbuffer);
+		*lastchar = (char)key;
+		*(lastchar+1) = '\0';
+		return lastchar;
+	case 9: /* Tab */
+		if (tab < 2)
+			++tab;
+		p_tmp = buffer;
+		while (*p_tmp == ' ')
+			p_tmp++;
+		if (p_tmp > cp)
+			break;
+		memcpy(tmpbuffer, p_tmp, cp-p_tmp);
+		*(tmpbuffer + (cp-p_tmp)) = '\0';
+		p_tmp = strrchr(tmpbuffer, ' ');
+		if (p_tmp)
+			++p_tmp;
+		else
+			p_tmp = tmpbuffer;
+		len = strlen(p_tmp);
+		count = kallsyms_symbol_complete(p_tmp,
+						 sizeof(tmpbuffer) -
+						 (p_tmp - tmpbuffer));
+		if (tab == 2 && count > 0) {
+			kdb_printf("\n%d symbols are found.", count);
+			if (count > dtab_count) {
+				count = dtab_count;
+				kdb_printf(" But only first %d symbols will"
+					   " be printed.\nYou can change the"
+					   " environment variable DTABCOUNT.",
+					   count);
+			}
+			kdb_printf("\n");
+			for (i = 0; i < count; i++) {
+				if (kallsyms_symbol_next(p_tmp, i) < 0)
+					break;
+				kdb_printf("%s ", p_tmp);
+				*(p_tmp + len) = '\0';
+			}
+			if (i >= dtab_count)
+				kdb_printf("...");
+			kdb_printf("\n");
+			kdb_printf(kdb_prompt_str);
+			kdb_printf("%s", buffer);
+		} else if (tab != 2 && count > 0) {
+			len_tmp = strlen(p_tmp);
+			strncpy(p_tmp+len_tmp, cp, lastchar-cp+1);
+			len_tmp = strlen(p_tmp);
+			strncpy(cp, p_tmp+len, len_tmp-len + 1);
+			len = len_tmp - len;
+			kdb_printf("%s", cp);
+			cp += len;
+			lastchar += len;
+		}
+		kdb_nextline = 1; /* reset output line number */
+		break;
+	default:
+		if (key >= 32 && lastchar < bufend) {
+			if (cp < lastchar) {
+				memcpy(tmpbuffer, cp, lastchar - cp);
+				memcpy(cp+1, tmpbuffer, lastchar - cp);
+				*++lastchar = '\0';
+				*cp = key;
+				kdb_printf("%s\r", cp);
+				++cp;
+				tmp = *cp;
+				*cp = '\0';
+				kdb_printf(kdb_prompt_str);
+				kdb_printf("%s", buffer);
+				*cp = tmp;
+			} else {
+				*++lastchar = '\0';
+				*cp++ = key;
+				/* The kgdb transition check will hide
+				 * printed characters if we think that
+				 * kgdb is connecting, until the check
+				 * fails */
+				if (!KDB_STATE(KGDB_TRANS))
+					kgdb_transition_check(buffer);
+				else
+					kdb_printf("%c", key);
+			}
+			/* Special escape to kgdb */
+			if (lastchar - buffer >= 5 &&
+			    strcmp(lastchar - 5, "$?#3f") == 0) {
+				strcpy(buffer, "kgdb");
+				KDB_STATE_SET(DOING_KGDB);
+				return buffer;
+			}
+			if (lastchar - buffer >= 14 &&
+			    strcmp(lastchar - 14, "$qSupported#37") == 0) {
+				strcpy(buffer, "kgdb");
+				KDB_STATE_SET(DOING_KGDB2);
+				return buffer;
+			}
+		}
+		break;
+	}
+	goto poll_again;
+}
+
+/*
+ * kdb_getstr
+ *
+ *	Print the prompt string and read a command from the
+ *	input device.
+ *
+ * Parameters:
+ *	buffer	Address of buffer to receive command
+ *	bufsize Size of buffer in bytes
+ *	prompt	Pointer to string to use as prompt string
+ * Returns:
+ *	Pointer to command buffer.
+ * Locking:
+ *	None.
+ * Remarks:
+ *	For SMP kernels, the processor number will be
+ *	substituted for %d, %x or %o in the prompt.
+ */
+
+char *kdb_getstr(char *buffer, size_t bufsize, char *prompt)
+{
+	if (prompt && kdb_prompt_str != prompt)
+		strncpy(kdb_prompt_str, prompt, CMD_BUFLEN);
+	kdb_printf(kdb_prompt_str);
+	kdb_nextline = 1;	/* Prompt and input resets line number */
+	return kdb_read(buffer, bufsize);
+}
+
+/*
+ * kdb_input_flush
+ *
+ *	Get rid of any buffered console input.
+ *
+ * Parameters:
+ *	none
+ * Returns:
+ *	nothing
+ * Locking:
+ *	none
+ * Remarks:
+ *	Call this function whenever you want to flush input.  If there is any
+ *	outstanding input, it ignores all characters until there has been no
+ *	data for approximately 1ms.
+ */
+
+static void kdb_input_flush(void)
+{
+	get_char_func *f;
+	int res;
+	int flush_delay = 1;
+	while (flush_delay) {
+		flush_delay--;
+empty:
+		touch_nmi_watchdog();
+		for (f = &kdb_poll_funcs[0]; *f; ++f) {
+			res = (*f)();
+			if (res != -1) {
+				flush_delay = 1;
+				goto empty;
+			}
+		}
+		if (flush_delay)
+			mdelay(1);
+	}
+}
+
+/*
+ * kdb_printf
+ *
+ *	Print a string to the output device(s).
+ *
+ * Parameters:
+ *	printf-like format and optional args.
+ * Returns:
+ *	0
+ * Locking:
+ *	None.
+ * Remarks:
+ *	use 'kdbcons->write()' to avoid polluting 'log_buf' with
+ *	kdb output.
+ *
+ *  If the user is doing a cmd args | grep srch
+ *  then kdb_grepping_flag is set.
+ *  In that case we need to accumulate full lines (ending in \n) before
+ *  searching for the pattern.
+ */
+
+static char kdb_buffer[256];	/* A bit too big to go on stack */
+static char *next_avail = kdb_buffer;
+static int  size_avail;
+static int  suspend_grep;
+
+/*
+ * search arg1 to see if it contains arg2
+ * (kdmain.c provides flags for ^pat and pat$)
+ *
+ * return 1 for found, 0 for not found
+ */
+static int kdb_search_string(char *searched, char *searchfor)
+{
+	char firstchar, *cp;
+	int len1, len2;
+
+	/* not counting the newline at the end of "searched" */
+	len1 = strlen(searched)-1;
+	len2 = strlen(searchfor);
+	if (len1 < len2)
+		return 0;
+	if (kdb_grep_leading && kdb_grep_trailing && len1 != len2)
+		return 0;
+	if (kdb_grep_leading) {
+		if (!strncmp(searched, searchfor, len2))
+			return 1;
+	} else if (kdb_grep_trailing) {
+		if (!strncmp(searched+len1-len2, searchfor, len2))
+			return 1;
+	} else {
+		firstchar = *searchfor;
+		cp = searched;
+		while ((cp = strchr(cp, firstchar))) {
+			if (!strncmp(cp, searchfor, len2))
+				return 1;
+			cp++;
+		}
+	}
+	return 0;
+}
+
+int kdb_printf(const char *fmt, ...)
+{
+	va_list ap;
+	int diag;
+	int linecount;
+	int logging, saved_loglevel = 0;
+	int got_printf_lock = 0;
+	int retlen = 0;
+	int fnd, len;
+	char *cp, *cp2, *cphold = NULL, replaced_byte = ' ';
+	char *moreprompt = "more> ";
+	struct console *c = console_drivers;
+	static DEFINE_SPINLOCK(kdb_printf_lock);
+	unsigned long uninitialized_var(flags);
+
+	preempt_disable();
+	/* Serialize kdb_printf if multiple cpus try to write at once.
+	 * But if any cpu goes recursive in kdb, just print the output,
+	 * even if it is interleaved with any other text.
+	 */
+	if (!KDB_STATE(PRINTF_LOCK)) {
+		KDB_STATE_SET(PRINTF_LOCK);
+		spin_lock_irqsave(&kdb_printf_lock, flags);
+		got_printf_lock = 1;
+		atomic_inc(&kdb_event);
+	} else {
+		__acquire(kdb_printf_lock);
+	}
+
+	diag = kdbgetintenv("LINES", &linecount);
+	if (diag || linecount <= 1)
+		linecount = 24;
+
+	diag = kdbgetintenv("LOGGING", &logging);
+	if (diag)
+		logging = 0;
+
+	if (!kdb_grepping_flag || suspend_grep) {
+		/* normally, every vsnprintf starts a new buffer */
+		next_avail = kdb_buffer;
+		size_avail = sizeof(kdb_buffer);
+	}
+	va_start(ap, fmt);
+	vsnprintf(next_avail, size_avail, fmt, ap);
+	va_end(ap);
+
+	/*
+	 * If kdb_parse() found that the command was cmd xxx | grep yyy
+	 * then kdb_grepping_flag is set, and kdb_grep_string contains yyy
+	 *
+	 * Accumulate the print data up to a newline before searching it.
+	 * (vsnprintf does null-terminate the string that it generates)
+	 */
+
+	/* skip the search if prints are temporarily unconditional */
+	if (!suspend_grep && kdb_grepping_flag) {
+		cp = strchr(kdb_buffer, '\n');
+		if (!cp) {
+			/*
+			 * Special cases that don't end with newlines
+			 * but should be written without one:
+			 *   The "[nn]kdb> " prompt should
+			 *   appear at the front of the buffer.
+			 *
+			 *   The "[nn]more " prompt should also be
+			 *     (MOREPROMPT -> moreprompt)
+			 *   written *   but we print that ourselves,
+			 *   we set the suspend_grep flag to make
+			 *   it unconditional.
+			 *
+			 */
+			if (next_avail == kdb_buffer) {
+				/*
+				 * these should occur after a newline,
+				 * so they will be at the front of the
+				 * buffer
+				 */
+				cp2 = kdb_buffer;
+				len = strlen(kdb_prompt_str);
+				if (!strncmp(cp2, kdb_prompt_str, len)) {
+					/*
+					 * We're about to start a new
+					 * command, so we can go back
+					 * to normal mode.
+					 */
+					kdb_grepping_flag = 0;
+					goto kdb_printit;
+				}
+			}
+			/* no newline; don't search/write the buffer
+			   until one is there */
+			len = strlen(kdb_buffer);
+			next_avail = kdb_buffer + len;
+			size_avail = sizeof(kdb_buffer) - len;
+			goto kdb_print_out;
+		}
+
+		/*
+		 * The newline is present; print through it or discard
+		 * it, depending on the results of the search.
+		 */
+		cp++;	 	     /* to byte after the newline */
+		replaced_byte = *cp; /* remember what/where it was */
+		cphold = cp;
+		*cp = '\0';	     /* end the string for our search */
+
+		/*
+		 * We now have a newline at the end of the string
+		 * Only continue with this output if it contains the
+		 * search string.
+		 */
+		fnd = kdb_search_string(kdb_buffer, kdb_grep_string);
+		if (!fnd) {
+			/*
+			 * At this point the complete line at the start
+			 * of kdb_buffer can be discarded, as it does
+			 * not contain what the user is looking for.
+			 * Shift the buffer left.
+			 */
+			*cphold = replaced_byte;
+			strcpy(kdb_buffer, cphold);
+			len = strlen(kdb_buffer);
+			next_avail = kdb_buffer + len;
+			size_avail = sizeof(kdb_buffer) - len;
+			goto kdb_print_out;
+		}
+		/*
+		 * at this point the string is a full line and
+		 * should be printed, up to the null.
+		 */
+	}
+kdb_printit:
+
+	/*
+	 * Write to all consoles.
+	 */
+	retlen = strlen(kdb_buffer);
+	while (c) {
+		c->write(c, kdb_buffer, retlen);
+		touch_nmi_watchdog();
+		c = c->next;
+	}
+	if (logging) {
+		saved_loglevel = console_loglevel;
+		console_loglevel = 0;
+		printk(KERN_INFO "%s", kdb_buffer);
+	}
+
+	if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n'))
+		kdb_nextline++;
+
+	/* check for having reached the LINES number of printed lines */
+	if (kdb_nextline == linecount) {
+		char buf1[16] = "";
+#if defined(CONFIG_SMP)
+		char buf2[32];
+#endif
+
+		/* Watch out for recursion here.  Any routine that calls
+		 * kdb_printf will come back through here.  And kdb_read
+		 * uses kdb_printf to echo on serial consoles ...
+		 */
+		kdb_nextline = 1;	/* In case of recursion */
+
+		/*
+		 * Pause until cr.
+		 */
+		moreprompt = kdbgetenv("MOREPROMPT");
+		if (moreprompt == NULL)
+			moreprompt = "more> ";
+
+#if defined(CONFIG_SMP)
+		if (strchr(moreprompt, '%')) {
+			sprintf(buf2, moreprompt, get_cpu());
+			put_cpu();
+			moreprompt = buf2;
+		}
+#endif
+
+		kdb_input_flush();
+		c = console_drivers;
+
+		while (c) {
+			c->write(c, moreprompt, strlen(moreprompt));
+			touch_nmi_watchdog();
+			c = c->next;
+		}
+
+		if (logging)
+			printk("%s", moreprompt);
+
+		kdb_read(buf1, 2); /* '2' indicates to return
+				    * immediately after getting one key. */
+		kdb_nextline = 1;	/* Really set output line 1 */
+
+		/* empty and reset the buffer: */
+		kdb_buffer[0] = '\0';
+		next_avail = kdb_buffer;
+		size_avail = sizeof(kdb_buffer);
+		if ((buf1[0] == 'q') || (buf1[0] == 'Q')) {
+			/* user hit q or Q */
+			KDB_FLAG_SET(CMD_INTERRUPT); /* command interrupted */
+			KDB_STATE_CLEAR(PAGER);
+			/* end of command output; back to normal mode */
+			kdb_grepping_flag = 0;
+			kdb_printf("\n");
+		} else if (buf1[0] == ' ') {
+			kdb_printf("\n");
+			suspend_grep = 1; /* for this recursion */
+		} else if (buf1[0] == '\n') {
+			kdb_nextline = linecount - 1;
+			kdb_printf("\r");
+			suspend_grep = 1; /* for this recursion */
+		} else if (buf1[0] && buf1[0] != '\n') {
+			/* user hit something other than enter */
+			suspend_grep = 1; /* for this recursion */
+			kdb_printf("\nOnly 'q' or 'Q' are processed at more "
+				   "prompt, input ignored\n");
+		} else if (kdb_grepping_flag) {
+			/* user hit enter */
+			suspend_grep = 1; /* for this recursion */
+			kdb_printf("\n");
+		}
+		kdb_input_flush();
+	}
+
+	/*
+	 * For grep searches, shift the printed string left.
+	 *  replaced_byte contains the character that was overwritten with
+	 *  the terminating null, and cphold points to the null.
+	 * Then adjust the notion of available space in the buffer.
+	 */
+	if (kdb_grepping_flag && !suspend_grep) {
+		*cphold = replaced_byte;
+		strcpy(kdb_buffer, cphold);
+		len = strlen(kdb_buffer);
+		next_avail = kdb_buffer + len;
+		size_avail = sizeof(kdb_buffer) - len;
+	}
+
+kdb_print_out:
+	suspend_grep = 0; /* end of what may have been a recursive call */
+	if (logging)
+		console_loglevel = saved_loglevel;
+	if (KDB_STATE(PRINTF_LOCK) && got_printf_lock) {
+		got_printf_lock = 0;
+		spin_unlock_irqrestore(&kdb_printf_lock, flags);
+		KDB_STATE_CLEAR(PRINTF_LOCK);
+		atomic_dec(&kdb_event);
+	} else {
+		__release(kdb_printf_lock);
+	}
+	preempt_enable();
+	return retlen;
+}
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
new file mode 100644
index 000000000000..64ef9ac14ba9
--- /dev/null
+++ b/kernel/debug/kdb/kdb_main.c
@@ -0,0 +1,2845 @@
+/*
+ * Kernel Debugger Architecture Independent Main Code
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (C) 2000 Stephane Eranian <eranian@hpl.hp.com>
+ * Xscale (R) modifications copyright (C) 2003 Intel Corporation.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ */
+
+#include <linux/ctype.h>
+#include <linux/string.h>
+#include <linux/kernel.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/sysrq.h>
+#include <linux/smp.h>
+#include <linux/utsname.h>
+#include <linux/vmalloc.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/init.h>
+#include <linux/kallsyms.h>
+#include <linux/kgdb.h>
+#include <linux/kdb.h>
+#include <linux/notifier.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/nmi.h>
+#include <linux/time.h>
+#include <linux/ptrace.h>
+#include <linux/sysctl.h>
+#include <linux/cpu.h>
+#include <linux/kdebug.h>
+#include <linux/proc_fs.h>
+#include <linux/uaccess.h>
+#include <linux/slab.h>
+#include "kdb_private.h"
+
+#define GREP_LEN 256
+char kdb_grep_string[GREP_LEN];
+int kdb_grepping_flag;
+EXPORT_SYMBOL(kdb_grepping_flag);
+int kdb_grep_leading;
+int kdb_grep_trailing;
+
+/*
+ * Kernel debugger state flags
+ */
+int kdb_flags;
+atomic_t kdb_event;
+
+/*
+ * kdb_lock protects updates to kdb_initial_cpu.  Used to
+ * single thread processors through the kernel debugger.
+ */
+int kdb_initial_cpu = -1;	/* cpu number that owns kdb */
+int kdb_nextline = 1;
+int kdb_state;			/* General KDB state */
+
+struct task_struct *kdb_current_task;
+EXPORT_SYMBOL(kdb_current_task);
+struct pt_regs *kdb_current_regs;
+
+const char *kdb_diemsg;
+static int kdb_go_count;
+#ifdef CONFIG_KDB_CONTINUE_CATASTROPHIC
+static unsigned int kdb_continue_catastrophic =
+	CONFIG_KDB_CONTINUE_CATASTROPHIC;
+#else
+static unsigned int kdb_continue_catastrophic;
+#endif
+
+/* kdb_commands describes the available commands. */
+static kdbtab_t *kdb_commands;
+#define KDB_BASE_CMD_MAX 50
+static int kdb_max_commands = KDB_BASE_CMD_MAX;
+static kdbtab_t kdb_base_commands[50];
+#define for_each_kdbcmd(cmd, num)					\
+	for ((cmd) = kdb_base_commands, (num) = 0;			\
+	     num < kdb_max_commands;					\
+	     num == KDB_BASE_CMD_MAX ? cmd = kdb_commands : cmd++, num++)
+
+typedef struct _kdbmsg {
+	int	km_diag;	/* kdb diagnostic */
+	char	*km_msg;	/* Corresponding message text */
+} kdbmsg_t;
+
+#define KDBMSG(msgnum, text) \
+	{ KDB_##msgnum, text }
+
+static kdbmsg_t kdbmsgs[] = {
+	KDBMSG(NOTFOUND, "Command Not Found"),
+	KDBMSG(ARGCOUNT, "Improper argument count, see usage."),
+	KDBMSG(BADWIDTH, "Illegal value for BYTESPERWORD use 1, 2, 4 or 8, "
+	       "8 is only allowed on 64 bit systems"),
+	KDBMSG(BADRADIX, "Illegal value for RADIX use 8, 10 or 16"),
+	KDBMSG(NOTENV, "Cannot find environment variable"),
+	KDBMSG(NOENVVALUE, "Environment variable should have value"),
+	KDBMSG(NOTIMP, "Command not implemented"),
+	KDBMSG(ENVFULL, "Environment full"),
+	KDBMSG(ENVBUFFULL, "Environment buffer full"),
+	KDBMSG(TOOMANYBPT, "Too many breakpoints defined"),
+#ifdef CONFIG_CPU_XSCALE
+	KDBMSG(TOOMANYDBREGS, "More breakpoints than ibcr registers defined"),
+#else
+	KDBMSG(TOOMANYDBREGS, "More breakpoints than db registers defined"),
+#endif
+	KDBMSG(DUPBPT, "Duplicate breakpoint address"),
+	KDBMSG(BPTNOTFOUND, "Breakpoint not found"),
+	KDBMSG(BADMODE, "Invalid IDMODE"),
+	KDBMSG(BADINT, "Illegal numeric value"),
+	KDBMSG(INVADDRFMT, "Invalid symbolic address format"),
+	KDBMSG(BADREG, "Invalid register name"),
+	KDBMSG(BADCPUNUM, "Invalid cpu number"),
+	KDBMSG(BADLENGTH, "Invalid length field"),
+	KDBMSG(NOBP, "No Breakpoint exists"),
+	KDBMSG(BADADDR, "Invalid address"),
+};
+#undef KDBMSG
+
+static const int __nkdb_err = sizeof(kdbmsgs) / sizeof(kdbmsg_t);
+
+
+/*
+ * Initial environment.   This is all kept static and local to
+ * this file.   We don't want to rely on the memory allocation
+ * mechanisms in the kernel, so we use a very limited allocate-only
+ * heap for new and altered environment variables.  The entire
+ * environment is limited to a fixed number of entries (add more
+ * to __env[] if required) and a fixed amount of heap (add more to
+ * KDB_ENVBUFSIZE if required).
+ */
+
+static char *__env[] = {
+#if defined(CONFIG_SMP)
+ "PROMPT=[%d]kdb> ",
+ "MOREPROMPT=[%d]more> ",
+#else
+ "PROMPT=kdb> ",
+ "MOREPROMPT=more> ",
+#endif
+ "RADIX=16",
+ "MDCOUNT=8",			/* lines of md output */
+ "BTARGS=9",			/* 9 possible args in bt */
+ KDB_PLATFORM_ENV,
+ "DTABCOUNT=30",
+ "NOSECT=1",
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+ (char *)0,
+};
+
+static const int __nenv = (sizeof(__env) / sizeof(char *));
+
+struct task_struct *kdb_curr_task(int cpu)
+{
+	struct task_struct *p = curr_task(cpu);
+#ifdef	_TIF_MCA_INIT
+	if ((task_thread_info(p)->flags & _TIF_MCA_INIT) && KDB_TSK(cpu))
+		p = krp->p;
+#endif
+	return p;
+}
+
+/*
+ * kdbgetenv - This function will return the character string value of
+ *	an environment variable.
+ * Parameters:
+ *	match	A character string representing an environment variable.
+ * Returns:
+ *	NULL	No environment variable matches 'match'
+ *	char*	Pointer to string value of environment variable.
+ */
+char *kdbgetenv(const char *match)
+{
+	char **ep = __env;
+	int matchlen = strlen(match);
+	int i;
+
+	for (i = 0; i < __nenv; i++) {
+		char *e = *ep++;
+
+		if (!e)
+			continue;
+
+		if ((strncmp(match, e, matchlen) == 0)
+		 && ((e[matchlen] == '\0')
+		   || (e[matchlen] == '='))) {
+			char *cp = strchr(e, '=');
+			return cp ? ++cp : "";
+		}
+	}
+	return NULL;
+}
+
+/*
+ * kdballocenv - This function is used to allocate bytes for
+ *	environment entries.
+ * Parameters:
+ *	match	A character string representing a numeric value
+ * Outputs:
+ *	*value  the unsigned long representation of the env variable 'match'
+ * Returns:
+ *	Zero on success, a kdb diagnostic on failure.
+ * Remarks:
+ *	We use a static environment buffer (envbuffer) to hold the values
+ *	of dynamically generated environment variables (see kdb_set).  Buffer
+ *	space once allocated is never free'd, so over time, the amount of space
+ *	(currently 512 bytes) will be exhausted if env variables are changed
+ *	frequently.
+ */
+static char *kdballocenv(size_t bytes)
+{
+#define	KDB_ENVBUFSIZE	512
+	static char envbuffer[KDB_ENVBUFSIZE];
+	static int envbufsize;
+	char *ep = NULL;
+
+	if ((KDB_ENVBUFSIZE - envbufsize) >= bytes) {
+		ep = &envbuffer[envbufsize];
+		envbufsize += bytes;
+	}
+	return ep;
+}
+
+/*
+ * kdbgetulenv - This function will return the value of an unsigned
+ *	long-valued environment variable.
+ * Parameters:
+ *	match	A character string representing a numeric value
+ * Outputs:
+ *	*value  the unsigned long represntation of the env variable 'match'
+ * Returns:
+ *	Zero on success, a kdb diagnostic on failure.
+ */
+static int kdbgetulenv(const char *match, unsigned long *value)
+{
+	char *ep;
+
+	ep = kdbgetenv(match);
+	if (!ep)
+		return KDB_NOTENV;
+	if (strlen(ep) == 0)
+		return KDB_NOENVVALUE;
+
+	*value = simple_strtoul(ep, NULL, 0);
+
+	return 0;
+}
+
+/*
+ * kdbgetintenv - This function will return the value of an
+ *	integer-valued environment variable.
+ * Parameters:
+ *	match	A character string representing an integer-valued env variable
+ * Outputs:
+ *	*value  the integer representation of the environment variable 'match'
+ * Returns:
+ *	Zero on success, a kdb diagnostic on failure.
+ */
+int kdbgetintenv(const char *match, int *value)
+{
+	unsigned long val;
+	int diag;
+
+	diag = kdbgetulenv(match, &val);
+	if (!diag)
+		*value = (int) val;
+	return diag;
+}
+
+/*
+ * kdbgetularg - This function will convert a numeric string into an
+ *	unsigned long value.
+ * Parameters:
+ *	arg	A character string representing a numeric value
+ * Outputs:
+ *	*value  the unsigned long represntation of arg.
+ * Returns:
+ *	Zero on success, a kdb diagnostic on failure.
+ */
+int kdbgetularg(const char *arg, unsigned long *value)
+{
+	char *endp;
+	unsigned long val;
+
+	val = simple_strtoul(arg, &endp, 0);
+
+	if (endp == arg) {
+		/*
+		 * Try base 16, for us folks too lazy to type the
+		 * leading 0x...
+		 */
+		val = simple_strtoul(arg, &endp, 16);
+		if (endp == arg)
+			return KDB_BADINT;
+	}
+
+	*value = val;
+
+	return 0;
+}
+
+/*
+ * kdb_set - This function implements the 'set' command.  Alter an
+ *	existing environment variable or create a new one.
+ */
+int kdb_set(int argc, const char **argv)
+{
+	int i;
+	char *ep;
+	size_t varlen, vallen;
+
+	/*
+	 * we can be invoked two ways:
+	 *   set var=value    argv[1]="var", argv[2]="value"
+	 *   set var = value  argv[1]="var", argv[2]="=", argv[3]="value"
+	 * - if the latter, shift 'em down.
+	 */
+	if (argc == 3) {
+		argv[2] = argv[3];
+		argc--;
+	}
+
+	if (argc != 2)
+		return KDB_ARGCOUNT;
+
+	/*
+	 * Check for internal variables
+	 */
+	if (strcmp(argv[1], "KDBDEBUG") == 0) {
+		unsigned int debugflags;
+		char *cp;
+
+		debugflags = simple_strtoul(argv[2], &cp, 0);
+		if (cp == argv[2] || debugflags & ~KDB_DEBUG_FLAG_MASK) {
+			kdb_printf("kdb: illegal debug flags '%s'\n",
+				    argv[2]);
+			return 0;
+		}
+		kdb_flags = (kdb_flags &
+			     ~(KDB_DEBUG_FLAG_MASK << KDB_DEBUG_FLAG_SHIFT))
+			| (debugflags << KDB_DEBUG_FLAG_SHIFT);
+
+		return 0;
+	}
+
+	/*
+	 * Tokenizer squashed the '=' sign.  argv[1] is variable
+	 * name, argv[2] = value.
+	 */
+	varlen = strlen(argv[1]);
+	vallen = strlen(argv[2]);
+	ep = kdballocenv(varlen + vallen + 2);
+	if (ep == (char *)0)
+		return KDB_ENVBUFFULL;
+
+	sprintf(ep, "%s=%s", argv[1], argv[2]);
+
+	ep[varlen+vallen+1] = '\0';
+
+	for (i = 0; i < __nenv; i++) {
+		if (__env[i]
+		 && ((strncmp(__env[i], argv[1], varlen) == 0)
+		   && ((__env[i][varlen] == '\0')
+		    || (__env[i][varlen] == '=')))) {
+			__env[i] = ep;
+			return 0;
+		}
+	}
+
+	/*
+	 * Wasn't existing variable.  Fit into slot.
+	 */
+	for (i = 0; i < __nenv-1; i++) {
+		if (__env[i] == (char *)0) {
+			__env[i] = ep;
+			return 0;
+		}
+	}
+
+	return KDB_ENVFULL;
+}
+
+static int kdb_check_regs(void)
+{
+	if (!kdb_current_regs) {
+		kdb_printf("No current kdb registers."
+			   "  You may need to select another task\n");
+		return KDB_BADREG;
+	}
+	return 0;
+}
+
+/*
+ * kdbgetaddrarg - This function is responsible for parsing an
+ *	address-expression and returning the value of the expression,
+ *	symbol name, and offset to the caller.
+ *
+ *	The argument may consist of a numeric value (decimal or
+ *	hexidecimal), a symbol name, a register name (preceeded by the
+ *	percent sign), an environment variable with a numeric value
+ *	(preceeded by a dollar sign) or a simple arithmetic expression
+ *	consisting of a symbol name, +/-, and a numeric constant value
+ *	(offset).
+ * Parameters:
+ *	argc	- count of arguments in argv
+ *	argv	- argument vector
+ *	*nextarg - index to next unparsed argument in argv[]
+ *	regs	- Register state at time of KDB entry
+ * Outputs:
+ *	*value	- receives the value of the address-expression
+ *	*offset - receives the offset specified, if any
+ *	*name   - receives the symbol name, if any
+ *	*nextarg - index to next unparsed argument in argv[]
+ * Returns:
+ *	zero is returned on success, a kdb diagnostic code is
+ *      returned on error.
+ */
+int kdbgetaddrarg(int argc, const char **argv, int *nextarg,
+		  unsigned long *value,  long *offset,
+		  char **name)
+{
+	unsigned long addr;
+	unsigned long off = 0;
+	int positive;
+	int diag;
+	int found = 0;
+	char *symname;
+	char symbol = '\0';
+	char *cp;
+	kdb_symtab_t symtab;
+
+	/*
+	 * Process arguments which follow the following syntax:
+	 *
+	 *  symbol | numeric-address [+/- numeric-offset]
+	 *  %register
+	 *  $environment-variable
+	 */
+
+	if (*nextarg > argc)
+		return KDB_ARGCOUNT;
+
+	symname = (char *)argv[*nextarg];
+
+	/*
+	 * If there is no whitespace between the symbol
+	 * or address and the '+' or '-' symbols, we
+	 * remember the character and replace it with a
+	 * null so the symbol/value can be properly parsed
+	 */
+	cp = strpbrk(symname, "+-");
+	if (cp != NULL) {
+		symbol = *cp;
+		*cp++ = '\0';
+	}
+
+	if (symname[0] == '$') {
+		diag = kdbgetulenv(&symname[1], &addr);
+		if (diag)
+			return diag;
+	} else if (symname[0] == '%') {
+		diag = kdb_check_regs();
+		if (diag)
+			return diag;
+		/* Implement register values with % at a later time as it is
+		 * arch optional.
+		 */
+		return KDB_NOTIMP;
+	} else {
+		found = kdbgetsymval(symname, &symtab);
+		if (found) {
+			addr = symtab.sym_start;
+		} else {
+			diag = kdbgetularg(argv[*nextarg], &addr);
+			if (diag)
+				return diag;
+		}
+	}
+
+	if (!found)
+		found = kdbnearsym(addr, &symtab);
+
+	(*nextarg)++;
+
+	if (name)
+		*name = symname;
+	if (value)
+		*value = addr;
+	if (offset && name && *name)
+		*offset = addr - symtab.sym_start;
+
+	if ((*nextarg > argc)
+	 && (symbol == '\0'))
+		return 0;
+
+	/*
+	 * check for +/- and offset
+	 */
+
+	if (symbol == '\0') {
+		if ((argv[*nextarg][0] != '+')
+		 && (argv[*nextarg][0] != '-')) {
+			/*
+			 * Not our argument.  Return.
+			 */
+			return 0;
+		} else {
+			positive = (argv[*nextarg][0] == '+');
+			(*nextarg)++;
+		}
+	} else
+		positive = (symbol == '+');
+
+	/*
+	 * Now there must be an offset!
+	 */
+	if ((*nextarg > argc)
+	 && (symbol == '\0')) {
+		return KDB_INVADDRFMT;
+	}
+
+	if (!symbol) {
+		cp = (char *)argv[*nextarg];
+		(*nextarg)++;
+	}
+
+	diag = kdbgetularg(cp, &off);
+	if (diag)
+		return diag;
+
+	if (!positive)
+		off = -off;
+
+	if (offset)
+		*offset += off;
+
+	if (value)
+		*value += off;
+
+	return 0;
+}
+
+static void kdb_cmderror(int diag)
+{
+	int i;
+
+	if (diag >= 0) {
+		kdb_printf("no error detected (diagnostic is %d)\n", diag);
+		return;
+	}
+
+	for (i = 0; i < __nkdb_err; i++) {
+		if (kdbmsgs[i].km_diag == diag) {
+			kdb_printf("diag: %d: %s\n", diag, kdbmsgs[i].km_msg);
+			return;
+		}
+	}
+
+	kdb_printf("Unknown diag %d\n", -diag);
+}
+
+/*
+ * kdb_defcmd, kdb_defcmd2 - This function implements the 'defcmd'
+ *	command which defines one command as a set of other commands,
+ *	terminated by endefcmd.  kdb_defcmd processes the initial
+ *	'defcmd' command, kdb_defcmd2 is invoked from kdb_parse for
+ *	the following commands until 'endefcmd'.
+ * Inputs:
+ *	argc	argument count
+ *	argv	argument vector
+ * Returns:
+ *	zero for success, a kdb diagnostic if error
+ */
+struct defcmd_set {
+	int count;
+	int usable;
+	char *name;
+	char *usage;
+	char *help;
+	char **command;
+};
+static struct defcmd_set *defcmd_set;
+static int defcmd_set_count;
+static int defcmd_in_progress;
+
+/* Forward references */
+static int kdb_exec_defcmd(int argc, const char **argv);
+
+static int kdb_defcmd2(const char *cmdstr, const char *argv0)
+{
+	struct defcmd_set *s = defcmd_set + defcmd_set_count - 1;
+	char **save_command = s->command;
+	if (strcmp(argv0, "endefcmd") == 0) {
+		defcmd_in_progress = 0;
+		if (!s->count)
+			s->usable = 0;
+		if (s->usable)
+			kdb_register(s->name, kdb_exec_defcmd,
+				     s->usage, s->help, 0);
+		return 0;
+	}
+	if (!s->usable)
+		return KDB_NOTIMP;
+	s->command = kmalloc((s->count + 1) * sizeof(*(s->command)), GFP_KDB);
+	if (!s->command) {
+		kdb_printf("Could not allocate new kdb_defcmd table for %s\n",
+			   cmdstr);
+		s->usable = 0;
+		return KDB_NOTIMP;
+	}
+	memcpy(s->command, save_command, s->count * sizeof(*(s->command)));
+	s->command[s->count++] = kdb_strdup(cmdstr, GFP_KDB);
+	kfree(save_command);
+	return 0;
+}
+
+static int kdb_defcmd(int argc, const char **argv)
+{
+	struct defcmd_set *save_defcmd_set = defcmd_set, *s;
+	if (defcmd_in_progress) {
+		kdb_printf("kdb: nested defcmd detected, assuming missing "
+			   "endefcmd\n");
+		kdb_defcmd2("endefcmd", "endefcmd");
+	}
+	if (argc == 0) {
+		int i;
+		for (s = defcmd_set; s < defcmd_set + defcmd_set_count; ++s) {
+			kdb_printf("defcmd %s \"%s\" \"%s\"\n", s->name,
+				   s->usage, s->help);
+			for (i = 0; i < s->count; ++i)
+				kdb_printf("%s", s->command[i]);
+			kdb_printf("endefcmd\n");
+		}
+		return 0;
+	}
+	if (argc != 3)
+		return KDB_ARGCOUNT;
+	defcmd_set = kmalloc((defcmd_set_count + 1) * sizeof(*defcmd_set),
+			     GFP_KDB);
+	if (!defcmd_set) {
+		kdb_printf("Could not allocate new defcmd_set entry for %s\n",
+			   argv[1]);
+		defcmd_set = save_defcmd_set;
+		return KDB_NOTIMP;
+	}
+	memcpy(defcmd_set, save_defcmd_set,
+	       defcmd_set_count * sizeof(*defcmd_set));
+	kfree(save_defcmd_set);
+	s = defcmd_set + defcmd_set_count;
+	memset(s, 0, sizeof(*s));
+	s->usable = 1;
+	s->name = kdb_strdup(argv[1], GFP_KDB);
+	s->usage = kdb_strdup(argv[2], GFP_KDB);
+	s->help = kdb_strdup(argv[3], GFP_KDB);
+	if (s->usage[0] == '"') {
+		strcpy(s->usage, s->usage+1);
+		s->usage[strlen(s->usage)-1] = '\0';
+	}
+	if (s->help[0] == '"') {
+		strcpy(s->help, s->help+1);
+		s->help[strlen(s->help)-1] = '\0';
+	}
+	++defcmd_set_count;
+	defcmd_in_progress = 1;
+	return 0;
+}
+
+/*
+ * kdb_exec_defcmd - Execute the set of commands associated with this
+ *	defcmd name.
+ * Inputs:
+ *	argc	argument count
+ *	argv	argument vector
+ * Returns:
+ *	zero for success, a kdb diagnostic if error
+ */
+static int kdb_exec_defcmd(int argc, const char **argv)
+{
+	int i, ret;
+	struct defcmd_set *s;
+	if (argc != 0)
+		return KDB_ARGCOUNT;
+	for (s = defcmd_set, i = 0; i < defcmd_set_count; ++i, ++s) {
+		if (strcmp(s->name, argv[0]) == 0)
+			break;
+	}
+	if (i == defcmd_set_count) {
+		kdb_printf("kdb_exec_defcmd: could not find commands for %s\n",
+			   argv[0]);
+		return KDB_NOTIMP;
+	}
+	for (i = 0; i < s->count; ++i) {
+		/* Recursive use of kdb_parse, do not use argv after
+		 * this point */
+		argv = NULL;
+		kdb_printf("[%s]kdb> %s\n", s->name, s->command[i]);
+		ret = kdb_parse(s->command[i]);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/* Command history */
+#define KDB_CMD_HISTORY_COUNT	32
+#define CMD_BUFLEN		200	/* kdb_printf: max printline
+					 * size == 256 */
+static unsigned int cmd_head, cmd_tail;
+static unsigned int cmdptr;
+static char cmd_hist[KDB_CMD_HISTORY_COUNT][CMD_BUFLEN];
+static char cmd_cur[CMD_BUFLEN];
+
+/*
+ * The "str" argument may point to something like  | grep xyz
+ */
+static void parse_grep(const char *str)
+{
+	int	len;
+	char	*cp = (char *)str, *cp2;
+
+	/* sanity check: we should have been called with the \ first */
+	if (*cp != '|')
+		return;
+	cp++;
+	while (isspace(*cp))
+		cp++;
+	if (strncmp(cp, "grep ", 5)) {
+		kdb_printf("invalid 'pipe', see grephelp\n");
+		return;
+	}
+	cp += 5;
+	while (isspace(*cp))
+		cp++;
+	cp2 = strchr(cp, '\n');
+	if (cp2)
+		*cp2 = '\0'; /* remove the trailing newline */
+	len = strlen(cp);
+	if (len == 0) {
+		kdb_printf("invalid 'pipe', see grephelp\n");
+		return;
+	}
+	/* now cp points to a nonzero length search string */
+	if (*cp == '"') {
+		/* allow it be "x y z" by removing the "'s - there must
+		   be two of them */
+		cp++;
+		cp2 = strchr(cp, '"');
+		if (!cp2) {
+			kdb_printf("invalid quoted string, see grephelp\n");
+			return;
+		}
+		*cp2 = '\0'; /* end the string where the 2nd " was */
+	}
+	kdb_grep_leading = 0;
+	if (*cp == '^') {
+		kdb_grep_leading = 1;
+		cp++;
+	}
+	len = strlen(cp);
+	kdb_grep_trailing = 0;
+	if (*(cp+len-1) == '$') {
+		kdb_grep_trailing = 1;
+		*(cp+len-1) = '\0';
+	}
+	len = strlen(cp);
+	if (!len)
+		return;
+	if (len >= GREP_LEN) {
+		kdb_printf("search string too long\n");
+		return;
+	}
+	strcpy(kdb_grep_string, cp);
+	kdb_grepping_flag++;
+	return;
+}
+
+/*
+ * kdb_parse - Parse the command line, search the command table for a
+ *	matching command and invoke the command function.  This
+ *	function may be called recursively, if it is, the second call
+ *	will overwrite argv and cbuf.  It is the caller's
+ *	responsibility to save their argv if they recursively call
+ *	kdb_parse().
+ * Parameters:
+ *      cmdstr	The input command line to be parsed.
+ *	regs	The registers at the time kdb was entered.
+ * Returns:
+ *	Zero for success, a kdb diagnostic if failure.
+ * Remarks:
+ *	Limited to 20 tokens.
+ *
+ *	Real rudimentary tokenization. Basically only whitespace
+ *	is considered a token delimeter (but special consideration
+ *	is taken of the '=' sign as used by the 'set' command).
+ *
+ *	The algorithm used to tokenize the input string relies on
+ *	there being at least one whitespace (or otherwise useless)
+ *	character between tokens as the character immediately following
+ *	the token is altered in-place to a null-byte to terminate the
+ *	token string.
+ */
+
+#define MAXARGC	20
+
+int kdb_parse(const char *cmdstr)
+{
+	static char *argv[MAXARGC];
+	static int argc;
+	static char cbuf[CMD_BUFLEN+2];
+	char *cp;
+	char *cpp, quoted;
+	kdbtab_t *tp;
+	int i, escaped, ignore_errors = 0, check_grep;
+
+	/*
+	 * First tokenize the command string.
+	 */
+	cp = (char *)cmdstr;
+	kdb_grepping_flag = check_grep = 0;
+
+	if (KDB_FLAG(CMD_INTERRUPT)) {
+		/* Previous command was interrupted, newline must not
+		 * repeat the command */
+		KDB_FLAG_CLEAR(CMD_INTERRUPT);
+		KDB_STATE_SET(PAGER);
+		argc = 0;	/* no repeat */
+	}
+
+	if (*cp != '\n' && *cp != '\0') {
+		argc = 0;
+		cpp = cbuf;
+		while (*cp) {
+			/* skip whitespace */
+			while (isspace(*cp))
+				cp++;
+			if ((*cp == '\0') || (*cp == '\n') ||
+			    (*cp == '#' && !defcmd_in_progress))
+				break;
+			/* special case: check for | grep pattern */
+			if (*cp == '|') {
+				check_grep++;
+				break;
+			}
+			if (cpp >= cbuf + CMD_BUFLEN) {
+				kdb_printf("kdb_parse: command buffer "
+					   "overflow, command ignored\n%s\n",
+					   cmdstr);
+				return KDB_NOTFOUND;
+			}
+			if (argc >= MAXARGC - 1) {
+				kdb_printf("kdb_parse: too many arguments, "
+					   "command ignored\n%s\n", cmdstr);
+				return KDB_NOTFOUND;
+			}
+			argv[argc++] = cpp;
+			escaped = 0;
+			quoted = '\0';
+			/* Copy to next unquoted and unescaped
+			 * whitespace or '=' */
+			while (*cp && *cp != '\n' &&
+			       (escaped || quoted || !isspace(*cp))) {
+				if (cpp >= cbuf + CMD_BUFLEN)
+					break;
+				if (escaped) {
+					escaped = 0;
+					*cpp++ = *cp++;
+					continue;
+				}
+				if (*cp == '\\') {
+					escaped = 1;
+					++cp;
+					continue;
+				}
+				if (*cp == quoted)
+					quoted = '\0';
+				else if (*cp == '\'' || *cp == '"')
+					quoted = *cp;
+				*cpp = *cp++;
+				if (*cpp == '=' && !quoted)
+					break;
+				++cpp;
+			}
+			*cpp++ = '\0';	/* Squash a ws or '=' character */
+		}
+	}
+	if (!argc)
+		return 0;
+	if (check_grep)
+		parse_grep(cp);
+	if (defcmd_in_progress) {
+		int result = kdb_defcmd2(cmdstr, argv[0]);
+		if (!defcmd_in_progress) {
+			argc = 0;	/* avoid repeat on endefcmd */
+			*(argv[0]) = '\0';
+		}
+		return result;
+	}
+	if (argv[0][0] == '-' && argv[0][1] &&
+	    (argv[0][1] < '0' || argv[0][1] > '9')) {
+		ignore_errors = 1;
+		++argv[0];
+	}
+
+	for_each_kdbcmd(tp, i) {
+		if (tp->cmd_name) {
+			/*
+			 * If this command is allowed to be abbreviated,
+			 * check to see if this is it.
+			 */
+
+			if (tp->cmd_minlen
+			 && (strlen(argv[0]) <= tp->cmd_minlen)) {
+				if (strncmp(argv[0],
+					    tp->cmd_name,
+					    tp->cmd_minlen) == 0) {
+					break;
+				}
+			}
+
+			if (strcmp(argv[0], tp->cmd_name) == 0)
+				break;
+		}
+	}
+
+	/*
+	 * If we don't find a command by this name, see if the first
+	 * few characters of this match any of the known commands.
+	 * e.g., md1c20 should match md.
+	 */
+	if (i == kdb_max_commands) {
+		for_each_kdbcmd(tp, i) {
+			if (tp->cmd_name) {
+				if (strncmp(argv[0],
+					    tp->cmd_name,
+					    strlen(tp->cmd_name)) == 0) {
+					break;
+				}
+			}
+		}
+	}
+
+	if (i < kdb_max_commands) {
+		int result;
+		KDB_STATE_SET(CMD);
+		result = (*tp->cmd_func)(argc-1, (const char **)argv);
+		if (result && ignore_errors && result > KDB_CMD_GO)
+			result = 0;
+		KDB_STATE_CLEAR(CMD);
+		switch (tp->cmd_repeat) {
+		case KDB_REPEAT_NONE:
+			argc = 0;
+			if (argv[0])
+				*(argv[0]) = '\0';
+			break;
+		case KDB_REPEAT_NO_ARGS:
+			argc = 1;
+			if (argv[1])
+				*(argv[1]) = '\0';
+			break;
+		case KDB_REPEAT_WITH_ARGS:
+			break;
+		}
+		return result;
+	}
+
+	/*
+	 * If the input with which we were presented does not
+	 * map to an existing command, attempt to parse it as an
+	 * address argument and display the result.   Useful for
+	 * obtaining the address of a variable, or the nearest symbol
+	 * to an address contained in a register.
+	 */
+	{
+		unsigned long value;
+		char *name = NULL;
+		long offset;
+		int nextarg = 0;
+
+		if (kdbgetaddrarg(0, (const char **)argv, &nextarg,
+				  &value, &offset, &name)) {
+			return KDB_NOTFOUND;
+		}
+
+		kdb_printf("%s = ", argv[0]);
+		kdb_symbol_print(value, NULL, KDB_SP_DEFAULT);
+		kdb_printf("\n");
+		return 0;
+	}
+}
+
+
+static int handle_ctrl_cmd(char *cmd)
+{
+#define CTRL_P	16
+#define CTRL_N	14
+
+	/* initial situation */
+	if (cmd_head == cmd_tail)
+		return 0;
+	switch (*cmd) {
+	case CTRL_P:
+		if (cmdptr != cmd_tail)
+			cmdptr = (cmdptr-1) % KDB_CMD_HISTORY_COUNT;
+		strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+		return 1;
+	case CTRL_N:
+		if (cmdptr != cmd_head)
+			cmdptr = (cmdptr+1) % KDB_CMD_HISTORY_COUNT;
+		strncpy(cmd_cur, cmd_hist[cmdptr], CMD_BUFLEN);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * kdb_reboot - This function implements the 'reboot' command.  Reboot
+ *	the system immediately, or loop for ever on failure.
+ */
+static int kdb_reboot(int argc, const char **argv)
+{
+	emergency_restart();
+	kdb_printf("Hmm, kdb_reboot did not reboot, spinning here\n");
+	while (1)
+		cpu_relax();
+	/* NOTREACHED */
+	return 0;
+}
+
+static void kdb_dumpregs(struct pt_regs *regs)
+{
+	int old_lvl = console_loglevel;
+	console_loglevel = 15;
+	show_regs(regs);
+	kdb_printf("\n");
+	console_loglevel = old_lvl;
+}
+
+void kdb_set_current_task(struct task_struct *p)
+{
+	kdb_current_task = p;
+
+	if (kdb_task_has_cpu(p)) {
+		kdb_current_regs = KDB_TSKREGS(kdb_process_cpu(p));
+		return;
+	}
+	kdb_current_regs = NULL;
+}
+
+/*
+ * kdb_local - The main code for kdb.  This routine is invoked on a
+ *	specific processor, it is not global.  The main kdb() routine
+ *	ensures that only one processor at a time is in this routine.
+ *	This code is called with the real reason code on the first
+ *	entry to a kdb session, thereafter it is called with reason
+ *	SWITCH, even if the user goes back to the original cpu.
+ * Inputs:
+ *	reason		The reason KDB was invoked
+ *	error		The hardware-defined error code
+ *	regs		The exception frame at time of fault/breakpoint.
+ *	db_result	Result code from the break or debug point.
+ * Returns:
+ *	0	KDB was invoked for an event which it wasn't responsible
+ *	1	KDB handled the event for which it was invoked.
+ *	KDB_CMD_GO	User typed 'go'.
+ *	KDB_CMD_CPU	User switched to another cpu.
+ *	KDB_CMD_SS	Single step.
+ *	KDB_CMD_SSB	Single step until branch.
+ */
+static int kdb_local(kdb_reason_t reason, int error, struct pt_regs *regs,
+		     kdb_dbtrap_t db_result)
+{
+	char *cmdbuf;
+	int diag;
+	struct task_struct *kdb_current =
+		kdb_curr_task(raw_smp_processor_id());
+
+	KDB_DEBUG_STATE("kdb_local 1", reason);
+	kdb_go_count = 0;
+	if (reason == KDB_REASON_DEBUG) {
+		/* special case below */
+	} else {
+		kdb_printf("\nEntering kdb (current=0x%p, pid %d) ",
+			   kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+		kdb_printf("on processor %d ", raw_smp_processor_id());
+#endif
+	}
+
+	switch (reason) {
+	case KDB_REASON_DEBUG:
+	{
+		/*
+		 * If re-entering kdb after a single step
+		 * command, don't print the message.
+		 */
+		switch (db_result) {
+		case KDB_DB_BPT:
+			kdb_printf("\nEntering kdb (0x%p, pid %d) ",
+				   kdb_current, kdb_current->pid);
+#if defined(CONFIG_SMP)
+			kdb_printf("on processor %d ", raw_smp_processor_id());
+#endif
+			kdb_printf("due to Debug @ " kdb_machreg_fmt "\n",
+				   instruction_pointer(regs));
+			break;
+		case KDB_DB_SSB:
+			/*
+			 * In the midst of ssb command. Just return.
+			 */
+			KDB_DEBUG_STATE("kdb_local 3", reason);
+			return KDB_CMD_SSB;	/* Continue with SSB command */
+
+			break;
+		case KDB_DB_SS:
+			break;
+		case KDB_DB_SSBPT:
+			KDB_DEBUG_STATE("kdb_local 4", reason);
+			return 1;	/* kdba_db_trap did the work */
+		default:
+			kdb_printf("kdb: Bad result from kdba_db_trap: %d\n",
+				   db_result);
+			break;
+		}
+
+	}
+		break;
+	case KDB_REASON_ENTER:
+		if (KDB_STATE(KEYBOARD))
+			kdb_printf("due to Keyboard Entry\n");
+		else
+			kdb_printf("due to KDB_ENTER()\n");
+		break;
+	case KDB_REASON_KEYBOARD:
+		KDB_STATE_SET(KEYBOARD);
+		kdb_printf("due to Keyboard Entry\n");
+		break;
+	case KDB_REASON_ENTER_SLAVE:
+		/* drop through, slaves only get released via cpu switch */
+	case KDB_REASON_SWITCH:
+		kdb_printf("due to cpu switch\n");
+		break;
+	case KDB_REASON_OOPS:
+		kdb_printf("Oops: %s\n", kdb_diemsg);
+		kdb_printf("due to oops @ " kdb_machreg_fmt "\n",
+			   instruction_pointer(regs));
+		kdb_dumpregs(regs);
+		break;
+	case KDB_REASON_NMI:
+		kdb_printf("due to NonMaskable Interrupt @ "
+			   kdb_machreg_fmt "\n",
+			   instruction_pointer(regs));
+		kdb_dumpregs(regs);
+		break;
+	case KDB_REASON_SSTEP:
+	case KDB_REASON_BREAK:
+		kdb_printf("due to %s @ " kdb_machreg_fmt "\n",
+			   reason == KDB_REASON_BREAK ?
+			   "Breakpoint" : "SS trap", instruction_pointer(regs));
+		/*
+		 * Determine if this breakpoint is one that we
+		 * are interested in.
+		 */
+		if (db_result != KDB_DB_BPT) {
+			kdb_printf("kdb: error return from kdba_bp_trap: %d\n",
+				   db_result);
+			KDB_DEBUG_STATE("kdb_local 6", reason);
+			return 0;	/* Not for us, dismiss it */
+		}
+		break;
+	case KDB_REASON_RECURSE:
+		kdb_printf("due to Recursion @ " kdb_machreg_fmt "\n",
+			   instruction_pointer(regs));
+		break;
+	default:
+		kdb_printf("kdb: unexpected reason code: %d\n", reason);
+		KDB_DEBUG_STATE("kdb_local 8", reason);
+		return 0;	/* Not for us, dismiss it */
+	}
+
+	while (1) {
+		/*
+		 * Initialize pager context.
+		 */
+		kdb_nextline = 1;
+		KDB_STATE_CLEAR(SUPPRESS);
+
+		cmdbuf = cmd_cur;
+		*cmdbuf = '\0';
+		*(cmd_hist[cmd_head]) = '\0';
+
+		if (KDB_FLAG(ONLY_DO_DUMP)) {
+			/* kdb is off but a catastrophic error requires a dump.
+			 * Take the dump and reboot.
+			 * Turn on logging so the kdb output appears in the log
+			 * buffer in the dump.
+			 */
+			const char *setargs[] = { "set", "LOGGING", "1" };
+			kdb_set(2, setargs);
+			kdb_reboot(0, NULL);
+			/*NOTREACHED*/
+		}
+
+do_full_getstr:
+#if defined(CONFIG_SMP)
+		snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"),
+			 raw_smp_processor_id());
+#else
+		snprintf(kdb_prompt_str, CMD_BUFLEN, kdbgetenv("PROMPT"));
+#endif
+		if (defcmd_in_progress)
+			strncat(kdb_prompt_str, "[defcmd]", CMD_BUFLEN);
+
+		/*
+		 * Fetch command from keyboard
+		 */
+		cmdbuf = kdb_getstr(cmdbuf, CMD_BUFLEN, kdb_prompt_str);
+		if (*cmdbuf != '\n') {
+			if (*cmdbuf < 32) {
+				if (cmdptr == cmd_head) {
+					strncpy(cmd_hist[cmd_head], cmd_cur,
+						CMD_BUFLEN);
+					*(cmd_hist[cmd_head] +
+					  strlen(cmd_hist[cmd_head])-1) = '\0';
+				}
+				if (!handle_ctrl_cmd(cmdbuf))
+					*(cmd_cur+strlen(cmd_cur)-1) = '\0';
+				cmdbuf = cmd_cur;
+				goto do_full_getstr;
+			} else {
+				strncpy(cmd_hist[cmd_head], cmd_cur,
+					CMD_BUFLEN);
+			}
+
+			cmd_head = (cmd_head+1) % KDB_CMD_HISTORY_COUNT;
+			if (cmd_head == cmd_tail)
+				cmd_tail = (cmd_tail+1) % KDB_CMD_HISTORY_COUNT;
+		}
+
+		cmdptr = cmd_head;
+		diag = kdb_parse(cmdbuf);
+		if (diag == KDB_NOTFOUND) {
+			kdb_printf("Unknown kdb command: '%s'\n", cmdbuf);
+			diag = 0;
+		}
+		if (diag == KDB_CMD_GO
+		 || diag == KDB_CMD_CPU
+		 || diag == KDB_CMD_SS
+		 || diag == KDB_CMD_SSB
+		 || diag == KDB_CMD_KGDB)
+			break;
+
+		if (diag)
+			kdb_cmderror(diag);
+	}
+	KDB_DEBUG_STATE("kdb_local 9", diag);
+	return diag;
+}
+
+
+/*
+ * kdb_print_state - Print the state data for the current processor
+ *	for debugging.
+ * Inputs:
+ *	text		Identifies the debug point
+ *	value		Any integer value to be printed, e.g. reason code.
+ */
+void kdb_print_state(const char *text, int value)
+{
+	kdb_printf("state: %s cpu %d value %d initial %d state %x\n",
+		   text, raw_smp_processor_id(), value, kdb_initial_cpu,
+		   kdb_state);
+}
+
+/*
+ * kdb_main_loop - After initial setup and assignment of the
+ *	controlling cpu, all cpus are in this loop.  One cpu is in
+ *	control and will issue the kdb prompt, the others will spin
+ *	until 'go' or cpu switch.
+ *
+ *	To get a consistent view of the kernel stacks for all
+ *	processes, this routine is invoked from the main kdb code via
+ *	an architecture specific routine.  kdba_main_loop is
+ *	responsible for making the kernel stacks consistent for all
+ *	processes, there should be no difference between a blocked
+ *	process and a running process as far as kdb is concerned.
+ * Inputs:
+ *	reason		The reason KDB was invoked
+ *	error		The hardware-defined error code
+ *	reason2		kdb's current reason code.
+ *			Initially error but can change
+ *			acording to kdb state.
+ *	db_result	Result code from break or debug point.
+ *	regs		The exception frame at time of fault/breakpoint.
+ *			should always be valid.
+ * Returns:
+ *	0	KDB was invoked for an event which it wasn't responsible
+ *	1	KDB handled the event for which it was invoked.
+ */
+int kdb_main_loop(kdb_reason_t reason, kdb_reason_t reason2, int error,
+	      kdb_dbtrap_t db_result, struct pt_regs *regs)
+{
+	int result = 1;
+	/* Stay in kdb() until 'go', 'ss[b]' or an error */
+	while (1) {
+		/*
+		 * All processors except the one that is in control
+		 * will spin here.
+		 */
+		KDB_DEBUG_STATE("kdb_main_loop 1", reason);
+		while (KDB_STATE(HOLD_CPU)) {
+			/* state KDB is turned off by kdb_cpu to see if the
+			 * other cpus are still live, each cpu in this loop
+			 * turns it back on.
+			 */
+			if (!KDB_STATE(KDB))
+				KDB_STATE_SET(KDB);
+		}
+
+		KDB_STATE_CLEAR(SUPPRESS);
+		KDB_DEBUG_STATE("kdb_main_loop 2", reason);
+		if (KDB_STATE(LEAVING))
+			break;	/* Another cpu said 'go' */
+		/* Still using kdb, this processor is in control */
+		result = kdb_local(reason2, error, regs, db_result);
+		KDB_DEBUG_STATE("kdb_main_loop 3", result);
+
+		if (result == KDB_CMD_CPU)
+			break;
+
+		if (result == KDB_CMD_SS) {
+			KDB_STATE_SET(DOING_SS);
+			break;
+		}
+
+		if (result == KDB_CMD_SSB) {
+			KDB_STATE_SET(DOING_SS);
+			KDB_STATE_SET(DOING_SSB);
+			break;
+		}
+
+		if (result == KDB_CMD_KGDB) {
+			if (!(KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)))
+				kdb_printf("Entering please attach debugger "
+					   "or use $D#44+ or $3#33\n");
+			break;
+		}
+		if (result && result != 1 && result != KDB_CMD_GO)
+			kdb_printf("\nUnexpected kdb_local return code %d\n",
+				   result);
+		KDB_DEBUG_STATE("kdb_main_loop 4", reason);
+		break;
+	}
+	if (KDB_STATE(DOING_SS))
+		KDB_STATE_CLEAR(SSBPT);
+
+	return result;
+}
+
+/*
+ * kdb_mdr - This function implements the guts of the 'mdr', memory
+ * read command.
+ *	mdr  <addr arg>,<byte count>
+ * Inputs:
+ *	addr	Start address
+ *	count	Number of bytes
+ * Returns:
+ *	Always 0.  Any errors are detected and printed by kdb_getarea.
+ */
+static int kdb_mdr(unsigned long addr, unsigned int count)
+{
+	unsigned char c;
+	while (count--) {
+		if (kdb_getarea(c, addr))
+			return 0;
+		kdb_printf("%02x", c);
+		addr++;
+	}
+	kdb_printf("\n");
+	return 0;
+}
+
+/*
+ * kdb_md - This function implements the 'md', 'md1', 'md2', 'md4',
+ *	'md8' 'mdr' and 'mds' commands.
+ *
+ *	md|mds  [<addr arg> [<line count> [<radix>]]]
+ *	mdWcN	[<addr arg> [<line count> [<radix>]]]
+ *		where W = is the width (1, 2, 4 or 8) and N is the count.
+ *		for eg., md1c20 reads 20 bytes, 1 at a time.
+ *	mdr  <addr arg>,<byte count>
+ */
+static void kdb_md_line(const char *fmtstr, unsigned long addr,
+			int symbolic, int nosect, int bytesperword,
+			int num, int repeat, int phys)
+{
+	/* print just one line of data */
+	kdb_symtab_t symtab;
+	char cbuf[32];
+	char *c = cbuf;
+	int i;
+	unsigned long word;
+
+	memset(cbuf, '\0', sizeof(cbuf));
+	if (phys)
+		kdb_printf("phys " kdb_machreg_fmt0 " ", addr);
+	else
+		kdb_printf(kdb_machreg_fmt0 " ", addr);
+
+	for (i = 0; i < num && repeat--; i++) {
+		if (phys) {
+			if (kdb_getphysword(&word, addr, bytesperword))
+				break;
+		} else if (kdb_getword(&word, addr, bytesperword))
+			break;
+		kdb_printf(fmtstr, word);
+		if (symbolic)
+			kdbnearsym(word, &symtab);
+		else
+			memset(&symtab, 0, sizeof(symtab));
+		if (symtab.sym_name) {
+			kdb_symbol_print(word, &symtab, 0);
+			if (!nosect) {
+				kdb_printf("\n");
+				kdb_printf("                       %s %s "
+					   kdb_machreg_fmt " "
+					   kdb_machreg_fmt " "
+					   kdb_machreg_fmt, symtab.mod_name,
+					   symtab.sec_name, symtab.sec_start,
+					   symtab.sym_start, symtab.sym_end);
+			}
+			addr += bytesperword;
+		} else {
+			union {
+				u64 word;
+				unsigned char c[8];
+			} wc;
+			unsigned char *cp;
+#ifdef	__BIG_ENDIAN
+			cp = wc.c + 8 - bytesperword;
+#else
+			cp = wc.c;
+#endif
+			wc.word = word;
+#define printable_char(c) \
+	({unsigned char __c = c; isascii(__c) && isprint(__c) ? __c : '.'; })
+			switch (bytesperword) {
+			case 8:
+				*c++ = printable_char(*cp++);
+				*c++ = printable_char(*cp++);
+				*c++ = printable_char(*cp++);
+				*c++ = printable_char(*cp++);
+				addr += 4;
+			case 4:
+				*c++ = printable_char(*cp++);
+				*c++ = printable_char(*cp++);
+				addr += 2;
+			case 2:
+				*c++ = printable_char(*cp++);
+				addr++;
+			case 1:
+				*c++ = printable_char(*cp++);
+				addr++;
+				break;
+			}
+#undef printable_char
+		}
+	}
+	kdb_printf("%*s %s\n", (int)((num-i)*(2*bytesperword + 1)+1),
+		   " ", cbuf);
+}
+
+static int kdb_md(int argc, const char **argv)
+{
+	static unsigned long last_addr;
+	static int last_radix, last_bytesperword, last_repeat;
+	int radix = 16, mdcount = 8, bytesperword = KDB_WORD_SIZE, repeat;
+	int nosect = 0;
+	char fmtchar, fmtstr[64];
+	unsigned long addr;
+	unsigned long word;
+	long offset = 0;
+	int symbolic = 0;
+	int valid = 0;
+	int phys = 0;
+
+	kdbgetintenv("MDCOUNT", &mdcount);
+	kdbgetintenv("RADIX", &radix);
+	kdbgetintenv("BYTESPERWORD", &bytesperword);
+
+	/* Assume 'md <addr>' and start with environment values */
+	repeat = mdcount * 16 / bytesperword;
+
+	if (strcmp(argv[0], "mdr") == 0) {
+		if (argc != 2)
+			return KDB_ARGCOUNT;
+		valid = 1;
+	} else if (isdigit(argv[0][2])) {
+		bytesperword = (int)(argv[0][2] - '0');
+		if (bytesperword == 0) {
+			bytesperword = last_bytesperword;
+			if (bytesperword == 0)
+				bytesperword = 4;
+		}
+		last_bytesperword = bytesperword;
+		repeat = mdcount * 16 / bytesperword;
+		if (!argv[0][3])
+			valid = 1;
+		else if (argv[0][3] == 'c' && argv[0][4]) {
+			char *p;
+			repeat = simple_strtoul(argv[0] + 4, &p, 10);
+			mdcount = ((repeat * bytesperword) + 15) / 16;
+			valid = !*p;
+		}
+		last_repeat = repeat;
+	} else if (strcmp(argv[0], "md") == 0)
+		valid = 1;
+	else if (strcmp(argv[0], "mds") == 0)
+		valid = 1;
+	else if (strcmp(argv[0], "mdp") == 0) {
+		phys = valid = 1;
+	}
+	if (!valid)
+		return KDB_NOTFOUND;
+
+	if (argc == 0) {
+		if (last_addr == 0)
+			return KDB_ARGCOUNT;
+		addr = last_addr;
+		radix = last_radix;
+		bytesperword = last_bytesperword;
+		repeat = last_repeat;
+		mdcount = ((repeat * bytesperword) + 15) / 16;
+	}
+
+	if (argc) {
+		unsigned long val;
+		int diag, nextarg = 1;
+		diag = kdbgetaddrarg(argc, argv, &nextarg, &addr,
+				     &offset, NULL);
+		if (diag)
+			return diag;
+		if (argc > nextarg+2)
+			return KDB_ARGCOUNT;
+
+		if (argc >= nextarg) {
+			diag = kdbgetularg(argv[nextarg], &val);
+			if (!diag) {
+				mdcount = (int) val;
+				repeat = mdcount * 16 / bytesperword;
+			}
+		}
+		if (argc >= nextarg+1) {
+			diag = kdbgetularg(argv[nextarg+1], &val);
+			if (!diag)
+				radix = (int) val;
+		}
+	}
+
+	if (strcmp(argv[0], "mdr") == 0)
+		return kdb_mdr(addr, mdcount);
+
+	switch (radix) {
+	case 10:
+		fmtchar = 'd';
+		break;
+	case 16:
+		fmtchar = 'x';
+		break;
+	case 8:
+		fmtchar = 'o';
+		break;
+	default:
+		return KDB_BADRADIX;
+	}
+
+	last_radix = radix;
+
+	if (bytesperword > KDB_WORD_SIZE)
+		return KDB_BADWIDTH;
+
+	switch (bytesperword) {
+	case 8:
+		sprintf(fmtstr, "%%16.16l%c ", fmtchar);
+		break;
+	case 4:
+		sprintf(fmtstr, "%%8.8l%c ", fmtchar);
+		break;
+	case 2:
+		sprintf(fmtstr, "%%4.4l%c ", fmtchar);
+		break;
+	case 1:
+		sprintf(fmtstr, "%%2.2l%c ", fmtchar);
+		break;
+	default:
+		return KDB_BADWIDTH;
+	}
+
+	last_repeat = repeat;
+	last_bytesperword = bytesperword;
+
+	if (strcmp(argv[0], "mds") == 0) {
+		symbolic = 1;
+		/* Do not save these changes as last_*, they are temporary mds
+		 * overrides.
+		 */
+		bytesperword = KDB_WORD_SIZE;
+		repeat = mdcount;
+		kdbgetintenv("NOSECT", &nosect);
+	}
+
+	/* Round address down modulo BYTESPERWORD */
+
+	addr &= ~(bytesperword-1);
+
+	while (repeat > 0) {
+		unsigned long a;
+		int n, z, num = (symbolic ? 1 : (16 / bytesperword));
+
+		if (KDB_FLAG(CMD_INTERRUPT))
+			return 0;
+		for (a = addr, z = 0; z < repeat; a += bytesperword, ++z) {
+			if (phys) {
+				if (kdb_getphysword(&word, a, bytesperword)
+						|| word)
+					break;
+			} else if (kdb_getword(&word, a, bytesperword) || word)
+				break;
+		}
+		n = min(num, repeat);
+		kdb_md_line(fmtstr, addr, symbolic, nosect, bytesperword,
+			    num, repeat, phys);
+		addr += bytesperword * n;
+		repeat -= n;
+		z = (z + num - 1) / num;
+		if (z > 2) {
+			int s = num * (z-2);
+			kdb_printf(kdb_machreg_fmt0 "-" kdb_machreg_fmt0
+				   " zero suppressed\n",
+				addr, addr + bytesperword * s - 1);
+			addr += bytesperword * s;
+			repeat -= s;
+		}
+	}
+	last_addr = addr;
+
+	return 0;
+}
+
+/*
+ * kdb_mm - This function implements the 'mm' command.
+ *	mm address-expression new-value
+ * Remarks:
+ *	mm works on machine words, mmW works on bytes.
+ */
+static int kdb_mm(int argc, const char **argv)
+{
+	int diag;
+	unsigned long addr;
+	long offset = 0;
+	unsigned long contents;
+	int nextarg;
+	int width;
+
+	if (argv[0][2] && !isdigit(argv[0][2]))
+		return KDB_NOTFOUND;
+
+	if (argc < 2)
+		return KDB_ARGCOUNT;
+
+	nextarg = 1;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+	if (diag)
+		return diag;
+
+	if (nextarg > argc)
+		return KDB_ARGCOUNT;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &contents, NULL, NULL);
+	if (diag)
+		return diag;
+
+	if (nextarg != argc + 1)
+		return KDB_ARGCOUNT;
+
+	width = argv[0][2] ? (argv[0][2] - '0') : (KDB_WORD_SIZE);
+	diag = kdb_putword(addr, contents, width);
+	if (diag)
+		return diag;
+
+	kdb_printf(kdb_machreg_fmt " = " kdb_machreg_fmt "\n", addr, contents);
+
+	return 0;
+}
+
+/*
+ * kdb_go - This function implements the 'go' command.
+ *	go [address-expression]
+ */
+static int kdb_go(int argc, const char **argv)
+{
+	unsigned long addr;
+	int diag;
+	int nextarg;
+	long offset;
+
+	if (argc == 1) {
+		if (raw_smp_processor_id() != kdb_initial_cpu) {
+			kdb_printf("go <address> must be issued from the "
+				   "initial cpu, do cpu %d first\n",
+				   kdb_initial_cpu);
+			return KDB_ARGCOUNT;
+		}
+		nextarg = 1;
+		diag = kdbgetaddrarg(argc, argv, &nextarg,
+				     &addr, &offset, NULL);
+		if (diag)
+			return diag;
+	} else if (argc) {
+		return KDB_ARGCOUNT;
+	}
+
+	diag = KDB_CMD_GO;
+	if (KDB_FLAG(CATASTROPHIC)) {
+		kdb_printf("Catastrophic error detected\n");
+		kdb_printf("kdb_continue_catastrophic=%d, ",
+			kdb_continue_catastrophic);
+		if (kdb_continue_catastrophic == 0 && kdb_go_count++ == 0) {
+			kdb_printf("type go a second time if you really want "
+				   "to continue\n");
+			return 0;
+		}
+		if (kdb_continue_catastrophic == 2) {
+			kdb_printf("forcing reboot\n");
+			kdb_reboot(0, NULL);
+		}
+		kdb_printf("attempting to continue\n");
+	}
+	return diag;
+}
+
+/*
+ * kdb_rd - This function implements the 'rd' command.
+ */
+static int kdb_rd(int argc, const char **argv)
+{
+	int diag = kdb_check_regs();
+	if (diag)
+		return diag;
+
+	kdb_dumpregs(kdb_current_regs);
+	return 0;
+}
+
+/*
+ * kdb_rm - This function implements the 'rm' (register modify)  command.
+ *	rm register-name new-contents
+ * Remarks:
+ *	Currently doesn't allow modification of control or
+ *	debug registers.
+ */
+static int kdb_rm(int argc, const char **argv)
+{
+	int diag;
+	int ind = 0;
+	unsigned long contents;
+
+	if (argc != 2)
+		return KDB_ARGCOUNT;
+	/*
+	 * Allow presence or absence of leading '%' symbol.
+	 */
+	if (argv[1][0] == '%')
+		ind = 1;
+
+	diag = kdbgetularg(argv[2], &contents);
+	if (diag)
+		return diag;
+
+	diag = kdb_check_regs();
+	if (diag)
+		return diag;
+	kdb_printf("ERROR: Register set currently not implemented\n");
+	return 0;
+}
+
+#if defined(CONFIG_MAGIC_SYSRQ)
+/*
+ * kdb_sr - This function implements the 'sr' (SYSRQ key) command
+ *	which interfaces to the soi-disant MAGIC SYSRQ functionality.
+ *		sr <magic-sysrq-code>
+ */
+static int kdb_sr(int argc, const char **argv)
+{
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+	sysrq_toggle_support(1);
+	handle_sysrq(*argv[1], NULL);
+
+	return 0;
+}
+#endif	/* CONFIG_MAGIC_SYSRQ */
+
+/*
+ * kdb_ef - This function implements the 'regs' (display exception
+ *	frame) command.  This command takes an address and expects to
+ *	find an exception frame at that address, formats and prints
+ *	it.
+ *		regs address-expression
+ * Remarks:
+ *	Not done yet.
+ */
+static int kdb_ef(int argc, const char **argv)
+{
+	int diag;
+	unsigned long addr;
+	long offset;
+	int nextarg;
+
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+
+	nextarg = 1;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+	if (diag)
+		return diag;
+	show_regs((struct pt_regs *)addr);
+	return 0;
+}
+
+#if defined(CONFIG_MODULES)
+/* modules using other modules */
+struct module_use {
+	struct list_head list;
+	struct module *module_which_uses;
+};
+
+/*
+ * kdb_lsmod - This function implements the 'lsmod' command.  Lists
+ *	currently loaded kernel modules.
+ *	Mostly taken from userland lsmod.
+ */
+static int kdb_lsmod(int argc, const char **argv)
+{
+	struct module *mod;
+
+	if (argc != 0)
+		return KDB_ARGCOUNT;
+
+	kdb_printf("Module                  Size  modstruct     Used by\n");
+	list_for_each_entry(mod, kdb_modules, list) {
+
+		kdb_printf("%-20s%8u  0x%p ", mod->name,
+			   mod->core_size, (void *)mod);
+#ifdef CONFIG_MODULE_UNLOAD
+		kdb_printf("%4d ", module_refcount(mod));
+#endif
+		if (mod->state == MODULE_STATE_GOING)
+			kdb_printf(" (Unloading)");
+		else if (mod->state == MODULE_STATE_COMING)
+			kdb_printf(" (Loading)");
+		else
+			kdb_printf(" (Live)");
+
+#ifdef CONFIG_MODULE_UNLOAD
+		{
+			struct module_use *use;
+			kdb_printf(" [ ");
+			list_for_each_entry(use, &mod->modules_which_use_me,
+					    list)
+				kdb_printf("%s ", use->module_which_uses->name);
+			kdb_printf("]\n");
+		}
+#endif
+	}
+
+	return 0;
+}
+
+#endif	/* CONFIG_MODULES */
+
+/*
+ * kdb_env - This function implements the 'env' command.  Display the
+ *	current environment variables.
+ */
+
+static int kdb_env(int argc, const char **argv)
+{
+	int i;
+
+	for (i = 0; i < __nenv; i++) {
+		if (__env[i])
+			kdb_printf("%s\n", __env[i]);
+	}
+
+	if (KDB_DEBUG(MASK))
+		kdb_printf("KDBFLAGS=0x%x\n", kdb_flags);
+
+	return 0;
+}
+
+#ifdef CONFIG_PRINTK
+/*
+ * kdb_dmesg - This function implements the 'dmesg' command to display
+ *	the contents of the syslog buffer.
+ *		dmesg [lines] [adjust]
+ */
+static int kdb_dmesg(int argc, const char **argv)
+{
+	char *syslog_data[4], *start, *end, c = '\0', *p;
+	int diag, logging, logsize, lines = 0, adjust = 0, n;
+
+	if (argc > 2)
+		return KDB_ARGCOUNT;
+	if (argc) {
+		char *cp;
+		lines = simple_strtol(argv[1], &cp, 0);
+		if (*cp)
+			lines = 0;
+		if (argc > 1) {
+			adjust = simple_strtoul(argv[2], &cp, 0);
+			if (*cp || adjust < 0)
+				adjust = 0;
+		}
+	}
+
+	/* disable LOGGING if set */
+	diag = kdbgetintenv("LOGGING", &logging);
+	if (!diag && logging) {
+		const char *setargs[] = { "set", "LOGGING", "0" };
+		kdb_set(2, setargs);
+	}
+
+	/* syslog_data[0,1] physical start, end+1.  syslog_data[2,3]
+	 * logical start, end+1. */
+	kdb_syslog_data(syslog_data);
+	if (syslog_data[2] == syslog_data[3])
+		return 0;
+	logsize = syslog_data[1] - syslog_data[0];
+	start = syslog_data[2];
+	end = syslog_data[3];
+#define KDB_WRAP(p) (((p - syslog_data[0]) % logsize) + syslog_data[0])
+	for (n = 0, p = start; p < end; ++p) {
+		c = *KDB_WRAP(p);
+		if (c == '\n')
+			++n;
+	}
+	if (c != '\n')
+		++n;
+	if (lines < 0) {
+		if (adjust >= n)
+			kdb_printf("buffer only contains %d lines, nothing "
+				   "printed\n", n);
+		else if (adjust - lines >= n)
+			kdb_printf("buffer only contains %d lines, last %d "
+				   "lines printed\n", n, n - adjust);
+		if (adjust) {
+			for (; start < end && adjust; ++start) {
+				if (*KDB_WRAP(start) == '\n')
+					--adjust;
+			}
+			if (start < end)
+				++start;
+		}
+		for (p = start; p < end && lines; ++p) {
+			if (*KDB_WRAP(p) == '\n')
+				++lines;
+		}
+		end = p;
+	} else if (lines > 0) {
+		int skip = n - (adjust + lines);
+		if (adjust >= n) {
+			kdb_printf("buffer only contains %d lines, "
+				   "nothing printed\n", n);
+			skip = n;
+		} else if (skip < 0) {
+			lines += skip;
+			skip = 0;
+			kdb_printf("buffer only contains %d lines, first "
+				   "%d lines printed\n", n, lines);
+		}
+		for (; start < end && skip; ++start) {
+			if (*KDB_WRAP(start) == '\n')
+				--skip;
+		}
+		for (p = start; p < end && lines; ++p) {
+			if (*KDB_WRAP(p) == '\n')
+				--lines;
+		}
+		end = p;
+	}
+	/* Do a line at a time (max 200 chars) to reduce protocol overhead */
+	c = '\n';
+	while (start != end) {
+		char buf[201];
+		p = buf;
+		if (KDB_FLAG(CMD_INTERRUPT))
+			return 0;
+		while (start < end && (c = *KDB_WRAP(start)) &&
+		       (p - buf) < sizeof(buf)-1) {
+			++start;
+			*p++ = c;
+			if (c == '\n')
+				break;
+		}
+		*p = '\0';
+		kdb_printf("%s", buf);
+	}
+	if (c != '\n')
+		kdb_printf("\n");
+
+	return 0;
+}
+#endif /* CONFIG_PRINTK */
+/*
+ * kdb_cpu - This function implements the 'cpu' command.
+ *	cpu	[<cpunum>]
+ * Returns:
+ *	KDB_CMD_CPU for success, a kdb diagnostic if error
+ */
+static void kdb_cpu_status(void)
+{
+	int i, start_cpu, first_print = 1;
+	char state, prev_state = '?';
+
+	kdb_printf("Currently on cpu %d\n", raw_smp_processor_id());
+	kdb_printf("Available cpus: ");
+	for (start_cpu = -1, i = 0; i < NR_CPUS; i++) {
+		if (!cpu_online(i)) {
+			state = 'F';	/* cpu is offline */
+		} else {
+			state = ' ';	/* cpu is responding to kdb */
+			if (kdb_task_state_char(KDB_TSK(i)) == 'I')
+				state = 'I';	/* idle task */
+		}
+		if (state != prev_state) {
+			if (prev_state != '?') {
+				if (!first_print)
+					kdb_printf(", ");
+				first_print = 0;
+				kdb_printf("%d", start_cpu);
+				if (start_cpu < i-1)
+					kdb_printf("-%d", i-1);
+				if (prev_state != ' ')
+					kdb_printf("(%c)", prev_state);
+			}
+			prev_state = state;
+			start_cpu = i;
+		}
+	}
+	/* print the trailing cpus, ignoring them if they are all offline */
+	if (prev_state != 'F') {
+		if (!first_print)
+			kdb_printf(", ");
+		kdb_printf("%d", start_cpu);
+		if (start_cpu < i-1)
+			kdb_printf("-%d", i-1);
+		if (prev_state != ' ')
+			kdb_printf("(%c)", prev_state);
+	}
+	kdb_printf("\n");
+}
+
+static int kdb_cpu(int argc, const char **argv)
+{
+	unsigned long cpunum;
+	int diag;
+
+	if (argc == 0) {
+		kdb_cpu_status();
+		return 0;
+	}
+
+	if (argc != 1)
+		return KDB_ARGCOUNT;
+
+	diag = kdbgetularg(argv[1], &cpunum);
+	if (diag)
+		return diag;
+
+	/*
+	 * Validate cpunum
+	 */
+	if ((cpunum > NR_CPUS) || !cpu_online(cpunum))
+		return KDB_BADCPUNUM;
+
+	dbg_switch_cpu = cpunum;
+
+	/*
+	 * Switch to other cpu
+	 */
+	return KDB_CMD_CPU;
+}
+
+/* The user may not realize that ps/bta with no parameters does not print idle
+ * or sleeping system daemon processes, so tell them how many were suppressed.
+ */
+void kdb_ps_suppressed(void)
+{
+	int idle = 0, daemon = 0;
+	unsigned long mask_I = kdb_task_state_string("I"),
+		      mask_M = kdb_task_state_string("M");
+	unsigned long cpu;
+	const struct task_struct *p, *g;
+	for_each_online_cpu(cpu) {
+		p = kdb_curr_task(cpu);
+		if (kdb_task_state(p, mask_I))
+			++idle;
+	}
+	kdb_do_each_thread(g, p) {
+		if (kdb_task_state(p, mask_M))
+			++daemon;
+	} kdb_while_each_thread(g, p);
+	if (idle || daemon) {
+		if (idle)
+			kdb_printf("%d idle process%s (state I)%s\n",
+				   idle, idle == 1 ? "" : "es",
+				   daemon ? " and " : "");
+		if (daemon)
+			kdb_printf("%d sleeping system daemon (state M) "
+				   "process%s", daemon,
+				   daemon == 1 ? "" : "es");
+		kdb_printf(" suppressed,\nuse 'ps A' to see all.\n");
+	}
+}
+
+/*
+ * kdb_ps - This function implements the 'ps' command which shows a
+ *	list of the active processes.
+ *		ps [DRSTCZEUIMA]   All processes, optionally filtered by state
+ */
+void kdb_ps1(const struct task_struct *p)
+{
+	int cpu;
+	unsigned long tmp;
+
+	if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+		return;
+
+	cpu = kdb_process_cpu(p);
+	kdb_printf("0x%p %8d %8d  %d %4d   %c  0x%p %c%s\n",
+		   (void *)p, p->pid, p->parent->pid,
+		   kdb_task_has_cpu(p), kdb_process_cpu(p),
+		   kdb_task_state_char(p),
+		   (void *)(&p->thread),
+		   p == kdb_curr_task(raw_smp_processor_id()) ? '*' : ' ',
+		   p->comm);
+	if (kdb_task_has_cpu(p)) {
+		if (!KDB_TSK(cpu)) {
+			kdb_printf("  Error: no saved data for this cpu\n");
+		} else {
+			if (KDB_TSK(cpu) != p)
+				kdb_printf("  Error: does not match running "
+				   "process table (0x%p)\n", KDB_TSK(cpu));
+		}
+	}
+}
+
+static int kdb_ps(int argc, const char **argv)
+{
+	struct task_struct *g, *p;
+	unsigned long mask, cpu;
+
+	if (argc == 0)
+		kdb_ps_suppressed();
+	kdb_printf("%-*s      Pid   Parent [*] cpu State %-*s Command\n",
+		(int)(2*sizeof(void *))+2, "Task Addr",
+		(int)(2*sizeof(void *))+2, "Thread");
+	mask = kdb_task_state_string(argc ? argv[1] : NULL);
+	/* Run the active tasks first */
+	for_each_online_cpu(cpu) {
+		if (KDB_FLAG(CMD_INTERRUPT))
+			return 0;
+		p = kdb_curr_task(cpu);
+		if (kdb_task_state(p, mask))
+			kdb_ps1(p);
+	}
+	kdb_printf("\n");
+	/* Now the real tasks */
+	kdb_do_each_thread(g, p) {
+		if (KDB_FLAG(CMD_INTERRUPT))
+			return 0;
+		if (kdb_task_state(p, mask))
+			kdb_ps1(p);
+	} kdb_while_each_thread(g, p);
+
+	return 0;
+}
+
+/*
+ * kdb_pid - This function implements the 'pid' command which switches
+ *	the currently active process.
+ *		pid [<pid> | R]
+ */
+static int kdb_pid(int argc, const char **argv)
+{
+	struct task_struct *p;
+	unsigned long val;
+	int diag;
+
+	if (argc > 1)
+		return KDB_ARGCOUNT;
+
+	if (argc) {
+		if (strcmp(argv[1], "R") == 0) {
+			p = KDB_TSK(kdb_initial_cpu);
+		} else {
+			diag = kdbgetularg(argv[1], &val);
+			if (diag)
+				return KDB_BADINT;
+
+			p = find_task_by_pid_ns((pid_t)val,	&init_pid_ns);
+			if (!p) {
+				kdb_printf("No task with pid=%d\n", (pid_t)val);
+				return 0;
+			}
+		}
+		kdb_set_current_task(p);
+	}
+	kdb_printf("KDB current process is %s(pid=%d)\n",
+		   kdb_current_task->comm,
+		   kdb_current_task->pid);
+
+	return 0;
+}
+
+/*
+ * kdb_ll - This function implements the 'll' command which follows a
+ *	linked list and executes an arbitrary command for each
+ *	element.
+ */
+static int kdb_ll(int argc, const char **argv)
+{
+	int diag;
+	unsigned long addr;
+	long offset = 0;
+	unsigned long va;
+	unsigned long linkoffset;
+	int nextarg;
+	const char *command;
+
+	if (argc != 3)
+		return KDB_ARGCOUNT;
+
+	nextarg = 1;
+	diag = kdbgetaddrarg(argc, argv, &nextarg, &addr, &offset, NULL);
+	if (diag)
+		return diag;
+
+	diag = kdbgetularg(argv[2], &linkoffset);
+	if (diag)
+		return diag;
+
+	/*
+	 * Using the starting address as
+	 * the first element in the list, and assuming that
+	 * the list ends with a null pointer.
+	 */
+
+	va = addr;
+	command = kdb_strdup(argv[3], GFP_KDB);
+	if (!command) {
+		kdb_printf("%s: cannot duplicate command\n", __func__);
+		return 0;
+	}
+	/* Recursive use of kdb_parse, do not use argv after this point */
+	argv = NULL;
+
+	while (va) {
+		char buf[80];
+
+		sprintf(buf, "%s " kdb_machreg_fmt "\n", command, va);
+		diag = kdb_parse(buf);
+		if (diag)
+			return diag;
+
+		addr = va + linkoffset;
+		if (kdb_getword(&va, addr, sizeof(va)))
+			return 0;
+	}
+	kfree(command);
+
+	return 0;
+}
+
+static int kdb_kgdb(int argc, const char **argv)
+{
+	return KDB_CMD_KGDB;
+}
+
+/*
+ * kdb_help - This function implements the 'help' and '?' commands.
+ */
+static int kdb_help(int argc, const char **argv)
+{
+	kdbtab_t *kt;
+	int i;
+
+	kdb_printf("%-15.15s %-20.20s %s\n", "Command", "Usage", "Description");
+	kdb_printf("-----------------------------"
+		   "-----------------------------\n");
+	for_each_kdbcmd(kt, i) {
+		if (kt->cmd_name)
+			kdb_printf("%-15.15s %-20.20s %s\n", kt->cmd_name,
+				   kt->cmd_usage, kt->cmd_help);
+		if (KDB_FLAG(CMD_INTERRUPT))
+			return 0;
+	}
+	return 0;
+}
+
+/*
+ * kdb_kill - This function implements the 'kill' commands.
+ */
+static int kdb_kill(int argc, const char **argv)
+{
+	long sig, pid;
+	char *endp;
+	struct task_struct *p;
+	struct siginfo info;
+
+	if (argc != 2)
+		return KDB_ARGCOUNT;
+
+	sig = simple_strtol(argv[1], &endp, 0);
+	if (*endp)
+		return KDB_BADINT;
+	if (sig >= 0) {
+		kdb_printf("Invalid signal parameter.<-signal>\n");
+		return 0;
+	}
+	sig = -sig;
+
+	pid = simple_strtol(argv[2], &endp, 0);
+	if (*endp)
+		return KDB_BADINT;
+	if (pid <= 0) {
+		kdb_printf("Process ID must be large than 0.\n");
+		return 0;
+	}
+
+	/* Find the process. */
+	p = find_task_by_pid_ns(pid, &init_pid_ns);
+	if (!p) {
+		kdb_printf("The specified process isn't found.\n");
+		return 0;
+	}
+	p = p->group_leader;
+	info.si_signo = sig;
+	info.si_errno = 0;
+	info.si_code = SI_USER;
+	info.si_pid = pid;  /* same capabilities as process being signalled */
+	info.si_uid = 0;    /* kdb has root authority */
+	kdb_send_sig_info(p, &info);
+	return 0;
+}
+
+struct kdb_tm {
+	int tm_sec;	/* seconds */
+	int tm_min;	/* minutes */
+	int tm_hour;	/* hours */
+	int tm_mday;	/* day of the month */
+	int tm_mon;	/* month */
+	int tm_year;	/* year */
+};
+
+static void kdb_gmtime(struct timespec *tv, struct kdb_tm *tm)
+{
+	/* This will work from 1970-2099, 2100 is not a leap year */
+	static int mon_day[] = { 31, 29, 31, 30, 31, 30, 31,
+				 31, 30, 31, 30, 31 };
+	memset(tm, 0, sizeof(*tm));
+	tm->tm_sec  = tv->tv_sec % (24 * 60 * 60);
+	tm->tm_mday = tv->tv_sec / (24 * 60 * 60) +
+		(2 * 365 + 1); /* shift base from 1970 to 1968 */
+	tm->tm_min =  tm->tm_sec / 60 % 60;
+	tm->tm_hour = tm->tm_sec / 60 / 60;
+	tm->tm_sec =  tm->tm_sec % 60;
+	tm->tm_year = 68 + 4*(tm->tm_mday / (4*365+1));
+	tm->tm_mday %= (4*365+1);
+	mon_day[1] = 29;
+	while (tm->tm_mday >= mon_day[tm->tm_mon]) {
+		tm->tm_mday -= mon_day[tm->tm_mon];
+		if (++tm->tm_mon == 12) {
+			tm->tm_mon = 0;
+			++tm->tm_year;
+			mon_day[1] = 28;
+		}
+	}
+	++tm->tm_mday;
+}
+
+/*
+ * Most of this code has been lifted from kernel/timer.c::sys_sysinfo().
+ * I cannot call that code directly from kdb, it has an unconditional
+ * cli()/sti() and calls routines that take locks which can stop the debugger.
+ */
+static void kdb_sysinfo(struct sysinfo *val)
+{
+	struct timespec uptime;
+	do_posix_clock_monotonic_gettime(&uptime);
+	memset(val, 0, sizeof(*val));
+	val->uptime = uptime.tv_sec;
+	val->loads[0] = avenrun[0];
+	val->loads[1] = avenrun[1];
+	val->loads[2] = avenrun[2];
+	val->procs = nr_threads-1;
+	si_meminfo(val);
+
+	return;
+}
+
+/*
+ * kdb_summary - This function implements the 'summary' command.
+ */
+static int kdb_summary(int argc, const char **argv)
+{
+	struct kdb_tm tm;
+	struct sysinfo val;
+
+	if (argc)
+		return KDB_ARGCOUNT;
+
+	kdb_printf("sysname    %s\n", init_uts_ns.name.sysname);
+	kdb_printf("release    %s\n", init_uts_ns.name.release);
+	kdb_printf("version    %s\n", init_uts_ns.name.version);
+	kdb_printf("machine    %s\n", init_uts_ns.name.machine);
+	kdb_printf("nodename   %s\n", init_uts_ns.name.nodename);
+	kdb_printf("domainname %s\n", init_uts_ns.name.domainname);
+	kdb_printf("ccversion  %s\n", __stringify(CCVERSION));
+
+	kdb_gmtime(&xtime, &tm);
+	kdb_printf("date       %04d-%02d-%02d %02d:%02d:%02d "
+		   "tz_minuteswest %d\n",
+		1900+tm.tm_year, tm.tm_mon+1, tm.tm_mday,
+		tm.tm_hour, tm.tm_min, tm.tm_sec,
+		sys_tz.tz_minuteswest);
+
+	kdb_sysinfo(&val);
+	kdb_printf("uptime     ");
+	if (val.uptime > (24*60*60)) {
+		int days = val.uptime / (24*60*60);
+		val.uptime %= (24*60*60);
+		kdb_printf("%d day%s ", days, days == 1 ? "" : "s");
+	}
+	kdb_printf("%02ld:%02ld\n", val.uptime/(60*60), (val.uptime/60)%60);
+
+	/* lifted from fs/proc/proc_misc.c::loadavg_read_proc() */
+
+#define LOAD_INT(x) ((x) >> FSHIFT)
+#define LOAD_FRAC(x) LOAD_INT(((x) & (FIXED_1-1)) * 100)
+	kdb_printf("load avg   %ld.%02ld %ld.%02ld %ld.%02ld\n",
+		LOAD_INT(val.loads[0]), LOAD_FRAC(val.loads[0]),
+		LOAD_INT(val.loads[1]), LOAD_FRAC(val.loads[1]),
+		LOAD_INT(val.loads[2]), LOAD_FRAC(val.loads[2]));
+#undef LOAD_INT
+#undef LOAD_FRAC
+	/* Display in kilobytes */
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+	kdb_printf("\nMemTotal:       %8lu kB\nMemFree:        %8lu kB\n"
+		   "Buffers:        %8lu kB\n",
+		   val.totalram, val.freeram, val.bufferram);
+	return 0;
+}
+
+/*
+ * kdb_per_cpu - This function implements the 'per_cpu' command.
+ */
+static int kdb_per_cpu(int argc, const char **argv)
+{
+	char buf[256], fmtstr[64];
+	kdb_symtab_t symtab;
+	cpumask_t suppress = CPU_MASK_NONE;
+	int cpu, diag;
+	unsigned long addr, val, bytesperword = 0, whichcpu = ~0UL;
+
+	if (argc < 1 || argc > 3)
+		return KDB_ARGCOUNT;
+
+	snprintf(buf, sizeof(buf), "per_cpu__%s", argv[1]);
+	if (!kdbgetsymval(buf, &symtab)) {
+		kdb_printf("%s is not a per_cpu variable\n", argv[1]);
+		return KDB_BADADDR;
+	}
+	if (argc >= 2) {
+		diag = kdbgetularg(argv[2], &bytesperword);
+		if (diag)
+			return diag;
+	}
+	if (!bytesperword)
+		bytesperword = KDB_WORD_SIZE;
+	else if (bytesperword > KDB_WORD_SIZE)
+		return KDB_BADWIDTH;
+	sprintf(fmtstr, "%%0%dlx ", (int)(2*bytesperword));
+	if (argc >= 3) {
+		diag = kdbgetularg(argv[3], &whichcpu);
+		if (diag)
+			return diag;
+		if (!cpu_online(whichcpu)) {
+			kdb_printf("cpu %ld is not online\n", whichcpu);
+			return KDB_BADCPUNUM;
+		}
+	}
+
+	/* Most architectures use __per_cpu_offset[cpu], some use
+	 * __per_cpu_offset(cpu), smp has no __per_cpu_offset.
+	 */
+#ifdef	__per_cpu_offset
+#define KDB_PCU(cpu) __per_cpu_offset(cpu)
+#else
+#ifdef	CONFIG_SMP
+#define KDB_PCU(cpu) __per_cpu_offset[cpu]
+#else
+#define KDB_PCU(cpu) 0
+#endif
+#endif
+
+	for_each_online_cpu(cpu) {
+		if (whichcpu != ~0UL && whichcpu != cpu)
+			continue;
+		addr = symtab.sym_start + KDB_PCU(cpu);
+		diag = kdb_getword(&val, addr, bytesperword);
+		if (diag) {
+			kdb_printf("%5d " kdb_bfd_vma_fmt0 " - unable to "
+				   "read, diag=%d\n", cpu, addr, diag);
+			continue;
+		}
+#ifdef	CONFIG_SMP
+		if (!val) {
+			cpu_set(cpu, suppress);
+			continue;
+		}
+#endif	/* CONFIG_SMP */
+		kdb_printf("%5d ", cpu);
+		kdb_md_line(fmtstr, addr,
+			bytesperword == KDB_WORD_SIZE,
+			1, bytesperword, 1, 1, 0);
+	}
+	if (cpus_weight(suppress) == 0)
+		return 0;
+	kdb_printf("Zero suppressed cpu(s):");
+	for (cpu = first_cpu(suppress); cpu < num_possible_cpus();
+	     cpu = next_cpu(cpu, suppress)) {
+		kdb_printf(" %d", cpu);
+		if (cpu == num_possible_cpus() - 1 ||
+		    next_cpu(cpu, suppress) != cpu + 1)
+			continue;
+		while (cpu < num_possible_cpus() &&
+		       next_cpu(cpu, suppress) == cpu + 1)
+			++cpu;
+		kdb_printf("-%d", cpu);
+	}
+	kdb_printf("\n");
+
+#undef KDB_PCU
+
+	return 0;
+}
+
+/*
+ * display help for the use of cmd | grep pattern
+ */
+static int kdb_grep_help(int argc, const char **argv)
+{
+	kdb_printf("Usage of  cmd args | grep pattern:\n");
+	kdb_printf("  Any command's output may be filtered through an ");
+	kdb_printf("emulated 'pipe'.\n");
+	kdb_printf("  'grep' is just a key word.\n");
+	kdb_printf("  The pattern may include a very limited set of "
+		   "metacharacters:\n");
+	kdb_printf("   pattern or ^pattern or pattern$ or ^pattern$\n");
+	kdb_printf("  And if there are spaces in the pattern, you may "
+		   "quote it:\n");
+	kdb_printf("   \"pat tern\" or \"^pat tern\" or \"pat tern$\""
+		   " or \"^pat tern$\"\n");
+	return 0;
+}
+
+/*
+ * kdb_register_repeat - This function is used to register a kernel
+ * 	debugger command.
+ * Inputs:
+ *	cmd	Command name
+ *	func	Function to execute the command
+ *	usage	A simple usage string showing arguments
+ *	help	A simple help string describing command
+ *	repeat	Does the command auto repeat on enter?
+ * Returns:
+ *	zero for success, one if a duplicate command.
+ */
+#define kdb_command_extend 50	/* arbitrary */
+int kdb_register_repeat(char *cmd,
+			kdb_func_t func,
+			char *usage,
+			char *help,
+			short minlen,
+			kdb_repeat_t repeat)
+{
+	int i;
+	kdbtab_t *kp;
+
+	/*
+	 *  Brute force method to determine duplicates
+	 */
+	for_each_kdbcmd(kp, i) {
+		if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
+			kdb_printf("Duplicate kdb command registered: "
+				"%s, func %p help %s\n", cmd, func, help);
+			return 1;
+		}
+	}
+
+	/*
+	 * Insert command into first available location in table
+	 */
+	for_each_kdbcmd(kp, i) {
+		if (kp->cmd_name == NULL)
+			break;
+	}
+
+	if (i >= kdb_max_commands) {
+		kdbtab_t *new = kmalloc((kdb_max_commands - KDB_BASE_CMD_MAX +
+			 kdb_command_extend) * sizeof(*new), GFP_KDB);
+		if (!new) {
+			kdb_printf("Could not allocate new kdb_command "
+				   "table\n");
+			return 1;
+		}
+		if (kdb_commands) {
+			memcpy(new, kdb_commands,
+			       kdb_max_commands * sizeof(*new));
+			kfree(kdb_commands);
+		}
+		memset(new + kdb_max_commands, 0,
+		       kdb_command_extend * sizeof(*new));
+		kdb_commands = new;
+		kp = kdb_commands + kdb_max_commands;
+		kdb_max_commands += kdb_command_extend;
+	}
+
+	kp->cmd_name   = cmd;
+	kp->cmd_func   = func;
+	kp->cmd_usage  = usage;
+	kp->cmd_help   = help;
+	kp->cmd_flags  = 0;
+	kp->cmd_minlen = minlen;
+	kp->cmd_repeat = repeat;
+
+	return 0;
+}
+
+/*
+ * kdb_register - Compatibility register function for commands that do
+ *	not need to specify a repeat state.  Equivalent to
+ *	kdb_register_repeat with KDB_REPEAT_NONE.
+ * Inputs:
+ *	cmd	Command name
+ *	func	Function to execute the command
+ *	usage	A simple usage string showing arguments
+ *	help	A simple help string describing command
+ * Returns:
+ *	zero for success, one if a duplicate command.
+ */
+int kdb_register(char *cmd,
+	     kdb_func_t func,
+	     char *usage,
+	     char *help,
+	     short minlen)
+{
+	return kdb_register_repeat(cmd, func, usage, help, minlen,
+				   KDB_REPEAT_NONE);
+}
+
+/*
+ * kdb_unregister - This function is used to unregister a kernel
+ *	debugger command.  It is generally called when a module which
+ *	implements kdb commands is unloaded.
+ * Inputs:
+ *	cmd	Command name
+ * Returns:
+ *	zero for success, one command not registered.
+ */
+int kdb_unregister(char *cmd)
+{
+	int i;
+	kdbtab_t *kp;
+
+	/*
+	 *  find the command.
+	 */
+	for (i = 0, kp = kdb_commands; i < kdb_max_commands; i++, kp++) {
+		if (kp->cmd_name && (strcmp(kp->cmd_name, cmd) == 0)) {
+			kp->cmd_name = NULL;
+			return 0;
+		}
+	}
+
+	/* Couldn't find it.  */
+	return 1;
+}
+
+/* Initialize the kdb command table. */
+static void __init kdb_inittab(void)
+{
+	int i;
+	kdbtab_t *kp;
+
+	for_each_kdbcmd(kp, i)
+		kp->cmd_name = NULL;
+
+	kdb_register_repeat("md", kdb_md, "<vaddr>",
+	  "Display Memory Contents, also mdWcN, e.g. md8c1", 1,
+			    KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",
+	  "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",
+	  "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("mds", kdb_md, "<vaddr>",
+	  "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",
+	  "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
+	kdb_register_repeat("go", kdb_go, "[<vaddr>]",
+	  "Continue Execution", 1, KDB_REPEAT_NONE);
+	kdb_register_repeat("rd", kdb_rd, "",
+	  "Display Registers", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("rm", kdb_rm, "<reg> <contents>",
+	  "Modify Registers", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("ef", kdb_ef, "<vaddr>",
+	  "Display exception frame", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("bt", kdb_bt, "[<vaddr>]",
+	  "Stack traceback", 1, KDB_REPEAT_NONE);
+	kdb_register_repeat("btp", kdb_bt, "<pid>",
+	  "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("bta", kdb_bt, "[DRSTCZEUIMA]",
+	  "Display stack all processes", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("btc", kdb_bt, "",
+	  "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("btt", kdb_bt, "<vaddr>",
+	  "Backtrace process given its struct task address", 0,
+			    KDB_REPEAT_NONE);
+	kdb_register_repeat("ll", kdb_ll, "<first-element> <linkoffset> <cmd>",
+	  "Execute cmd for each element in linked list", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("env", kdb_env, "",
+	  "Show environment variables", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("set", kdb_set, "",
+	  "Set environment variables", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("help", kdb_help, "",
+	  "Display Help Message", 1, KDB_REPEAT_NONE);
+	kdb_register_repeat("?", kdb_help, "",
+	  "Display Help Message", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("cpu", kdb_cpu, "<cpunum>",
+	  "Switch to new cpu", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("kgdb", kdb_kgdb, "",
+	  "Enter kgdb mode", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("ps", kdb_ps, "[<flags>|A]",
+	  "Display active task list", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("pid", kdb_pid, "<pidnum>",
+	  "Switch to another task", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("reboot", kdb_reboot, "",
+	  "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+#if defined(CONFIG_MODULES)
+	kdb_register_repeat("lsmod", kdb_lsmod, "",
+	  "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_MAGIC_SYSRQ)
+	kdb_register_repeat("sr", kdb_sr, "<key>",
+	  "Magic SysRq key", 0, KDB_REPEAT_NONE);
+#endif
+#if defined(CONFIG_PRINTK)
+	kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",
+	  "Display syslog buffer", 0, KDB_REPEAT_NONE);
+#endif
+	kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
+	  "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>",
+	  "Send a signal to a process", 0, KDB_REPEAT_NONE);
+	kdb_register_repeat("summary", kdb_summary, "",
+	  "Summarize the system", 4, KDB_REPEAT_NONE);
+	kdb_register_repeat("per_cpu", kdb_per_cpu, "",
+	  "Display per_cpu variables", 3, KDB_REPEAT_NONE);
+	kdb_register_repeat("grephelp", kdb_grep_help, "",
+	  "Display help on | grep", 0, KDB_REPEAT_NONE);
+}
+
+/* Execute any commands defined in kdb_cmds.  */
+static void __init kdb_cmd_init(void)
+{
+	int i, diag;
+	for (i = 0; kdb_cmds[i]; ++i) {
+		diag = kdb_parse(kdb_cmds[i]);
+		if (diag)
+			kdb_printf("kdb command %s failed, kdb diag %d\n",
+				kdb_cmds[i], diag);
+	}
+	if (defcmd_in_progress) {
+		kdb_printf("Incomplete 'defcmd' set, forcing endefcmd\n");
+		kdb_parse("endefcmd");
+	}
+}
+
+/* Intialize kdb_printf, breakpoint tables and kdb state */
+void __init kdb_init(int lvl)
+{
+	static int kdb_init_lvl = KDB_NOT_INITIALIZED;
+	int i;
+
+	if (kdb_init_lvl == KDB_INIT_FULL || lvl <= kdb_init_lvl)
+		return;
+	for (i = kdb_init_lvl; i < lvl; i++) {
+		switch (i) {
+		case KDB_NOT_INITIALIZED:
+			kdb_inittab();		/* Initialize Command Table */
+			kdb_initbptab();	/* Initialize Breakpoints */
+			break;
+		case KDB_INIT_EARLY:
+			kdb_cmd_init();		/* Build kdb_cmds tables */
+			break;
+		}
+	}
+	kdb_init_lvl = lvl;
+}
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
new file mode 100644
index 000000000000..69ed2eff3fea
--- /dev/null
+++ b/kernel/debug/kdb/kdb_private.h
@@ -0,0 +1,301 @@
+#ifndef _KDBPRIVATE_H
+#define _KDBPRIVATE_H
+
+/*
+ * Kernel Debugger Architecture Independent Private Headers
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 2000-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ */
+
+#include <linux/kgdb.h>
+#include "../debug_core.h"
+
+/* Kernel Debugger Error codes.  Must not overlap with command codes. */
+#define KDB_NOTFOUND	(-1)
+#define KDB_ARGCOUNT	(-2)
+#define KDB_BADWIDTH	(-3)
+#define KDB_BADRADIX	(-4)
+#define KDB_NOTENV	(-5)
+#define KDB_NOENVVALUE	(-6)
+#define KDB_NOTIMP	(-7)
+#define KDB_ENVFULL	(-8)
+#define KDB_ENVBUFFULL	(-9)
+#define KDB_TOOMANYBPT	(-10)
+#define KDB_TOOMANYDBREGS (-11)
+#define KDB_DUPBPT	(-12)
+#define KDB_BPTNOTFOUND	(-13)
+#define KDB_BADMODE	(-14)
+#define KDB_BADINT	(-15)
+#define KDB_INVADDRFMT  (-16)
+#define KDB_BADREG      (-17)
+#define KDB_BADCPUNUM   (-18)
+#define KDB_BADLENGTH	(-19)
+#define KDB_NOBP	(-20)
+#define KDB_BADADDR	(-21)
+
+/* Kernel Debugger Command codes.  Must not overlap with error codes. */
+#define KDB_CMD_GO	(-1001)
+#define KDB_CMD_CPU	(-1002)
+#define KDB_CMD_SS	(-1003)
+#define KDB_CMD_SSB	(-1004)
+#define KDB_CMD_KGDB (-1005)
+#define KDB_CMD_KGDB2 (-1006)
+
+/* Internal debug flags */
+#define KDB_DEBUG_FLAG_BP	0x0002	/* Breakpoint subsystem debug */
+#define KDB_DEBUG_FLAG_BB_SUMM	0x0004	/* Basic block analysis, summary only */
+#define KDB_DEBUG_FLAG_AR	0x0008	/* Activation record, generic */
+#define KDB_DEBUG_FLAG_ARA	0x0010	/* Activation record, arch specific */
+#define KDB_DEBUG_FLAG_BB	0x0020	/* All basic block analysis */
+#define KDB_DEBUG_FLAG_STATE	0x0040	/* State flags */
+#define KDB_DEBUG_FLAG_MASK	0xffff	/* All debug flags */
+#define KDB_DEBUG_FLAG_SHIFT	16	/* Shift factor for dbflags */
+
+#define KDB_DEBUG(flag)	(kdb_flags & \
+	(KDB_DEBUG_FLAG_##flag << KDB_DEBUG_FLAG_SHIFT))
+#define KDB_DEBUG_STATE(text, value) if (KDB_DEBUG(STATE)) \
+		kdb_print_state(text, value)
+
+#if BITS_PER_LONG == 32
+
+#define KDB_PLATFORM_ENV	"BYTESPERWORD=4"
+
+#define kdb_machreg_fmt		"0x%lx"
+#define kdb_machreg_fmt0	"0x%08lx"
+#define kdb_bfd_vma_fmt		"0x%lx"
+#define kdb_bfd_vma_fmt0	"0x%08lx"
+#define kdb_elfw_addr_fmt	"0x%x"
+#define kdb_elfw_addr_fmt0	"0x%08x"
+#define kdb_f_count_fmt		"%d"
+
+#elif BITS_PER_LONG == 64
+
+#define KDB_PLATFORM_ENV	"BYTESPERWORD=8"
+
+#define kdb_machreg_fmt		"0x%lx"
+#define kdb_machreg_fmt0	"0x%016lx"
+#define kdb_bfd_vma_fmt		"0x%lx"
+#define kdb_bfd_vma_fmt0	"0x%016lx"
+#define kdb_elfw_addr_fmt	"0x%x"
+#define kdb_elfw_addr_fmt0	"0x%016x"
+#define kdb_f_count_fmt		"%ld"
+
+#endif
+
+/*
+ * KDB_MAXBPT describes the total number of breakpoints
+ * supported by this architecure.
+ */
+#define KDB_MAXBPT	16
+
+/* Maximum number of arguments to a function  */
+#define KDB_MAXARGS    16
+
+typedef enum {
+	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
+	KDB_REPEAT_NO_ARGS,	/* Repeat the command without arguments */
+	KDB_REPEAT_WITH_ARGS,	/* Repeat the command including its arguments */
+} kdb_repeat_t;
+
+typedef int (*kdb_func_t)(int, const char **);
+
+/* Symbol table format returned by kallsyms. */
+typedef struct __ksymtab {
+		unsigned long value;	/* Address of symbol */
+		const char *mod_name;	/* Module containing symbol or
+					 * "kernel" */
+		unsigned long mod_start;
+		unsigned long mod_end;
+		const char *sec_name;	/* Section containing symbol */
+		unsigned long sec_start;
+		unsigned long sec_end;
+		const char *sym_name;	/* Full symbol name, including
+					 * any version */
+		unsigned long sym_start;
+		unsigned long sym_end;
+		} kdb_symtab_t;
+extern int kallsyms_symbol_next(char *prefix_name, int flag);
+extern int kallsyms_symbol_complete(char *prefix_name, int max_len);
+
+/* Exported Symbols for kernel loadable modules to use. */
+extern int kdb_register(char *, kdb_func_t, char *, char *, short);
+extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
+			       short, kdb_repeat_t);
+extern int kdb_unregister(char *);
+
+extern int kdb_getarea_size(void *, unsigned long, size_t);
+extern int kdb_putarea_size(unsigned long, void *, size_t);
+
+/*
+ * Like get_user and put_user, kdb_getarea and kdb_putarea take variable
+ * names, not pointers.  The underlying *_size functions take pointers.
+ */
+#define kdb_getarea(x, addr) kdb_getarea_size(&(x), addr, sizeof((x)))
+#define kdb_putarea(addr, x) kdb_putarea_size(addr, &(x), sizeof((x)))
+
+extern int kdb_getphysword(unsigned long *word,
+			unsigned long addr, size_t size);
+extern int kdb_getword(unsigned long *, unsigned long, size_t);
+extern int kdb_putword(unsigned long, unsigned long, size_t);
+
+extern int kdbgetularg(const char *, unsigned long *);
+extern int kdb_set(int, const char **);
+extern char *kdbgetenv(const char *);
+extern int kdbgetintenv(const char *, int *);
+extern int kdbgetaddrarg(int, const char **, int*, unsigned long *,
+			 long *, char **);
+extern int kdbgetsymval(const char *, kdb_symtab_t *);
+extern int kdbnearsym(unsigned long, kdb_symtab_t *);
+extern void kdbnearsym_cleanup(void);
+extern char *kdb_strdup(const char *str, gfp_t type);
+extern void kdb_symbol_print(unsigned long, const kdb_symtab_t *, unsigned int);
+
+/* Routine for debugging the debugger state. */
+extern void kdb_print_state(const char *, int);
+
+extern int kdb_state;
+#define KDB_STATE_KDB		0x00000001	/* Cpu is inside kdb */
+#define KDB_STATE_LEAVING	0x00000002	/* Cpu is leaving kdb */
+#define KDB_STATE_CMD		0x00000004	/* Running a kdb command */
+#define KDB_STATE_KDB_CONTROL	0x00000008	/* This cpu is under
+						 * kdb control */
+#define KDB_STATE_HOLD_CPU	0x00000010	/* Hold this cpu inside kdb */
+#define KDB_STATE_DOING_SS	0x00000020	/* Doing ss command */
+#define KDB_STATE_DOING_SSB	0x00000040	/* Doing ssb command,
+						 * DOING_SS is also set */
+#define KDB_STATE_SSBPT		0x00000080	/* Install breakpoint
+						 * after one ss, independent of
+						 * DOING_SS */
+#define KDB_STATE_REENTRY	0x00000100	/* Valid re-entry into kdb */
+#define KDB_STATE_SUPPRESS	0x00000200	/* Suppress error messages */
+#define KDB_STATE_PAGER		0x00000400	/* pager is available */
+#define KDB_STATE_GO_SWITCH	0x00000800	/* go is switching
+						 * back to initial cpu */
+#define KDB_STATE_PRINTF_LOCK	0x00001000	/* Holds kdb_printf lock */
+#define KDB_STATE_WAIT_IPI	0x00002000	/* Waiting for kdb_ipi() NMI */
+#define KDB_STATE_RECURSE	0x00004000	/* Recursive entry to kdb */
+#define KDB_STATE_IP_ADJUSTED	0x00008000	/* Restart IP has been
+						 * adjusted */
+#define KDB_STATE_GO1		0x00010000	/* go only releases one cpu */
+#define KDB_STATE_KEYBOARD	0x00020000	/* kdb entered via
+						 * keyboard on this cpu */
+#define KDB_STATE_KEXEC		0x00040000	/* kexec issued */
+#define KDB_STATE_DOING_KGDB	0x00080000	/* kgdb enter now issued */
+#define KDB_STATE_DOING_KGDB2	0x00100000	/* kgdb enter now issued */
+#define KDB_STATE_KGDB_TRANS	0x00200000	/* Transition to kgdb */
+#define KDB_STATE_ARCH		0xff000000	/* Reserved for arch
+						 * specific use */
+
+#define KDB_STATE(flag) (kdb_state & KDB_STATE_##flag)
+#define KDB_STATE_SET(flag) ((void)(kdb_state |= KDB_STATE_##flag))
+#define KDB_STATE_CLEAR(flag) ((void)(kdb_state &= ~KDB_STATE_##flag))
+
+extern int kdb_nextline; /* Current number of lines displayed */
+
+typedef struct _kdb_bp {
+	unsigned long	bp_addr;	/* Address breakpoint is present at */
+	unsigned int	bp_free:1;	/* This entry is available */
+	unsigned int	bp_enabled:1;	/* Breakpoint is active in register */
+	unsigned int	bp_type:4;	/* Uses hardware register */
+	unsigned int	bp_installed:1;	/* Breakpoint is installed */
+	unsigned int	bp_delay:1;	/* Do delayed bp handling */
+	unsigned int	bp_delayed:1;	/* Delayed breakpoint */
+	unsigned int	bph_length;	/* HW break length */
+} kdb_bp_t;
+
+#ifdef CONFIG_KGDB_KDB
+extern kdb_bp_t kdb_breakpoints[/* KDB_MAXBPT */];
+
+/* The KDB shell command table */
+typedef struct _kdbtab {
+	char    *cmd_name;		/* Command name */
+	kdb_func_t cmd_func;		/* Function to execute command */
+	char    *cmd_usage;		/* Usage String for this command */
+	char    *cmd_help;		/* Help message for this command */
+	short    cmd_flags;		/* Parsing flags */
+	short    cmd_minlen;		/* Minimum legal # command
+					 * chars required */
+	kdb_repeat_t cmd_repeat;	/* Does command auto repeat on enter? */
+} kdbtab_t;
+
+extern int kdb_bt(int, const char **);	/* KDB display back trace */
+
+/* KDB breakpoint management functions */
+extern void kdb_initbptab(void);
+extern void kdb_bp_install(struct pt_regs *);
+extern void kdb_bp_remove(void);
+
+typedef enum {
+	KDB_DB_BPT,	/* Breakpoint */
+	KDB_DB_SS,	/* Single-step trap */
+	KDB_DB_SSB,	/* Single step to branch */
+	KDB_DB_SSBPT,	/* Single step over breakpoint */
+	KDB_DB_NOBPT	/* Spurious breakpoint */
+} kdb_dbtrap_t;
+
+extern int kdb_main_loop(kdb_reason_t, kdb_reason_t,
+			 int, kdb_dbtrap_t, struct pt_regs *);
+
+/* Miscellaneous functions and data areas */
+extern int kdb_grepping_flag;
+extern char kdb_grep_string[];
+extern int kdb_grep_leading;
+extern int kdb_grep_trailing;
+extern char *kdb_cmds[];
+extern void kdb_syslog_data(char *syslog_data[]);
+extern unsigned long kdb_task_state_string(const char *);
+extern char kdb_task_state_char (const struct task_struct *);
+extern unsigned long kdb_task_state(const struct task_struct *p,
+				    unsigned long mask);
+extern void kdb_ps_suppressed(void);
+extern void kdb_ps1(const struct task_struct *p);
+extern int kdb_parse(const char *cmdstr);
+extern void kdb_print_nameval(const char *name, unsigned long val);
+extern void kdb_send_sig_info(struct task_struct *p, struct siginfo *info);
+extern void kdb_meminfo_proc_show(void);
+extern const char *kdb_walk_kallsyms(loff_t *pos);
+extern char *kdb_getstr(char *, size_t, char *);
+
+/* Defines for kdb_symbol_print */
+#define KDB_SP_SPACEB	0x0001		/* Space before string */
+#define KDB_SP_SPACEA	0x0002		/* Space after string */
+#define KDB_SP_PAREN	0x0004		/* Parenthesis around string */
+#define KDB_SP_VALUE	0x0008		/* Print the value of the address */
+#define KDB_SP_SYMSIZE	0x0010		/* Print the size of the symbol */
+#define KDB_SP_NEWLINE	0x0020		/* Newline after string */
+#define KDB_SP_DEFAULT (KDB_SP_VALUE|KDB_SP_PAREN)
+
+#define KDB_TSK(cpu) kgdb_info[cpu].task
+#define KDB_TSKREGS(cpu) kgdb_info[cpu].debuggerinfo
+
+extern struct task_struct *kdb_curr_task(int);
+
+#define kdb_task_has_cpu(p) (task_curr(p))
+
+/* Simplify coexistence with NPTL */
+#define	kdb_do_each_thread(g, p) do_each_thread(g, p)
+#define	kdb_while_each_thread(g, p) while_each_thread(g, p)
+
+#define GFP_KDB (in_interrupt() ? GFP_ATOMIC : GFP_KERNEL)
+
+extern void *debug_kmalloc(size_t size, gfp_t flags);
+extern void debug_kfree(void *);
+extern void debug_kusage(void);
+
+extern void kdb_set_current_task(struct task_struct *);
+extern struct task_struct *kdb_current_task;
+#ifdef CONFIG_MODULES
+extern struct list_head *kdb_modules;
+#endif /* CONFIG_MODULES */
+
+extern char kdb_prompt_str[];
+
+#define	KDB_WORD_SIZE	((int)sizeof(unsigned long))
+
+#endif /* CONFIG_KGDB_KDB */
+#endif	/* !_KDBPRIVATE_H */
diff --git a/kernel/debug/kdb/kdb_support.c b/kernel/debug/kdb/kdb_support.c
new file mode 100644
index 000000000000..45344d5c53dd
--- /dev/null
+++ b/kernel/debug/kdb/kdb_support.c
@@ -0,0 +1,927 @@
+/*
+ * Kernel Debugger Architecture Independent Support Functions
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (c) 1999-2004 Silicon Graphics, Inc.  All Rights Reserved.
+ * Copyright (c) 2009 Wind River Systems, Inc.  All Rights Reserved.
+ * 03/02/13    added new 2.5 kallsyms <xavier.bru@bull.net>
+ */
+
+#include <stdarg.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/kallsyms.h>
+#include <linux/stddef.h>
+#include <linux/vmalloc.h>
+#include <linux/ptrace.h>
+#include <linux/module.h>
+#include <linux/highmem.h>
+#include <linux/hardirq.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/kdb.h>
+#include <linux/slab.h>
+#include "kdb_private.h"
+
+/*
+ * kdbgetsymval - Return the address of the given symbol.
+ *
+ * Parameters:
+ *	symname	Character string containing symbol name
+ *      symtab  Structure to receive results
+ * Returns:
+ *	0	Symbol not found, symtab zero filled
+ *	1	Symbol mapped to module/symbol/section, data in symtab
+ */
+int kdbgetsymval(const char *symname, kdb_symtab_t *symtab)
+{
+	if (KDB_DEBUG(AR))
+		kdb_printf("kdbgetsymval: symname=%s, symtab=%p\n", symname,
+			   symtab);
+	memset(symtab, 0, sizeof(*symtab));
+	symtab->sym_start = kallsyms_lookup_name(symname);
+	if (symtab->sym_start) {
+		if (KDB_DEBUG(AR))
+			kdb_printf("kdbgetsymval: returns 1, "
+				   "symtab->sym_start=0x%lx\n",
+				   symtab->sym_start);
+		return 1;
+	}
+	if (KDB_DEBUG(AR))
+		kdb_printf("kdbgetsymval: returns 0\n");
+	return 0;
+}
+EXPORT_SYMBOL(kdbgetsymval);
+
+static char *kdb_name_table[100];	/* arbitrary size */
+
+/*
+ * kdbnearsym -	Return the name of the symbol with the nearest address
+ *	less than 'addr'.
+ *
+ * Parameters:
+ *	addr	Address to check for symbol near
+ *	symtab  Structure to receive results
+ * Returns:
+ *	0	No sections contain this address, symtab zero filled
+ *	1	Address mapped to module/symbol/section, data in symtab
+ * Remarks:
+ *	2.6 kallsyms has a "feature" where it unpacks the name into a
+ *	string.  If that string is reused before the caller expects it
+ *	then the caller sees its string change without warning.  To
+ *	avoid cluttering up the main kdb code with lots of kdb_strdup,
+ *	tests and kfree calls, kdbnearsym maintains an LRU list of the
+ *	last few unique strings.  The list is sized large enough to
+ *	hold active strings, no kdb caller of kdbnearsym makes more
+ *	than ~20 later calls before using a saved value.
+ */
+int kdbnearsym(unsigned long addr, kdb_symtab_t *symtab)
+{
+	int ret = 0;
+	unsigned long symbolsize;
+	unsigned long offset;
+#define knt1_size 128		/* must be >= kallsyms table size */
+	char *knt1 = NULL;
+
+	if (KDB_DEBUG(AR))
+		kdb_printf("kdbnearsym: addr=0x%lx, symtab=%p\n", addr, symtab);
+	memset(symtab, 0, sizeof(*symtab));
+
+	if (addr < 4096)
+		goto out;
+	knt1 = debug_kmalloc(knt1_size, GFP_ATOMIC);
+	if (!knt1) {
+		kdb_printf("kdbnearsym: addr=0x%lx cannot kmalloc knt1\n",
+			   addr);
+		goto out;
+	}
+	symtab->sym_name = kallsyms_lookup(addr, &symbolsize , &offset,
+				(char **)(&symtab->mod_name), knt1);
+	if (offset > 8*1024*1024) {
+		symtab->sym_name = NULL;
+		addr = offset = symbolsize = 0;
+	}
+	symtab->sym_start = addr - offset;
+	symtab->sym_end = symtab->sym_start + symbolsize;
+	ret = symtab->sym_name != NULL && *(symtab->sym_name) != '\0';
+
+	if (ret) {
+		int i;
+		/* Another 2.6 kallsyms "feature".  Sometimes the sym_name is
+		 * set but the buffer passed into kallsyms_lookup is not used,
+		 * so it contains garbage.  The caller has to work out which
+		 * buffer needs to be saved.
+		 *
+		 * What was Rusty smoking when he wrote that code?
+		 */
+		if (symtab->sym_name != knt1) {
+			strncpy(knt1, symtab->sym_name, knt1_size);
+			knt1[knt1_size-1] = '\0';
+		}
+		for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+			if (kdb_name_table[i] &&
+			    strcmp(kdb_name_table[i], knt1) == 0)
+				break;
+		}
+		if (i >= ARRAY_SIZE(kdb_name_table)) {
+			debug_kfree(kdb_name_table[0]);
+			memcpy(kdb_name_table, kdb_name_table+1,
+			       sizeof(kdb_name_table[0]) *
+			       (ARRAY_SIZE(kdb_name_table)-1));
+		} else {
+			debug_kfree(knt1);
+			knt1 = kdb_name_table[i];
+			memcpy(kdb_name_table+i, kdb_name_table+i+1,
+			       sizeof(kdb_name_table[0]) *
+			       (ARRAY_SIZE(kdb_name_table)-i-1));
+		}
+		i = ARRAY_SIZE(kdb_name_table) - 1;
+		kdb_name_table[i] = knt1;
+		symtab->sym_name = kdb_name_table[i];
+		knt1 = NULL;
+	}
+
+	if (symtab->mod_name == NULL)
+		symtab->mod_name = "kernel";
+	if (KDB_DEBUG(AR))
+		kdb_printf("kdbnearsym: returns %d symtab->sym_start=0x%lx, "
+		   "symtab->mod_name=%p, symtab->sym_name=%p (%s)\n", ret,
+		   symtab->sym_start, symtab->mod_name, symtab->sym_name,
+		   symtab->sym_name);
+
+out:
+	debug_kfree(knt1);
+	return ret;
+}
+
+void kdbnearsym_cleanup(void)
+{
+	int i;
+	for (i = 0; i < ARRAY_SIZE(kdb_name_table); ++i) {
+		if (kdb_name_table[i]) {
+			debug_kfree(kdb_name_table[i]);
+			kdb_name_table[i] = NULL;
+		}
+	}
+}
+
+static char ks_namebuf[KSYM_NAME_LEN+1], ks_namebuf_prev[KSYM_NAME_LEN+1];
+
+/*
+ * kallsyms_symbol_complete
+ *
+ * Parameters:
+ *	prefix_name	prefix of a symbol name to lookup
+ *	max_len		maximum length that can be returned
+ * Returns:
+ *	Number of symbols which match the given prefix.
+ * Notes:
+ *	prefix_name is changed to contain the longest unique prefix that
+ *	starts with this prefix (tab completion).
+ */
+int kallsyms_symbol_complete(char *prefix_name, int max_len)
+{
+	loff_t pos = 0;
+	int prefix_len = strlen(prefix_name), prev_len = 0;
+	int i, number = 0;
+	const char *name;
+
+	while ((name = kdb_walk_kallsyms(&pos))) {
+		if (strncmp(name, prefix_name, prefix_len) == 0) {
+			strcpy(ks_namebuf, name);
+			/* Work out the longest name that matches the prefix */
+			if (++number == 1) {
+				prev_len = min_t(int, max_len-1,
+						 strlen(ks_namebuf));
+				memcpy(ks_namebuf_prev, ks_namebuf, prev_len);
+				ks_namebuf_prev[prev_len] = '\0';
+				continue;
+			}
+			for (i = 0; i < prev_len; i++) {
+				if (ks_namebuf[i] != ks_namebuf_prev[i]) {
+					prev_len = i;
+					ks_namebuf_prev[i] = '\0';
+					break;
+				}
+			}
+		}
+	}
+	if (prev_len > prefix_len)
+		memcpy(prefix_name, ks_namebuf_prev, prev_len+1);
+	return number;
+}
+
+/*
+ * kallsyms_symbol_next
+ *
+ * Parameters:
+ *	prefix_name	prefix of a symbol name to lookup
+ *	flag	0 means search from the head, 1 means continue search.
+ * Returns:
+ *	1 if a symbol matches the given prefix.
+ *	0 if no string found
+ */
+int kallsyms_symbol_next(char *prefix_name, int flag)
+{
+	int prefix_len = strlen(prefix_name);
+	static loff_t pos;
+	const char *name;
+
+	if (!flag)
+		pos = 0;
+
+	while ((name = kdb_walk_kallsyms(&pos))) {
+		if (strncmp(name, prefix_name, prefix_len) == 0) {
+			strncpy(prefix_name, name, strlen(name)+1);
+			return 1;
+		}
+	}
+	return 0;
+}
+
+/*
+ * kdb_symbol_print - Standard method for printing a symbol name and offset.
+ * Inputs:
+ *	addr	Address to be printed.
+ *	symtab	Address of symbol data, if NULL this routine does its
+ *		own lookup.
+ *	punc	Punctuation for string, bit field.
+ * Remarks:
+ *	The string and its punctuation is only printed if the address
+ *	is inside the kernel, except that the value is always printed
+ *	when requested.
+ */
+void kdb_symbol_print(unsigned long addr, const kdb_symtab_t *symtab_p,
+		      unsigned int punc)
+{
+	kdb_symtab_t symtab, *symtab_p2;
+	if (symtab_p) {
+		symtab_p2 = (kdb_symtab_t *)symtab_p;
+	} else {
+		symtab_p2 = &symtab;
+		kdbnearsym(addr, symtab_p2);
+	}
+	if (!(symtab_p2->sym_name || (punc & KDB_SP_VALUE)))
+		return;
+	if (punc & KDB_SP_SPACEB)
+		kdb_printf(" ");
+	if (punc & KDB_SP_VALUE)
+		kdb_printf(kdb_machreg_fmt0, addr);
+	if (symtab_p2->sym_name) {
+		if (punc & KDB_SP_VALUE)
+			kdb_printf(" ");
+		if (punc & KDB_SP_PAREN)
+			kdb_printf("(");
+		if (strcmp(symtab_p2->mod_name, "kernel"))
+			kdb_printf("[%s]", symtab_p2->mod_name);
+		kdb_printf("%s", symtab_p2->sym_name);
+		if (addr != symtab_p2->sym_start)
+			kdb_printf("+0x%lx", addr - symtab_p2->sym_start);
+		if (punc & KDB_SP_SYMSIZE)
+			kdb_printf("/0x%lx",
+				   symtab_p2->sym_end - symtab_p2->sym_start);
+		if (punc & KDB_SP_PAREN)
+			kdb_printf(")");
+	}
+	if (punc & KDB_SP_SPACEA)
+		kdb_printf(" ");
+	if (punc & KDB_SP_NEWLINE)
+		kdb_printf("\n");
+}
+
+/*
+ * kdb_strdup - kdb equivalent of strdup, for disasm code.
+ * Inputs:
+ *	str	The string to duplicate.
+ *	type	Flags to kmalloc for the new string.
+ * Returns:
+ *	Address of the new string, NULL if storage could not be allocated.
+ * Remarks:
+ *	This is not in lib/string.c because it uses kmalloc which is not
+ *	available when string.o is used in boot loaders.
+ */
+char *kdb_strdup(const char *str, gfp_t type)
+{
+	int n = strlen(str)+1;
+	char *s = kmalloc(n, type);
+	if (!s)
+		return NULL;
+	return strcpy(s, str);
+}
+
+/*
+ * kdb_getarea_size - Read an area of data.  The kdb equivalent of
+ *	copy_from_user, with kdb messages for invalid addresses.
+ * Inputs:
+ *	res	Pointer to the area to receive the result.
+ *	addr	Address of the area to copy.
+ *	size	Size of the area.
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+int kdb_getarea_size(void *res, unsigned long addr, size_t size)
+{
+	int ret = probe_kernel_read((char *)res, (char *)addr, size);
+	if (ret) {
+		if (!KDB_STATE(SUPPRESS)) {
+			kdb_printf("kdb_getarea: Bad address 0x%lx\n", addr);
+			KDB_STATE_SET(SUPPRESS);
+		}
+		ret = KDB_BADADDR;
+	} else {
+		KDB_STATE_CLEAR(SUPPRESS);
+	}
+	return ret;
+}
+
+/*
+ * kdb_putarea_size - Write an area of data.  The kdb equivalent of
+ *	copy_to_user, with kdb messages for invalid addresses.
+ * Inputs:
+ *	addr	Address of the area to write to.
+ *	res	Pointer to the area holding the data.
+ *	size	Size of the area.
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+int kdb_putarea_size(unsigned long addr, void *res, size_t size)
+{
+	int ret = probe_kernel_read((char *)addr, (char *)res, size);
+	if (ret) {
+		if (!KDB_STATE(SUPPRESS)) {
+			kdb_printf("kdb_putarea: Bad address 0x%lx\n", addr);
+			KDB_STATE_SET(SUPPRESS);
+		}
+		ret = KDB_BADADDR;
+	} else {
+		KDB_STATE_CLEAR(SUPPRESS);
+	}
+	return ret;
+}
+
+/*
+ * kdb_getphys - Read data from a physical address. Validate the
+ * 	address is in range, use kmap_atomic() to get data
+ * 	similar to kdb_getarea() - but for phys addresses
+ * Inputs:
+ * 	res	Pointer to the word to receive the result
+ * 	addr	Physical address of the area to copy
+ * 	size	Size of the area
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+static int kdb_getphys(void *res, unsigned long addr, size_t size)
+{
+	unsigned long pfn;
+	void *vaddr;
+	struct page *page;
+
+	pfn = (addr >> PAGE_SHIFT);
+	if (!pfn_valid(pfn))
+		return 1;
+	page = pfn_to_page(pfn);
+	vaddr = kmap_atomic(page, KM_KDB);
+	memcpy(res, vaddr + (addr & (PAGE_SIZE - 1)), size);
+	kunmap_atomic(vaddr, KM_KDB);
+
+	return 0;
+}
+
+/*
+ * kdb_getphysword
+ * Inputs:
+ *	word	Pointer to the word to receive the result.
+ *	addr	Address of the area to copy.
+ *	size	Size of the area.
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+int kdb_getphysword(unsigned long *word, unsigned long addr, size_t size)
+{
+	int diag;
+	__u8  w1;
+	__u16 w2;
+	__u32 w4;
+	__u64 w8;
+	*word = 0;	/* Default value if addr or size is invalid */
+
+	switch (size) {
+	case 1:
+		diag = kdb_getphys(&w1, addr, sizeof(w1));
+		if (!diag)
+			*word = w1;
+		break;
+	case 2:
+		diag = kdb_getphys(&w2, addr, sizeof(w2));
+		if (!diag)
+			*word = w2;
+		break;
+	case 4:
+		diag = kdb_getphys(&w4, addr, sizeof(w4));
+		if (!diag)
+			*word = w4;
+		break;
+	case 8:
+		if (size <= sizeof(*word)) {
+			diag = kdb_getphys(&w8, addr, sizeof(w8));
+			if (!diag)
+				*word = w8;
+			break;
+		}
+		/* drop through */
+	default:
+		diag = KDB_BADWIDTH;
+		kdb_printf("kdb_getphysword: bad width %ld\n", (long) size);
+	}
+	return diag;
+}
+
+/*
+ * kdb_getword - Read a binary value.  Unlike kdb_getarea, this treats
+ *	data as numbers.
+ * Inputs:
+ *	word	Pointer to the word to receive the result.
+ *	addr	Address of the area to copy.
+ *	size	Size of the area.
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+int kdb_getword(unsigned long *word, unsigned long addr, size_t size)
+{
+	int diag;
+	__u8  w1;
+	__u16 w2;
+	__u32 w4;
+	__u64 w8;
+	*word = 0;	/* Default value if addr or size is invalid */
+	switch (size) {
+	case 1:
+		diag = kdb_getarea(w1, addr);
+		if (!diag)
+			*word = w1;
+		break;
+	case 2:
+		diag = kdb_getarea(w2, addr);
+		if (!diag)
+			*word = w2;
+		break;
+	case 4:
+		diag = kdb_getarea(w4, addr);
+		if (!diag)
+			*word = w4;
+		break;
+	case 8:
+		if (size <= sizeof(*word)) {
+			diag = kdb_getarea(w8, addr);
+			if (!diag)
+				*word = w8;
+			break;
+		}
+		/* drop through */
+	default:
+		diag = KDB_BADWIDTH;
+		kdb_printf("kdb_getword: bad width %ld\n", (long) size);
+	}
+	return diag;
+}
+
+/*
+ * kdb_putword - Write a binary value.  Unlike kdb_putarea, this
+ *	treats data as numbers.
+ * Inputs:
+ *	addr	Address of the area to write to..
+ *	word	The value to set.
+ *	size	Size of the area.
+ * Returns:
+ *	0 for success, < 0 for error.
+ */
+int kdb_putword(unsigned long addr, unsigned long word, size_t size)
+{
+	int diag;
+	__u8  w1;
+	__u16 w2;
+	__u32 w4;
+	__u64 w8;
+	switch (size) {
+	case 1:
+		w1 = word;
+		diag = kdb_putarea(addr, w1);
+		break;
+	case 2:
+		w2 = word;
+		diag = kdb_putarea(addr, w2);
+		break;
+	case 4:
+		w4 = word;
+		diag = kdb_putarea(addr, w4);
+		break;
+	case 8:
+		if (size <= sizeof(word)) {
+			w8 = word;
+			diag = kdb_putarea(addr, w8);
+			break;
+		}
+		/* drop through */
+	default:
+		diag = KDB_BADWIDTH;
+		kdb_printf("kdb_putword: bad width %ld\n", (long) size);
+	}
+	return diag;
+}
+
+/*
+ * kdb_task_state_string - Convert a string containing any of the
+ *	letters DRSTCZEUIMA to a mask for the process state field and
+ *	return the value.  If no argument is supplied, return the mask
+ *	that corresponds to environment variable PS, DRSTCZEU by
+ *	default.
+ * Inputs:
+ *	s	String to convert
+ * Returns:
+ *	Mask for process state.
+ * Notes:
+ *	The mask folds data from several sources into a single long value, so
+ *	be carefull not to overlap the bits.  TASK_* bits are in the LSB,
+ *	special cases like UNRUNNABLE are in the MSB.  As of 2.6.10-rc1 there
+ *	is no overlap between TASK_* and EXIT_* but that may not always be
+ *	true, so EXIT_* bits are shifted left 16 bits before being stored in
+ *	the mask.
+ */
+
+/* unrunnable is < 0 */
+#define UNRUNNABLE	(1UL << (8*sizeof(unsigned long) - 1))
+#define RUNNING		(1UL << (8*sizeof(unsigned long) - 2))
+#define IDLE		(1UL << (8*sizeof(unsigned long) - 3))
+#define DAEMON		(1UL << (8*sizeof(unsigned long) - 4))
+
+unsigned long kdb_task_state_string(const char *s)
+{
+	long res = 0;
+	if (!s) {
+		s = kdbgetenv("PS");
+		if (!s)
+			s = "DRSTCZEU";	/* default value for ps */
+	}
+	while (*s) {
+		switch (*s) {
+		case 'D':
+			res |= TASK_UNINTERRUPTIBLE;
+			break;
+		case 'R':
+			res |= RUNNING;
+			break;
+		case 'S':
+			res |= TASK_INTERRUPTIBLE;
+			break;
+		case 'T':
+			res |= TASK_STOPPED;
+			break;
+		case 'C':
+			res |= TASK_TRACED;
+			break;
+		case 'Z':
+			res |= EXIT_ZOMBIE << 16;
+			break;
+		case 'E':
+			res |= EXIT_DEAD << 16;
+			break;
+		case 'U':
+			res |= UNRUNNABLE;
+			break;
+		case 'I':
+			res |= IDLE;
+			break;
+		case 'M':
+			res |= DAEMON;
+			break;
+		case 'A':
+			res = ~0UL;
+			break;
+		default:
+			  kdb_printf("%s: unknown flag '%c' ignored\n",
+				     __func__, *s);
+			  break;
+		}
+		++s;
+	}
+	return res;
+}
+
+/*
+ * kdb_task_state_char - Return the character that represents the task state.
+ * Inputs:
+ *	p	struct task for the process
+ * Returns:
+ *	One character to represent the task state.
+ */
+char kdb_task_state_char (const struct task_struct *p)
+{
+	int cpu;
+	char state;
+	unsigned long tmp;
+
+	if (!p || probe_kernel_read(&tmp, (char *)p, sizeof(unsigned long)))
+		return 'E';
+
+	cpu = kdb_process_cpu(p);
+	state = (p->state == 0) ? 'R' :
+		(p->state < 0) ? 'U' :
+		(p->state & TASK_UNINTERRUPTIBLE) ? 'D' :
+		(p->state & TASK_STOPPED) ? 'T' :
+		(p->state & TASK_TRACED) ? 'C' :
+		(p->exit_state & EXIT_ZOMBIE) ? 'Z' :
+		(p->exit_state & EXIT_DEAD) ? 'E' :
+		(p->state & TASK_INTERRUPTIBLE) ? 'S' : '?';
+	if (p->pid == 0) {
+		/* Idle task.  Is it really idle, apart from the kdb
+		 * interrupt? */
+		if (!kdb_task_has_cpu(p) || kgdb_info[cpu].irq_depth == 1) {
+			if (cpu != kdb_initial_cpu)
+				state = 'I';	/* idle task */
+		}
+	} else if (!p->mm && state == 'S') {
+		state = 'M';	/* sleeping system daemon */
+	}
+	return state;
+}
+
+/*
+ * kdb_task_state - Return true if a process has the desired state
+ *	given by the mask.
+ * Inputs:
+ *	p	struct task for the process
+ *	mask	mask from kdb_task_state_string to select processes
+ * Returns:
+ *	True if the process matches at least one criteria defined by the mask.
+ */
+unsigned long kdb_task_state(const struct task_struct *p, unsigned long mask)
+{
+	char state[] = { kdb_task_state_char(p), '\0' };
+	return (mask & kdb_task_state_string(state)) != 0;
+}
+
+/*
+ * kdb_print_nameval - Print a name and its value, converting the
+ *	value to a symbol lookup if possible.
+ * Inputs:
+ *	name	field name to print
+ *	val	value of field
+ */
+void kdb_print_nameval(const char *name, unsigned long val)
+{
+	kdb_symtab_t symtab;
+	kdb_printf("  %-11.11s ", name);
+	if (kdbnearsym(val, &symtab))
+		kdb_symbol_print(val, &symtab,
+				 KDB_SP_VALUE|KDB_SP_SYMSIZE|KDB_SP_NEWLINE);
+	else
+		kdb_printf("0x%lx\n", val);
+}
+
+/* Last ditch allocator for debugging, so we can still debug even when
+ * the GFP_ATOMIC pool has been exhausted.  The algorithms are tuned
+ * for space usage, not for speed.  One smallish memory pool, the free
+ * chain is always in ascending address order to allow coalescing,
+ * allocations are done in brute force best fit.
+ */
+
+struct debug_alloc_header {
+	u32 next;	/* offset of next header from start of pool */
+	u32 size;
+	void *caller;
+};
+
+/* The memory returned by this allocator must be aligned, which means
+ * so must the header size.  Do not assume that sizeof(struct
+ * debug_alloc_header) is a multiple of the alignment, explicitly
+ * calculate the overhead of this header, including the alignment.
+ * The rest of this code must not use sizeof() on any header or
+ * pointer to a header.
+ */
+#define dah_align 8
+#define dah_overhead ALIGN(sizeof(struct debug_alloc_header), dah_align)
+
+static u64 debug_alloc_pool_aligned[256*1024/dah_align];	/* 256K pool */
+static char *debug_alloc_pool = (char *)debug_alloc_pool_aligned;
+static u32 dah_first, dah_first_call = 1, dah_used, dah_used_max;
+
+/* Locking is awkward.  The debug code is called from all contexts,
+ * including non maskable interrupts.  A normal spinlock is not safe
+ * in NMI context.  Try to get the debug allocator lock, if it cannot
+ * be obtained after a second then give up.  If the lock could not be
+ * previously obtained on this cpu then only try once.
+ *
+ * sparse has no annotation for "this function _sometimes_ acquires a
+ * lock", so fudge the acquire/release notation.
+ */
+static DEFINE_SPINLOCK(dap_lock);
+static int get_dap_lock(void)
+	__acquires(dap_lock)
+{
+	static int dap_locked = -1;
+	int count;
+	if (dap_locked == smp_processor_id())
+		count = 1;
+	else
+		count = 1000;
+	while (1) {
+		if (spin_trylock(&dap_lock)) {
+			dap_locked = -1;
+			return 1;
+		}
+		if (!count--)
+			break;
+		udelay(1000);
+	}
+	dap_locked = smp_processor_id();
+	__acquire(dap_lock);
+	return 0;
+}
+
+void *debug_kmalloc(size_t size, gfp_t flags)
+{
+	unsigned int rem, h_offset;
+	struct debug_alloc_header *best, *bestprev, *prev, *h;
+	void *p = NULL;
+	if (!get_dap_lock()) {
+		__release(dap_lock);	/* we never actually got it */
+		return NULL;
+	}
+	h = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+	if (dah_first_call) {
+		h->size = sizeof(debug_alloc_pool_aligned) - dah_overhead;
+		dah_first_call = 0;
+	}
+	size = ALIGN(size, dah_align);
+	prev = best = bestprev = NULL;
+	while (1) {
+		if (h->size >= size && (!best || h->size < best->size)) {
+			best = h;
+			bestprev = prev;
+			if (h->size == size)
+				break;
+		}
+		if (!h->next)
+			break;
+		prev = h;
+		h = (struct debug_alloc_header *)(debug_alloc_pool + h->next);
+	}
+	if (!best)
+		goto out;
+	rem = best->size - size;
+	/* The pool must always contain at least one header */
+	if (best->next == 0 && bestprev == NULL && rem < dah_overhead)
+		goto out;
+	if (rem >= dah_overhead) {
+		best->size = size;
+		h_offset = ((char *)best - debug_alloc_pool) +
+			   dah_overhead + best->size;
+		h = (struct debug_alloc_header *)(debug_alloc_pool + h_offset);
+		h->size = rem - dah_overhead;
+		h->next = best->next;
+	} else
+		h_offset = best->next;
+	best->caller = __builtin_return_address(0);
+	dah_used += best->size;
+	dah_used_max = max(dah_used, dah_used_max);
+	if (bestprev)
+		bestprev->next = h_offset;
+	else
+		dah_first = h_offset;
+	p = (char *)best + dah_overhead;
+	memset(p, POISON_INUSE, best->size - 1);
+	*((char *)p + best->size - 1) = POISON_END;
+out:
+	spin_unlock(&dap_lock);
+	return p;
+}
+
+void debug_kfree(void *p)
+{
+	struct debug_alloc_header *h;
+	unsigned int h_offset;
+	if (!p)
+		return;
+	if ((char *)p < debug_alloc_pool ||
+	    (char *)p >= debug_alloc_pool + sizeof(debug_alloc_pool_aligned)) {
+		kfree(p);
+		return;
+	}
+	if (!get_dap_lock()) {
+		__release(dap_lock);	/* we never actually got it */
+		return;		/* memory leak, cannot be helped */
+	}
+	h = (struct debug_alloc_header *)((char *)p - dah_overhead);
+	memset(p, POISON_FREE, h->size - 1);
+	*((char *)p + h->size - 1) = POISON_END;
+	h->caller = NULL;
+	dah_used -= h->size;
+	h_offset = (char *)h - debug_alloc_pool;
+	if (h_offset < dah_first) {
+		h->next = dah_first;
+		dah_first = h_offset;
+	} else {
+		struct debug_alloc_header *prev;
+		unsigned int prev_offset;
+		prev = (struct debug_alloc_header *)(debug_alloc_pool +
+						     dah_first);
+		while (1) {
+			if (!prev->next || prev->next > h_offset)
+				break;
+			prev = (struct debug_alloc_header *)
+				(debug_alloc_pool + prev->next);
+		}
+		prev_offset = (char *)prev - debug_alloc_pool;
+		if (prev_offset + dah_overhead + prev->size == h_offset) {
+			prev->size += dah_overhead + h->size;
+			memset(h, POISON_FREE, dah_overhead - 1);
+			*((char *)h + dah_overhead - 1) = POISON_END;
+			h = prev;
+			h_offset = prev_offset;
+		} else {
+			h->next = prev->next;
+			prev->next = h_offset;
+		}
+	}
+	if (h_offset + dah_overhead + h->size == h->next) {
+		struct debug_alloc_header *next;
+		next = (struct debug_alloc_header *)
+			(debug_alloc_pool + h->next);
+		h->size += dah_overhead + next->size;
+		h->next = next->next;
+		memset(next, POISON_FREE, dah_overhead - 1);
+		*((char *)next + dah_overhead - 1) = POISON_END;
+	}
+	spin_unlock(&dap_lock);
+}
+
+void debug_kusage(void)
+{
+	struct debug_alloc_header *h_free, *h_used;
+#ifdef	CONFIG_IA64
+	/* FIXME: using dah for ia64 unwind always results in a memory leak.
+	 * Fix that memory leak first, then set debug_kusage_one_time = 1 for
+	 * all architectures.
+	 */
+	static int debug_kusage_one_time;
+#else
+	static int debug_kusage_one_time = 1;
+#endif
+	if (!get_dap_lock()) {
+		__release(dap_lock);	/* we never actually got it */
+		return;
+	}
+	h_free = (struct debug_alloc_header *)(debug_alloc_pool + dah_first);
+	if (dah_first == 0 &&
+	    (h_free->size == sizeof(debug_alloc_pool_aligned) - dah_overhead ||
+	     dah_first_call))
+		goto out;
+	if (!debug_kusage_one_time)
+		goto out;
+	debug_kusage_one_time = 0;
+	kdb_printf("%s: debug_kmalloc memory leak dah_first %d\n",
+		   __func__, dah_first);
+	if (dah_first) {
+		h_used = (struct debug_alloc_header *)debug_alloc_pool;
+		kdb_printf("%s: h_used %p size %d\n", __func__, h_used,
+			   h_used->size);
+	}
+	do {
+		h_used = (struct debug_alloc_header *)
+			  ((char *)h_free + dah_overhead + h_free->size);
+		kdb_printf("%s: h_used %p size %d caller %p\n",
+			   __func__, h_used, h_used->size, h_used->caller);
+		h_free = (struct debug_alloc_header *)
+			  (debug_alloc_pool + h_free->next);
+	} while (h_free->next);
+	h_used = (struct debug_alloc_header *)
+		  ((char *)h_free + dah_overhead + h_free->size);
+	if ((char *)h_used - debug_alloc_pool !=
+	    sizeof(debug_alloc_pool_aligned))
+		kdb_printf("%s: h_used %p size %d caller %p\n",
+			   __func__, h_used, h_used->size, h_used->caller);
+out:
+	spin_unlock(&dap_lock);
+}
+
+/* Maintain a small stack of kdb_flags to allow recursion without disturbing
+ * the global kdb state.
+ */
+
+static int kdb_flags_stack[4], kdb_flags_index;
+
+void kdb_save_flags(void)
+{
+	BUG_ON(kdb_flags_index >= ARRAY_SIZE(kdb_flags_stack));
+	kdb_flags_stack[kdb_flags_index++] = kdb_flags;
+}
+
+void kdb_restore_flags(void)
+{
+	BUG_ON(kdb_flags_index <= 0);
+	kdb_flags = kdb_flags_stack[--kdb_flags_index];
+}
-- 
cgit v1.2.3


From dcc7871128e99458ca86186b7bc8bf27ff0c47b5 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:21 -0500
Subject: kgdb: core changes to support kdb

These are the minimum changes to the kgdb core in order to enable an
API to connect a new front end (kdb) to the debug core.

This patch introduces the dbg_kdb_mode variable controls where the
user level I/O is routed.  It will be routed to the gdbstub (kgdb) or
to the kdb front end which is a simple shell available over the kgdboc
connection.

You can switch back and forth between kdb or the gdb stub mode of
operation dynamically.  From gdb stub mode you can blindly type
"$3#33", or from the kdb mode you can enter "kgdb" to switch to the
gdb stub.

The logic in the debug core depends on kdb to look for the typical gdb
connection sequences and return immediately with KGDB_PASS_EVENT if a
gdb serial command sequence is detected.  That should allow a
reasonably seamless transition between kdb -> gdb without leaving the
kernel exception state.  The two gdb serial queries that kdb is
responsible for detecting are the "?" and "qSupported" packets.

CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Martin Hicks <mort@sgi.com>
---
 arch/arm/kernel/kgdb.c     |   5 +++
 arch/mips/kernel/kgdb.c    |   5 +++
 arch/powerpc/kernel/kgdb.c |   5 +++
 arch/x86/kernel/kgdb.c     |   5 +++
 include/linux/kgdb.h       |  11 ++++-
 kernel/debug/debug_core.c  | 107 ++++++++++++++++++++++++++++++++++++++-------
 kernel/debug/debug_core.h  |  24 ++++++++++
 kernel/debug/gdbstub.c     |  36 +++++++++++++++
 lib/Kconfig.kgdb           |   8 +++-
 9 files changed, 186 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c
index a5b846b9895d..c868a8864117 100644
--- a/arch/arm/kernel/kgdb.c
+++ b/arch/arm/kernel/kgdb.c
@@ -98,6 +98,11 @@ sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task)
 	gdb_regs[_CPSR]		= thread_regs->ARM_cpsr;
 }
 
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->ARM_pc = pc;
+}
+
 static int compiled_break;
 
 int kgdb_arch_handle_exception(int exception_vector, int signo,
diff --git a/arch/mips/kernel/kgdb.c b/arch/mips/kernel/kgdb.c
index 50c9bb880667..6ed4c83c869b 100644
--- a/arch/mips/kernel/kgdb.c
+++ b/arch/mips/kernel/kgdb.c
@@ -180,6 +180,11 @@ void sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *p)
 	*(ptr++) = regs->cp0_epc;
 }
 
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->cp0_epc = pc;
+}
+
 /*
  * Calls linux_debug_hook before the kernel dies. If KGDB is enabled,
  * then try to fall into the debugger
diff --git a/arch/powerpc/kernel/kgdb.c b/arch/powerpc/kernel/kgdb.c
index 41bada0298c8..c81e3de1306e 100644
--- a/arch/powerpc/kernel/kgdb.c
+++ b/arch/powerpc/kernel/kgdb.c
@@ -309,6 +309,11 @@ void gdb_regs_to_pt_regs(unsigned long *gdb_regs, struct pt_regs *regs)
 	       (unsigned long)(((void *)gdb_regs) + NUMREGBYTES));
 }
 
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc)
+{
+	regs->nip = pc;
+}
+
 /*
  * This function does PowerPC specific procesing for interfacing to gdb.
  */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index b2258ca91003..f95a2c0b915c 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -690,6 +690,11 @@ unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs)
 	return instruction_pointer(regs);
 }
 
+void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long ip)
+{
+	regs->ip = ip;
+}
+
 struct kgdb_arch arch_kgdb_ops = {
 	/* Breakpoint instruction: */
 	.gdb_bpt_instr		= { 0xcc },
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 4830142ec339..5b37df00000d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -16,10 +16,12 @@
 #include <linux/serial_8250.h>
 #include <linux/linkage.h>
 #include <linux/init.h>
-
 #include <asm/atomic.h>
+#ifdef CONFIG_HAVE_ARCH_KGDB
 #include <asm/kgdb.h>
+#endif
 
+#ifdef CONFIG_KGDB
 struct pt_regs;
 
 /**
@@ -262,6 +264,7 @@ extern struct kgdb_arch		arch_kgdb_ops;
 
 extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
 
+extern void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc);
 extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern struct kgdb_io *dbg_io_ops;
@@ -279,5 +282,9 @@ extern int kgdb_nmicallback(int cpu, void *regs);
 
 extern int			kgdb_single_step;
 extern atomic_t			kgdb_active;
-
+#define in_dbg_master() \
+	(raw_smp_processor_id() == atomic_read(&kgdb_active))
+#else /* ! CONFIG_KGDB */
+#define in_dbg_master() (0)
+#endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 7e03969330bc..6e1fa829fdeb 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -43,6 +43,7 @@
 #include <linux/sysrq.h>
 #include <linux/init.h>
 #include <linux/kgdb.h>
+#include <linux/kdb.h>
 #include <linux/pid.h>
 #include <linux/smp.h>
 #include <linux/mm.h>
@@ -77,6 +78,11 @@ static DEFINE_SPINLOCK(kgdb_registration_lock);
 static int kgdb_con_registered;
 /* determine if kgdb console output should be used */
 static int kgdb_use_con;
+/* Next cpu to become the master debug core */
+int dbg_switch_cpu;
+
+/* Use kdb or gdbserver mode */
+static int dbg_kdb_mode = 1;
 
 static int __init opt_kgdb_con(char *str)
 {
@@ -100,6 +106,7 @@ static struct kgdb_bkpt		kgdb_break[KGDB_MAX_BREAKPOINTS] = {
  * The CPU# of the active CPU, or -1 if none:
  */
 atomic_t			kgdb_active = ATOMIC_INIT(-1);
+EXPORT_SYMBOL_GPL(kgdb_active);
 
 /*
  * We use NR_CPUs not PERCPU, in case kgdb is used to debug early
@@ -301,7 +308,7 @@ int dbg_set_sw_break(unsigned long addr)
 	return 0;
 }
 
-static int kgdb_deactivate_sw_breakpoints(void)
+int dbg_deactivate_sw_breakpoints(void)
 {
 	unsigned long addr;
 	int error;
@@ -395,8 +402,14 @@ static int kgdb_io_ready(int print_wait)
 		return 1;
 	if (atomic_read(&kgdb_setting_breakpoint))
 		return 1;
-	if (print_wait)
+	if (print_wait) {
+#ifdef CONFIG_KGDB_KDB
+		if (!dbg_kdb_mode)
+			printk(KERN_CRIT "KGDB: waiting... or $3#33 for KDB\n");
+#else
 		printk(KERN_CRIT "KGDB: Waiting for remote debugger\n");
+#endif
+	}
 	return 1;
 }
 
@@ -410,7 +423,7 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
 	/* Panic on recursive debugger calls: */
 	exception_level++;
 	addr = kgdb_arch_pc(ks->ex_vector, ks->linux_regs);
-	kgdb_deactivate_sw_breakpoints();
+	dbg_deactivate_sw_breakpoints();
 
 	/*
 	 * If the break point removed ok at the place exception
@@ -443,11 +456,24 @@ static int kgdb_reenter_check(struct kgdb_state *ks)
 	return 1;
 }
 
+static void dbg_cpu_switch(int cpu, int next_cpu)
+{
+	/* Mark the cpu we are switching away from as a slave when it
+	 * holds the kgdb_active token.  This must be done so that the
+	 * that all the cpus wait in for the debug core will not enter
+	 * again as the master. */
+	if (cpu == atomic_read(&kgdb_active)) {
+		kgdb_info[cpu].exception_state |= DCPU_IS_SLAVE;
+		kgdb_info[cpu].exception_state &= ~DCPU_WANT_MASTER;
+	}
+	kgdb_info[next_cpu].exception_state |= DCPU_NEXT_MASTER;
+}
+
 static int kgdb_cpu_enter(struct kgdb_state *ks, struct pt_regs *regs)
 {
 	unsigned long flags;
 	int sstep_tries = 100;
-	int error = 0;
+	int error;
 	int i, cpu;
 	int trace_on = 0;
 acquirelock:
@@ -460,6 +486,8 @@ acquirelock:
 	cpu = ks->cpu;
 	kgdb_info[cpu].debuggerinfo = regs;
 	kgdb_info[cpu].task = current;
+	kgdb_info[cpu].ret_state = 0;
+	kgdb_info[cpu].irq_depth = hardirq_count() >> HARDIRQ_SHIFT;
 	/*
 	 * Make sure the above info reaches the primary CPU before
 	 * our cpu_in_kgdb[] flag setting does:
@@ -471,7 +499,11 @@ acquirelock:
 	 * master cpu and acquire the kgdb_active lock:
 	 */
 	while (1) {
-		if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
+cpu_loop:
+		if (kgdb_info[cpu].exception_state & DCPU_NEXT_MASTER) {
+			kgdb_info[cpu].exception_state &= ~DCPU_NEXT_MASTER;
+			goto cpu_master_loop;
+		} else if (kgdb_info[cpu].exception_state & DCPU_WANT_MASTER) {
 			if (atomic_cmpxchg(&kgdb_active, -1, cpu) == cpu)
 				break;
 		} else if (kgdb_info[cpu].exception_state & DCPU_IS_SLAVE) {
@@ -513,7 +545,7 @@ return_normal:
 	}
 
 	if (!kgdb_io_ready(1)) {
-		error = 1;
+		kgdb_info[cpu].ret_state = 1;
 		goto kgdb_restore; /* No I/O connection, resume the system */
 	}
 
@@ -548,7 +580,7 @@ return_normal:
 	 * Wait for the other CPUs to be notified and be waiting for us:
 	 */
 	for_each_online_cpu(i) {
-		while (!atomic_read(&cpu_in_kgdb[i]))
+		while (kgdb_do_roundup && !atomic_read(&cpu_in_kgdb[i]))
 			cpu_relax();
 	}
 
@@ -557,7 +589,7 @@ return_normal:
 	 * in the debugger and all secondary CPUs are quiescent
 	 */
 	kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
-	kgdb_deactivate_sw_breakpoints();
+	dbg_deactivate_sw_breakpoints();
 	kgdb_single_step = 0;
 	kgdb_contthread = current;
 	exception_level = 0;
@@ -565,8 +597,26 @@ return_normal:
 	if (trace_on)
 		tracing_off();
 
-	/* Talk to debugger with gdbserial protocol */
-	error = gdb_serial_stub(ks);
+	while (1) {
+cpu_master_loop:
+		if (dbg_kdb_mode) {
+			kgdb_connected = 1;
+			error = kdb_stub(ks);
+		} else {
+			error = gdb_serial_stub(ks);
+		}
+
+		if (error == DBG_PASS_EVENT) {
+			dbg_kdb_mode = !dbg_kdb_mode;
+			kgdb_connected = 0;
+		} else if (error == DBG_SWITCH_CPU_EVENT) {
+			dbg_cpu_switch(cpu, dbg_switch_cpu);
+			goto cpu_loop;
+		} else {
+			kgdb_info[cpu].ret_state = error;
+			break;
+		}
+	}
 
 	/* Call the I/O driver's post_exception routine */
 	if (dbg_io_ops->post_exception)
@@ -578,11 +628,16 @@ return_normal:
 		for (i = NR_CPUS-1; i >= 0; i--)
 			atomic_dec(&passive_cpu_wait[i]);
 		/*
-		 * Wait till all the CPUs have quit
-		 * from the debugger.
+		 * Wait till all the CPUs have quit from the debugger,
+		 * but allow a CPU that hit an exception and is
+		 * waiting to become the master to remain in the debug
+		 * core.
 		 */
 		for_each_online_cpu(i) {
-			while (atomic_read(&cpu_in_kgdb[i]))
+			while (kgdb_do_roundup &&
+			       atomic_read(&cpu_in_kgdb[i]) &&
+			       !(kgdb_info[i].exception_state &
+				 DCPU_WANT_MASTER))
 				cpu_relax();
 		}
 	}
@@ -603,7 +658,7 @@ kgdb_restore:
 	clocksource_touch_watchdog();
 	local_irq_restore(flags);
 
-	return error;
+	return kgdb_info[cpu].ret_state;
 }
 
 /*
@@ -632,7 +687,8 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 		return 0; /* Ouch, double exception ! */
 	kgdb_info[ks->cpu].exception_state |= DCPU_WANT_MASTER;
 	ret = kgdb_cpu_enter(ks, regs);
-	kgdb_info[ks->cpu].exception_state &= ~DCPU_WANT_MASTER;
+	kgdb_info[ks->cpu].exception_state &= ~(DCPU_WANT_MASTER |
+						DCPU_IS_SLAVE);
 	return ret;
 }
 
@@ -665,7 +721,7 @@ static void kgdb_console_write(struct console *co, const char *s,
 
 	/* If we're debugging, or KGDB has not connected, don't try
 	 * and print. */
-	if (!kgdb_connected || atomic_read(&kgdb_active) != -1)
+	if (!kgdb_connected || atomic_read(&kgdb_active) != -1 || dbg_kdb_mode)
 		return;
 
 	local_irq_save(flags);
@@ -687,8 +743,14 @@ static void sysrq_handle_dbg(int key, struct tty_struct *tty)
 		printk(KERN_CRIT "ERROR: No KGDB I/O module available\n");
 		return;
 	}
-	if (!kgdb_connected)
+	if (!kgdb_connected) {
+#ifdef CONFIG_KGDB_KDB
+		if (!dbg_kdb_mode)
+			printk(KERN_CRIT "KGDB or $3#33 for KDB\n");
+#else
 		printk(KERN_CRIT "Entering KGDB\n");
+#endif
+	}
 
 	kgdb_breakpoint();
 }
@@ -817,6 +879,16 @@ void kgdb_unregister_io_module(struct kgdb_io *old_dbg_io_ops)
 }
 EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
 
+int dbg_io_get_char(void)
+{
+	int ret = dbg_io_ops->read_char();
+	if (!dbg_kdb_mode)
+		return ret;
+	if (ret == 127)
+		return 8;
+	return ret;
+}
+
 /**
  * kgdb_breakpoint - generate breakpoint exception
  *
@@ -839,6 +911,7 @@ static int __init opt_kgdb_wait(char *str)
 {
 	kgdb_break_asap = 1;
 
+	kdb_init(KDB_INIT_EARLY);
 	if (kgdb_io_module_registered)
 		kgdb_initial_breakpoint();
 
diff --git a/kernel/debug/debug_core.h b/kernel/debug/debug_core.h
index db554f9be51d..44cf3de8cf9e 100644
--- a/kernel/debug/debug_core.h
+++ b/kernel/debug/debug_core.h
@@ -38,6 +38,8 @@ struct debuggerinfo_struct {
 	void			*debuggerinfo;
 	struct task_struct	*task;
 	int			exception_state;
+	int			ret_state;
+	int			irq_depth;
 };
 
 extern struct debuggerinfo_struct kgdb_info[];
@@ -47,9 +49,31 @@ extern int dbg_remove_all_break(void);
 extern int dbg_set_sw_break(unsigned long addr);
 extern int dbg_remove_sw_break(unsigned long addr);
 extern int dbg_activate_sw_breakpoints(void);
+extern int dbg_deactivate_sw_breakpoints(void);
+
+/* polled character access to i/o module */
+extern int dbg_io_get_char(void);
+
+/* stub return value for switching between the gdbstub and kdb */
+#define DBG_PASS_EVENT -12345
+/* Switch from one cpu to another */
+#define DBG_SWITCH_CPU_EVENT -123456
+extern int dbg_switch_cpu;
 
 /* gdbstub interface functions */
 extern int gdb_serial_stub(struct kgdb_state *ks);
 extern void gdbstub_msg_write(const char *s, int len);
 
+/* gdbstub functions used for kdb <-> gdbstub transition */
+extern int gdbstub_state(struct kgdb_state *ks, char *cmd);
+
+#ifdef CONFIG_KGDB_KDB
+extern int kdb_stub(struct kgdb_state *ks);
+#else /* ! CONFIG_KGDB_KDB */
+static inline int kdb_stub(struct kgdb_state *ks)
+{
+	return DBG_PASS_EVENT;
+}
+#endif /* CONFIG_KGDB_KDB */
+
 #endif /* _DEBUG_CORE_H_ */
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index ccdf0929f12d..188203a19657 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -887,6 +887,13 @@ int gdb_serial_stub(struct kgdb_state *ks)
 		case 'Z': /* Break point set */
 			gdb_cmd_break(ks);
 			break;
+#ifdef CONFIG_KGDB_KDB
+		case '3': /* Escape into back into kdb */
+			if (remcom_in_buffer[1] == '\0') {
+				gdb_cmd_detachkill(ks);
+				return DBG_PASS_EVENT;
+			}
+#endif
 		case 'C': /* Exception passing */
 			tmp = gdb_cmd_exception_pass(ks);
 			if (tmp > 0)
@@ -932,3 +939,32 @@ kgdb_exit:
 		error = 1;
 	return error;
 }
+
+int gdbstub_state(struct kgdb_state *ks, char *cmd)
+{
+	int error;
+
+	switch (cmd[0]) {
+	case 'e':
+		error = kgdb_arch_handle_exception(ks->ex_vector,
+						   ks->signo,
+						   ks->err_code,
+						   remcom_in_buffer,
+						   remcom_out_buffer,
+						   ks->linux_regs);
+		return error;
+	case 's':
+	case 'c':
+		strcpy(remcom_in_buffer, cmd);
+		return 0;
+	case '?':
+		gdb_cmd_status(ks);
+		break;
+	case '\0':
+		strcpy(remcom_out_buffer, "");
+		break;
+	}
+	dbg_io_ops->write_char('+');
+	put_packet(remcom_out_buffer);
+	return 0;
+}
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 9b5d1d7f2ef7..78de43a5e902 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -3,7 +3,7 @@ config HAVE_ARCH_KGDB
 	bool
 
 menuconfig KGDB
-	bool "KGDB: kernel debugging with remote gdb"
+	bool "KGDB: kernel debugger"
 	depends on HAVE_ARCH_KGDB
 	depends on DEBUG_KERNEL && EXPERIMENTAL
 	help
@@ -57,4 +57,10 @@ config KGDB_TESTS_BOOT_STRING
 	  information about other strings you could use beyond the
 	  default of V1F100.
 
+config KGDB_KDB
+	bool "KGDB_KDB: include kdb frontend for kgdb"
+	default n
+	help
+	  KDB frontend for kernel
+
 endif # KGDB
-- 
cgit v1.2.3


From f5316b4aea024da9266d740322a5481657f6ce59 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:22 -0500
Subject: kgdb,8250,pl011: Return immediately from console poll

The design of the kdb shell requires that every device that can
provide input to kdb have a polling routine that exits immediately if
there is no character available.  This is required in order to get the
page scrolling mechanism working.

Changing the kernel debugger I/O API to require all polling character
routines to exit immediately if there is no data allows the kernel
debugger to process multiple input channels.

NO_POLL_CHAR will be the return code to the polling routine when ever
there is no character available.

CC: linux-serial@vger.kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 drivers/serial/8250.c           |  4 ++--
 drivers/serial/amba-pl011.c     |  6 +++---
 include/linux/kdb.h             |  1 +
 include/linux/serial_core.h     |  1 +
 kernel/debug/debug_core.c       |  2 ++
 kernel/debug/gdbstub.c          | 37 +++++++++++++++++++++++++++++++------
 kernel/debug/kdb/kdb_debugger.c | 10 ++++++++++
 7 files changed, 50 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/serial/8250.c b/drivers/serial/8250.c
index 2b1ea3d4c4f4..891e1dd65f24 100644
--- a/drivers/serial/8250.c
+++ b/drivers/serial/8250.c
@@ -1891,8 +1891,8 @@ static int serial8250_get_poll_char(struct uart_port *port)
 	struct uart_8250_port *up = (struct uart_8250_port *)port;
 	unsigned char lsr = serial_inp(up, UART_LSR);
 
-	while (!(lsr & UART_LSR_DR))
-		lsr = serial_inp(up, UART_LSR);
+	if (!(lsr & UART_LSR_DR))
+		return NO_POLL_CHAR;
 
 	return serial_inp(up, UART_RX);
 }
diff --git a/drivers/serial/amba-pl011.c b/drivers/serial/amba-pl011.c
index 743ebf5f16da..eb4cb480b93e 100644
--- a/drivers/serial/amba-pl011.c
+++ b/drivers/serial/amba-pl011.c
@@ -342,9 +342,9 @@ static int pl010_get_poll_char(struct uart_port *port)
 	struct uart_amba_port *uap = (struct uart_amba_port *)port;
 	unsigned int status;
 
-	do {
-		status = readw(uap->port.membase + UART01x_FR);
-	} while (status & UART01x_FR_RXFE);
+	status = readw(uap->port.membase + UART01x_FR);
+	if (status & UART01x_FR_RXFE)
+		return NO_POLL_CHAR;
 
 	return readw(uap->port.membase + UART01x_DR);
 }
diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 4d93790faec3..d72fa3908128 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -19,6 +19,7 @@
 #include <asm/atomic.h>
 
 #define KDB_POLL_FUNC_MAX	5
+extern int kdb_poll_idx;
 
 /*
  * kdb_initial_cpu is initialized to -1, and is set to the cpu
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 78dd1e7120a9..ad839963fa68 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -246,6 +246,7 @@ struct uart_ops {
 #endif
 };
 
+#define NO_POLL_CHAR		0x00ff0000
 #define UART_CONFIG_TYPE	(1 << 0)
 #define UART_CONFIG_IRQ		(1 << 1)
 
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 6e1fa829fdeb..1d71df66f3fa 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -882,6 +882,8 @@ EXPORT_SYMBOL_GPL(kgdb_unregister_io_module);
 int dbg_io_get_char(void)
 {
 	int ret = dbg_io_ops->read_char();
+	if (ret == NO_POLL_CHAR)
+		return -1;
 	if (!dbg_kdb_mode)
 		return ret;
 	if (ret == 127)
diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c
index 188203a19657..3c000490a7dd 100644
--- a/kernel/debug/gdbstub.c
+++ b/kernel/debug/gdbstub.c
@@ -30,6 +30,7 @@
 
 #include <linux/kernel.h>
 #include <linux/kgdb.h>
+#include <linux/kdb.h>
 #include <linux/reboot.h>
 #include <linux/uaccess.h>
 #include <asm/cacheflush.h>
@@ -62,6 +63,30 @@ static int hex(char ch)
 	return -1;
 }
 
+#ifdef CONFIG_KGDB_KDB
+static int gdbstub_read_wait(void)
+{
+	int ret = -1;
+	int i;
+
+	/* poll any additional I/O interfaces that are defined */
+	while (ret < 0)
+		for (i = 0; kdb_poll_funcs[i] != NULL; i++) {
+			ret = kdb_poll_funcs[i]();
+			if (ret > 0)
+				break;
+		}
+	return ret;
+}
+#else
+static int gdbstub_read_wait(void)
+{
+	int ret = dbg_io_ops->read_char();
+	while (ret == NO_POLL_CHAR)
+		ret = dbg_io_ops->read_char();
+	return ret;
+}
+#endif
 /* scan for the sequence $<data>#<checksum> */
 static void get_packet(char *buffer)
 {
@@ -75,7 +100,7 @@ static void get_packet(char *buffer)
 		 * Spin and wait around for the start character, ignore all
 		 * other characters:
 		 */
-		while ((ch = (dbg_io_ops->read_char())) != '$')
+		while ((ch = (gdbstub_read_wait())) != '$')
 			/* nothing */;
 
 		kgdb_connected = 1;
@@ -88,7 +113,7 @@ static void get_packet(char *buffer)
 		 * now, read until a # or end of buffer is found:
 		 */
 		while (count < (BUFMAX - 1)) {
-			ch = dbg_io_ops->read_char();
+			ch = gdbstub_read_wait();
 			if (ch == '#')
 				break;
 			checksum = checksum + ch;
@@ -98,8 +123,8 @@ static void get_packet(char *buffer)
 		buffer[count] = 0;
 
 		if (ch == '#') {
-			xmitcsum = hex(dbg_io_ops->read_char()) << 4;
-			xmitcsum += hex(dbg_io_ops->read_char());
+			xmitcsum = hex(gdbstub_read_wait()) << 4;
+			xmitcsum += hex(gdbstub_read_wait());
 
 			if (checksum != xmitcsum)
 				/* failed checksum */
@@ -144,10 +169,10 @@ static void put_packet(char *buffer)
 			dbg_io_ops->flush();
 
 		/* Now see what we get in reply. */
-		ch = dbg_io_ops->read_char();
+		ch = gdbstub_read_wait();
 
 		if (ch == 3)
-			ch = dbg_io_ops->read_char();
+			ch = gdbstub_read_wait();
 
 		/* If we get an ACK, we are done. */
 		if (ch == '+')
diff --git a/kernel/debug/kdb/kdb_debugger.c b/kernel/debug/kdb/kdb_debugger.c
index f024c0c4b8c4..bf6e8270e957 100644
--- a/kernel/debug/kdb/kdb_debugger.c
+++ b/kernel/debug/kdb/kdb_debugger.c
@@ -20,7 +20,15 @@
 get_char_func kdb_poll_funcs[] = {
 	dbg_io_get_char,
 	NULL,
+	NULL,
+	NULL,
+	NULL,
+	NULL,
 };
+EXPORT_SYMBOL_GPL(kdb_poll_funcs);
+
+int kdb_poll_idx = 1;
+EXPORT_SYMBOL_GPL(kdb_poll_idx);
 
 int kdb_stub(struct kgdb_state *ks)
 {
@@ -85,6 +93,7 @@ int kdb_stub(struct kgdb_state *ks)
 	kdb_bp_remove();
 	KDB_STATE_CLEAR(DOING_SS);
 	KDB_STATE_CLEAR(DOING_SSB);
+	KDB_STATE_SET(PAGER);
 	/* zero out any offline cpu data */
 	for_each_present_cpu(i) {
 		if (!cpu_online(i)) {
@@ -112,6 +121,7 @@ int kdb_stub(struct kgdb_state *ks)
 	kdb_initial_cpu = -1;
 	kdb_current_task = NULL;
 	kdb_current_regs = NULL;
+	KDB_STATE_CLEAR(PAGER);
 	kdbnearsym_cleanup();
 	if (error == KDB_CMD_KGDB) {
 		if (KDB_STATE(DOING_KGDB) || KDB_STATE(DOING_KGDB2)) {
-- 
cgit v1.2.3


From 84c08fd61e2d21702337e9fe366e97cdf09bf797 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:24 -0500
Subject: kgdb,docs: Update the kgdb docs to include kdb

Update the kgdb docs to reflect the new directory structure and API.

Merge in the kdb shell information.

[Randy Dunlap <rdunlap@xenotime.net>: grammatical corrections]

CC: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 Documentation/DocBook/kgdb.tmpl     | 692 +++++++++++++++++++++++++++---------
 Documentation/kernel-parameters.txt |   6 +-
 include/linux/kgdb.h                |  12 +-
 3 files changed, 532 insertions(+), 178 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
index 5cff41a5fa7c..55f12ac37acd 100644
--- a/Documentation/DocBook/kgdb.tmpl
+++ b/Documentation/DocBook/kgdb.tmpl
@@ -4,7 +4,7 @@
 
 <book id="kgdbOnLinux">
  <bookinfo>
-  <title>Using kgdb and the kgdb Internals</title>
+  <title>Using kgdb, kdb and the kernel debugger internals</title>
 
   <authorgroup>
    <author>
@@ -17,33 +17,8 @@
     </affiliation>
    </author>
   </authorgroup>
-
-  <authorgroup>
-   <author>
-    <firstname>Tom</firstname>
-    <surname>Rini</surname>
-    <affiliation>
-     <address>
-      <email>trini@kernel.crashing.org</email>
-     </address>
-    </affiliation>
-   </author>
-  </authorgroup>
-
-  <authorgroup>
-   <author>
-    <firstname>Amit S.</firstname>
-    <surname>Kale</surname>
-    <affiliation>
-     <address>
-      <email>amitkale@linsyssoft.com</email>
-     </address>
-    </affiliation>
-   </author>
-  </authorgroup>
-
   <copyright>
-   <year>2008</year>
+   <year>2008,2010</year>
    <holder>Wind River Systems, Inc.</holder>
   </copyright>
   <copyright>
@@ -69,41 +44,76 @@
   <chapter id="Introduction">
     <title>Introduction</title>
     <para>
-    kgdb is a source level debugger for linux kernel. It is used along
-    with gdb to debug a linux kernel.  The expectation is that gdb can
-    be used to "break in" to the kernel to inspect memory, variables
-    and look through call stack information similar to what an
-    application developer would use gdb for.  It is possible to place
-    breakpoints in kernel code and perform some limited execution
-    stepping.
+    The kernel has two different debugger front ends (kdb and kgdb)
+    which interface to the debug core.  It is possible to use either
+    of the debugger front ends and dynamically transition between them
+    if you configure the kernel properly at compile and runtime.
+    </para>
+    <para>
+    Kdb is simplistic shell-style interface which you can use on a
+    system console with a keyboard or serial console.  You can use it
+    to inspect memory, registers, process lists, dmesg, and even set
+    breakpoints to stop in a certain location.  Kdb is not a source
+    level debugger, although you can set breakpoints and execute some
+    basic kernel run control.  Kdb is mainly aimed at doing some
+    analysis to aid in development or diagnosing kernel problems.  You
+    can access some symbols by name in kernel built-ins or in kernel
+    modules if the code was built
+    with <symbol>CONFIG_KALLSYMS</symbol>.
+    </para>
+    <para>
+    Kgdb is intended to be used as a source level debugger for the
+    Linux kernel. It is used along with gdb to debug a Linux kernel.
+    The expectation is that gdb can be used to "break in" to the
+    kernel to inspect memory, variables and look through call stack
+    information similar to the way an application developer would use
+    gdb to debug an application.  It is possible to place breakpoints
+    in kernel code and perform some limited execution stepping.
     </para>
     <para>
-    Two machines are required for using kgdb. One of these machines is a
-    development machine and the other is a test machine.  The kernel
-    to be debugged runs on the test machine. The development machine
-    runs an instance of gdb against the vmlinux file which contains
-    the symbols (not boot image such as bzImage, zImage, uImage...).
-    In gdb the developer specifies the connection parameters and
-    connects to kgdb.  The type of connection a developer makes with
-    gdb depends on the availability of kgdb I/O modules compiled as
-    builtin's or kernel modules in the test machine's kernel.
+    Two machines are required for using kgdb. One of these machines is
+    a development machine and the other is the target machine.  The
+    kernel to be debugged runs on the target machine. The development
+    machine runs an instance of gdb against the vmlinux file which
+    contains the symbols (not boot image such as bzImage, zImage,
+    uImage...).  In gdb the developer specifies the connection
+    parameters and connects to kgdb.  The type of connection a
+    developer makes with gdb depends on the availability of kgdb I/O
+    modules compiled as built-ins or loadable kernel modules in the test
+    machine's kernel.
     </para>
   </chapter>
   <chapter id="CompilingAKernel">
-    <title>Compiling a kernel</title>
+  <title>Compiling a kernel</title>
+  <para>
+  <itemizedlist>
+  <listitem><para>In order to enable compilation of kdb, you must first enable kgdb.</para></listitem>
+  <listitem><para>The kgdb test compile options are described in the kgdb test suite chapter.</para></listitem>
+  </itemizedlist>
+  </para>
+  <sect1 id="CompileKGDB">
+    <title>Kernel config options for kgdb</title>
     <para>
     To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
     "Prompt for development and/or incomplete code/drivers"
     (CONFIG_EXPERIMENTAL) in  "General setup", then under the
-    "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
+    "Kernel debugging" select "KGDB: kernel debugger".
+    </para>
+    <para>
+    While it is not a hard requirement that you have symbols in your
+    vmlinux file, gdb tends not to be very useful without the symbolic
+    data, so you will want to turn
+    on <symbol>CONFIG_DEBUG_INFO</symbol> which is called "Compile the
+    kernel with debug info" in the config menu.
     </para>
     <para>
     It is advised, but not required that you turn on the
-    CONFIG_FRAME_POINTER kernel option.  This option inserts code to
-    into the compiled executable which saves the frame information in
-    registers or on the stack at different points which will allow a
-    debugger such as gdb to more accurately construct stack back traces
-    while debugging the kernel.
+    <symbol>CONFIG_FRAME_POINTER</symbol> kernel option which is called "Compile the
+    kernel with frame pointers" in the config menu.  This option
+    inserts code to into the compiled executable which saves the frame
+    information in registers or on the stack at different points which
+    allows a debugger such as gdb to more accurately construct
+    stack back traces while debugging the kernel.
     </para>
     <para>
     If the architecture that you are using supports the kernel option
@@ -116,38 +126,160 @@
     this option.
     </para>
     <para>
-    Next you should choose one of more I/O drivers to interconnect debugging
-    host and debugged target.  Early boot debugging requires a KGDB
-    I/O driver that supports early debugging and the driver must be
-    built into the kernel directly. Kgdb I/O driver configuration
-    takes place via kernel or module parameters, see following
-    chapter.
+    Next you should choose one of more I/O drivers to interconnect
+    debugging host and debugged target.  Early boot debugging requires
+    a KGDB I/O driver that supports early debugging and the driver
+    must be built into the kernel directly. Kgdb I/O driver
+    configuration takes place via kernel or module parameters which
+    you can learn more about in the in the section that describes the
+    parameter "kgdboc".
     </para>
-    <para>
-    The kgdb test compile options are described in the kgdb test suite chapter.
+    <para>Here is an example set of .config symbols to enable or
+    disable for kgdb:
+    <itemizedlist>
+    <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+    <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+    <listitem><para>CONFIG_KGDB=y</para></listitem>
+    <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+    </itemizedlist>
     </para>
-
+  </sect1>
+  <sect1 id="CompileKDB">
+    <title>Kernel config options for kdb</title>
+    <para>Kdb is quite a bit more complex than the simple gdbstub
+    sitting on top of the kernel's debug core.  Kdb must implement a
+    shell, and also adds some helper functions in other parts of the
+    kernel, responsible for printing out interesting data such as what
+    you would see if you ran "lsmod", or "ps".  In order to build kdb
+    into the kernel you follow the same steps as you would for kgdb.
+    </para>
+    <para>The main config option for kdb
+    is <symbol>CONFIG_KGDB_KDB</symbol> which is called "KGDB_KDB:
+    include kdb frontend for kgdb" in the config menu.  In theory you
+    would have already also selected an I/O driver such as the
+    CONFIG_KGDB_SERIAL_CONSOLE interface if you plan on using kdb on a
+    serial port, when you were configuring kgdb.
+    </para>
+    <para>If you want to use a PS/2-style keyboard with kdb, you would
+    select CONFIG_KDB_KEYBOARD which is called "KGDB_KDB: keyboard as
+    input device" in the config menu.  The CONFIG_KDB_KEYBOARD option
+    is not used for anything in the gdb interface to kgdb.  The
+    CONFIG_KDB_KEYBOARD option only works with kdb.
+    </para>
+    <para>Here is an example set of .config symbols to enable/disable kdb:
+    <itemizedlist>
+    <listitem><para># CONFIG_DEBUG_RODATA is not set</para></listitem>
+    <listitem><para>CONFIG_FRAME_POINTER=y</para></listitem>
+    <listitem><para>CONFIG_KGDB=y</para></listitem>
+    <listitem><para>CONFIG_KGDB_SERIAL_CONSOLE=y</para></listitem>
+    <listitem><para>CONFIG_KGDB_KDB=y</para></listitem>
+    <listitem><para>CONFIG_KDB_KEYBOARD=y</para></listitem>
+    </itemizedlist>
+    </para>
+  </sect1>
   </chapter>
-  <chapter id="EnableKGDB">
-   <title>Enable kgdb for debugging</title>
-   <para>
-   In order to use kgdb you must activate it by passing configuration
-   information to one of the kgdb I/O drivers.  If you do not pass any
-   configuration information kgdb will not do anything at all.  Kgdb
-   will only actively hook up to the kernel trap hooks if a kgdb I/O
-   driver is loaded and configured.  If you unconfigure a kgdb I/O
-   driver, kgdb will unregister all the kernel hook points.
+  <chapter id="kgdbKernelArgs">
+  <title>Kernel Debugger Boot Arguments</title>
+  <para>This section describes the various runtime kernel
+  parameters that affect the configuration of the kernel debugger.
+  The following chapter covers using kdb and kgdb as well as
+  provides some examples of the configuration parameters.</para>
+   <sect1 id="kgdboc">
+   <title>Kernel parameter: kgdboc</title>
+   <para>The kgdboc driver was originally an abbreviation meant to
+   stand for "kgdb over console".  Today it is the primary mechanism
+   to configure how to communicate from gdb to kgdb as well as the
+   devices you want to use to interact with the kdb shell.
+   </para>
+   <para>For kgdb/gdb, kgdboc is designed to work with a single serial
+   port. It is intended to cover the circumstance where you want to
+   use a serial console as your primary console as well as using it to
+   perform kernel debugging.  It is also possible to use kgdb on a
+   serial port which is not designated as a system console.  Kgdboc
+   may be configured as a kernel built-in or a kernel loadable module.
+   You can only make use of <constant>kgdbwait</constant> and early
+   debugging if you build kgdboc into the kernel as a built-in.
    </para>
+   <sect2 id="kgdbocArgs">
+   <title>kgdboc arguments</title>
+   <para>Usage: <constant>kgdboc=[kbd][[,]serial_device][,baud]</constant></para>
+   <sect3 id="kgdbocArgs1">
+   <title>Using loadable module or built-in</title>
    <para>
-   All drivers can be reconfigured at run time, if
-   <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
-   are enabled, by echo'ing a new config string to
-   <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
-   The driver can be unconfigured by passing an empty string.  You cannot
-   change the configuration while the debugger is attached.  Make sure
-   to detach the debugger with the <constant>detach</constant> command
-   prior to trying unconfigure a kgdb I/O driver.
+   <orderedlist>
+   <listitem><para>As a kernel built-in:</para>
+   <para>Use the kernel boot argument: <constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para></listitem>
+   <listitem>
+   <para>As a kernel loadable module:</para>
+   <para>Use the command: <constant>modprobe kgdboc kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+   <para>Here are two examples of how you might formate the kgdboc
+   string. The first is for an x86 target using the first serial port.
+   The second example is for the ARM Versatile AB using the second
+   serial port.
+   <orderedlist>
+   <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+   <listitem><para><constant>kgdboc=ttyAMA1,115200</constant></para></listitem>
+   </orderedlist>
    </para>
+   </listitem>
+   </orderedlist></para>
+   </sect3>
+   <sect3 id="kgdbocArgs2">
+   <title>Configure kgdboc at runtime with sysfs</title>
+   <para>At run time you can enable or disable kgdboc by echoing a
+   parameters into the sysfs.  Here are two examples:</para>
+   <orderedlist>
+   <listitem><para>Enable kgdboc on ttyS0</para>
+   <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+   <listitem><para>Disable kgdboc</para>
+   <para><constant>echo "" &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+   </orderedlist>
+   <para>NOTE: You do not need to specify the baud if you are
+   configuring the console on tty which is already configured or
+   open.</para>
+   </sect3>
+   <sect3 id="kgdbocArgs3">
+   <title>More examples</title>
+   <para>You can configure kgdboc to use the keyboard, and or a serial device
+   depending on if you are using kdb and or kgdb, in one of the
+   following scenarios.
+   <orderedlist>
+   <listitem><para>kdb and kgdb over only a serial port</para>
+   <para><constant>kgdboc=&lt;serial_device&gt;[,baud]</constant></para>
+   <para>Example: <constant>kgdboc=ttyS0,115200</constant></para>
+   </listitem>
+   <listitem><para>kdb and kgdb with keyboard and a serial port</para>
+   <para><constant>kgdboc=kbd,&lt;serial_device&gt;[,baud]</constant></para>
+   <para>Example: <constant>kgdboc=kbd,ttyS0,115200</constant></para>
+   </listitem>
+   <listitem><para>kdb with a keyboard</para>
+   <para><constant>kgdboc=kbd</constant></para>
+   </listitem>
+   </orderedlist>
+   </para>
+   </sect3>
+   <para>NOTE: Kgdboc does not support interrupting the target via the
+   gdb remote protocol.  You must manually send a sysrq-g unless you
+   have a proxy that splits console output to a terminal program.
+   A console proxy has a separate TCP port for the debugger and a separate
+   TCP port for the "human" console.  The proxy can take care of sending
+   the sysrq-g for you.
+   </para>
+   <para>When using kgdboc with no debugger proxy, you can end up
+    connecting the debugger at one of two entry points.  If an
+    exception occurs after you have loaded kgdboc, a message should
+    print on the console stating it is waiting for the debugger.  In
+    this case you disconnect your terminal program and then connect the
+    debugger in its place.  If you want to interrupt the target system
+    and forcibly enter a debug session you have to issue a Sysrq
+    sequence and then type the letter <constant>g</constant>.  Then
+    you disconnect the terminal session and connect gdb.  Your options
+    if you don't like this are to hack gdb to send the sysrq-g for you
+    as well as on the initial connect, or to use a debugger proxy that
+    allows an unmodified gdb to do the debugging.
+   </para>
+   </sect2>
+   </sect1>
    <sect1 id="kgdbwait">
    <title>Kernel parameter: kgdbwait</title>
    <para>
@@ -162,103 +294,204 @@
    </para>
    <para>
    The kernel will stop and wait as early as the I/O driver and
-   architecture will allow when you use this option.  If you build the
-   kgdb I/O driver as a kernel module kgdbwait will not do anything.
+   architecture allows when you use this option.  If you build the
+   kgdb I/O driver as a loadable kernel module kgdbwait will not do
+   anything.
    </para>
    </sect1>
-  <sect1 id="kgdboc">
-  <title>Kernel parameter: kgdboc</title>
-  <para>
-  The kgdboc driver was originally an abbreviation meant to stand for
-  "kgdb over console".  Kgdboc is designed to work with a single
-  serial port. It was meant to cover the circumstance
-  where you wanted to use a serial console as your primary console as
-  well as using it to perform kernel debugging.  Of course you can
-  also use kgdboc without assigning a console to the same port.
+   <sect1 id="kgdbcon">
+   <title>Kernel parameter: kgdbcon</title>
+   <para> The kgdbcon feature allows you to see printk() messages
+   inside gdb while gdb is connected to the kernel.  Kdb does not make
+    use of the kgdbcon feature.
+   </para>
+   <para>Kgdb supports using the gdb serial protocol to send console
+   messages to the debugger when the debugger is connected and running.
+   There are two ways to activate this feature.
+   <orderedlist>
+   <listitem><para>Activate with the kernel command line option:</para>
+   <para><constant>kgdbcon</constant></para>
+   </listitem>
+   <listitem><para>Use sysfs before configuring an I/O driver</para>
+   <para>
+   <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
+   </para>
+   <para>
+   NOTE: If you do this after you configure the kgdb I/O driver, the
+   setting will not take effect until the next point the I/O is
+   reconfigured.
+   </para>
+   </listitem>
+   </orderedlist>
+   <para>IMPORTANT NOTE: You cannot use kgdboc + kgdbcon on a tty that is an
+   active system console.  An example incorrect usage is <constant>console=ttyS0,115200 kgdboc=ttyS0 kgdbcon</constant>
+   </para>
+   <para>It is possible to use this option with kgdboc on a tty that is not a system console.
+   </para>
   </para>
-  <sect2 id="UsingKgdboc">
-  <title>Using kgdboc</title>
-  <para>
-  You can configure kgdboc via sysfs or a module or kernel boot line
-  parameter depending on if you build with CONFIG_KGDBOC as a module
-  or built-in.
-  <orderedlist>
-  <listitem><para>From the module load or build-in</para>
-  <para><constant>kgdboc=&lt;tty-device&gt;,[baud]</constant></para>
+  </sect1>
+  </chapter>
+  <chapter id="usingKDB">
+  <title>Using kdb</title>
   <para>
-  The example here would be if your console port was typically ttyS0, you would use something like <constant>kgdboc=ttyS0,115200</constant> or on the ARM Versatile AB you would likely use <constant>kgdboc=ttyAMA0,115200</constant>
+  </para>
+  <sect1 id="quickKDBserial">
+  <title>Quick start for kdb on a serial port</title>
+  <para>This is a quick example of how to use kdb.</para>
+  <para><orderedlist>
+  <listitem><para>Boot kernel with arguments:
+  <itemizedlist>
+  <listitem><para><constant>console=ttyS0,115200 kgdboc=ttyS0,115200</constant></para></listitem>
+  </itemizedlist></para>
+  <para>OR</para>
+  <para>Configure kgdboc after the kernel booted; assuming you are using a serial port console:
+  <itemizedlist>
+  <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+  </itemizedlist>
   </para>
   </listitem>
-  <listitem><para>From sysfs</para>
-  <para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para>
+  <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault.  There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+  <itemizedlist>
+  <listitem><para>When logged in as root or with a super user session you can run:</para>
+   <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+  <listitem><para>Example using minicom 2.2</para>
+  <para>Press: <constant>Control-a</constant></para>
+  <para>Press: <constant>f</constant></para>
+  <para>Press: <constant>g</constant></para>
   </listitem>
-  </orderedlist>
-  </para>
-  <para>
-  NOTE: Kgdboc does not support interrupting the target via the
-  gdb remote protocol.  You must manually send a sysrq-g unless you
-  have a proxy that splits console output to a terminal problem and
-  has a separate port for the debugger to connect to that sends the
-  sysrq-g for you.
+  <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+  <para>Press: <constant>Control-]</constant></para>
+  <para>Type in:<constant>send break</constant></para>
+  <para>Press: <constant>Enter</constant></para>
+  <para>Press: <constant>g</constant></para>
+  </listitem>
+  </itemizedlist>
+  </listitem>
+  <listitem><para>From the kdb prompt you can run the "help" command to see a complete list of the commands that are available.</para>
+  <para>Some useful commands in kdb include:
+  <itemizedlist>
+  <listitem><para>lsmod  -- Shows where kernel modules are loaded</para></listitem>
+  <listitem><para>ps -- Displays only the active processes</para></listitem>
+  <listitem><para>ps A -- Shows all the processes</para></listitem>
+  <listitem><para>summary -- Shows kernel version info and memory usage</para></listitem>
+  <listitem><para>bt -- Get a backtrace of the current process using dump_stack()</para></listitem>
+  <listitem><para>dmesg -- View the kernel syslog buffer</para></listitem>
+  <listitem><para>go -- Continue the system</para></listitem>
+  </itemizedlist>
   </para>
-  <para>When using kgdboc with no debugger proxy, you can end up
-  connecting the debugger for one of two entry points.  If an
-  exception occurs after you have loaded kgdboc a message should print
-  on the console stating it is waiting for the debugger.  In case you
-  disconnect your terminal program and then connect the debugger in
-  its place.  If you want to interrupt the target system and forcibly
-  enter a debug session you have to issue a Sysrq sequence and then
-  type the letter <constant>g</constant>.  Then you disconnect the
-  terminal session and connect gdb.  Your options if you don't like
-  this are to hack gdb to send the sysrq-g for you as well as on the
-  initial connect, or to use a debugger proxy that allows an
-  unmodified gdb to do the debugging.
+  </listitem>
+  <listitem>
+  <para>When you are done using kdb you need to consider rebooting the
+  system or using the "go" command to resuming normal kernel
+  execution.  If you have paused the kernel for a lengthy period of
+  time, applications that rely on timely networking or anything to do
+  with real wall clock time could be adversely affected, so you
+  should take this into consideration when using the kernel
+  debugger.</para>
+  </listitem>
+  </orderedlist></para>
+  </sect1>
+  <sect1 id="quickKDBkeyboard">
+  <title>Quick start for kdb using a keyboard connected console</title>
+  <para>This is a quick example of how to use kdb with a keyboard.</para>
+  <para><orderedlist>
+  <listitem><para>Boot kernel with arguments:
+  <itemizedlist>
+  <listitem><para><constant>kgdboc=kbd</constant></para></listitem>
+  </itemizedlist></para>
+  <para>OR</para>
+  <para>Configure kgdboc after the kernel booted:
+  <itemizedlist>
+  <listitem><para><constant>echo kbd &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+  </itemizedlist>
   </para>
-  </sect2>
+  </listitem>
+  <listitem><para>Enter the kernel debugger manually or by waiting for an oops or fault.  There are several ways you can enter the kernel debugger manually; all involve using the sysrq-g, which means you must have enabled CONFIG_MAGIC_SYSRQ=y in your kernel config.</para>
+  <itemizedlist>
+  <listitem><para>When logged in as root or with a super user session you can run:</para>
+   <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+  <listitem><para>Example using a laptop keyboard</para>
+  <para>Press and hold down: <constant>Alt</constant></para>
+  <para>Press and hold down: <constant>Fn</constant></para>
+  <para>Press and release the key with the label: <constant>SysRq</constant></para>
+  <para>Release: <constant>Fn</constant></para>
+  <para>Press and release: <constant>g</constant></para>
+  <para>Release: <constant>Alt</constant></para>
+  </listitem>
+  <listitem><para>Example using a PS/2 101-key keyboard</para>
+  <para>Press and hold down: <constant>Alt</constant></para>
+  <para>Press and release the key with the label: <constant>SysRq</constant></para>
+  <para>Press and release: <constant>g</constant></para>
+  <para>Release: <constant>Alt</constant></para>
+  </listitem>
+  </itemizedlist>
+  </listitem>
+  <listitem>
+  <para>Now type in a kdb command such as "help", "dmesg", "bt" or "go" to continue kernel execution.</para>
+  </listitem>
+  </orderedlist></para>
   </sect1>
-  <sect1 id="kgdbcon">
-  <title>Kernel parameter: kgdbcon</title>
-  <para>
-  Kgdb supports using the gdb serial protocol to send console messages
-  to the debugger when the debugger is connected and running.  There
-  are two ways to activate this feature.
+  </chapter>
+  <chapter id="EnableKGDB">
+   <title>Using kgdb / gdb</title>
+   <para>In order to use kgdb you must activate it by passing
+   configuration information to one of the kgdb I/O drivers.  If you
+   do not pass any configuration information kgdb will not do anything
+   at all.  Kgdb will only actively hook up to the kernel trap hooks
+   if a kgdb I/O driver is loaded and configured.  If you unconfigure
+   a kgdb I/O driver, kgdb will unregister all the kernel hook points.
+   </para>
+   <para> All kgdb I/O drivers can be reconfigured at run time, if
+   <symbol>CONFIG_SYSFS</symbol> and <symbol>CONFIG_MODULES</symbol>
+   are enabled, by echo'ing a new config string to
+   <constant>/sys/module/&lt;driver&gt;/parameter/&lt;option&gt;</constant>.
+   The driver can be unconfigured by passing an empty string.  You cannot
+   change the configuration while the debugger is attached.  Make sure
+   to detach the debugger with the <constant>detach</constant> command
+   prior to trying to unconfigure a kgdb I/O driver.
+   </para>
+  <sect1 id="ConnectingGDB">
+  <title>Connecting with gdb to a serial port</title>
   <orderedlist>
-  <listitem><para>Activate with the kernel command line option:</para>
-  <para><constant>kgdbcon</constant></para>
+  <listitem><para>Configure kgdboc</para>
+   <para>Boot kernel with arguments:
+   <itemizedlist>
+    <listitem><para><constant>kgdboc=ttyS0,115200</constant></para></listitem>
+   </itemizedlist></para>
+   <para>OR</para>
+   <para>Configure kgdboc after the kernel booted:
+   <itemizedlist>
+    <listitem><para><constant>echo ttyS0 &gt; /sys/module/kgdboc/parameters/kgdboc</constant></para></listitem>
+   </itemizedlist></para>
   </listitem>
-  <listitem><para>Use sysfs before configuring an io driver</para>
-  <para>
-  <constant>echo 1 &gt; /sys/module/kgdb/parameters/kgdb_use_con</constant>
-  </para>
-  <para>
-  NOTE: If you do this after you configure the kgdb I/O driver, the
-  setting will not take effect until the next point the I/O is
-  reconfigured.
-  </para>
+  <listitem>
+  <para>Stop kernel execution (break into the debugger)</para>
+  <para>In order to connect to gdb via kgdboc, the kernel must
+  first be stopped.  There are several ways to stop the kernel which
+  include using kgdbwait as a boot argument, via a sysrq-g, or running
+  the kernel until it takes an exception where it waits for the
+  debugger to attach.
+  <itemizedlist>
+  <listitem><para>When logged in as root or with a super user session you can run:</para>
+   <para><constant>echo g &gt; /proc/sysrq-trigger</constant></para></listitem>
+  <listitem><para>Example using minicom 2.2</para>
+  <para>Press: <constant>Control-a</constant></para>
+  <para>Press: <constant>f</constant></para>
+  <para>Press: <constant>g</constant></para>
   </listitem>
-  </orderedlist>
-  </para>
-  <para>
-  IMPORTANT NOTE: Using this option with kgdb over the console
-  (kgdboc) is not supported.
+  <listitem><para>When you have telneted to a terminal server that supports sending a remote break</para>
+  <para>Press: <constant>Control-]</constant></para>
+  <para>Type in:<constant>send break</constant></para>
+  <para>Press: <constant>Enter</constant></para>
+  <para>Press: <constant>g</constant></para>
+  </listitem>
+  </itemizedlist>
   </para>
-  </sect1>
-  </chapter>
-  <chapter id="ConnectingGDB">
-  <title>Connecting gdb</title>
-    <para>
-    If you are using kgdboc, you need to have used kgdbwait as a boot
-    argument, issued a sysrq-g, or the system you are going to debug
-    has already taken an exception and is waiting for the debugger to
-    attach before you can connect gdb.
-    </para>
-    <para>
-    If you are not using different kgdb I/O driver other than kgdboc,
-    you should be able to connect and the target will automatically
-    respond.
-    </para>
+  </listitem>
+  <listitem>
+    <para>Connect from from gdb</para>
     <para>
-    Example (using a serial port):
+    Example (using a directly connected port):
     </para>
     <programlisting>
     % gdb ./vmlinux
@@ -266,7 +499,7 @@
     (gdb) target remote /dev/ttyS0
     </programlisting>
     <para>
-    Example (kgdb to a terminal server on tcp port 2012):
+    Example (kgdb to a terminal server on TCP port 2012):
     </para>
     <programlisting>
     % gdb ./vmlinux
@@ -283,6 +516,83 @@
     communications.  You do this prior to issuing the <constant>target
     remote</constant> command by typing in: <constant>set debug remote 1</constant>
     </para>
+  </listitem>
+  </orderedlist>
+  <para>Remember if you continue in gdb, and need to "break in" again,
+  you need to issue an other sysrq-g.  It is easy to create a simple
+  entry point by putting a breakpoint at <constant>sys_sync</constant>
+  and then you can run "sync" from a shell or script to break into the
+  debugger.</para>
+  </sect1>
+  </chapter>
+  <chapter id="switchKdbKgdb">
+  <title>kgdb and kdb interoperability</title>
+  <para>It is possible to transition between kdb and kgdb dynamically.
+  The debug core will remember which you used the last time and
+  automatically start in the same mode.</para>
+  <sect1>
+  <title>Switching between kdb and kgdb</title>
+  <sect2>
+  <title>Switching from kgdb to kdb</title>
+  <para>
+  There are two ways to switch from kgdb to kdb: you can use gdb to
+  issue a maintenance packet, or you can blindly type the command $3#33.
+  Whenever kernel debugger stops in kgdb mode it will print the
+  message <constant>KGDB or $3#33 for KDB</constant>.  It is important
+  to note that you have to type the sequence correctly in one pass.
+  You cannot type a backspace or delete because kgdb will interpret
+  that as part of the debug stream.
+  <orderedlist>
+  <listitem><para>Change from kgdb to kdb by blindly typing:</para>
+  <para><constant>$3#33</constant></para></listitem>
+  <listitem><para>Change from kgdb to kdb with gdb</para>
+  <para><constant>maintenance packet 3</constant></para>
+  <para>NOTE: Now you must kill gdb. Typically you press control-z and
+  issue the command: kill -9 %</para></listitem>
+  </orderedlist>
+  </para>
+  </sect2>
+  <sect2>
+  <title>Change from kdb to kgdb</title>
+  <para>There are two ways you can change from kdb to kgdb.  You can
+  manually enter kgdb mode by issuing the kgdb command from the kdb
+  shell prompt, or you can connect gdb while the kdb shell prompt is
+  active.  The kdb shell looks for the typical first commands that gdb
+  would issue with the gdb remote protocol and if it sees one of those
+  commands it automatically changes into kgdb mode.</para>
+  <orderedlist>
+  <listitem><para>From kdb issue the command:</para>
+  <para><constant>kgdb</constant></para>
+  <para>Now disconnect your terminal program and connect gdb in its place</para></listitem>
+  <listitem><para>At the kdb prompt, disconnect the terminal program and connect gdb in its place.</para></listitem>
+  </orderedlist>
+  </sect2>
+  </sect1>
+  <sect1>
+  <title>Running kdb commands from gdb</title>
+  <para>It is possible to run a limited set of kdb commands from gdb,
+  using the gdb monitor command.  You don't want to execute any of the
+  run control or breakpoint operations, because it can disrupt the
+  state of the kernel debugger.  You should be using gdb for
+  breakpoints and run control operations if you have gdb connected.
+  The more useful commands to run are things like lsmod, dmesg, ps or
+  possibly some of the memory information commands.  To see all the kdb
+  commands you can run <constant>monitor help</constant>.</para>
+  <para>Example:
+  <informalexample><programlisting>
+(gdb) monitor ps
+1 idle process (state I) and
+27 sleeping system daemon (state M) processes suppressed,
+use 'ps A' to see all.
+Task Addr       Pid   Parent [*] cpu State Thread     Command
+
+0xc78291d0        1        0  0    0   S  0xc7829404  init
+0xc7954150      942        1  0    0   S  0xc7954384  dropbear
+0xc78789c0      944        1  0    0   S  0xc7878bf4  sh
+(gdb)
+  </programlisting></informalexample>
+  </para>
+  </sect1>
   </chapter>
   <chapter id="KGDBTestSuite">
     <title>kgdb Test Suite</title>
@@ -309,34 +619,36 @@
     </para>
   </chapter>
   <chapter id="CommonBackEndReq">
-  <title>KGDB Internals</title>
+  <title>Kernel Debugger Internals</title>
   <sect1 id="kgdbArchitecture">
     <title>Architecture Specifics</title>
       <para>
-      Kgdb is organized into three basic components:
+      The kernel debugger is organized into a number of components:
       <orderedlist>
-      <listitem><para>kgdb core</para>
+      <listitem><para>The debug core</para>
       <para>
-      The kgdb core is found in kernel/kgdb.c.  It contains:
+      The debug core is found in kernel/debugger/debug_core.c.  It contains:
       <itemizedlist>
-      <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
-      <listitem><para>A generic OS exception handler which includes sync'ing the processors into a stopped state on an multi cpu system.</para></listitem>
+      <listitem><para>A generic OS exception handler which includes
+      sync'ing the processors into a stopped state on an multi-CPU
+      system.</para></listitem>
       <listitem><para>The API to talk to the kgdb I/O drivers</para></listitem>
-      <listitem><para>The API to make calls to the arch specific kgdb implementation</para></listitem>
+      <listitem><para>The API to make calls to the arch-specific kgdb implementation</para></listitem>
       <listitem><para>The logic to perform safe memory reads and writes to memory while using the debugger</para></listitem>
       <listitem><para>A full implementation for software breakpoints unless overridden by the arch</para></listitem>
+      <listitem><para>The API to invoke either the kdb or kgdb frontend to the debug core.</para></listitem>
       </itemizedlist>
       </para>
       </listitem>
-      <listitem><para>kgdb arch specific implementation</para>
+      <listitem><para>kgdb arch-specific implementation</para>
       <para>
       This implementation is generally found in arch/*/kernel/kgdb.c.
       As an example, arch/x86/kernel/kgdb.c contains the specifics to
       implement HW breakpoint as well as the initialization to
       dynamically register and unregister for the trap handlers on
-      this architecture.  The arch specific portion implements:
+      this architecture.  The arch-specific portion implements:
       <itemizedlist>
-      <listitem><para>contains an arch specific trap catcher which
+      <listitem><para>contains an arch-specific trap catcher which
       invokes kgdb_handle_exception() to start kgdb about doing its
       work</para></listitem>
       <listitem><para>translation to and from gdb specific packet format to pt_regs</para></listitem>
@@ -347,11 +659,35 @@
       </itemizedlist>
       </para>
       </listitem>
+      <listitem><para>gdbstub frontend (aka kgdb)</para>
+      <para>The gdbstub is located in kernel/debug/gdbstub.c. It contains:</para>
+      <itemizedlist>
+        <listitem><para>All the logic to implement the gdb serial protocol</para></listitem>
+      </itemizedlist>
+      </listitem>
+      <listitem><para>kdb frontend</para>
+      <para>The kdb debugger shell is broken down into a number of
+      components.  The kdb core is located in kernel/debug/kdb.  There
+      are a number of helper functions in some of the other kernel
+      components to make it possible for kdb to examine and report
+      information about the kernel without taking locks that could
+      cause a kernel deadlock.  The kdb core contains implements the following functionality.</para>
+      <itemizedlist>
+        <listitem><para>A simple shell</para></listitem>
+        <listitem><para>The kdb core command set</para></listitem>
+        <listitem><para>A registration API to register additional kdb shell commands.</para>
+        <para>A good example of a self-contained kdb module is the "ftdump" command for dumping the ftrace buffer.  See: kernel/trace/trace_kdb.c</para></listitem>
+        <listitem><para>The implementation for kdb_printf() which
+        emits messages directly to I/O drivers, bypassing the kernel
+        log.</para></listitem>
+        <listitem><para>SW / HW breakpoint management for the kdb shell</para></listitem>
+      </itemizedlist>
+      </listitem>
       <listitem><para>kgdb I/O driver</para>
       <para>
-      Each kgdb I/O driver has to provide an implemenation for the following:
+      Each kgdb I/O driver has to provide an implementation for the following:
       <itemizedlist>
-      <listitem><para>configuration via builtin or module</para></listitem>
+      <listitem><para>configuration via built-in or module</para></listitem>
       <listitem><para>dynamic configuration and kgdb hook registration calls</para></listitem>
       <listitem><para>read and write character interface</para></listitem>
       <listitem><para>A cleanup handler for unconfiguring from the kgdb core</para></listitem>
@@ -416,15 +752,15 @@
   underlying low level to the hardware driver having "polling hooks"
   which the to which the tty driver is attached.  In the initial
   implementation of kgdboc it the serial_core was changed to expose a
-  low level uart hook for doing polled mode reading and writing of a
+  low level UART hook for doing polled mode reading and writing of a
   single character while in an atomic context.  When kgdb makes an I/O
   request to the debugger, kgdboc invokes a call back in the serial
-  core which in turn uses the call back in the uart driver.  It is
-  certainly possible to extend kgdboc to work with non-uart based
+  core which in turn uses the call back in the UART driver.  It is
+  certainly possible to extend kgdboc to work with non-UART based
   consoles in the future.
   </para>
   <para>
-  When using kgdboc with a uart, the uart driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
+  When using kgdboc with a UART, the UART driver must implement two callbacks in the <constant>struct uart_ops</constant>. Example from drivers/8250.c:<programlisting>
 #ifdef CONFIG_CONSOLE_POLL
 	.poll_get_char = serial8250_get_poll_char,
 	.poll_put_char = serial8250_put_poll_char,
@@ -434,7 +770,7 @@
   <constant>#ifdef CONFIG_CONSOLE_POLL</constant>, as shown above.
   Keep in mind that polling hooks have to be implemented in such a way
   that they can be called from an atomic context and have to restore
-  the state of the uart chip on return such that the system can return
+  the state of the UART chip on return such that the system can return
   to normal when the debugger detaches.  You need to be very careful
   with any kind of lock you consider, because failing here is most
   going to mean pressing the reset button.
@@ -453,6 +789,10 @@
 		<itemizedlist>
 		<listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
 		</itemizedlist>
+                In Jan 2010 this document was updated to include kdb.
+		<itemizedlist>
+		<listitem><para>Jason Wessel<email>jason.wessel@windriver.com</email></para></listitem>
+		</itemizedlist>
 	</para>
   </chapter>
 </book>
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 3845e3a84a52..bdce359820bb 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -58,6 +58,7 @@ parameter is applicable:
 	ISAPNP	ISA PnP code is enabled.
 	ISDN	Appropriate ISDN support is enabled.
 	JOY	Appropriate joystick support is enabled.
+	KGDB	Kernel debugger support is enabled.
 	KVM	Kernel Virtual Machine support is enabled.
 	LIBATA  Libata driver is enabled
 	LP	Printer support is enabled.
@@ -1120,13 +1121,16 @@ and is between 256 and 4096 characters. It is defined in the file
 			use the HighMem zone if it exists, and the Normal
 			zone if it does not.
 
-	kgdboc=		[HW] kgdb over consoles.
+	kgdboc=		[KGDB,HW] kgdb over consoles.
 			Requires a tty driver that supports console polling,
 			or a supported polling keyboard driver (non-usb).
 			Serial only format: <serial_device>[,baud]
 			keyboard only format: kbd
 			keyboard and serial format: kbd,<serial_device>[,baud]
 
+	kgdbwait	[KGDB] Stop kernel execution and enter the
+			kernel debugger at the earliest opportunity.
+
 	kmac=		[MIPS] korina ethernet MAC address.
 			Configure the RouterBoard 532 series on-chip
 			Ethernet adapter MAC address.
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 5b37df00000d..407edb1e0c4d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -204,6 +204,17 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  */
 extern void kgdb_roundup_cpus(unsigned long flags);
 
+/**
+ *	kgdb_arch_set_pc - Generic call back to the program counter
+ *	@regs: Current &struct pt_regs.
+ *  @pc: The new value for the program counter
+ *
+ *	This function handles updating the program counter and requires an
+ *	architecture specific implementation.
+ */
+extern void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc);
+
+
 /* Optional functions. */
 extern int kgdb_validate_break_address(unsigned long addr);
 extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr);
@@ -264,7 +275,6 @@ extern struct kgdb_arch		arch_kgdb_ops;
 
 extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
 
-extern void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc);
 extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
 extern struct kgdb_io *dbg_io_ops;
-- 
cgit v1.2.3


From 98ec1878cacb393975cba64f7392eece81716cb4 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Mon, 27 Apr 2009 10:58:06 -0500
Subject: kgdb: remove post_primary_code references

Remove all the references to the kgdb_post_primary_code.  This
function serves no useful purpose because you can obtain the same
information from the "struct kgdb_state *ks" from with in the
debugger, if for some reason you want the data.

Also remove the unintentional duplicate assignment for ks->ex_vector.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c    | 29 -----------------------------
 include/linux/kgdb.h      | 14 --------------
 kernel/debug/debug_core.c |  8 --------
 3 files changed, 51 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index f95a2c0b915c..acba57169938 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -47,20 +47,8 @@
 #include <asm/debugreg.h>
 #include <asm/apicdef.h>
 #include <asm/system.h>
-
 #include <asm/apic.h>
 
-/*
- * Put the error code here just in case the user cares:
- */
-static int gdb_x86errcode;
-
-/*
- * Likewise, the vector number here (since GDB only gets the signal
- * number through the usual means, and that's not very specific):
- */
-static int gdb_x86vector = -1;
-
 /**
  *	pt_regs_to_gdb_regs - Convert ptrace regs to GDB regs
  *	@gdb_regs: A pointer to hold the registers in the order GDB wants.
@@ -399,23 +387,6 @@ void kgdb_disable_hw_debug(struct pt_regs *regs)
 	}
 }
 
-/**
- *	kgdb_post_primary_code - Save error vector/code numbers.
- *	@regs: Original pt_regs.
- *	@e_vector: Original error vector.
- *	@err_code: Original error code.
- *
- *	This is needed on architectures which support SMP and KGDB.
- *	This function is called after all the slave cpus have been put
- *	to a know spin state and the primary CPU has control over KGDB.
- */
-void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
-	/* primary processor is completely in the debugger */
-	gdb_x86vector = e_vector;
-	gdb_x86errcode = err_code;
-}
-
 #ifdef CONFIG_SMP
 /**
  *	kgdb_roundup_cpus - Get other CPUs into a holding pattern
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 407edb1e0c4d..406f6f9286f3 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -35,20 +35,6 @@ struct pt_regs;
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
-/**
- *	kgdb_post_primary_code - (optional) Save error vector/code numbers.
- *	@regs: Original pt_regs.
- *	@e_vector: Original error vector.
- *	@err_code: Original error code.
- *
- *	This is usually needed on architectures which support SMP and
- *	KGDB.  This function is called after all the secondary cpus have
- *	been put to a know spin state and the primary CPU has control over
- *	KGDB.
- */
-extern void kgdb_post_primary_code(struct pt_regs *regs, int e_vector,
-				  int err_code);
-
 /**
  *	kgdb_disable_hw_debug - (optional) Disable hardware debugging hook
  *	@regs: Current &struct pt_regs.
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 1aed37b4c564..88a83a225374 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -203,12 +203,6 @@ int __weak kgdb_skipexception(int exception, struct pt_regs *regs)
 	return 0;
 }
 
-void __weak
-kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
-{
-	return;
-}
-
 /**
  *	kgdb_disable_hw_debug - Disable hardware debugging while we in kgdb.
  *	@regs: Current &struct pt_regs.
@@ -588,7 +582,6 @@ return_normal:
 	 * At this point the primary processor is completely
 	 * in the debugger and all secondary CPUs are quiescent
 	 */
-	kgdb_post_primary_code(ks->linux_regs, ks->ex_vector, ks->err_code);
 	dbg_deactivate_sw_breakpoints();
 	kgdb_single_step = 0;
 	kgdb_contthread = current;
@@ -678,7 +671,6 @@ kgdb_handle_exception(int evector, int signo, int ecode, struct pt_regs *regs)
 	ks->cpu			= raw_smp_processor_id();
 	ks->ex_vector		= evector;
 	ks->signo		= signo;
-	ks->ex_vector		= evector;
 	ks->err_code		= ecode;
 	ks->kgdb_usethreadid	= 0;
 	ks->linux_regs		= regs;
-- 
cgit v1.2.3


From f503b5ae53cb557ac351a668fcac1baab1cef0db Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:25 -0500
Subject: x86,kgdb: Add low level debug hook

The only way the debugger can handle a trap in inside rcu_lock,
notify_die, or atomic_notifier_call_chain without a triple fault is
to have a low level "first opportunity handler" in the int3 exception
handler.

Generally this will be something the vast majority of folks will not
need, but for those who need it, it is added as a kernel .config
option called KGDB_LOW_LEVEL_TRAP.

CC: Ingo Molnar <mingo@elte.hu>
CC: Thomas Gleixner <tglx@linutronix.de>
CC: H. Peter Anvin <hpa@zytor.com>
CC: x86@kernel.org
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/include/asm/kgdb.h |  3 +++
 arch/x86/kernel/kgdb.c      | 22 +++++++++++++++++++++-
 arch/x86/kernel/traps.c     |  6 ++++++
 include/linux/kgdb.h        |  1 +
 kernel/debug/debug_core.c   |  2 +-
 lib/Kconfig.kgdb            |  9 +++++++++
 6 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kgdb.h b/arch/x86/include/asm/kgdb.h
index e6c6c808489f..006da3687cdc 100644
--- a/arch/x86/include/asm/kgdb.h
+++ b/arch/x86/include/asm/kgdb.h
@@ -76,4 +76,7 @@ static inline void arch_kgdb_breakpoint(void)
 #define BREAK_INSTR_SIZE	1
 #define CACHE_FLUSH_IS_SAFE	1
 
+extern int kgdb_ll_trap(int cmd, const char *str,
+			struct pt_regs *regs, long err, int trap, int sig);
+
 #endif /* _ASM_X86_KGDB_H */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index acba57169938..95b89d4cb8f1 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -538,7 +538,7 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 			return NOTIFY_DONE;
 	}
 
-	if (kgdb_handle_exception(args->trapnr, args->signr, args->err, regs))
+	if (kgdb_handle_exception(args->trapnr, args->signr, cmd, regs))
 		return NOTIFY_DONE;
 
 	/* Must touch watchdog before return to normal operation */
@@ -546,6 +546,26 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 	return NOTIFY_STOP;
 }
 
+#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
+int kgdb_ll_trap(int cmd, const char *str,
+		 struct pt_regs *regs, long err, int trap, int sig)
+{
+	struct die_args args = {
+		.regs	= regs,
+		.str	= str,
+		.err	= err,
+		.trapnr	= trap,
+		.signr	= sig,
+
+	};
+
+	if (!kgdb_io_module_registered)
+		return NOTIFY_DONE;
+
+	return __kgdb_notify(&args, cmd);
+}
+#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
+
 static int
 kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr)
 {
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 02cfb9b8f5b1..7eaad4c5110a 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -15,6 +15,7 @@
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
 #include <linux/kdebug.h>
+#include <linux/kgdb.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/ptrace.h>
@@ -451,6 +452,11 @@ void restart_nmi(void)
 /* May run on IST stack. */
 dotraplinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
 {
+#ifdef CONFIG_KGDB_LOW_LEVEL_TRAP
+	if (kgdb_ll_trap(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
+			== NOTIFY_STOP)
+		return;
+#endif /* CONFIG_KGDB_LOW_LEVEL_TRAP */
 #ifdef CONFIG_KPROBES
 	if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
 			== NOTIFY_STOP)
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 406f6f9286f3..19d1b29a2694 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -60,6 +60,7 @@ struct uart_port;
 void kgdb_breakpoint(void);
 
 extern int kgdb_connected;
+extern int kgdb_io_module_registered;
 
 extern atomic_t			kgdb_setting_breakpoint;
 extern atomic_t			kgdb_cpu_doing_single_step;
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 88a83a225374..375e42f0baf0 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -66,7 +66,7 @@ int				kgdb_connected;
 EXPORT_SYMBOL_GPL(kgdb_connected);
 
 /* All the KGDB handlers are installed */
-static int			kgdb_io_module_registered;
+int			kgdb_io_module_registered;
 
 /* Guard for recursive entry */
 static int			exception_level;
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index ee8ae7132f20..c56ccb4ad292 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -57,6 +57,15 @@ config KGDB_TESTS_BOOT_STRING
 	  information about other strings you could use beyond the
 	  default of V1F100.
 
+config KGDB_LOW_LEVEL_TRAP
+       bool "KGDB: Allow debugging with traps in notifiers"
+       depends on X86
+       default n
+       help
+         This will add an extra call back to kgdb for the breakpoint
+         exception handler on which will will allow kgdb to step
+         through a notify handler.
+
 config KGDB_KDB
 	bool "KGDB_KDB: include kdb frontend for kgdb"
 	default n
-- 
cgit v1.2.3


From 1cee5e35f15d0893be1ba944f1aec8676e43ab76 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Wed, 3 Jun 2009 14:06:57 -0500
Subject: kgdb: Add the ability to schedule a breakpoint via a tasklet

Some kgdb I/O modules require the ability to create a breakpoint
tasklet, such as kgdboc and external modules such as kgdboe.  The
breakpoint tasklet is used as an asynchronous entry point into the
debugger which will have a different function scope than the current
execution path where it might not be safe to have an inline
breakpoint.  This is true of some of the kgdb I/O drivers which share
code with kgdb and rest of the kernel users.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h      |  1 +
 kernel/debug/debug_core.c | 26 ++++++++++++++++++++++++++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 19d1b29a2694..ee007ea341b8 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -271,6 +271,7 @@ extern int kgdb_mem2hex(char *mem, char *buf, int count);
 extern int kgdb_hex2mem(char *buf, char *mem, int count);
 
 extern int kgdb_isremovedbreak(unsigned long addr);
+extern void kgdb_schedule_breakpoint(void);
 
 extern int
 kgdb_handle_exception(int ex_vector, int signo, int err_code,
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 375e42f0baf0..fff59019cca0 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -114,6 +114,7 @@ EXPORT_SYMBOL_GPL(kgdb_active);
  */
 static atomic_t			passive_cpu_wait[NR_CPUS];
 static atomic_t			cpu_in_kgdb[NR_CPUS];
+static atomic_t			kgdb_break_tasklet_var;
 atomic_t			kgdb_setting_breakpoint;
 
 struct task_struct		*kgdb_usethread;
@@ -789,6 +790,31 @@ static void kgdb_unregister_callbacks(void)
 	}
 }
 
+/*
+ * There are times a tasklet needs to be used vs a compiled in
+ * break point so as to cause an exception outside a kgdb I/O module,
+ * such as is the case with kgdboe, where calling a breakpoint in the
+ * I/O driver itself would be fatal.
+ */
+static void kgdb_tasklet_bpt(unsigned long ing)
+{
+	kgdb_breakpoint();
+	atomic_set(&kgdb_break_tasklet_var, 0);
+}
+
+static DECLARE_TASKLET(kgdb_tasklet_breakpoint, kgdb_tasklet_bpt, 0);
+
+void kgdb_schedule_breakpoint(void)
+{
+	if (atomic_read(&kgdb_break_tasklet_var) ||
+		atomic_read(&kgdb_active) != -1 ||
+		atomic_read(&kgdb_setting_breakpoint))
+		return;
+	atomic_inc(&kgdb_break_tasklet_var);
+	tasklet_schedule(&kgdb_tasklet_breakpoint);
+}
+EXPORT_SYMBOL_GPL(kgdb_schedule_breakpoint);
+
 static void kgdb_initial_breakpoint(void)
 {
 	kgdb_break_asap = 0;
-- 
cgit v1.2.3


From efe2f29e324fd20e0449bcd6dc6dbe4734c2ba94 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:26 -0500
Subject: kgdboc,kdb: Allow kdb to work on a non open console port

If kdb is open on a serial port that is not actually a console make
sure to call the poll routines to emit and receive characters.

Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Martin Hicks <mort@sgi.com>
---
 drivers/serial/kgdboc.c   | 14 ++++++++++++++
 include/linux/kgdb.h      |  3 +++
 kernel/debug/kdb/kdb_io.c | 16 ++++++++++++++++
 3 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/serial/kgdboc.c b/drivers/serial/kgdboc.c
index ecef6e1a599a..b765ab48dfe7 100644
--- a/drivers/serial/kgdboc.c
+++ b/drivers/serial/kgdboc.c
@@ -16,6 +16,7 @@
 #include <linux/kgdb.h>
 #include <linux/kdb.h>
 #include <linux/tty.h>
+#include <linux/console.h>
 
 #define MAX_CONFIG_LEN		40
 
@@ -93,12 +94,14 @@ static int configure_kgdboc(void)
 	int tty_line = 0;
 	int err;
 	char *cptr = config;
+	struct console *cons;
 
 	err = kgdboc_option_setup(config);
 	if (err || !strlen(config) || isspace(config[0]))
 		goto noconfig;
 
 	err = -ENODEV;
+	kgdboc_io_ops.is_console = 0;
 	kgdb_tty_driver = NULL;
 
 	if (kgdboc_register_kbd(&cptr))
@@ -108,6 +111,17 @@ static int configure_kgdboc(void)
 	if (!p)
 		goto noconfig;
 
+	cons = console_drivers;
+	while (cons) {
+		int idx;
+		if (cons->device && cons->device(cons, &idx) == p &&
+		    idx == tty_line) {
+			kgdboc_io_ops.is_console = 1;
+			break;
+		}
+		cons = cons->next;
+	}
+
 	kgdb_tty_driver = p;
 	kgdb_tty_line = tty_line;
 
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index ee007ea341b8..6c784ab6856a 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -247,6 +247,8 @@ struct kgdb_arch {
  * the I/O driver.
  * @post_exception: Pointer to a function that will do any cleanup work
  * for the I/O driver.
+ * @is_console: 1 if the end device is a console 0 if the I/O device is
+ * not a console
  */
 struct kgdb_io {
 	const char		*name;
@@ -256,6 +258,7 @@ struct kgdb_io {
 	int			(*init) (void);
 	void			(*pre_exception) (void);
 	void			(*post_exception) (void);
+	int			is_console;
 };
 
 extern struct kgdb_arch		arch_kgdb_ops;
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 8339b291e8bc..58be7e9c9e95 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -673,6 +673,14 @@ kdb_printit:
 	if (!dbg_kdb_mode && kgdb_connected) {
 		gdbstub_msg_write(kdb_buffer, retlen);
 	} else {
+		if (!dbg_io_ops->is_console) {
+			len = strlen(kdb_buffer);
+			cp = kdb_buffer;
+			while (len--) {
+				dbg_io_ops->write_char(*cp);
+				cp++;
+			}
+		}
 		while (c) {
 			c->write(c, kdb_buffer, retlen);
 			touch_nmi_watchdog();
@@ -719,6 +727,14 @@ kdb_printit:
 		kdb_input_flush();
 		c = console_drivers;
 
+		if (!dbg_io_ops->is_console) {
+			len = strlen(moreprompt);
+			cp = moreprompt;
+			while (len--) {
+				dbg_io_ops->write_char(*cp);
+				cp++;
+			}
+		}
 		while (c) {
 			c->write(c, moreprompt, strlen(moreprompt));
 			touch_nmi_watchdog();
-- 
cgit v1.2.3


From d37d39ae3b4a8f9a21114921fb344fe7cadb1abd Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:27 -0500
Subject: printk,kdb: capture printk() when in kdb shell

Certain calls from the kdb shell will call out to printk(), and any of
these calls should get vectored back to the kdb_printf() so that the
kdb pager and processing can be used, as well as to properly channel
I/O to the polled I/O devices.

CC: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/kdb.h         |  3 +++
 kernel/debug/kdb/kdb_bt.c   |  2 ++
 kernel/debug/kdb/kdb_io.c   | 24 ++++++++++++++++++++----
 kernel/debug/kdb/kdb_main.c |  4 ++++
 kernel/printk.c             |  9 +++++++++
 5 files changed, 38 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index d72fa3908128..ccb2b3ec0fe8 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -78,6 +78,9 @@ typedef enum {
 	KDB_REASON_SSTEP,	/* Single Step trap. - regs valid */
 } kdb_reason_t;
 
+extern int kdb_trap_printk;
+extern int vkdb_printf(const char *fmt, va_list args)
+	    __attribute__ ((format (printf, 1, 0)));
 extern int kdb_printf(const char *, ...)
 	    __attribute__ ((format (printf, 1, 2)));
 typedef int (*kdb_printf_t)(const char *, ...)
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 483fa4e7aaac..2f62fe85f16a 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -23,6 +23,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
 {
 	int old_lvl = console_loglevel;
 	console_loglevel = 15;
+	kdb_trap_printk++;
 	kdb_set_current_task(p);
 	if (addr) {
 		show_stack((struct task_struct *)p, addr);
@@ -36,6 +37,7 @@ static void kdb_show_stack(struct task_struct *p, void *addr)
 		show_stack(p, NULL);
 	}
 	console_loglevel = old_lvl;
+	kdb_trap_printk--;
 }
 
 /*
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 58be7e9c9e95..c9b7f4f90bba 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -29,6 +29,7 @@
 #define CMD_BUFLEN 256
 char kdb_prompt_str[CMD_BUFLEN];
 
+int kdb_trap_printk;
 
 static void kgdb_transition_check(char *buffer)
 {
@@ -533,12 +534,12 @@ static int kdb_search_string(char *searched, char *searchfor)
 	return 0;
 }
 
-int kdb_printf(const char *fmt, ...)
+int vkdb_printf(const char *fmt, va_list ap)
 {
-	va_list ap;
 	int diag;
 	int linecount;
 	int logging, saved_loglevel = 0;
+	int saved_trap_printk;
 	int got_printf_lock = 0;
 	int retlen = 0;
 	int fnd, len;
@@ -549,6 +550,9 @@ int kdb_printf(const char *fmt, ...)
 	unsigned long uninitialized_var(flags);
 
 	preempt_disable();
+	saved_trap_printk = kdb_trap_printk;
+	kdb_trap_printk = 0;
+
 	/* Serialize kdb_printf if multiple cpus try to write at once.
 	 * But if any cpu goes recursive in kdb, just print the output,
 	 * even if it is interleaved with any other text.
@@ -575,9 +579,7 @@ int kdb_printf(const char *fmt, ...)
 		next_avail = kdb_buffer;
 		size_avail = sizeof(kdb_buffer);
 	}
-	va_start(ap, fmt);
 	vsnprintf(next_avail, size_avail, fmt, ap);
-	va_end(ap);
 
 	/*
 	 * If kdb_parse() found that the command was cmd xxx | grep yyy
@@ -805,6 +807,20 @@ kdb_print_out:
 	} else {
 		__release(kdb_printf_lock);
 	}
+	kdb_trap_printk = saved_trap_printk;
 	preempt_enable();
 	return retlen;
 }
+
+int kdb_printf(const char *fmt, ...)
+{
+	va_list ap;
+	int r;
+
+	va_start(ap, fmt);
+	r = vkdb_printf(fmt, ap);
+	va_end(ap);
+
+	return r;
+}
+
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 64ef9ac14ba9..b724c791b6d4 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1056,7 +1056,9 @@ static void kdb_dumpregs(struct pt_regs *regs)
 {
 	int old_lvl = console_loglevel;
 	console_loglevel = 15;
+	kdb_trap_printk++;
 	show_regs(regs);
+	kdb_trap_printk--;
 	kdb_printf("\n");
 	console_loglevel = old_lvl;
 }
@@ -1819,7 +1821,9 @@ static int kdb_sr(int argc, const char **argv)
 	if (argc != 1)
 		return KDB_ARGCOUNT;
 	sysrq_toggle_support(1);
+	kdb_trap_printk++;
 	handle_sysrq(*argv[1], NULL);
+	kdb_trap_printk--;
 
 	return 0;
 }
diff --git a/kernel/printk.c b/kernel/printk.c
index 9213b8b5bb4f..444b770c9595 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -33,6 +33,7 @@
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
 #include <linux/kexec.h>
+#include <linux/kdb.h>
 #include <linux/ratelimit.h>
 #include <linux/kmsg_dump.h>
 #include <linux/syslog.h>
@@ -602,6 +603,14 @@ asmlinkage int printk(const char *fmt, ...)
 	va_list args;
 	int r;
 
+#ifdef CONFIG_KGDB_KDB
+	if (unlikely(kdb_trap_printk)) {
+		va_start(args, fmt);
+		r = vkdb_printf(fmt, args);
+		va_end(args);
+		return r;
+	}
+#endif
 	va_start(args, fmt);
 	r = vprintk(fmt, args);
 	va_end(args);
-- 
cgit v1.2.3


From 0b4b3827db386ec6034a5aba1261025b039440c2 Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 20 May 2010 21:04:29 -0500
Subject: x86, kgdb, init: Add early and late debug states

The kernel debugger can operate well before mm_init(), but the x86
hardware breakpoint code which uses the perf api requires that the
kernel allocators are initialized.

This means the kernel debug core needs to provide an optional arch
specific call back to allow the initialization functions to run after
the kernel has been further initialized.

The kdb shell already had a similar restriction with an early
initialization and late initialization.  The kdb_init() was moved into
the debug core's version of the late init which is called
dbg_late_init();

CC: kgdb-bugreport@lists.sourceforge.net
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 arch/x86/kernel/kgdb.c    | 17 ++++++++++-------
 include/linux/kgdb.h      | 14 ++++++++++++++
 init/main.c               |  4 ++--
 kernel/debug/debug_core.c | 16 ++++++++++++++++
 4 files changed, 42 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 95b89d4cb8f1..2b71ec41869f 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -595,15 +595,16 @@ static struct notifier_block kgdb_notifier = {
  *	specific callbacks.
  */
 int kgdb_arch_init(void)
+{
+	return register_die_notifier(&kgdb_notifier);
+}
+
+void kgdb_arch_late(void)
 {
 	int i, cpu;
-	int ret;
 	struct perf_event_attr attr;
 	struct perf_event **pevent;
 
-	ret = register_die_notifier(&kgdb_notifier);
-	if (ret != 0)
-		return ret;
 	/*
 	 * Pre-allocate the hw breakpoint structions in the non-atomic
 	 * portion of kgdb because this operation requires mutexs to
@@ -615,12 +616,15 @@ int kgdb_arch_init(void)
 	attr.bp_type = HW_BREAKPOINT_W;
 	attr.disabled = 1;
 	for (i = 0; i < 4; i++) {
+		if (breakinfo[i].pev)
+			continue;
 		breakinfo[i].pev = register_wide_hw_breakpoint(&attr, NULL);
 		if (IS_ERR(breakinfo[i].pev)) {
-			printk(KERN_ERR "kgdb: Could not allocate hw breakpoints\n");
+			printk(KERN_ERR "kgdb: Could not allocate hw"
+			       "breakpoints\nDisabling the kernel debugger\n");
 			breakinfo[i].pev = NULL;
 			kgdb_arch_exit();
-			return -1;
+			return;
 		}
 		for_each_online_cpu(cpu) {
 			pevent = per_cpu_ptr(breakinfo[i].pev, cpu);
@@ -631,7 +635,6 @@ int kgdb_arch_init(void)
 			}
 		}
 	}
-	return ret;
 }
 
 /**
diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6c784ab6856a..9340f34d1bb5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -207,6 +207,17 @@ extern int kgdb_validate_break_address(unsigned long addr);
 extern int kgdb_arch_set_breakpoint(unsigned long addr, char *saved_instr);
 extern int kgdb_arch_remove_breakpoint(unsigned long addr, char *bundle);
 
+/**
+ *	kgdb_arch_late - Perform any architecture specific initalization.
+ *
+ *	This function will handle the late initalization of any
+ *	architecture specific callbacks.  This is an optional function for
+ *	handling things like late initialization of hw breakpoints.  The
+ *	default implementation does nothing.
+ */
+extern void kgdb_arch_late(void);
+
+
 /**
  * struct kgdb_arch - Describe architecture specific values.
  * @gdb_bpt_instr: The instruction to trigger a breakpoint.
@@ -285,7 +296,10 @@ extern int			kgdb_single_step;
 extern atomic_t			kgdb_active;
 #define in_dbg_master() \
 	(raw_smp_processor_id() == atomic_read(&kgdb_active))
+extern bool dbg_is_early;
+extern void __init dbg_late_init(void);
 #else /* ! CONFIG_KGDB */
 #define in_dbg_master() (0)
+#define dbg_late_init()
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
diff --git a/init/main.c b/init/main.c
index 372771333d98..22881b5e95e3 100644
--- a/init/main.c
+++ b/init/main.c
@@ -62,7 +62,7 @@
 #include <linux/sched.h>
 #include <linux/signal.h>
 #include <linux/idr.h>
-#include <linux/kdb.h>
+#include <linux/kgdb.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
 #include <linux/kmemcheck.h>
@@ -676,7 +676,7 @@ asmlinkage void __init start_kernel(void)
 	buffer_init();
 	key_init();
 	security_init();
-	kdb_init(KDB_INIT_FULL);
+	dbg_late_init();
 	vfs_caches_init(totalram_pages);
 	signals_init();
 	/* rootfs populating might need page-writeback */
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 64b5588c9638..5cb7cd1de10c 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -78,6 +78,8 @@ static DEFINE_SPINLOCK(kgdb_registration_lock);
 static int kgdb_con_registered;
 /* determine if kgdb console output should be used */
 static int kgdb_use_con;
+/* Flag for alternate operations for early debugging */
+bool dbg_is_early = true;
 /* Next cpu to become the master debug core */
 int dbg_switch_cpu;
 
@@ -777,11 +779,25 @@ static struct notifier_block kgdb_panic_event_nb = {
        .priority	= INT_MAX,
 };
 
+void __weak kgdb_arch_late(void)
+{
+}
+
+void __init dbg_late_init(void)
+{
+	dbg_is_early = false;
+	if (kgdb_io_module_registered)
+		kgdb_arch_late();
+	kdb_init(KDB_INIT_FULL);
+}
+
 static void kgdb_register_callbacks(void)
 {
 	if (!kgdb_io_module_registered) {
 		kgdb_io_module_registered = 1;
 		kgdb_arch_init();
+		if (!dbg_is_early)
+			kgdb_arch_late();
 		atomic_notifier_chain_register(&panic_notifier_list,
 					       &kgdb_panic_event_nb);
 #ifdef CONFIG_MAGIC_SYSRQ
-- 
cgit v1.2.3


From 1ff454ef9b1e852e8f4d295a68a715b1dddf233c Mon Sep 17 00:00:00 2001
From: Pavan Savoy <pavan_savoy@ti.com>
Date: Thu, 8 Apr 2010 13:16:52 -0500
Subject: serial: TTY: new ldiscs for staging

Push the max ldiscs by a few number to allow ldiscs
to exist in the staging directory and elsewhere.

Signed-off-by: Pavan Savoy <pavan_savoy@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index bb44fa9ae135..7e605e4b3da3 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -23,7 +23,7 @@
  */
 #define NR_UNIX98_PTY_DEFAULT	4096      /* Default maximum for Unix98 ptys */
 #define NR_UNIX98_PTY_MAX	(1 << MINORBITS) /* Absolute limit */
-#define NR_LDISCS		21
+#define NR_LDISCS		30
 
 /* line disciplines */
 #define N_TTY		0
-- 
cgit v1.2.3


From e1eaea46bb4020b38a141b84f88565d4603f8dd0 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Fri, 26 Mar 2010 11:32:54 +0000
Subject: tty: n_gsm line discipline

Add an implementation of GSM 0710 MUX. The implementation currently supports

- Basic and advanced framing (as either end of the link)
- UI or UIH data frames
- Adaption layer 1-4 (1 and 2 via tty, 3 and 4 as skbuff lists)
- Modem and control messages including the correct retry process
- Flow control

and exposes the MUX channels as a set of virtual tty devices including modem
signals. This is an experimental driver.

Signed-off-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/char/Kconfig   |    9 +-
 drivers/char/Makefile  |    1 +
 drivers/char/n_gsm.c   | 2763 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/gsmmux.h |   25 +
 include/linux/tty.h    |    1 +
 5 files changed, 2798 insertions(+), 1 deletion(-)
 create mode 100644 drivers/char/n_gsm.c
 create mode 100644 include/linux/gsmmux.h

(limited to 'include/linux')

diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 3141dd3b6e53..51a57009aec8 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -276,11 +276,18 @@ config N_HDLC
 	  Allows synchronous HDLC communications with tty device drivers that
 	  support synchronous HDLC such as the Microgate SyncLink adapter.
 
-	  This driver can only be built as a module ( = code which can be
+	  This driver can be built as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want).
 	  The module will be called n_hdlc. If you want to do that, say M
 	  here.
 
+config N_GSM
+	tristate "GSM MUX line discipline support (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+	help
+	  This line discipline provides support for the GSM MUX protocol and
+	  presents the mux as a set of 61 individual tty devices.
+
 config RISCOM8
 	tristate "SDL RISCom/8 card support"
 	depends on SERIAL_NONSTANDARD
diff --git a/drivers/char/Makefile b/drivers/char/Makefile
index f957edf7e45d..d39be4cf1f5d 100644
--- a/drivers/char/Makefile
+++ b/drivers/char/Makefile
@@ -40,6 +40,7 @@ obj-$(CONFIG_SYNCLINK)		+= synclink.o
 obj-$(CONFIG_SYNCLINKMP)	+= synclinkmp.o
 obj-$(CONFIG_SYNCLINK_GT)	+= synclink_gt.o
 obj-$(CONFIG_N_HDLC)		+= n_hdlc.o
+obj-$(CONFIG_N_GSM)		+= n_gsm.o
 obj-$(CONFIG_AMIGA_BUILTIN_SERIAL) += amiserial.o
 obj-$(CONFIG_SX)		+= sx.o generic_serial.o
 obj-$(CONFIG_RIO)		+= rio/ generic_serial.o
diff --git a/drivers/char/n_gsm.c b/drivers/char/n_gsm.c
new file mode 100644
index 000000000000..c4161d5e053d
--- /dev/null
+++ b/drivers/char/n_gsm.c
@@ -0,0 +1,2763 @@
+/*
+ * n_gsm.c GSM 0710 tty multiplexor
+ * Copyright (c) 2009/10 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *	* THIS IS A DEVELOPMENT SNAPSHOT IT IS NOT A FINAL RELEASE *
+ *
+ * TO DO:
+ *	Mostly done:	ioctls for setting modes/timing
+ *	Partly done: 	hooks so you can pull off frames to non tty devs
+ *	Restart DLCI 0 when it closes ?
+ *	Test basic encoding
+ *	Improve the tx engine
+ *	Resolve tx side locking by adding a queue_head and routing
+ *		all control traffic via it
+ *	General tidy/document
+ *	Review the locking/move to refcounts more (mux now moved to an
+ *		alloc/free model ready)
+ *	Use newest tty open/close port helpers and install hooks
+ *	What to do about power functions ?
+ *	Termios setting and negotiation
+ *	Do we need a 'which mux are you' ioctl to correlate mux and tty sets
+ *
+ */
+
+#include <linux/types.h>
+#include <linux/major.h>
+#include <linux/errno.h>
+#include <linux/signal.h>
+#include <linux/fcntl.h>
+#include <linux/sched.h>
+#include <linux/interrupt.h>
+#include <linux/tty.h>
+#include <linux/timer.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/poll.h>
+#include <linux/bitops.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <linux/module.h>
+#include <linux/timer.h>
+#include <linux/tty_flip.h>
+#include <linux/tty_driver.h>
+#include <linux/serial.h>
+#include <linux/kfifo.h>
+#include <linux/skbuff.h>
+#include <linux/gsmmux.h>
+
+static int debug;
+module_param(debug, int, 0600);
+
+#define T1	(HZ/10)
+#define T2	(HZ/3)
+#define N2	3
+
+/* Use long timers for testing at low speed with debug on */
+#ifdef DEBUG_TIMING
+#define T1	HZ
+#define T2	(2 * HZ)
+#endif
+
+/* Semi-arbitary buffer size limits. 0710 is normally run with 32-64 byte
+   limits so this is plenty */
+#define MAX_MRU 512
+#define MAX_MTU 512
+
+/*
+ *	Each block of data we have queued to go out is in the form of
+ *	a gsm_msg which holds everything we need in a link layer independant
+ *	format
+ */
+
+struct gsm_msg {
+	struct gsm_msg *next;
+	u8 addr;		/* DLCI address + flags */
+	u8 ctrl;		/* Control byte + flags */
+	unsigned int len;	/* Length of data block (can be zero) */
+	unsigned char *data;	/* Points into buffer but not at the start */
+	unsigned char buffer[0];
+};
+
+/*
+ *	Each active data link has a gsm_dlci structure associated which ties
+ *	the link layer to an optional tty (if the tty side is open). To avoid
+ *	complexity right now these are only ever freed up when the mux is
+ *	shut down.
+ *
+ *	At the moment we don't free DLCI objects until the mux is torn down
+ *	this avoid object life time issues but might be worth review later.
+ */
+
+struct gsm_dlci {
+	struct gsm_mux *gsm;
+	int addr;
+	int state;
+#define DLCI_CLOSED		0
+#define DLCI_OPENING		1	/* Sending SABM not seen UA */
+#define DLCI_OPEN		2	/* SABM/UA complete */
+#define DLCI_CLOSING		3	/* Sending DISC not seen UA/DM */
+
+	/* Link layer */
+	spinlock_t lock;	/* Protects the internal state */
+	struct timer_list t1;	/* Retransmit timer for SABM and UA */
+	int retries;
+	/* Uplink tty if active */
+	struct tty_port port;	/* The tty bound to this DLCI if there is one */
+	struct kfifo *fifo;	/* Queue fifo for the DLCI */
+	struct kfifo _fifo;	/* For new fifo API porting only */
+	int adaption;		/* Adaption layer in use */
+	u32 modem_rx;		/* Our incoming virtual modem lines */
+	u32 modem_tx;		/* Our outgoing modem lines */
+	int dead;		/* Refuse re-open */
+	/* Flow control */
+	int throttled;		/* Private copy of throttle state */
+	int constipated;	/* Throttle status for outgoing */
+	/* Packetised I/O */
+	struct sk_buff *skb;	/* Frame being sent */
+	struct sk_buff_head skb_list;	/* Queued frames */
+	/* Data handling callback */
+	void (*data)(struct gsm_dlci *dlci, u8 *data, int len);
+};
+
+/* DLCI 0, 62/63 are special or reseved see gsmtty_open */
+
+#define NUM_DLCI		64
+
+/*
+ *	DLCI 0 is used to pass control blocks out of band of the data
+ *	flow (and with a higher link priority). One command can be outstanding
+ *	at a time and we use this structure to manage them. They are created
+ *	and destroyed by the user context, and updated by the receive paths
+ *	and timers
+ */
+
+struct gsm_control {
+	u8 cmd;		/* Command we are issuing */
+	u8 *data;	/* Data for the command in case we retransmit */
+	int len;	/* Length of block for retransmission */
+	int done;	/* Done flag */
+	int error;	/* Error if any */
+};
+
+/*
+ *	Each GSM mux we have is represented by this structure. If we are
+ *	operating as an ldisc then we use this structure as our ldisc
+ *	state. We need to sort out lifetimes and locking with respect
+ *	to the gsm mux array. For now we don't free DLCI objects that
+ *	have been instantiated until the mux itself is terminated.
+ *
+ *	To consider further: tty open versus mux shutdown.
+ */
+
+struct gsm_mux {
+	struct tty_struct *tty;		/* The tty our ldisc is bound to */
+	spinlock_t lock;
+
+	/* Events on the GSM channel */
+	wait_queue_head_t event;
+
+	/* Bits for GSM mode decoding */
+
+	/* Framing Layer */
+	unsigned char *buf;
+	int state;
+#define GSM_SEARCH		0
+#define GSM_START		1
+#define GSM_ADDRESS		2
+#define GSM_CONTROL		3
+#define GSM_LEN			4
+#define GSM_DATA		5
+#define GSM_FCS			6
+#define GSM_OVERRUN		7
+	unsigned int len;
+	unsigned int address;
+	unsigned int count;
+	int escape;
+	int encoding;
+	u8 control;
+	u8 fcs;
+	u8 *txframe;			/* TX framing buffer */
+
+	/* Methods for the receiver side */
+	void (*receive)(struct gsm_mux *gsm, u8 ch);
+	void (*error)(struct gsm_mux *gsm, u8 ch, u8 flag);
+	/* And transmit side */
+	int (*output)(struct gsm_mux *mux, u8 *data, int len);
+
+	/* Link Layer */
+	unsigned int mru;
+	unsigned int mtu;
+	int initiator;			/* Did we initiate connection */
+	int dead;			/* Has the mux been shut down */
+	struct gsm_dlci *dlci[NUM_DLCI];
+	int constipated;		/* Asked by remote to shut up */
+
+	spinlock_t tx_lock;
+	unsigned int tx_bytes;		/* TX data outstanding */
+#define TX_THRESH_HI		8192
+#define TX_THRESH_LO		2048
+	struct gsm_msg *tx_head;	/* Pending data packets */
+	struct gsm_msg *tx_tail;
+
+	/* Control messages */
+	struct timer_list t2_timer;	/* Retransmit timer for commands */
+	int cretries;			/* Command retry counter */
+	struct gsm_control *pending_cmd;/* Our current pending command */
+	spinlock_t control_lock;	/* Protects the pending command */
+
+	/* Configuration */
+	int adaption;		/* 1 or 2 supported */
+	u8 ftype;		/* UI or UIH */
+	int t1, t2;		/* Timers in 1/100th of a sec */
+	int n2;			/* Retry count */
+
+	/* Statistics (not currently exposed) */
+	unsigned long bad_fcs;
+	unsigned long malformed;
+	unsigned long io_error;
+	unsigned long bad_size;
+	unsigned long unsupported;
+};
+
+
+/*
+ *	Mux objects - needed so that we can translate a tty index into the
+ *	relevant mux and DLCI.
+ */
+
+#define MAX_MUX		4			/* 256 minors */
+static struct gsm_mux *gsm_mux[MAX_MUX];	/* GSM muxes */
+static spinlock_t gsm_mux_lock;
+
+/*
+ *	This section of the driver logic implements the GSM encodings
+ *	both the basic and the 'advanced'. Reliable transport is not
+ *	supported.
+ */
+
+#define CR			0x02
+#define EA			0x01
+#define	PF			0x10
+
+/* I is special: the rest are ..*/
+#define RR			0x01
+#define UI			0x03
+#define RNR			0x05
+#define REJ			0x09
+#define DM			0x0F
+#define SABM			0x2F
+#define DISC			0x43
+#define UA			0x63
+#define	UIH			0xEF
+
+/* Channel commands */
+#define CMD_NSC			0x09
+#define CMD_TEST		0x11
+#define CMD_PSC			0x21
+#define CMD_RLS			0x29
+#define CMD_FCOFF		0x31
+#define CMD_PN			0x41
+#define CMD_RPN			0x49
+#define CMD_FCON		0x51
+#define CMD_CLD			0x61
+#define CMD_SNC			0x69
+#define CMD_MSC			0x71
+
+/* Virtual modem bits */
+#define MDM_FC			0x01
+#define MDM_RTC			0x02
+#define MDM_RTR			0x04
+#define MDM_IC			0x20
+#define MDM_DV			0x40
+
+#define GSM0_SOF		0xF9
+#define GSM1_SOF 		0x7E
+#define GSM1_ESCAPE		0x7D
+#define GSM1_ESCAPE_BITS	0x20
+#define XON			0x11
+#define XOFF			0x13
+
+static const struct tty_port_operations gsm_port_ops;
+
+/*
+ *	CRC table for GSM 0710
+ */
+
+static const u8 gsm_fcs8[256] = {
+	0x00, 0x91, 0xE3, 0x72, 0x07, 0x96, 0xE4, 0x75,
+	0x0E, 0x9F, 0xED, 0x7C, 0x09, 0x98, 0xEA, 0x7B,
+	0x1C, 0x8D, 0xFF, 0x6E, 0x1B, 0x8A, 0xF8, 0x69,
+	0x12, 0x83, 0xF1, 0x60, 0x15, 0x84, 0xF6, 0x67,
+	0x38, 0xA9, 0xDB, 0x4A, 0x3F, 0xAE, 0xDC, 0x4D,
+	0x36, 0xA7, 0xD5, 0x44, 0x31, 0xA0, 0xD2, 0x43,
+	0x24, 0xB5, 0xC7, 0x56, 0x23, 0xB2, 0xC0, 0x51,
+	0x2A, 0xBB, 0xC9, 0x58, 0x2D, 0xBC, 0xCE, 0x5F,
+	0x70, 0xE1, 0x93, 0x02, 0x77, 0xE6, 0x94, 0x05,
+	0x7E, 0xEF, 0x9D, 0x0C, 0x79, 0xE8, 0x9A, 0x0B,
+	0x6C, 0xFD, 0x8F, 0x1E, 0x6B, 0xFA, 0x88, 0x19,
+	0x62, 0xF3, 0x81, 0x10, 0x65, 0xF4, 0x86, 0x17,
+	0x48, 0xD9, 0xAB, 0x3A, 0x4F, 0xDE, 0xAC, 0x3D,
+	0x46, 0xD7, 0xA5, 0x34, 0x41, 0xD0, 0xA2, 0x33,
+	0x54, 0xC5, 0xB7, 0x26, 0x53, 0xC2, 0xB0, 0x21,
+	0x5A, 0xCB, 0xB9, 0x28, 0x5D, 0xCC, 0xBE, 0x2F,
+	0xE0, 0x71, 0x03, 0x92, 0xE7, 0x76, 0x04, 0x95,
+	0xEE, 0x7F, 0x0D, 0x9C, 0xE9, 0x78, 0x0A, 0x9B,
+	0xFC, 0x6D, 0x1F, 0x8E, 0xFB, 0x6A, 0x18, 0x89,
+	0xF2, 0x63, 0x11, 0x80, 0xF5, 0x64, 0x16, 0x87,
+	0xD8, 0x49, 0x3B, 0xAA, 0xDF, 0x4E, 0x3C, 0xAD,
+	0xD6, 0x47, 0x35, 0xA4, 0xD1, 0x40, 0x32, 0xA3,
+	0xC4, 0x55, 0x27, 0xB6, 0xC3, 0x52, 0x20, 0xB1,
+	0xCA, 0x5B, 0x29, 0xB8, 0xCD, 0x5C, 0x2E, 0xBF,
+	0x90, 0x01, 0x73, 0xE2, 0x97, 0x06, 0x74, 0xE5,
+	0x9E, 0x0F, 0x7D, 0xEC, 0x99, 0x08, 0x7A, 0xEB,
+	0x8C, 0x1D, 0x6F, 0xFE, 0x8B, 0x1A, 0x68, 0xF9,
+	0x82, 0x13, 0x61, 0xF0, 0x85, 0x14, 0x66, 0xF7,
+	0xA8, 0x39, 0x4B, 0xDA, 0xAF, 0x3E, 0x4C, 0xDD,
+	0xA6, 0x37, 0x45, 0xD4, 0xA1, 0x30, 0x42, 0xD3,
+	0xB4, 0x25, 0x57, 0xC6, 0xB3, 0x22, 0x50, 0xC1,
+	0xBA, 0x2B, 0x59, 0xC8, 0xBD, 0x2C, 0x5E, 0xCF
+};
+
+#define INIT_FCS	0xFF
+#define GOOD_FCS	0xCF
+
+/**
+ *	gsm_fcs_add	-	update FCS
+ *	@fcs: Current FCS
+ *	@c: Next data
+ *
+ *	Update the FCS to include c. Uses the algorithm in the specification
+ *	notes.
+ */
+
+static inline u8 gsm_fcs_add(u8 fcs, u8 c)
+{
+	return gsm_fcs8[fcs ^ c];
+}
+
+/**
+ *	gsm_fcs_add_block	-	update FCS for a block
+ *	@fcs: Current FCS
+ *	@c: buffer of data
+ *	@len: length of buffer
+ *
+ *	Update the FCS to include c. Uses the algorithm in the specification
+ *	notes.
+ */
+
+static inline u8 gsm_fcs_add_block(u8 fcs, u8 *c, int len)
+{
+	while (len--)
+		fcs = gsm_fcs8[fcs ^ *c++];
+	return fcs;
+}
+
+/**
+ *	gsm_read_ea		-	read a byte into an EA
+ *	@val: variable holding value
+ *	c: byte going into the EA
+ *
+ *	Processes one byte of an EA. Updates the passed variable
+ *	and returns 1 if the EA is now completely read
+ */
+
+static int gsm_read_ea(unsigned int *val, u8 c)
+{
+	/* Add the next 7 bits into the value */
+	*val <<= 7;
+	*val |= c >> 1;
+	/* Was this the last byte of the EA 1 = yes*/
+	return c & EA;
+}
+
+/**
+ *	gsm_encode_modem	-	encode modem data bits
+ *	@dlci: DLCI to encode from
+ *
+ *	Returns the correct GSM encoded modem status bits (6 bit field) for
+ *	the current status of the DLCI and attached tty object
+ */
+
+static u8 gsm_encode_modem(const struct gsm_dlci *dlci)
+{
+	u8 modembits = 0;
+	/* FC is true flow control not modem bits */
+	if (dlci->throttled)
+		modembits |= MDM_FC;
+	if (dlci->modem_tx & TIOCM_DTR)
+		modembits |= MDM_RTC;
+	if (dlci->modem_tx & TIOCM_RTS)
+		modembits |= MDM_RTR;
+	if (dlci->modem_tx & TIOCM_RI)
+		modembits |= MDM_IC;
+	if (dlci->modem_tx & TIOCM_CD)
+		modembits |= MDM_DV;
+	return modembits;
+}
+
+/**
+ *	gsm_print_packet	-	display a frame for debug
+ *	@hdr: header to print before decode
+ *	@addr: address EA from the frame
+ *	@cr: C/R bit from the frame
+ *	@control: control including PF bit
+ *	@data: following data bytes
+ *	@dlen: length of data
+ *
+ *	Displays a packet in human readable format for debugging purposes. The
+ *	style is based on amateur radio LAP-B dump display.
+ */
+
+static void gsm_print_packet(const char *hdr, int addr, int cr,
+					u8 control, const u8 *data, int dlen)
+{
+	if (!(debug & 1))
+		return;
+
+	printk(KERN_INFO "%s %d) %c: ", hdr, addr, "RC"[cr]);
+
+	switch (control & ~PF) {
+	case SABM:
+		printk(KERN_CONT "SABM");
+		break;
+	case UA:
+		printk(KERN_CONT "UA");
+		break;
+	case DISC:
+		printk(KERN_CONT "DISC");
+		break;
+	case DM:
+		printk(KERN_CONT "DM");
+		break;
+	case UI:
+		printk(KERN_CONT "UI");
+		break;
+	case UIH:
+		printk(KERN_CONT "UIH");
+		break;
+	default:
+		if (!(control & 0x01)) {
+			printk(KERN_CONT "I N(S)%d N(R)%d",
+				(control & 0x0E) >> 1, (control & 0xE)>> 5);
+		} else switch (control & 0x0F) {
+		case RR:
+			printk("RR(%d)", (control & 0xE0) >> 5);
+			break;
+		case RNR:
+			printk("RNR(%d)", (control & 0xE0) >> 5);
+			break;
+		case REJ:
+			printk("REJ(%d)", (control & 0xE0) >> 5);
+			break;
+		default:
+			printk(KERN_CONT "[%02X]", control);
+		}
+	}
+
+	if (control & PF)
+		printk(KERN_CONT "(P)");
+	else
+		printk(KERN_CONT "(F)");
+
+	if (dlen) {
+		int ct = 0;
+		while (dlen--) {
+			if (ct % 8 == 0)
+				printk(KERN_CONT "\n    ");
+			printk(KERN_CONT "%02X ", *data++);
+			ct++;
+		}
+	}
+	printk(KERN_CONT "\n");
+}
+
+
+/*
+ *	Link level transmission side
+ */
+
+/**
+ *	gsm_stuff_packet	-	bytestuff a packet
+ *	@ibuf: input
+ *	@obuf: output
+ *	@len: length of input
+ *
+ *	Expand a buffer by bytestuffing it. The worst case size change
+ *	is doubling and the caller is responsible for handing out
+ *	suitable sized buffers.
+ */
+
+static int gsm_stuff_frame(const u8 *input, u8 *output, int len)
+{
+	int olen = 0;
+	while (len--) {
+		if (*input == GSM1_SOF || *input == GSM1_ESCAPE
+		    || *input == XON || *input == XOFF) {
+			*output++ = GSM1_ESCAPE;
+			*output++ = *input++ ^ GSM1_ESCAPE_BITS;
+			olen++;
+		} else
+			*output++ = *input++;
+		olen++;
+	}
+	return olen;
+}
+
+static void hex_packet(const unsigned char *p, int len)
+{
+	int i;
+	for (i = 0; i < len; i++) {
+		if (i && (i % 16) == 0)
+			printk("\n");
+		printk("%02X ", *p++);
+	}
+	printk("\n");
+}
+
+/**
+ *	gsm_send	-	send a control frame
+ *	@gsm: our GSM mux
+ *	@addr: address for control frame
+ *	@cr: command/response bit
+ *	@control:  control byte including PF bit
+ *
+ *	Format up and transmit a control frame. These do not go via the
+ *	queueing logic as they should be transmitted ahead of data when
+ *	they are needed.
+ *
+ *	FIXME: Lock versus data TX path
+ */
+
+static void gsm_send(struct gsm_mux *gsm, int addr, int cr, int control)
+{
+	int len;
+	u8 cbuf[10];
+	u8 ibuf[3];
+
+	switch (gsm->encoding) {
+	case 0:
+		cbuf[0] = GSM0_SOF;
+		cbuf[1] = (addr << 2) | (cr << 1) | EA;
+		cbuf[2] = control;
+		cbuf[3] = EA;	/* Length of data = 0 */
+		cbuf[4] = 0xFF - gsm_fcs_add_block(INIT_FCS, cbuf + 1, 3);
+		cbuf[5] = GSM0_SOF;
+		len = 6;
+		break;
+	case 1:
+	case 2:
+		/* Control frame + packing (but not frame stuffing) in mode 1 */
+		ibuf[0] = (addr << 2) | (cr << 1) | EA;
+		ibuf[1] = control;
+		ibuf[2] = 0xFF - gsm_fcs_add_block(INIT_FCS, ibuf, 2);
+		/* Stuffing may double the size worst case */
+		len = gsm_stuff_frame(ibuf, cbuf + 1, 3);
+		/* Now add the SOF markers */
+		cbuf[0] = GSM1_SOF;
+		cbuf[len + 1] = GSM1_SOF;
+		/* FIXME: we can omit the lead one in many cases */
+		len += 2;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+	gsm->output(gsm, cbuf, len);
+	gsm_print_packet("-->", addr, cr, control, NULL, 0);
+}
+
+/**
+ *	gsm_response	-	send a control response
+ *	@gsm: our GSM mux
+ *	@addr: address for control frame
+ *	@control:  control byte including PF bit
+ *
+ *	Format up and transmit a link level response frame.
+ */
+
+static inline void gsm_response(struct gsm_mux *gsm, int addr, int control)
+{
+	gsm_send(gsm, addr, 0, control);
+}
+
+/**
+ *	gsm_command	-	send a control command
+ *	@gsm: our GSM mux
+ *	@addr: address for control frame
+ *	@control:  control byte including PF bit
+ *
+ *	Format up and transmit a link level command frame.
+ */
+
+static inline void gsm_command(struct gsm_mux *gsm, int addr, int control)
+{
+	gsm_send(gsm, addr, 1, control);
+}
+
+/* Data transmission */
+
+#define HDR_LEN		6	/* ADDR CTRL [LEN.2] DATA FCS */
+
+/**
+ *	gsm_data_alloc		-	allocate data frame
+ *	@gsm: GSM mux
+ *	@addr: DLCI address
+ *	@len: length excluding header and FCS
+ *	@ctrl: control byte
+ *
+ *	Allocate a new data buffer for sending frames with data. Space is left
+ *	at the front for header bytes but that is treated as an implementation
+ *	detail and not for the high level code to use
+ */
+
+static struct gsm_msg *gsm_data_alloc(struct gsm_mux *gsm, u8 addr, int len,
+								u8 ctrl)
+{
+	struct gsm_msg *m = kmalloc(sizeof(struct gsm_msg) + len + HDR_LEN,
+								GFP_ATOMIC);
+	if (m == NULL)
+		return NULL;
+	m->data = m->buffer + HDR_LEN - 1;	/* Allow for FCS */
+	m->len = len;
+	m->addr = addr;
+	m->ctrl = ctrl;
+	m->next = NULL;
+	return m;
+}
+
+/**
+ *	gsm_data_kick		-	poke the queue
+ *	@gsm: GSM Mux
+ *
+ *	The tty device has called us to indicate that room has appeared in
+ *	the transmit queue. Ram more data into the pipe if we have any
+ *
+ *	FIXME: lock against link layer control transmissions
+ */
+
+static void gsm_data_kick(struct gsm_mux *gsm)
+{
+	struct gsm_msg *msg = gsm->tx_head;
+	int len;
+	int skip_sof = 0;
+
+	/* FIXME: We need to apply this solely to data messages */
+	if (gsm->constipated)
+		return;
+
+	while (gsm->tx_head != NULL) {
+		msg = gsm->tx_head;
+		if (gsm->encoding != 0) {
+			gsm->txframe[0] = GSM1_SOF;
+			len = gsm_stuff_frame(msg->data,
+						gsm->txframe + 1, msg->len);
+			gsm->txframe[len + 1] = GSM1_SOF;
+			len += 2;
+		} else {
+			gsm->txframe[0] = GSM0_SOF;
+			memcpy(gsm->txframe + 1 , msg->data, msg->len);
+			gsm->txframe[msg->len + 1] = GSM0_SOF;
+			len = msg->len + 2;
+		}
+
+		if (debug & 4) {
+			printk("gsm_data_kick: \n");
+			hex_packet(gsm->txframe, len);
+		}
+
+		if (gsm->output(gsm, gsm->txframe + skip_sof,
+						len - skip_sof) < 0)
+			break;
+		/* FIXME: Can eliminate one SOF in many more cases */
+		gsm->tx_head = msg->next;
+		if (gsm->tx_head == NULL)
+			gsm->tx_tail = NULL;
+		gsm->tx_bytes -= msg->len;
+		kfree(msg);
+		/* For a burst of frames skip the extra SOF within the
+		   burst */
+		skip_sof = 1;
+	}
+}
+
+/**
+ *	__gsm_data_queue		-	queue a UI or UIH frame
+ *	@dlci: DLCI sending the data
+ *	@msg: message queued
+ *
+ *	Add data to the transmit queue and try and get stuff moving
+ *	out of the mux tty if not already doing so. The Caller must hold
+ *	the gsm tx lock.
+ */
+
+static void __gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg)
+{
+	struct gsm_mux *gsm = dlci->gsm;
+	u8 *dp = msg->data;
+	u8 *fcs = dp + msg->len;
+
+	/* Fill in the header */
+	if (gsm->encoding == 0) {
+		if (msg->len < 128)
+			*--dp = (msg->len << 1) | EA;
+		else {
+			*--dp = (msg->len >> 6) | EA;
+			*--dp = (msg->len & 127) << 1;
+		}
+	}
+
+	*--dp = msg->ctrl;
+	if (gsm->initiator)
+		*--dp = (msg->addr << 2) | 2 | EA;
+	else
+		*--dp = (msg->addr << 2) | EA;
+	*fcs = gsm_fcs_add_block(INIT_FCS, dp , msg->data - dp);
+	/* Ugly protocol layering violation */
+	if (msg->ctrl == UI || msg->ctrl == (UI|PF))
+		*fcs = gsm_fcs_add_block(*fcs, msg->data, msg->len);
+	*fcs = 0xFF - *fcs;
+
+	gsm_print_packet("Q> ", msg->addr, gsm->initiator, msg->ctrl,
+							msg->data, msg->len);
+
+	/* Move the header back and adjust the length, also allow for the FCS
+	   now tacked on the end */
+	msg->len += (msg->data - dp) + 1;
+	msg->data = dp;
+
+	/* Add to the actual output queue */
+	if (gsm->tx_tail)
+		gsm->tx_tail->next = msg;
+	else
+		gsm->tx_head = msg;
+	gsm->tx_tail = msg;
+	gsm->tx_bytes += msg->len;
+	gsm_data_kick(gsm);
+}
+
+/**
+ *	gsm_data_queue		-	queue a UI or UIH frame
+ *	@dlci: DLCI sending the data
+ *	@msg: message queued
+ *
+ *	Add data to the transmit queue and try and get stuff moving
+ *	out of the mux tty if not already doing so. Take the
+ *	the gsm tx lock and dlci lock.
+ */
+
+static void gsm_data_queue(struct gsm_dlci *dlci, struct gsm_msg *msg)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&dlci->gsm->tx_lock, flags);
+	__gsm_data_queue(dlci, msg);
+	spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags);
+}
+
+/**
+ *	gsm_dlci_data_output	-	try and push data out of a DLCI
+ *	@gsm: mux
+ *	@dlci: the DLCI to pull data from
+ *
+ *	Pull data from a DLCI and send it into the transmit queue if there
+ *	is data. Keep to the MRU of the mux. This path handles the usual tty
+ *	interface which is a byte stream with optional modem data.
+ *
+ *	Caller must hold the tx_lock of the mux.
+ */
+
+static int gsm_dlci_data_output(struct gsm_mux *gsm, struct gsm_dlci *dlci)
+{
+	struct gsm_msg *msg;
+	u8 *dp;
+	int len, size;
+	int h = dlci->adaption - 1;
+
+	len = kfifo_len(dlci->fifo);
+	if (len == 0)
+		return 0;
+
+	/* MTU/MRU count only the data bits */
+	if (len > gsm->mtu)
+		len = gsm->mtu;
+
+	size = len + h;
+
+	msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+	/* FIXME: need a timer or something to kick this so it can't
+	   get stuck with no work outstanding and no buffer free */
+	if (msg == NULL)
+		return -ENOMEM;
+	dp = msg->data;
+	switch (dlci->adaption) {
+	case 1:	/* Unstructured */
+		break;
+	case 2:	/* Unstructed with modem bits. Always one byte as we never
+		   send inline break data */
+		*dp += gsm_encode_modem(dlci);
+		len--;
+		break;
+	}
+	WARN_ON(kfifo_out_locked(dlci->fifo, dp , len, &dlci->lock) != len);
+	__gsm_data_queue(dlci, msg);
+	/* Bytes of data we used up */
+	return size;
+}
+
+/**
+ *	gsm_dlci_data_output_framed  -	try and push data out of a DLCI
+ *	@gsm: mux
+ *	@dlci: the DLCI to pull data from
+ *
+ *	Pull data from a DLCI and send it into the transmit queue if there
+ *	is data. Keep to the MRU of the mux. This path handles framed data
+ *	queued as skbuffs to the DLCI.
+ *
+ *	Caller must hold the tx_lock of the mux.
+ */
+
+static int gsm_dlci_data_output_framed(struct gsm_mux *gsm,
+						struct gsm_dlci *dlci)
+{
+	struct gsm_msg *msg;
+	u8 *dp;
+	int len, size;
+	int last = 0, first = 0;
+	int overhead = 0;
+
+	/* One byte per frame is used for B/F flags */
+	if (dlci->adaption == 4)
+		overhead = 1;
+
+	/* dlci->skb is locked by tx_lock */
+	if (dlci->skb == NULL) {
+		dlci->skb = skb_dequeue(&dlci->skb_list);
+		if (dlci->skb == NULL)
+			return 0;
+		first = 1;
+	}
+	len = dlci->skb->len + overhead;
+
+	/* MTU/MRU count only the data bits */
+	if (len > gsm->mtu) {
+		if (dlci->adaption == 3) {
+			/* Over long frame, bin it */
+			kfree_skb(dlci->skb);
+			dlci->skb = NULL;
+			return 0;
+		}
+		len = gsm->mtu;
+	} else
+		last = 1;
+
+	size = len + overhead;
+	msg = gsm_data_alloc(gsm, dlci->addr, size, gsm->ftype);
+
+	/* FIXME: need a timer or something to kick this so it can't
+	   get stuck with no work outstanding and no buffer free */
+	if (msg == NULL)
+		return -ENOMEM;
+	dp = msg->data;
+
+	if (dlci->adaption == 4) { /* Interruptible framed (Packetised Data) */
+		/* Flag byte to carry the start/end info */
+		*dp++ = last << 7 | first << 6 | 1;	/* EA */
+		len--;
+	}
+	memcpy(dp, skb_pull(dlci->skb, len), len);
+	__gsm_data_queue(dlci, msg);
+	if (last)
+		dlci->skb = NULL;
+	return size;
+}
+
+/**
+ *	gsm_dlci_data_sweep		-	look for data to send
+ *	@gsm: the GSM mux
+ *
+ *	Sweep the GSM mux channels in priority order looking for ones with
+ *	data to send. We could do with optimising this scan a bit. We aim
+ *	to fill the queue totally or up to TX_THRESH_HI bytes. Once we hit
+ *	TX_THRESH_LO we get called again
+ *
+ *	FIXME: We should round robin between groups and in theory you can
+ *	renegotiate DLCI priorities with optional stuff. Needs optimising.
+ */
+
+static void gsm_dlci_data_sweep(struct gsm_mux *gsm)
+{
+	int len;
+	/* Priority ordering: We should do priority with RR of the groups */
+	int i = 1;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gsm->tx_lock, flags);
+	while (i < NUM_DLCI) {
+		struct gsm_dlci *dlci;
+
+		if (gsm->tx_bytes > TX_THRESH_HI)
+			break;
+		dlci = gsm->dlci[i];
+		if (dlci == NULL || dlci->constipated) {
+			i++;
+			continue;
+		}
+		if (dlci->adaption < 3)
+			len = gsm_dlci_data_output(gsm, dlci);
+		else
+			len = gsm_dlci_data_output_framed(gsm, dlci);
+		if (len < 0)
+			return;
+		/* DLCI empty - try the next */
+		if (len == 0)
+			i++;
+	}
+	spin_unlock_irqrestore(&gsm->tx_lock, flags);
+}
+
+/**
+ *	gsm_dlci_data_kick	-	transmit if possible
+ *	@dlci: DLCI to kick
+ *
+ *	Transmit data from this DLCI if the queue is empty. We can't rely on
+ *	a tty wakeup except when we filled the pipe so we need to fire off
+ *	new data ourselves in other cases.
+ */
+
+static void gsm_dlci_data_kick(struct gsm_dlci *dlci)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dlci->gsm->tx_lock, flags);
+	/* If we have nothing running then we need to fire up */
+	if (dlci->gsm->tx_bytes == 0)
+		gsm_dlci_data_output(dlci->gsm, dlci);
+	else if (dlci->gsm->tx_bytes < TX_THRESH_LO)
+		gsm_dlci_data_sweep(dlci->gsm);
+	spin_unlock_irqrestore(&dlci->gsm->tx_lock, flags);
+}
+
+/*
+ *	Control message processing
+ */
+
+
+/**
+ *	gsm_control_reply	-	send a response frame to a control
+ *	@gsm: gsm channel
+ *	@cmd: the command to use
+ *	@data: data to follow encoded info
+ *	@dlen: length of data
+ *
+ *	Encode up and queue a UI/UIH frame containing our response.
+ */
+
+static void gsm_control_reply(struct gsm_mux *gsm, int cmd, u8 *data,
+					int dlen)
+{
+	struct gsm_msg *msg;
+	msg = gsm_data_alloc(gsm, 0, dlen + 2, gsm->ftype);
+	msg->data[0] = (cmd & 0xFE) << 1 | EA;	/* Clear C/R */
+	msg->data[1] = (dlen << 1) | EA;
+	memcpy(msg->data + 2, data, dlen);
+	gsm_data_queue(gsm->dlci[0], msg);
+}
+
+/**
+ *	gsm_process_modem	-	process received modem status
+ *	@tty: virtual tty bound to the DLCI
+ *	@dlci: DLCI to affect
+ *	@modem: modem bits (full EA)
+ *
+ *	Used when a modem control message or line state inline in adaption
+ *	layer 2 is processed. Sort out the local modem state and throttles
+ */
+
+static void gsm_process_modem(struct tty_struct *tty, struct gsm_dlci *dlci,
+							u32 modem)
+{
+	int  mlines = 0;
+	u8 brk = modem >> 6;
+
+	/* Flow control/ready to communicate */
+	if (modem & MDM_FC) {
+		/* Need to throttle our output on this device */
+		dlci->constipated = 1;
+	}
+	if (modem & MDM_RTC) {
+		mlines |= TIOCM_DSR | TIOCM_DTR;
+		dlci->constipated = 0;
+		gsm_dlci_data_kick(dlci);
+	}
+	/* Map modem bits */
+	if (modem & MDM_RTR)
+		mlines |= TIOCM_RTS | TIOCM_CTS;
+	if (modem & MDM_IC)
+		mlines |= TIOCM_RI;
+	if (modem & MDM_DV)
+		mlines |= TIOCM_CD;
+
+	/* Carrier drop -> hangup */
+	if (tty) {
+		if ((mlines & TIOCM_CD) == 0 && (dlci->modem_rx & TIOCM_CD))
+			if (!(tty->termios->c_cflag & CLOCAL))
+				tty_hangup(tty);
+		if (brk & 0x01)
+			tty_insert_flip_char(tty, 0, TTY_BREAK);
+	}
+	dlci->modem_rx = mlines;
+}
+
+/**
+ *	gsm_control_modem	-	modem status received
+ *	@gsm: GSM channel
+ *	@data: data following command
+ *	@clen: command length
+ *
+ *	We have received a modem status control message. This is used by
+ *	the GSM mux protocol to pass virtual modem line status and optionally
+ *	to indicate break signals. Unpack it, convert to Linux representation
+ *	and if need be stuff a break message down the tty.
+ */
+
+static void gsm_control_modem(struct gsm_mux *gsm, u8 *data, int clen)
+{
+	unsigned int addr = 0;
+	unsigned int modem = 0;
+	struct gsm_dlci *dlci;
+	int len = clen;
+	u8 *dp = data;
+	struct tty_struct *tty;
+
+	while (gsm_read_ea(&addr, *dp++) == 0) {
+		len--;
+		if (len == 0)
+			return;
+	}
+	/* Must be at least one byte following the EA */
+	len--;
+	if (len <= 0)
+		return;
+
+	addr >>= 1;
+	/* Closed port, or invalid ? */
+	if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL)
+		return;
+	dlci = gsm->dlci[addr];
+
+	while (gsm_read_ea(&modem, *dp++) == 0) {
+		len--;
+		if (len == 0)
+			return;
+	}
+	tty = tty_port_tty_get(&dlci->port);
+	gsm_process_modem(tty, dlci, modem);
+	if (tty) {
+		tty_wakeup(tty);
+		tty_kref_put(tty);
+	}
+	gsm_control_reply(gsm, CMD_MSC, data, clen);
+}
+
+/**
+ *	gsm_control_rls		-	remote line status
+ *	@gsm: GSM channel
+ *	@data: data bytes
+ *	@clen: data length
+ *
+ *	The modem sends us a two byte message on the control channel whenever
+ *	it wishes to send us an error state from the virtual link. Stuff
+ *	this into the uplink tty if present
+ */
+
+static void gsm_control_rls(struct gsm_mux *gsm, u8 *data, int clen)
+{
+	struct tty_struct *tty;
+	unsigned int addr = 0 ;
+	u8 bits;
+	int len = clen;
+	u8 *dp = data;
+
+	while (gsm_read_ea(&addr, *dp++) == 0) {
+		len--;
+		if (len == 0)
+			return;
+	}
+	/* Must be at least one byte following ea */
+	len--;
+	if (len <= 0)
+		return;
+	addr >>= 1;
+	/* Closed port, or invalid ? */
+	if (addr == 0 || addr >= NUM_DLCI || gsm->dlci[addr] == NULL)
+		return;
+	/* No error ? */
+	bits = *dp;
+	if ((bits & 1) == 0)
+		return;
+	/* See if we have an uplink tty */
+	tty = tty_port_tty_get(&gsm->dlci[addr]->port);
+
+	if (tty) {
+		if (bits & 2)
+			tty_insert_flip_char(tty, 0, TTY_OVERRUN);
+		if (bits & 4)
+			tty_insert_flip_char(tty, 0, TTY_PARITY);
+		if (bits & 8)
+			tty_insert_flip_char(tty, 0, TTY_FRAME);
+		tty_flip_buffer_push(tty);
+		tty_kref_put(tty);
+	}
+	gsm_control_reply(gsm, CMD_RLS, data, clen);
+}
+
+static void gsm_dlci_begin_close(struct gsm_dlci *dlci);
+
+/**
+ *	gsm_control_message	-	DLCI 0 control processing
+ *	@gsm: our GSM mux
+ *	@command:  the command EA
+ *	@data: data beyond the command/length EAs
+ *	@clen: length
+ *
+ *	Input processor for control messages from the other end of the link.
+ *	Processes the incoming request and queues a response frame or an
+ *	NSC response if not supported
+ */
+
+static void gsm_control_message(struct gsm_mux *gsm, unsigned int command,
+							u8 *data, int clen)
+{
+	u8 buf[1];
+	switch (command) {
+	case CMD_CLD: {
+		struct gsm_dlci *dlci = gsm->dlci[0];
+		/* Modem wishes to close down */
+		if (dlci) {
+			dlci->dead = 1;
+			gsm->dead = 1;
+			gsm_dlci_begin_close(dlci);
+		}
+		}
+		break;
+	case CMD_TEST:
+		/* Modem wishes to test, reply with the data */
+		gsm_control_reply(gsm, CMD_TEST, data, clen);
+		break;
+	case CMD_FCON:
+		/* Modem wants us to STFU */
+		gsm->constipated = 1;
+		gsm_control_reply(gsm, CMD_FCON, NULL, 0);
+		break;
+	case CMD_FCOFF:
+		/* Modem can accept data again */
+		gsm->constipated = 0;
+		gsm_control_reply(gsm, CMD_FCOFF, NULL, 0);
+		/* Kick the link in case it is idling */
+		gsm_data_kick(gsm);
+		break;
+	case CMD_MSC:
+		/* Out of band modem line change indicator for a DLCI */
+		gsm_control_modem(gsm, data, clen);
+		break;
+	case CMD_RLS:
+		/* Out of band error reception for a DLCI */
+		gsm_control_rls(gsm, data, clen);
+		break;
+	case CMD_PSC:
+		/* Modem wishes to enter power saving state */
+		gsm_control_reply(gsm, CMD_PSC, NULL, 0);
+		break;
+		/* Optional unsupported commands */
+	case CMD_PN:	/* Parameter negotiation */
+	case CMD_RPN:	/* Remote port negotation */
+	case CMD_SNC:	/* Service negotation command */
+	default:
+		/* Reply to bad commands with an NSC */
+		buf[0] = command;
+		gsm_control_reply(gsm, CMD_NSC, buf, 1);
+		break;
+	}
+}
+
+/**
+ *	gsm_control_response	-	process a response to our control
+ *	@gsm: our GSM mux
+ *	@command: the command (response) EA
+ *	@data: data beyond the command/length EA
+ *	@clen: length
+ *
+ *	Process a response to an outstanding command. We only allow a single
+ *	control message in flight so this is fairly easy. All the clean up
+ *	is done by the caller, we just update the fields, flag it as done
+ *	and return
+ */
+
+static void gsm_control_response(struct gsm_mux *gsm, unsigned int command,
+							u8 *data, int clen)
+{
+	struct gsm_control *ctrl;
+	unsigned long flags;
+
+	spin_lock_irqsave(&gsm->control_lock, flags);
+
+	ctrl = gsm->pending_cmd;
+	/* Does the reply match our command */
+	command |= 1;
+	if (ctrl != NULL && (command == ctrl->cmd || command == CMD_NSC)) {
+		/* Our command was replied to, kill the retry timer */
+		del_timer(&gsm->t2_timer);
+		gsm->pending_cmd = NULL;
+		/* Rejected by the other end */
+		if (command == CMD_NSC)
+			ctrl->error = -EOPNOTSUPP;
+		ctrl->done = 1;
+		wake_up(&gsm->event);
+	}
+	spin_unlock_irqrestore(&gsm->control_lock, flags);
+}
+
+/**
+ *	gsm_control_transmit 	-	send control packet
+ *	@gsm: gsm mux
+ *	@ctrl: frame to send
+ *
+ *	Send out a pending control command (called under control lock)
+ */
+
+static void gsm_control_transmit(struct gsm_mux *gsm, struct gsm_control *ctrl)
+{
+	struct gsm_msg *msg = gsm_data_alloc(gsm, 0, ctrl->len + 1,
+							gsm->ftype|PF);
+	if (msg == NULL)
+		return;
+	msg->data[0] = (ctrl->cmd << 1) | 2 | EA;	/* command */
+	memcpy(msg->data + 1, ctrl->data, ctrl->len);
+	gsm_data_queue(gsm->dlci[0], msg);
+}
+
+/**
+ *	gsm_control_retransmit	-	retransmit a control frame
+ *	@data: pointer to our gsm object
+ *
+ *	Called off the T2 timer expiry in order to retransmit control frames
+ *	that have been lost in the system somewhere. The control_lock protects
+ *	us from colliding with another sender or a receive completion event.
+ *	In that situation the timer may still occur in a small window but
+ *	gsm->pending_cmd will be NULL and we just let the timer expire.
+ */
+
+static void gsm_control_retransmit(unsigned long data)
+{
+	struct gsm_mux *gsm = (struct gsm_mux *)data;
+	struct gsm_control *ctrl;
+	unsigned long flags;
+	spin_lock_irqsave(&gsm->control_lock, flags);
+	ctrl = gsm->pending_cmd;
+	if (ctrl) {
+		gsm->cretries--;
+		if (gsm->cretries == 0) {
+			gsm->pending_cmd = NULL;
+			ctrl->error = -ETIMEDOUT;
+			ctrl->done = 1;
+			spin_unlock_irqrestore(&gsm->control_lock, flags);
+			wake_up(&gsm->event);
+			return;
+		}
+		gsm_control_transmit(gsm, ctrl);
+		mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
+	}
+	spin_unlock_irqrestore(&gsm->control_lock, flags);
+}
+
+/**
+ *	gsm_control_send	-	send a control frame on DLCI 0
+ *	@gsm: the GSM channel
+ *	@command: command  to send including CR bit
+ *	@data: bytes of data (must be kmalloced)
+ *	@len: length of the block to send
+ *
+ *	Queue and dispatch a control command. Only one command can be
+ *	active at a time. In theory more can be outstanding but the matching
+ *	gets really complicated so for now stick to one outstanding.
+ */
+
+static struct gsm_control *gsm_control_send(struct gsm_mux *gsm,
+		unsigned int command, u8 *data, int clen)
+{
+	struct gsm_control *ctrl = kzalloc(sizeof(struct gsm_control),
+						GFP_KERNEL);
+	unsigned long flags;
+	if (ctrl == NULL)
+		return NULL;
+retry:
+	wait_event(gsm->event, gsm->pending_cmd == NULL);
+	spin_lock_irqsave(&gsm->control_lock, flags);
+	if (gsm->pending_cmd != NULL) {
+		spin_unlock_irqrestore(&gsm->control_lock, flags);
+		goto retry;
+	}
+	ctrl->cmd = command;
+	ctrl->data = data;
+	ctrl->len = clen;
+	gsm->pending_cmd = ctrl;
+	gsm->cretries = gsm->n2;
+	mod_timer(&gsm->t2_timer, jiffies + gsm->t2 * HZ / 100);
+	gsm_control_transmit(gsm, ctrl);
+	spin_unlock_irqrestore(&gsm->control_lock, flags);
+	return ctrl;
+}
+
+/**
+ *	gsm_control_wait	-	wait for a control to finish
+ *	@gsm: GSM mux
+ *	@control: control we are waiting on
+ *
+ *	Waits for the control to complete or time out. Frees any used
+ *	resources and returns 0 for success, or an error if the remote
+ *	rejected or ignored the request.
+ */
+
+static int gsm_control_wait(struct gsm_mux *gsm, struct gsm_control *control)
+{
+	int err;
+	wait_event(gsm->event, control->done == 1);
+	err = control->error;
+	kfree(control);
+	return err;
+}
+
+
+/*
+ *	DLCI level handling: Needs krefs
+ */
+
+/*
+ *	State transitions and timers
+ */
+
+/**
+ *	gsm_dlci_close		-	a DLCI has closed
+ *	@dlci: DLCI that closed
+ *
+ *	Perform processing when moving a DLCI into closed state. If there
+ *	is an attached tty this is hung up
+ */
+
+static void gsm_dlci_close(struct gsm_dlci *dlci)
+{
+	del_timer(&dlci->t1);
+	if (debug & 8)
+		printk("DLCI %d goes closed.\n", dlci->addr);
+	dlci->state = DLCI_CLOSED;
+	if (dlci->addr != 0) {
+		struct tty_struct  *tty = tty_port_tty_get(&dlci->port);
+		if (tty) {
+			tty_hangup(tty);
+			tty_kref_put(tty);
+		}
+		kfifo_reset(dlci->fifo);
+	} else
+		dlci->gsm->dead = 1;
+	wake_up(&dlci->gsm->event);
+	/* A DLCI 0 close is a MUX termination so we need to kick that
+	   back to userspace somehow */
+}
+
+/**
+ *	gsm_dlci_open		-	a DLCI has opened
+ *	@dlci: DLCI that opened
+ *
+ *	Perform processing when moving a DLCI into open state.
+ */
+
+static void gsm_dlci_open(struct gsm_dlci *dlci)
+{
+	/* Note that SABM UA .. SABM UA first UA lost can mean that we go
+	   open -> open */
+	del_timer(&dlci->t1);
+	/* This will let a tty open continue */
+	dlci->state = DLCI_OPEN;
+	if (debug & 8)
+		printk("DLCI %d goes open.\n", dlci->addr);
+	wake_up(&dlci->gsm->event);
+}
+
+/**
+ *	gsm_dlci_t1		-	T1 timer expiry
+ *	@dlci: DLCI that opened
+ *
+ *	The T1 timer handles retransmits of control frames (essentially of
+ *	SABM and DISC). We resend the command until the retry count runs out
+ *	in which case an opening port goes back to closed and a closing port
+ *	is simply put into closed state (any further frames from the other
+ *	end will get a DM response)
+ */
+
+static void gsm_dlci_t1(unsigned long data)
+{
+	struct gsm_dlci *dlci = (struct gsm_dlci *)data;
+	struct gsm_mux *gsm = dlci->gsm;
+
+	switch (dlci->state) {
+	case DLCI_OPENING:
+		dlci->retries--;
+		if (dlci->retries) {
+			gsm_command(dlci->gsm, dlci->addr, SABM|PF);
+			mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+		} else
+			gsm_dlci_close(dlci);
+		break;
+	case DLCI_CLOSING:
+		dlci->retries--;
+		if (dlci->retries) {
+			gsm_command(dlci->gsm, dlci->addr, DISC|PF);
+			mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+		} else
+			gsm_dlci_close(dlci);
+		break;
+	}
+}
+
+/**
+ *	gsm_dlci_begin_open	-	start channel open procedure
+ *	@dlci: DLCI to open
+ *
+ *	Commence opening a DLCI from the Linux side. We issue SABM messages
+ *	to the modem which should then reply with a UA, at which point we
+ *	will move into open state. Opening is done asynchronously with retry
+ *	running off timers and the responses.
+ */
+
+static void gsm_dlci_begin_open(struct gsm_dlci *dlci)
+{
+	struct gsm_mux *gsm = dlci->gsm;
+	if (dlci->state == DLCI_OPEN || dlci->state == DLCI_OPENING)
+		return;
+	dlci->retries = gsm->n2;
+	dlci->state = DLCI_OPENING;
+	gsm_command(dlci->gsm, dlci->addr, SABM|PF);
+	mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+}
+
+/**
+ *	gsm_dlci_begin_close	-	start channel open procedure
+ *	@dlci: DLCI to open
+ *
+ *	Commence closing a DLCI from the Linux side. We issue DISC messages
+ *	to the modem which should then reply with a UA, at which point we
+ *	will move into closed state. Closing is done asynchronously with retry
+ *	off timers. We may also receive a DM reply from the other end which
+ *	indicates the channel was already closed.
+ */
+
+static void gsm_dlci_begin_close(struct gsm_dlci *dlci)
+{
+	struct gsm_mux *gsm = dlci->gsm;
+	if (dlci->state == DLCI_CLOSED || dlci->state == DLCI_CLOSING)
+		return;
+	dlci->retries = gsm->n2;
+	dlci->state = DLCI_CLOSING;
+	gsm_command(dlci->gsm, dlci->addr, DISC|PF);
+	mod_timer(&dlci->t1, jiffies + gsm->t1 * HZ / 100);
+}
+
+/**
+ *	gsm_dlci_data		-	data arrived
+ *	@dlci: channel
+ *	@data: block of bytes received
+ *	@len: length of received block
+ *
+ *	A UI or UIH frame has arrived which contains data for a channel
+ *	other than the control channel. If the relevant virtual tty is
+ *	open we shovel the bits down it, if not we drop them.
+ */
+
+static void gsm_dlci_data(struct gsm_dlci *dlci, u8 *data, int len)
+{
+	/* krefs .. */
+	struct tty_port *port = &dlci->port;
+	struct tty_struct *tty = tty_port_tty_get(port);
+	unsigned int modem = 0;
+
+	if (debug & 16)
+		printk("%d bytes for tty %p\n", len, tty);
+	if (tty) {
+		switch (dlci->adaption)  {
+			/* Unsupported types */
+			/* Packetised interruptible data */
+			case 4:
+				break;
+			/* Packetised uininterruptible voice/data */
+			case 3:
+				break;
+			/* Asynchronous serial with line state in each frame */
+			case 2:
+				while (gsm_read_ea(&modem, *data++) == 0) {
+					len--;
+					if (len == 0)
+						return;
+				}
+				gsm_process_modem(tty, dlci, modem);
+			/* Line state will go via DLCI 0 controls only */
+			case 1:
+			default:
+				tty_insert_flip_string(tty, data, len);
+				tty_flip_buffer_push(tty);
+		}
+		tty_kref_put(tty);
+	}
+}
+
+/**
+ *	gsm_dlci_control	-	data arrived on control channel
+ *	@dlci: channel
+ *	@data: block of bytes received
+ *	@len: length of received block
+ *
+ *	A UI or UIH frame has arrived which contains data for DLCI 0 the
+ *	control channel. This should contain a command EA followed by
+ *	control data bytes. The command EA contains a command/response bit
+ *	and we divide up the work accordingly.
+ */
+
+static void gsm_dlci_command(struct gsm_dlci *dlci, u8 *data, int len)
+{
+	/* See what command is involved */
+	unsigned int command = 0;
+	while (len-- > 0) {
+		if (gsm_read_ea(&command, *data++) == 1) {
+			int clen = *data++;
+			len--;
+			/* FIXME: this is properly an EA */
+			clen >>= 1;
+			/* Malformed command ? */
+			if (clen > len)
+				return;
+			if (command & 1)
+				gsm_control_message(dlci->gsm, command,
+								data, clen);
+			else
+				gsm_control_response(dlci->gsm, command,
+								data, clen);
+			return;
+		}
+	}
+}
+
+/*
+ *	Allocate/Free DLCI channels
+ */
+
+/**
+ *	gsm_dlci_alloc		-	allocate a DLCI
+ *	@gsm: GSM mux
+ *	@addr: address of the DLCI
+ *
+ *	Allocate and install a new DLCI object into the GSM mux.
+ *
+ *	FIXME: review locking races
+ */
+
+static struct gsm_dlci *gsm_dlci_alloc(struct gsm_mux *gsm, int addr)
+{
+	struct gsm_dlci *dlci = kzalloc(sizeof(struct gsm_dlci), GFP_ATOMIC);
+	if (dlci == NULL)
+		return NULL;
+	spin_lock_init(&dlci->lock);
+	dlci->fifo = &dlci->_fifo;
+	if (kfifo_alloc(&dlci->_fifo, 4096, GFP_KERNEL) < 0) {
+		kfree(dlci);
+		return NULL;
+	}
+
+	skb_queue_head_init(&dlci->skb_list);
+	init_timer(&dlci->t1);
+	dlci->t1.function = gsm_dlci_t1;
+	dlci->t1.data = (unsigned long)dlci;
+	tty_port_init(&dlci->port);
+	dlci->port.ops = &gsm_port_ops;
+	dlci->gsm = gsm;
+	dlci->addr = addr;
+	dlci->adaption = gsm->adaption;
+	dlci->state = DLCI_CLOSED;
+	if (addr)
+		dlci->data = gsm_dlci_data;
+	else
+		dlci->data = gsm_dlci_command;
+	gsm->dlci[addr] = dlci;
+	return dlci;
+}
+
+/**
+ *	gsm_dlci_free		-	release DLCI
+ *	@dlci: DLCI to destroy
+ *
+ *	Free up a DLCI. Currently to keep the lifetime rules sane we only
+ *	clean up DLCI objects when the MUX closes rather than as the port
+ *	is closed down on both the tty and mux levels.
+ *
+ *	Can sleep.
+ */
+static void gsm_dlci_free(struct gsm_dlci *dlci)
+{
+	struct tty_struct *tty = tty_port_tty_get(&dlci->port);
+	if (tty) {
+		tty_vhangup(tty);
+		tty_kref_put(tty);
+	}
+	del_timer_sync(&dlci->t1);
+	dlci->gsm->dlci[dlci->addr] = NULL;
+	kfifo_free(dlci->fifo);
+	kfree(dlci);
+}
+
+
+/*
+ *	LAPBish link layer logic
+ */
+
+/**
+ *	gsm_queue		-	a GSM frame is ready to process
+ *	@gsm: pointer to our gsm mux
+ *
+ *	At this point in time a frame has arrived and been demangled from
+ *	the line encoding. All the differences between the encodings have
+ *	been handled below us and the frame is unpacked into the structures.
+ *	The fcs holds the header FCS but any data FCS must be added here.
+ */
+
+static void gsm_queue(struct gsm_mux *gsm)
+{
+	struct gsm_dlci *dlci;
+	u8 cr;
+	int address;
+	/* We have to sneak a look at the packet body to do the FCS.
+	   A somewhat layering violation in the spec */
+
+	if ((gsm->control & ~PF) == UI)
+		gsm->fcs = gsm_fcs_add_block(gsm->fcs, gsm->buf, gsm->len);
+	if (gsm->fcs != GOOD_FCS) {
+		gsm->bad_fcs++;
+		if (debug & 4)
+			printk("BAD FCS %02x\n", gsm->fcs);
+		return;
+	}
+	address = gsm->address >> 1;
+	if (address >= NUM_DLCI)
+		goto invalid;
+
+	cr = gsm->address & 1;		/* C/R bit */
+
+	gsm_print_packet("<--", address, cr, gsm->control, gsm->buf, gsm->len);
+
+	cr ^= 1 - gsm->initiator;	/* Flip so 1 always means command */
+	dlci = gsm->dlci[address];
+
+	switch (gsm->control) {
+	case SABM|PF:
+		if (cr == 0)
+			goto invalid;
+		if (dlci == NULL)
+			dlci = gsm_dlci_alloc(gsm, address);
+		if (dlci == NULL)
+			return;
+		if (dlci->dead)
+			gsm_response(gsm, address, DM);
+		else {
+			gsm_response(gsm, address, UA);
+			gsm_dlci_open(dlci);
+		}
+		break;
+	case DISC|PF:
+		if (cr == 0)
+			goto invalid;
+		if (dlci == NULL || dlci->state == DLCI_CLOSED) {
+			gsm_response(gsm, address, DM);
+			return;
+		}
+		/* Real close complete */
+		gsm_response(gsm, address, UA);
+		gsm_dlci_close(dlci);
+		break;
+	case UA:
+	case UA|PF:
+		if (cr == 0 || dlci == NULL)
+			break;
+		switch (dlci->state) {
+		case DLCI_CLOSING:
+			gsm_dlci_close(dlci);
+			break;
+		case DLCI_OPENING:
+			gsm_dlci_open(dlci);
+			break;
+		}
+		break;
+	case DM:	/* DM can be valid unsolicited */
+	case DM|PF:
+		if (cr)
+			goto invalid;
+		if (dlci == NULL)
+			return;
+		gsm_dlci_close(dlci);
+		break;
+	case UI:
+	case UI|PF:
+	case UIH:
+	case UIH|PF:
+#if 0
+		if (cr)
+			goto invalid;
+#endif
+		if (dlci == NULL || dlci->state != DLCI_OPEN) {
+			gsm_command(gsm, address, DM|PF);
+			return;
+		}
+		dlci->data(dlci, gsm->buf, gsm->len);
+		break;
+	default:
+		goto invalid;
+	}
+	return;
+invalid:
+	gsm->malformed++;
+	return;
+}
+
+
+/**
+ *	gsm0_receive	-	perform processing for non-transparency
+ *	@gsm: gsm data for this ldisc instance
+ *	@c: character
+ *
+ *	Receive bytes in gsm mode 0
+ */
+
+static void gsm0_receive(struct gsm_mux *gsm, unsigned char c)
+{
+	switch (gsm->state) {
+	case GSM_SEARCH:	/* SOF marker */
+		if (c == GSM0_SOF) {
+			gsm->state = GSM_ADDRESS;
+			gsm->address = 0;
+			gsm->len = 0;
+			gsm->fcs = INIT_FCS;
+		}
+		break;		/* Address EA */
+	case GSM_ADDRESS:
+		gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+		if (gsm_read_ea(&gsm->address, c))
+			gsm->state = GSM_CONTROL;
+		break;
+	case GSM_CONTROL:	/* Control Byte */
+		gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+		gsm->control = c;
+		gsm->state = GSM_LEN;
+		break;
+	case GSM_LEN:		/* Length EA */
+		gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+		if (gsm_read_ea(&gsm->len, c)) {
+			if (gsm->len > gsm->mru) {
+				gsm->bad_size++;
+				gsm->state = GSM_SEARCH;
+				break;
+			}
+			gsm->count = 0;
+			gsm->state = GSM_DATA;
+		}
+		break;
+	case GSM_DATA:		/* Data */
+		gsm->buf[gsm->count++] = c;
+		if (gsm->count == gsm->len)
+			gsm->state = GSM_FCS;
+		break;
+	case GSM_FCS:		/* FCS follows the packet */
+		gsm->fcs = c;
+		gsm_queue(gsm);
+		/* And then back for the next frame */
+		gsm->state = GSM_SEARCH;
+		break;
+	}
+}
+
+/**
+ *	gsm0_receive	-	perform processing for non-transparency
+ *	@gsm: gsm data for this ldisc instance
+ *	@c: character
+ *
+ *	Receive bytes in mode 1 (Advanced option)
+ */
+
+static void gsm1_receive(struct gsm_mux *gsm, unsigned char c)
+{
+	if (c == GSM1_SOF) {
+		/* EOF is only valid in frame if we have got to the data state
+		   and received at least one byte (the FCS) */
+		if (gsm->state == GSM_DATA && gsm->count) {
+			/* Extract the FCS */
+			gsm->count--;
+			gsm->fcs = gsm_fcs_add(gsm->fcs, gsm->buf[gsm->count]);
+			gsm->len = gsm->count;
+			gsm_queue(gsm);
+			gsm->state  = GSM_START;
+			return;
+		}
+		/* Any partial frame was a runt so go back to start */
+		if (gsm->state != GSM_START) {
+			gsm->malformed++;
+			gsm->state = GSM_START;
+		}
+		/* A SOF in GSM_START means we are still reading idling or
+		   framing bytes */
+		return;
+	}
+
+	if (c == GSM1_ESCAPE) {
+		gsm->escape = 1;
+		return;
+	}
+
+	/* Only an unescaped SOF gets us out of GSM search */
+	if (gsm->state == GSM_SEARCH)
+		return;
+
+	if (gsm->escape) {
+		c ^= GSM1_ESCAPE_BITS;
+		gsm->escape = 0;
+	}
+	switch (gsm->state) {
+	case GSM_START:		/* First byte after SOF */
+		gsm->address = 0;
+		gsm->state = GSM_ADDRESS;
+		gsm->fcs = INIT_FCS;
+		/* Drop through */
+	case GSM_ADDRESS:	/* Address continuation */
+		gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+		if (gsm_read_ea(&gsm->address, c))
+			gsm->state = GSM_CONTROL;
+		break;
+	case GSM_CONTROL:	/* Control Byte */
+		gsm->fcs = gsm_fcs_add(gsm->fcs, c);
+		gsm->control = c;
+		gsm->count = 0;
+		gsm->state = GSM_DATA;
+		break;
+	case GSM_DATA:		/* Data */
+		if (gsm->count > gsm->mru ) {	/* Allow one for the FCS */
+			gsm->state = GSM_OVERRUN;
+			gsm->bad_size++;
+		} else
+			gsm->buf[gsm->count++] = c;
+		break;
+	case GSM_OVERRUN:	/* Over-long - eg a dropped SOF */
+		break;
+	}
+}
+
+/**
+ *	gsm_error		-	handle tty error
+ *	@gsm: ldisc data
+ *	@data: byte received (may be invalid)
+ *	@flag: error received
+ *
+ *	Handle an error in the receipt of data for a frame. Currently we just
+ *	go back to hunting for a SOF.
+ *
+ *	FIXME: better diagnostics ?
+ */
+
+static void gsm_error(struct gsm_mux *gsm,
+				unsigned char data, unsigned char flag)
+{
+	gsm->state = GSM_SEARCH;
+	gsm->io_error++;
+}
+
+/**
+ *	gsm_cleanup_mux		-	generic GSM protocol cleanup
+ *	@gsm: our mux
+ *
+ *	Clean up the bits of the mux which are the same for all framing
+ *	protocols. Remove the mux from the mux table, stop all the timers
+ *	and then shut down each device hanging up the channels as we go.
+ */
+
+void gsm_cleanup_mux(struct gsm_mux *gsm)
+{
+	int i;
+	struct gsm_dlci *dlci = gsm->dlci[0];
+	struct gsm_msg *txq;
+
+	gsm->dead = 1;
+
+	spin_lock(&gsm_mux_lock);
+	for (i = 0; i < MAX_MUX; i++) {
+		if (gsm_mux[i] == gsm) {
+			gsm_mux[i] = NULL;
+			break;
+		}
+	}
+	spin_unlock(&gsm_mux_lock);
+	WARN_ON(i == MAX_MUX);
+
+	del_timer_sync(&gsm->t2_timer);
+	/* Now we are sure T2 has stopped */
+	if (dlci) {
+		dlci->dead = 1;
+		gsm_dlci_begin_close(dlci);
+		wait_event_interruptible(gsm->event,
+					dlci->state == DLCI_CLOSED);
+	}
+	/* Free up any link layer users */
+	for (i = 0; i < NUM_DLCI; i++)
+		if (gsm->dlci[i])
+			gsm_dlci_free(gsm->dlci[i]);
+	/* Now wipe the queues */
+	for (txq = gsm->tx_head; txq != NULL; txq = gsm->tx_head) {
+		gsm->tx_head = txq->next;
+		kfree(txq);
+	}
+	gsm->tx_tail = NULL;
+}
+EXPORT_SYMBOL_GPL(gsm_cleanup_mux);
+
+/**
+ *	gsm_activate_mux	-	generic GSM setup
+ *	@gsm: our mux
+ *
+ *	Set up the bits of the mux which are the same for all framing
+ *	protocols. Add the mux to the mux table so it can be opened and
+ *	finally kick off connecting to DLCI 0 on the modem.
+ */
+
+int gsm_activate_mux(struct gsm_mux *gsm)
+{
+	struct gsm_dlci *dlci;
+	int i = 0;
+
+	init_timer(&gsm->t2_timer);
+	gsm->t2_timer.function = gsm_control_retransmit;
+	gsm->t2_timer.data = (unsigned long)gsm;
+	init_waitqueue_head(&gsm->event);
+	spin_lock_init(&gsm->control_lock);
+	spin_lock_init(&gsm->tx_lock);
+
+	if (gsm->encoding == 0)
+		gsm->receive = gsm0_receive;
+	else
+		gsm->receive = gsm1_receive;
+	gsm->error = gsm_error;
+
+	spin_lock(&gsm_mux_lock);
+	for (i = 0; i < MAX_MUX; i++) {
+		if (gsm_mux[i] == NULL) {
+			gsm_mux[i] = gsm;
+			break;
+		}
+	}
+	spin_unlock(&gsm_mux_lock);
+	if (i == MAX_MUX)
+		return -EBUSY;
+
+	dlci = gsm_dlci_alloc(gsm, 0);
+	if (dlci == NULL)
+		return -ENOMEM;
+	gsm->dead = 0;		/* Tty opens are now permissible */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(gsm_activate_mux);
+
+/**
+ *	gsm_free_mux		-	free up a mux
+ *	@mux: mux to free
+ *
+ *	Dispose of allocated resources for a dead mux. No refcounting
+ *	at present so the mux must be truely dead.
+ */
+void gsm_free_mux(struct gsm_mux *gsm)
+{
+	kfree(gsm->txframe);
+	kfree(gsm->buf);
+	kfree(gsm);
+}
+EXPORT_SYMBOL_GPL(gsm_free_mux);
+
+/**
+ *	gsm_alloc_mux		-	allocate a mux
+ *
+ *	Creates a new mux ready for activation.
+ */
+
+struct gsm_mux *gsm_alloc_mux(void)
+{
+	struct gsm_mux *gsm = kzalloc(sizeof(struct gsm_mux), GFP_KERNEL);
+	if (gsm == NULL)
+		return NULL;
+	gsm->buf = kmalloc(MAX_MRU + 1, GFP_KERNEL);
+	if (gsm->buf == NULL) {
+		kfree(gsm);
+		return NULL;
+	}
+	gsm->txframe = kmalloc(2 * MAX_MRU + 2, GFP_KERNEL);
+	if (gsm->txframe == NULL) {
+		kfree(gsm->buf);
+		kfree(gsm);
+		return NULL;
+	}
+	spin_lock_init(&gsm->lock);
+
+	gsm->t1 = T1;
+	gsm->t2 = T2;
+	gsm->n2 = N2;
+	gsm->ftype = UIH;
+	gsm->initiator = 0;
+	gsm->adaption = 1;
+	gsm->encoding = 1;
+	gsm->mru = 64;	/* Default to encoding 1 so these should be 64 */
+	gsm->mtu = 64;
+	gsm->dead = 1;	/* Avoid early tty opens */
+
+	return gsm;
+}
+EXPORT_SYMBOL_GPL(gsm_alloc_mux);
+
+
+
+
+/**
+ *	gsmld_output		-	write to link
+ *	@gsm: our mux
+ *	@data: bytes to output
+ *	@len: size
+ *
+ *	Write a block of data from the GSM mux to the data channel. This
+ *	will eventually be serialized from above but at the moment isn't.
+ */
+
+static int gsmld_output(struct gsm_mux *gsm, u8 *data, int len)
+{
+	if (tty_write_room(gsm->tty) < len) {
+		set_bit(TTY_DO_WRITE_WAKEUP, &gsm->tty->flags);
+		return -ENOSPC;
+	}
+	if (debug & 4) {
+		printk("-->%d bytes out\n", len);
+		hex_packet(data, len);
+	}
+	gsm->tty->ops->write(gsm->tty, data, len);
+	return len;
+}
+
+/**
+ *	gsmld_attach_gsm	-	mode set up
+ *	@tty: our tty structure
+ *	@gsm: our mux
+ *
+ *	Set up the MUX for basic mode and commence connecting to the
+ *	modem. Currently called from the line discipline set up but
+ *	will need moving to an ioctl path.
+ */
+
+static int gsmld_attach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
+{
+	int ret;
+
+	gsm->tty = tty_kref_get(tty);
+	gsm->output = gsmld_output;
+	ret =  gsm_activate_mux(gsm);
+	if (ret != 0)
+		tty_kref_put(gsm->tty);
+	return ret;
+}
+
+
+/**
+ *	gsmld_detach_gsm	-	stop doing 0710 mux
+ *	@tty: tty atttached to the mux
+ *	@gsm: mux
+ *
+ *	Shutdown and then clean up the resources used by the line discipline
+ */
+
+static void gsmld_detach_gsm(struct tty_struct *tty, struct gsm_mux *gsm)
+{
+	WARN_ON(tty != gsm->tty);
+	gsm_cleanup_mux(gsm);
+	tty_kref_put(gsm->tty);
+	gsm->tty = NULL;
+}
+
+static void gsmld_receive_buf(struct tty_struct *tty, const unsigned char *cp,
+			      char *fp, int count)
+{
+	struct gsm_mux *gsm = tty->disc_data;
+	const unsigned char *dp;
+	char *f;
+	int i;
+	char buf[64];
+	char flags;
+
+	if (debug & 4) {
+		printk("Inbytes %dd\n", count);
+		hex_packet(cp, count);
+	}
+
+	for (i = count, dp = cp, f = fp; i; i--, dp++) {
+		flags = *f++;
+		switch (flags) {
+		case TTY_NORMAL:
+			gsm->receive(gsm, *dp);
+			break;
+		case TTY_OVERRUN:
+		case TTY_BREAK:
+		case TTY_PARITY:
+		case TTY_FRAME:
+			gsm->error(gsm, *dp, flags);
+			break;
+		default:
+			printk(KERN_ERR "%s: unknown flag %d\n",
+			       tty_name(tty, buf), flags);
+			break;
+		}
+	}
+	/* FASYNC if needed ? */
+	/* If clogged call tty_throttle(tty); */
+}
+
+/**
+ *	gsmld_chars_in_buffer	-	report available bytes
+ *	@tty: tty device
+ *
+ *	Report the number of characters buffered to be delivered to user
+ *	at this instant in time.
+ *
+ *	Locking: gsm lock
+ */
+
+static ssize_t gsmld_chars_in_buffer(struct tty_struct *tty)
+{
+	return 0;
+}
+
+/**
+ *	gsmld_flush_buffer	-	clean input queue
+ *	@tty:	terminal device
+ *
+ *	Flush the input buffer. Called when the line discipline is
+ *	being closed, when the tty layer wants the buffer flushed (eg
+ *	at hangup).
+ */
+
+static void gsmld_flush_buffer(struct tty_struct *tty)
+{
+}
+
+/**
+ *	gsmld_close		-	close the ldisc for this tty
+ *	@tty: device
+ *
+ *	Called from the terminal layer when this line discipline is
+ *	being shut down, either because of a close or becsuse of a
+ *	discipline change. The function will not be called while other
+ *	ldisc methods are in progress.
+ */
+
+static void gsmld_close(struct tty_struct *tty)
+{
+	struct gsm_mux *gsm = tty->disc_data;
+
+	gsmld_detach_gsm(tty, gsm);
+
+	gsmld_flush_buffer(tty);
+	/* Do other clean up here */
+	gsm_free_mux(gsm);
+}
+
+/**
+ *	gsmld_open		-	open an ldisc
+ *	@tty: terminal to open
+ *
+ *	Called when this line discipline is being attached to the
+ *	terminal device. Can sleep. Called serialized so that no
+ *	other events will occur in parallel. No further open will occur
+ *	until a close.
+ */
+
+static int gsmld_open(struct tty_struct *tty)
+{
+	struct gsm_mux *gsm;
+
+	if (tty->ops->write == NULL)
+		return -EINVAL;
+
+	/* Attach our ldisc data */
+	gsm = gsm_alloc_mux();
+	if (gsm == NULL)
+		return -ENOMEM;
+
+	tty->disc_data = gsm;
+	tty->receive_room = 65536;
+
+	/* Attach the initial passive connection */
+	gsm->encoding = 1;
+	return gsmld_attach_gsm(tty, gsm);
+}
+
+/**
+ *	gsmld_write_wakeup	-	asynchronous I/O notifier
+ *	@tty: tty device
+ *
+ *	Required for the ptys, serial driver etc. since processes
+ *	that attach themselves to the master and rely on ASYNC
+ *	IO must be woken up
+ */
+
+static void gsmld_write_wakeup(struct tty_struct *tty)
+{
+	struct gsm_mux *gsm = tty->disc_data;
+
+	/* Queue poll */
+	clear_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+	gsm_data_kick(gsm);
+	if (gsm->tx_bytes < TX_THRESH_LO)
+		gsm_dlci_data_sweep(gsm);
+}
+
+/**
+ *	gsmld_read		-	read function for tty
+ *	@tty: tty device
+ *	@file: file object
+ *	@buf: userspace buffer pointer
+ *	@nr: size of I/O
+ *
+ *	Perform reads for the line discipline. We are guaranteed that the
+ *	line discipline will not be closed under us but we may get multiple
+ *	parallel readers and must handle this ourselves. We may also get
+ *	a hangup. Always called in user context, may sleep.
+ *
+ *	This code must be sure never to sleep through a hangup.
+ */
+
+static ssize_t gsmld_read(struct tty_struct *tty, struct file *file,
+			 unsigned char __user *buf, size_t nr)
+{
+	return -EOPNOTSUPP;
+}
+
+/**
+ *	gsmld_write		-	write function for tty
+ *	@tty: tty device
+ *	@file: file object
+ *	@buf: userspace buffer pointer
+ *	@nr: size of I/O
+ *
+ *	Called when the owner of the device wants to send a frame
+ *	itself (or some other control data). The data is transferred
+ *	as-is and must be properly framed and checksummed as appropriate
+ *	by userspace. Frames are either sent whole or not at all as this
+ *	avoids pain user side.
+ */
+
+static ssize_t gsmld_write(struct tty_struct *tty, struct file *file,
+			   const unsigned char *buf, size_t nr)
+{
+	int space = tty_write_room(tty);
+	if (space >= nr)
+		return tty->ops->write(tty, buf, nr);
+	set_bit(TTY_DO_WRITE_WAKEUP, &tty->flags);
+	return -ENOBUFS;
+}
+
+/**
+ *	gsmld_poll		-	poll method for N_GSM0710
+ *	@tty: terminal device
+ *	@file: file accessing it
+ *	@wait: poll table
+ *
+ *	Called when the line discipline is asked to poll() for data or
+ *	for special events. This code is not serialized with respect to
+ *	other events save open/close.
+ *
+ *	This code must be sure never to sleep through a hangup.
+ *	Called without the kernel lock held - fine
+ */
+
+static unsigned int gsmld_poll(struct tty_struct *tty, struct file *file,
+							poll_table *wait)
+{
+	unsigned int mask = 0;
+	struct gsm_mux *gsm = tty->disc_data;
+
+	poll_wait(file, &tty->read_wait, wait);
+	poll_wait(file, &tty->write_wait, wait);
+	if (tty_hung_up_p(file))
+		mask |= POLLHUP;
+	if (!tty_is_writelocked(tty) && tty_write_room(tty) > 0)
+		mask |= POLLOUT | POLLWRNORM;
+	if (gsm->dead)
+		mask |= POLLHUP;
+	return mask;
+}
+
+static int gsmld_config(struct tty_struct *tty, struct gsm_mux *gsm,
+							struct gsm_config *c)
+{
+	int need_close = 0;
+	int need_restart = 0;
+
+	/* Stuff we don't support yet - UI or I frame transport, windowing */
+	if ((c->adaption !=1 && c->adaption != 2) || c->k)
+		return -EOPNOTSUPP;
+	/* Check the MRU/MTU range looks sane */
+	if (c->mru > MAX_MRU || c->mtu > MAX_MTU || c->mru < 8 || c->mtu < 8)
+		return -EINVAL;
+	if (c->n2 < 3)
+		return -EINVAL;
+	if (c->encapsulation > 1)	/* Basic, advanced, no I */
+		return -EINVAL;
+	if (c->initiator > 1)
+		return -EINVAL;
+	if (c->i == 0 || c->i > 2)	/* UIH and UI only */
+		return -EINVAL;
+	/*
+	 *	See what is needed for reconfiguration
+	 */
+
+	/* Timing fields */
+	if (c->t1 != 0 && c->t1 != gsm->t1)
+		need_restart = 1;
+	if (c->t2 != 0 && c->t2 != gsm->t2)
+		need_restart = 1;
+	if (c->encapsulation != gsm->encoding)
+		need_restart = 1;
+	if (c->adaption != gsm->adaption)
+		need_restart = 1;
+	/* Requires care */
+	if (c->initiator != gsm->initiator)
+		need_close = 1;
+	if (c->mru != gsm->mru)
+		need_restart = 1;
+	if (c->mtu != gsm->mtu)
+		need_restart = 1;
+
+	/*
+	 *	Close down what is needed, restart and initiate the new
+	 *	configuration
+	 */
+
+	if (need_close || need_restart) {
+		gsm_dlci_begin_close(gsm->dlci[0]);
+		/* This will timeout if the link is down due to N2 expiring */
+		wait_event_interruptible(gsm->event,
+				gsm->dlci[0]->state == DLCI_CLOSED);
+		if (signal_pending(current))
+			return -EINTR;
+	}
+	if (need_restart)
+		gsm_cleanup_mux(gsm);
+
+	gsm->initiator = c->initiator;
+	gsm->mru = c->mru;
+	gsm->encoding = c->encapsulation;
+	gsm->adaption = c->adaption;
+
+	if (c->i == 1)
+		gsm->ftype = UIH;
+	else if (c->i == 2)
+		gsm->ftype = UI;
+
+	if (c->t1)
+		gsm->t1 = c->t1;
+	if (c->t2)
+		gsm->t2 = c->t2;
+
+	/* FIXME: We need to separate activation/deactivation from adding
+	   and removing from the mux array */
+	if (need_restart)
+		gsm_activate_mux(gsm);
+	if (gsm->initiator && need_close)
+		gsm_dlci_begin_open(gsm->dlci[0]);
+	return 0;
+}
+
+static int gsmld_ioctl(struct tty_struct *tty, struct file *file,
+		       unsigned int cmd, unsigned long arg)
+{
+	struct gsm_config c;
+	struct gsm_mux *gsm = tty->disc_data;
+
+	switch (cmd) {
+	case GSMIOC_GETCONF:
+		memset(&c, 0, sizeof(c));
+		c.adaption = gsm->adaption;
+		c.encapsulation = gsm->encoding;
+		c.initiator = gsm->initiator;
+		c.t1 = gsm->t1;
+		c.t2 = gsm->t2;
+		c.t3 = 0;	/* Not supported */
+		c.n2 = gsm->n2;
+		if (gsm->ftype == UIH)
+			c.i = 1;
+		else
+			c.i = 2;
+		printk("Ftype %d i %d\n", gsm->ftype, c.i);
+		c.mru = gsm->mru;
+		c.mtu = gsm->mtu;
+		c.k = 0;
+		if (copy_to_user((void *)arg, &c, sizeof(c)))
+			return -EFAULT;
+		return 0;
+	case GSMIOC_SETCONF:
+		if (copy_from_user(&c, (void *)arg, sizeof(c)))
+			return -EFAULT;
+		return gsmld_config(tty, gsm, &c);
+	default:
+		return n_tty_ioctl_helper(tty, file, cmd, arg);
+	}
+}
+
+
+/* Line discipline for real tty */
+struct tty_ldisc_ops tty_ldisc_packet = {
+	.owner		 = THIS_MODULE,
+	.magic           = TTY_LDISC_MAGIC,
+	.name            = "n_gsm",
+	.open            = gsmld_open,
+	.close           = gsmld_close,
+	.flush_buffer    = gsmld_flush_buffer,
+	.chars_in_buffer = gsmld_chars_in_buffer,
+	.read            = gsmld_read,
+	.write           = gsmld_write,
+	.ioctl           = gsmld_ioctl,
+	.poll            = gsmld_poll,
+	.receive_buf     = gsmld_receive_buf,
+	.write_wakeup    = gsmld_write_wakeup
+};
+
+/*
+ *	Virtual tty side
+ */
+
+#define TX_SIZE		512
+
+static int gsmtty_modem_update(struct gsm_dlci *dlci, u8 brk)
+{
+	u8 modembits[5];
+	struct gsm_control *ctrl;
+	int len = 2;
+
+	if (brk)
+		len++;
+
+	modembits[0] = len << 1 | EA;		/* Data bytes */
+	modembits[1] = dlci->addr << 2 | 3;	/* DLCI, EA, 1 */
+	modembits[2] = gsm_encode_modem(dlci) << 1 | EA;
+	if (brk)
+		modembits[3] = brk << 4 | 2 | EA;	/* Valid, EA */
+	ctrl = gsm_control_send(dlci->gsm, CMD_MSC, modembits, len + 1);
+	if (ctrl == NULL)
+		return -ENOMEM;
+	return gsm_control_wait(dlci->gsm, ctrl);
+}
+
+static int gsm_carrier_raised(struct tty_port *port)
+{
+	struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
+	/* Not yet open so no carrier info */
+	if (dlci->state != DLCI_OPEN)
+		return 0;
+	if (debug & 2)
+		return 1;
+	return dlci->modem_rx & TIOCM_CD;
+}
+
+static void gsm_dtr_rts(struct tty_port *port, int onoff)
+{
+	struct gsm_dlci *dlci = container_of(port, struct gsm_dlci, port);
+	unsigned int modem_tx = dlci->modem_tx;
+	if (onoff)
+		modem_tx |= TIOCM_DTR | TIOCM_RTS;
+	else
+		modem_tx &= ~(TIOCM_DTR | TIOCM_RTS);
+	if (modem_tx != dlci->modem_tx) {
+		dlci->modem_tx = modem_tx;
+		gsmtty_modem_update(dlci, 0);
+	}
+}
+
+static const struct tty_port_operations gsm_port_ops = {
+	.carrier_raised = gsm_carrier_raised,
+	.dtr_rts = gsm_dtr_rts,
+};
+
+
+static int gsmtty_open(struct tty_struct *tty, struct file *filp)
+{
+	struct gsm_mux *gsm;
+	struct gsm_dlci *dlci;
+	struct tty_port *port;
+	unsigned int line = tty->index;
+	unsigned int mux = line >> 6;
+
+	line = line & 0x3F;
+
+	if (mux >= MAX_MUX)
+		return -ENXIO;
+	/* FIXME: we need to lock gsm_mux for lifetimes of ttys eventually */
+	if (gsm_mux[mux] == NULL)
+		return -EUNATCH;
+	if (line == 0 || line > 61)	/* 62/63 reserved */
+		return -ECHRNG;
+	gsm = gsm_mux[mux];
+	if (gsm->dead)
+		return -EL2HLT;
+	dlci = gsm->dlci[line];
+	if (dlci == NULL)
+		dlci = gsm_dlci_alloc(gsm, line);
+	if (dlci == NULL)
+		return -ENOMEM;
+	port = &dlci->port;
+	port->count++;
+	tty->driver_data = dlci;
+	tty_port_tty_set(port, tty);
+
+	dlci->modem_rx = 0;
+	/* We could in theory open and close before we wait - eg if we get
+	   a DM straight back. This is ok as that will have caused a hangup */
+	set_bit(ASYNCB_INITIALIZED, &port->flags);
+	/* Start sending off SABM messages */
+	gsm_dlci_begin_open(dlci);
+	/* And wait for virtual carrier */
+	return tty_port_block_til_ready(port, tty, filp);
+}
+
+static void gsmtty_close(struct tty_struct *tty, struct file *filp)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	if (dlci == NULL)
+		return;
+	if (tty_port_close_start(&dlci->port, tty, filp) == 0)
+		return;
+	gsm_dlci_begin_close(dlci);
+	tty_port_close_end(&dlci->port, tty);
+	tty_port_tty_set(&dlci->port, NULL);
+}
+
+static void gsmtty_hangup(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	tty_port_hangup(&dlci->port);
+	gsm_dlci_begin_close(dlci);
+}
+
+static int gsmtty_write(struct tty_struct *tty, const unsigned char *buf,
+								    int len)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	/* Stuff the bytes into the fifo queue */
+	int sent = kfifo_in_locked(dlci->fifo, buf, len, &dlci->lock);
+	/* Need to kick the channel */
+	gsm_dlci_data_kick(dlci);
+	return sent;
+}
+
+static int gsmtty_write_room(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	return TX_SIZE - kfifo_len(dlci->fifo);
+}
+
+static int gsmtty_chars_in_buffer(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	return kfifo_len(dlci->fifo);
+}
+
+static void gsmtty_flush_buffer(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	/* Caution needed: If we implement reliable transport classes
+	   then the data being transmitted can't simply be junked once
+	   it has first hit the stack. Until then we can just blow it
+	   away */
+	kfifo_reset(dlci->fifo);
+	/* Need to unhook this DLCI from the transmit queue logic */
+}
+
+static void gsmtty_wait_until_sent(struct tty_struct *tty, int timeout)
+{
+	/* The FIFO handles the queue so the kernel will do the right
+	   thing waiting on chars_in_buffer before calling us. No work
+	   to do here */
+}
+
+static int gsmtty_tiocmget(struct tty_struct *tty, struct file *filp)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	return dlci->modem_rx;
+}
+
+static int gsmtty_tiocmset(struct tty_struct *tty, struct file *filp,
+	unsigned int set, unsigned int clear)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	unsigned int modem_tx = dlci->modem_tx;
+
+	modem_tx &= clear;
+	modem_tx |= set;
+
+	if (modem_tx != dlci->modem_tx) {
+		dlci->modem_tx = modem_tx;
+		return gsmtty_modem_update(dlci, 0);
+	}
+	return 0;
+}
+
+
+static int gsmtty_ioctl(struct tty_struct *tty, struct file *filp,
+			unsigned int cmd, unsigned long arg)
+{
+	return -ENOIOCTLCMD;
+}
+
+static void gsmtty_set_termios(struct tty_struct *tty, struct ktermios *old)
+{
+	/* For the moment its fixed. In actual fact the speed information
+	   for the virtual channel can be propogated in both directions by
+	   the RPN control message. This however rapidly gets nasty as we
+	   then have to remap modem signals each way according to whether
+	   our virtual cable is null modem etc .. */
+	tty_termios_copy_hw(tty->termios, old);
+}
+
+static void gsmtty_throttle(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	if (tty->termios->c_cflag & CRTSCTS)
+		dlci->modem_tx &= ~TIOCM_DTR;
+	dlci->throttled = 1;
+	/* Send an MSC with DTR cleared */
+	gsmtty_modem_update(dlci, 0);
+}
+
+static void gsmtty_unthrottle(struct tty_struct *tty)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	if (tty->termios->c_cflag & CRTSCTS)
+		dlci->modem_tx |= TIOCM_DTR;
+	dlci->throttled = 0;
+	/* Send an MSC with DTR set */
+	gsmtty_modem_update(dlci, 0);
+}
+
+static int gsmtty_break_ctl(struct tty_struct *tty, int state)
+{
+	struct gsm_dlci *dlci = tty->driver_data;
+	int encode = 0;	/* Off */
+
+	if (state == -1)	/* "On indefinitely" - we can't encode this
+				    properly */
+		encode = 0x0F;
+	else if (state > 0) {
+		encode = state / 200;	/* mS to encoding */
+		if (encode > 0x0F)
+			encode = 0x0F;	/* Best effort */
+	}
+	return gsmtty_modem_update(dlci, encode);
+}
+
+static struct tty_driver *gsm_tty_driver;
+
+/* Virtual ttys for the demux */
+static const struct tty_operations gsmtty_ops = {
+	.open			= gsmtty_open,
+	.close			= gsmtty_close,
+	.write			= gsmtty_write,
+	.write_room		= gsmtty_write_room,
+	.chars_in_buffer	= gsmtty_chars_in_buffer,
+	.flush_buffer		= gsmtty_flush_buffer,
+	.ioctl			= gsmtty_ioctl,
+	.throttle		= gsmtty_throttle,
+	.unthrottle		= gsmtty_unthrottle,
+	.set_termios		= gsmtty_set_termios,
+	.hangup			= gsmtty_hangup,
+	.wait_until_sent	= gsmtty_wait_until_sent,
+	.tiocmget		= gsmtty_tiocmget,
+	.tiocmset		= gsmtty_tiocmset,
+	.break_ctl		= gsmtty_break_ctl,
+};
+
+
+
+static int __init gsm_init(void)
+{
+	/* Fill in our line protocol discipline, and register it */
+	int status = tty_register_ldisc(N_GSM0710, &tty_ldisc_packet);
+	if (status != 0) {
+		printk(KERN_ERR "n_gsm: can't register line discipline (err = %d)\n", status);
+		return status;
+	}
+
+	gsm_tty_driver = alloc_tty_driver(256);
+	if (!gsm_tty_driver) {
+		tty_unregister_ldisc(N_GSM0710);
+		printk(KERN_ERR "gsm_init: tty allocation failed.\n");
+		return -EINVAL;
+	}
+	gsm_tty_driver->owner	= THIS_MODULE;
+	gsm_tty_driver->driver_name	= "gsmtty";
+	gsm_tty_driver->name		= "gsmtty";
+	gsm_tty_driver->major		= 0;	/* Dynamic */
+	gsm_tty_driver->minor_start	= 0;
+	gsm_tty_driver->type		= TTY_DRIVER_TYPE_SERIAL;
+	gsm_tty_driver->subtype	= SERIAL_TYPE_NORMAL;
+	gsm_tty_driver->flags	= TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV
+							| TTY_DRIVER_HARDWARE_BREAK;
+	gsm_tty_driver->init_termios	= tty_std_termios;
+	/* Fixme */
+	gsm_tty_driver->init_termios.c_lflag &= ~ECHO;
+	tty_set_operations(gsm_tty_driver, &gsmtty_ops);
+
+	spin_lock_init(&gsm_mux_lock);
+
+	if (tty_register_driver(gsm_tty_driver)) {
+		put_tty_driver(gsm_tty_driver);
+		tty_unregister_ldisc(N_GSM0710);
+		printk(KERN_ERR "gsm_init: tty registration failed.\n");
+		return -EBUSY;
+	}
+	printk(KERN_INFO "gsm_init: loaded as %d,%d.\n", gsm_tty_driver->major, gsm_tty_driver->minor_start);
+	return 0;
+}
+
+static void __exit gsm_exit(void)
+{
+	int status = tty_unregister_ldisc(N_GSM0710);
+	if (status != 0)
+		printk(KERN_ERR "n_gsm: can't unregister line discipline (err = %d)\n", status);
+	tty_unregister_driver(gsm_tty_driver);
+	put_tty_driver(gsm_tty_driver);
+	printk(KERN_INFO "gsm_init: unloaded.\n");
+}
+
+module_init(gsm_init);
+module_exit(gsm_exit);
+
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS_LDISC(N_GSM0710);
diff --git a/include/linux/gsmmux.h b/include/linux/gsmmux.h
new file mode 100644
index 000000000000..378de4195caf
--- /dev/null
+++ b/include/linux/gsmmux.h
@@ -0,0 +1,25 @@
+#ifndef _LINUX_GSMMUX_H
+#define _LINUX_GSMMUX_H
+
+struct gsm_config
+{
+	unsigned int adaption;
+	unsigned int encapsulation;
+	unsigned int initiator;
+	unsigned int t1;
+	unsigned int t2;
+	unsigned int t3;
+	unsigned int n2;
+	unsigned int mru;
+	unsigned int mtu;
+	unsigned int k;
+	unsigned int i;
+	unsigned int unused[8];		/* Padding for expansion without
+					   breaking stuff */
+};
+
+#define GSMIOC_GETCONF		_IOR('G', 0, struct gsm_config)
+#define GSMIOC_SETCONF		_IOW('G', 1, struct gsm_config)
+
+
+#endif
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 7e605e4b3da3..931078b73226 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -48,6 +48,7 @@
 #define N_PPS		18	/* Pulse per Second */
 #define N_V253		19	/* Codec control over voice modem */
 #define N_CAIF		20      /* CAIF protocol for talking to modems */
+#define N_GSM0710	21	/* GSM 0710 Mux */
 
 /*
  * This character is the same as _POSIX_VDISABLE: it cannot be used as
-- 
cgit v1.2.3


From 5bcd601049c6b2ad52733d4cd2794bfbaf1b9314 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 5 May 2010 10:35:22 +0200
Subject: serial: Add driver for the Altera JTAG UART

Add an UART driver for the JTAG UART component available as a SOPC
(System on Programmable Chip) component for Altera FPGAs.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/Kconfig           |  21 ++
 drivers/serial/Makefile          |   1 +
 drivers/serial/altera_jtaguart.c | 504 +++++++++++++++++++++++++++++++++++++++
 include/linux/altera_jtaguart.h  |  16 ++
 include/linux/serial_core.h      |   3 +
 5 files changed, 545 insertions(+)
 create mode 100644 drivers/serial/altera_jtaguart.c
 create mode 100644 include/linux/altera_jtaguart.h

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index 577d2762a4f4..f4050e678460 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1523,4 +1523,25 @@ config SERIAL_GRLIB_GAISLER_APBUART_CONSOLE
 	help
 	Support for running a console on the GRLIB APBUART
 
+config SERIAL_ALTERA_JTAGUART
+	tristate "Altera JTAG UART support"
+	select SERIAL_CORE
+	help
+	  This driver supports the Altera JTAG UART port.
+
+config SERIAL_ALTERA_JTAGUART_CONSOLE
+	bool "Altera JTAG UART console support"
+	depends on SERIAL_ALTERA_JTAGUART=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a Altera JTAG UART port to be the system console.
+
+config SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS
+	bool "Bypass output when no connection"
+	depends on SERIAL_ALTERA_JTAGUART_CONSOLE
+	select SERIAL_CORE_CONSOLE
+	help
+	  Bypass console output and keep going even if there is no
+	  JTAG terminal connection with the host.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 328f107346c4..7f9d225d0430 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -82,3 +82,4 @@ obj-$(CONFIG_KGDB_SERIAL_CONSOLE) += kgdboc.o
 obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
 obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
 obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
+obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
diff --git a/drivers/serial/altera_jtaguart.c b/drivers/serial/altera_jtaguart.c
new file mode 100644
index 000000000000..f9b49b5ff5e1
--- /dev/null
+++ b/drivers/serial/altera_jtaguart.c
@@ -0,0 +1,504 @@
+/*
+ * altera_jtaguart.c -- Altera JTAG UART driver
+ *
+ * Based on mcf.c -- Freescale ColdFire UART driver
+ *
+ * (C) Copyright 2003-2007, Greg Ungerer <gerg@snapgear.com>
+ * (C) Copyright 2008, Thomas Chou <thomas@wytron.com.tw>
+ * (C) Copyright 2010, Tobias Klauser <tklauser@distanz.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/altera_jtaguart.h>
+
+#define DRV_NAME "altera_jtaguart"
+
+/*
+ * Altera JTAG UART register definitions according to the Altera JTAG UART
+ * datasheet: http://www.altera.com/literature/hb/nios2/n2cpu_nii51009.pdf
+ */
+
+#define ALTERA_JTAGUART_SIZE			8
+
+#define ALTERA_JTAGUART_DATA_REG		0
+
+#define ALTERA_JTAGUART_DATA_DATA_MSK		0x000000FF
+#define ALTERA_JTAGUART_DATA_RVALID_MSK		0x00008000
+#define ALTERA_JTAGUART_DATA_RAVAIL_MSK		0xFFFF0000
+#define ALTERA_JTAGUART_DATA_RAVAIL_OFF		16
+
+#define ALTERA_JTAGUART_CONTROL_REG		4
+
+#define ALTERA_JTAGUART_CONTROL_RE_MSK		0x00000001
+#define ALTERA_JTAGUART_CONTROL_WE_MSK		0x00000002
+#define ALTERA_JTAGUART_CONTROL_RI_MSK		0x00000100
+#define ALTERA_JTAGUART_CONTROL_RI_OFF		8
+#define ALTERA_JTAGUART_CONTROL_WI_MSK		0x00000200
+#define ALTERA_JTAGUART_CONTROL_AC_MSK		0x00000400
+#define ALTERA_JTAGUART_CONTROL_WSPACE_MSK	0xFFFF0000
+#define ALTERA_JTAGUART_CONTROL_WSPACE_OFF	16
+
+/*
+ * Local per-uart structure.
+ */
+struct altera_jtaguart {
+	struct uart_port port;
+	unsigned int sigs;	/* Local copy of line sigs */
+	unsigned long imr;	/* Local IMR mirror */
+};
+
+static unsigned int altera_jtaguart_tx_empty(struct uart_port *port)
+{
+	return (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) &
+		ALTERA_JTAGUART_CONTROL_WSPACE_MSK) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int altera_jtaguart_get_mctrl(struct uart_port *port)
+{
+	return TIOCM_CAR | TIOCM_DSR | TIOCM_CTS;
+}
+
+static void altera_jtaguart_set_mctrl(struct uart_port *port, unsigned int sigs)
+{
+}
+
+static void altera_jtaguart_start_tx(struct uart_port *port)
+{
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+
+	pp->imr |= ALTERA_JTAGUART_CONTROL_WE_MSK;
+	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+}
+
+static void altera_jtaguart_stop_tx(struct uart_port *port)
+{
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+
+	pp->imr &= ~ALTERA_JTAGUART_CONTROL_WE_MSK;
+	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+}
+
+static void altera_jtaguart_stop_rx(struct uart_port *port)
+{
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+
+	pp->imr &= ~ALTERA_JTAGUART_CONTROL_RE_MSK;
+	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+}
+
+static void altera_jtaguart_break_ctl(struct uart_port *port, int break_state)
+{
+}
+
+static void altera_jtaguart_enable_ms(struct uart_port *port)
+{
+}
+
+static void altera_jtaguart_set_termios(struct uart_port *port,
+					struct ktermios *termios,
+					struct ktermios *old)
+{
+	/* Just copy the old termios settings back */
+	if (old)
+		tty_termios_copy_hw(termios, old);
+}
+
+static void altera_jtaguart_rx_chars(struct altera_jtaguart *pp)
+{
+	struct uart_port *port = &pp->port;
+	unsigned char ch, flag;
+	unsigned long status;
+
+	while ((status = readl(port->membase + ALTERA_JTAGUART_DATA_REG)) &
+	       ALTERA_JTAGUART_DATA_RVALID_MSK) {
+		ch = status & ALTERA_JTAGUART_DATA_DATA_MSK;
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (uart_handle_sysrq_char(port, ch))
+			continue;
+		uart_insert_char(port, 0, 0, ch, flag);
+	}
+
+	tty_flip_buffer_push(port->state->port.tty);
+}
+
+static void altera_jtaguart_tx_chars(struct altera_jtaguart *pp)
+{
+	struct uart_port *port = &pp->port;
+	struct circ_buf *xmit = &port->state->xmit;
+	unsigned int pending, count;
+
+	if (port->x_char) {
+		/* Send special char - probably flow control */
+		writel(port->x_char, port->membase + ALTERA_JTAGUART_DATA_REG);
+		port->x_char = 0;
+		port->icount.tx++;
+		return;
+	}
+
+	pending = uart_circ_chars_pending(xmit);
+	if (pending > 0) {
+		count = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) &
+				ALTERA_JTAGUART_CONTROL_WSPACE_MSK) >>
+			ALTERA_JTAGUART_CONTROL_WSPACE_OFF;
+		if (count > pending)
+			count = pending;
+		if (count > 0) {
+			pending -= count;
+			while (count--) {
+				writel(xmit->buf[xmit->tail],
+				       port->membase + ALTERA_JTAGUART_DATA_REG);
+				xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+				port->icount.tx++;
+			}
+			if (pending < WAKEUP_CHARS)
+				uart_write_wakeup(port);
+		}
+	}
+
+	if (pending == 0) {
+		pp->imr &= ~ALTERA_JTAGUART_CONTROL_WE_MSK;
+		writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+	}
+}
+
+static irqreturn_t altera_jtaguart_interrupt(int irq, void *data)
+{
+	struct uart_port *port = data;
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+	unsigned int isr;
+
+	isr = (readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) >>
+	       ALTERA_JTAGUART_CONTROL_RI_OFF) & pp->imr;
+
+	spin_lock(&port->lock);
+
+	if (isr & ALTERA_JTAGUART_CONTROL_RE_MSK)
+		altera_jtaguart_rx_chars(pp);
+	if (isr & ALTERA_JTAGUART_CONTROL_WE_MSK)
+		altera_jtaguart_tx_chars(pp);
+
+	spin_unlock(&port->lock);
+
+	return IRQ_RETVAL(isr);
+}
+
+static void altera_jtaguart_config_port(struct uart_port *port, int flags)
+{
+	port->type = PORT_ALTERA_JTAGUART;
+
+	/* Clear mask, so no surprise interrupts. */
+	writel(0, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+}
+
+static int altera_jtaguart_startup(struct uart_port *port)
+{
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+	unsigned long flags;
+	int ret;
+
+	ret = request_irq(port->irq, altera_jtaguart_interrupt, IRQF_DISABLED,
+			DRV_NAME, port);
+	if (ret) {
+		pr_err(DRV_NAME ": unable to attach Altera JTAG UART %d "
+		       "interrupt vector=%d\n", port->line, port->irq);
+		return ret;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Enable RX interrupts now */
+	pp->imr = ALTERA_JTAGUART_CONTROL_RE_MSK;
+	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void altera_jtaguart_shutdown(struct uart_port *port)
+{
+	struct altera_jtaguart *pp =
+	    container_of(port, struct altera_jtaguart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Disable all interrupts now */
+	pp->imr = 0;
+	writel(pp->imr, port->membase + ALTERA_JTAGUART_CONTROL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	free_irq(port->irq, port);
+}
+
+static const char *altera_jtaguart_type(struct uart_port *port)
+{
+	return (port->type == PORT_ALTERA_JTAGUART) ? "Altera JTAG UART" : NULL;
+}
+
+static int altera_jtaguart_request_port(struct uart_port *port)
+{
+	/* UARTs always present */
+	return 0;
+}
+
+static void altera_jtaguart_release_port(struct uart_port *port)
+{
+	/* Nothing to release... */
+}
+
+static int altera_jtaguart_verify_port(struct uart_port *port,
+				       struct serial_struct *ser)
+{
+	if (ser->type != PORT_UNKNOWN && ser->type != PORT_ALTERA_JTAGUART)
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ *	Define the basic serial functions we support.
+ */
+static struct uart_ops altera_jtaguart_ops = {
+	.tx_empty	= altera_jtaguart_tx_empty,
+	.get_mctrl	= altera_jtaguart_get_mctrl,
+	.set_mctrl	= altera_jtaguart_set_mctrl,
+	.start_tx	= altera_jtaguart_start_tx,
+	.stop_tx	= altera_jtaguart_stop_tx,
+	.stop_rx	= altera_jtaguart_stop_rx,
+	.enable_ms	= altera_jtaguart_enable_ms,
+	.break_ctl	= altera_jtaguart_break_ctl,
+	.startup	= altera_jtaguart_startup,
+	.shutdown	= altera_jtaguart_shutdown,
+	.set_termios	= altera_jtaguart_set_termios,
+	.type		= altera_jtaguart_type,
+	.request_port	= altera_jtaguart_request_port,
+	.release_port	= altera_jtaguart_release_port,
+	.config_port	= altera_jtaguart_config_port,
+	.verify_port	= altera_jtaguart_verify_port,
+};
+
+#define ALTERA_JTAGUART_MAXPORTS 1
+static struct altera_jtaguart altera_jtaguart_ports[ALTERA_JTAGUART_MAXPORTS];
+
+#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE)
+
+int __init early_altera_jtaguart_setup(struct altera_jtaguart_platform_uart
+				       *platp)
+{
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < ALTERA_JTAGUART_MAXPORTS && platp[i].mapbase; i++) {
+		port = &altera_jtaguart_ports[i].port;
+
+		port->line = i;
+		port->type = PORT_ALTERA_JTAGUART;
+		port->mapbase = platp[i].mapbase;
+		port->membase = ioremap(port->mapbase, ALTERA_JTAGUART_SIZE);
+		port->iotype = SERIAL_IO_MEM;
+		port->irq = platp[i].irq;
+		port->flags = ASYNC_BOOT_AUTOCONF;
+		port->ops = &altera_jtaguart_ops;
+	}
+
+	return 0;
+}
+
+#if defined(CONFIG_SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS)
+static void altera_jtaguart_console_putc(struct console *co, const char c)
+{
+	struct uart_port *port = &(altera_jtaguart_ports + co->index)->port;
+	unsigned long status;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	while (((status = readl(port->membase + ALTERA_JTAGUART_CONTROL_REG)) &
+		ALTERA_JTAGUART_CONTROL_WSPACE_MSK) == 0) {
+		if ((status & ALTERA_JTAGUART_CONTROL_AC_MSK) == 0) {
+			spin_unlock_irqrestore(&port->lock, flags);
+			return;	/* no connection activity */
+		}
+		spin_unlock_irqrestore(&port->lock, flags);
+		cpu_relax();
+		spin_lock_irqsave(&port->lock, flags);
+	}
+	writel(c, port->membase + ALTERA_JTAGUART_DATA_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+#else
+static void altera_jtaguart_console_putc(struct console *co, const char c)
+{
+	struct uart_port *port = &(altera_jtaguart_ports + co->index)->port;
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	while ((readl(port->membase + ALTERA_JTAGUART_CONTROL_REG) &
+		ALTERA_JTAGUART_CONTROL_WSPACE_MSK) == 0) {
+		spin_unlock_irqrestore(&port->lock, flags);
+		cpu_relax();
+		spin_lock_irqsave(&port->lock, flags);
+	}
+	writel(c, port->membase + ALTERA_JTAGUART_DATA_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+#endif
+
+static void altera_jtaguart_console_write(struct console *co, const char *s,
+					  unsigned int count)
+{
+	for (; count; count--, s++) {
+		altera_jtaguart_console_putc(co, *s);
+		if (*s == '\n')
+			altera_jtaguart_console_putc(co, '\r');
+	}
+}
+
+static int __init altera_jtaguart_console_setup(struct console *co,
+						char *options)
+{
+	struct uart_port *port;
+
+	if (co->index < 0 || co->index >= ALTERA_JTAGUART_MAXPORTS)
+		return -EINVAL;
+	port = &altera_jtaguart_ports[co->index].port;
+	if (port->membase == 0)
+		return -ENODEV;
+	return 0;
+}
+
+static struct uart_driver altera_jtaguart_driver;
+
+static struct console altera_jtaguart_console = {
+	.name	= "ttyJ",
+	.write	= altera_jtaguart_console_write,
+	.device	= uart_console_device,
+	.setup	= altera_jtaguart_console_setup,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1,
+	.data	= &altera_jtaguart_driver,
+};
+
+static int __init altera_jtaguart_console_init(void)
+{
+	register_console(&altera_jtaguart_console);
+	return 0;
+}
+
+console_initcall(altera_jtaguart_console_init);
+
+#define	ALTERA_JTAGUART_CONSOLE	(&altera_jtaguart_console)
+
+#else
+
+#define	ALTERA_JTAGUART_CONSOLE	NULL
+
+#endif /* CONFIG_ALTERA_JTAGUART_CONSOLE */
+
+static struct uart_driver altera_jtaguart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= "altera_jtaguart",
+	.dev_name	= "ttyJ",
+	.major		= ALTERA_JTAGUART_MAJOR,
+	.minor		= ALTERA_JTAGUART_MINOR,
+	.nr		= ALTERA_JTAGUART_MAXPORTS,
+	.cons		= ALTERA_JTAGUART_CONSOLE,
+};
+
+static int __devinit altera_jtaguart_probe(struct platform_device *pdev)
+{
+	struct altera_jtaguart_platform_uart *platp = pdev->dev.platform_data;
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < ALTERA_JTAGUART_MAXPORTS && platp[i].mapbase; i++) {
+		port = &altera_jtaguart_ports[i].port;
+
+		port->line = i;
+		port->type = PORT_ALTERA_JTAGUART;
+		port->mapbase = platp[i].mapbase;
+		port->membase = ioremap(port->mapbase, ALTERA_JTAGUART_SIZE);
+		port->iotype = SERIAL_IO_MEM;
+		port->irq = platp[i].irq;
+		port->ops = &altera_jtaguart_ops;
+		port->flags = ASYNC_BOOT_AUTOCONF;
+
+		uart_add_one_port(&altera_jtaguart_driver, port);
+	}
+
+	return 0;
+}
+
+static int __devexit altera_jtaguart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < ALTERA_JTAGUART_MAXPORTS; i++) {
+		port = &altera_jtaguart_ports[i].port;
+		if (port)
+			uart_remove_one_port(&altera_jtaguart_driver, port);
+	}
+
+	return 0;
+}
+
+static struct platform_driver altera_jtaguart_platform_driver = {
+	.probe	= altera_jtaguart_probe,
+	.remove	= __devexit_p(altera_jtaguart_remove),
+	.driver	= {
+		.name	= DRV_NAME,
+		.owner	= THIS_MODULE,
+	},
+};
+
+static int __init altera_jtaguart_init(void)
+{
+	int rc;
+
+	rc = uart_register_driver(&altera_jtaguart_driver);
+	if (rc)
+		return rc;
+	rc = platform_driver_register(&altera_jtaguart_platform_driver);
+	if (rc) {
+		uart_unregister_driver(&altera_jtaguart_driver);
+		return rc;
+	}
+	return 0;
+}
+
+static void __exit altera_jtaguart_exit(void)
+{
+	platform_driver_unregister(&altera_jtaguart_platform_driver);
+	uart_unregister_driver(&altera_jtaguart_driver);
+}
+
+module_init(altera_jtaguart_init);
+module_exit(altera_jtaguart_exit);
+
+MODULE_DESCRIPTION("Altera JTAG UART driver");
+MODULE_AUTHOR("Thomas Chou <thomas@wytron.com.tw>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/altera_jtaguart.h b/include/linux/altera_jtaguart.h
new file mode 100644
index 000000000000..953b178a1650
--- /dev/null
+++ b/include/linux/altera_jtaguart.h
@@ -0,0 +1,16 @@
+/*
+ * altera_jtaguart.h -- Altera JTAG UART driver defines.
+ */
+
+#ifndef	__ALTJUART_H
+#define	__ALTJUART_H
+
+#define ALTERA_JTAGUART_MAJOR	204
+#define ALTERA_JTAGUART_MINOR	186
+
+struct altera_jtaguart_platform_uart {
+	unsigned long mapbase;	/* Physical address base */
+	unsigned int irq;	/* Interrupt vector */
+};
+
+#endif /* __ALTJUART_H */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 78dd1e7120a9..ade1536e1a48 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -182,6 +182,9 @@
 /* Aeroflex Gaisler GRLIB APBUART */
 #define PORT_APBUART    90
 
+/* Altera UARTs */
+#define PORT_ALTERA_JTAGUART	91
+
 #ifdef __KERNEL__
 
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From 6b7d8f8b5c43f7bedda750d8a9dab0658da1d2ba Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 5 May 2010 10:35:23 +0200
Subject: serial: Add driver for the Altera UART

Add an UART driver for the UART component available as a SOPC (System on
Programmable Chip) component for Altera FPGAs.

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/serial/Kconfig       |  31 +++
 drivers/serial/Makefile      |   1 +
 drivers/serial/altera_uart.c | 570 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/altera_uart.h  |  14 ++
 include/linux/serial_core.h  |   1 +
 5 files changed, 617 insertions(+)
 create mode 100644 drivers/serial/altera_uart.c
 create mode 100644 include/linux/altera_uart.h

(limited to 'include/linux')

diff --git a/drivers/serial/Kconfig b/drivers/serial/Kconfig
index f4050e678460..8b23165bc5dc 100644
--- a/drivers/serial/Kconfig
+++ b/drivers/serial/Kconfig
@@ -1544,4 +1544,35 @@ config SERIAL_ALTERA_JTAGUART_CONSOLE_BYPASS
 	  Bypass console output and keep going even if there is no
 	  JTAG terminal connection with the host.
 
+config SERIAL_ALTERA_UART
+	tristate "Altera UART support"
+	select SERIAL_CORE
+	help
+	  This driver supports the Altera softcore UART port.
+
+config SERIAL_ALTERA_UART_MAXPORTS
+	int "Maximum number of Altera UART ports"
+	depends on SERIAL_ALTERA_UART
+	default 4
+	help
+	  This setting lets you define the maximum number of the Altera
+	  UART ports. The usual default varies from board to board, and
+	  this setting is a way of catering for that.
+
+config SERIAL_ALTERA_UART_BAUDRATE
+	int "Default baudrate for Altera UART ports"
+	depends on SERIAL_ALTERA_UART
+	default 115200
+	help
+	  This setting lets you define what the default baudrate is for the
+	  Altera UART ports. The usual default varies from board to board,
+	  and this setting is a way of catering for that.
+
+config SERIAL_ALTERA_UART_CONSOLE
+	bool "Altera UART console support"
+	depends on SERIAL_ALTERA_UART=y
+	select SERIAL_CORE_CONSOLE
+	help
+	  Enable a Altera UART port to be the system console.
+
 endmenu
diff --git a/drivers/serial/Makefile b/drivers/serial/Makefile
index 7f9d225d0430..208a85572c32 100644
--- a/drivers/serial/Makefile
+++ b/drivers/serial/Makefile
@@ -83,3 +83,4 @@ obj-$(CONFIG_SERIAL_QE) += ucc_uart.o
 obj-$(CONFIG_SERIAL_TIMBERDALE)	+= timbuart.o
 obj-$(CONFIG_SERIAL_GRLIB_GAISLER_APBUART) += apbuart.o
 obj-$(CONFIG_SERIAL_ALTERA_JTAGUART) += altera_jtaguart.o
+obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
diff --git a/drivers/serial/altera_uart.c b/drivers/serial/altera_uart.c
new file mode 100644
index 000000000000..bcee156d2f2e
--- /dev/null
+++ b/drivers/serial/altera_uart.c
@@ -0,0 +1,570 @@
+/*
+ * altera_uart.c -- Altera UART driver
+ *
+ * Based on mcf.c -- Freescale ColdFire UART driver
+ *
+ * (C) Copyright 2003-2007, Greg Ungerer <gerg@snapgear.com>
+ * (C) Copyright 2008, Thomas Chou <thomas@wytron.com.tw>
+ * (C) Copyright 2010, Tobias Klauser <tklauser@distanz.ch>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/serial.h>
+#include <linux/serial_core.h>
+#include <linux/platform_device.h>
+#include <linux/io.h>
+#include <linux/altera_uart.h>
+
+#define DRV_NAME "altera_uart"
+
+/*
+ * Altera UART register definitions according to the Nios UART datasheet:
+ * http://www.altera.com/literature/ds/ds_nios_uart.pdf
+ */
+
+#define ALTERA_UART_SIZE		32
+
+#define ALTERA_UART_RXDATA_REG		0
+#define ALTERA_UART_TXDATA_REG		4
+#define ALTERA_UART_STATUS_REG		8
+#define ALTERA_UART_CONTROL_REG		12
+#define ALTERA_UART_DIVISOR_REG		16
+#define ALTERA_UART_EOP_REG		20
+
+#define ALTERA_UART_STATUS_PE_MSK	0x0001	/* parity error */
+#define ALTERA_UART_STATUS_FE_MSK	0x0002	/* framing error */
+#define ALTERA_UART_STATUS_BRK_MSK	0x0004	/* break */
+#define ALTERA_UART_STATUS_ROE_MSK	0x0008	/* RX overrun error */
+#define ALTERA_UART_STATUS_TOE_MSK	0x0010	/* TX overrun error */
+#define ALTERA_UART_STATUS_TMT_MSK	0x0020	/* TX shift register state */
+#define ALTERA_UART_STATUS_TRDY_MSK	0x0040	/* TX ready */
+#define ALTERA_UART_STATUS_RRDY_MSK	0x0080	/* RX ready */
+#define ALTERA_UART_STATUS_E_MSK	0x0100	/* exception condition */
+#define ALTERA_UART_STATUS_DCTS_MSK	0x0400	/* CTS logic-level change */
+#define ALTERA_UART_STATUS_CTS_MSK	0x0800	/* CTS logic state */
+#define ALTERA_UART_STATUS_EOP_MSK	0x1000	/* EOP written/read */
+
+						/* Enable interrupt on... */
+#define ALTERA_UART_CONTROL_PE_MSK	0x0001	/* ...parity error */
+#define ALTERA_UART_CONTROL_FE_MSK	0x0002	/* ...framing error */
+#define ALTERA_UART_CONTROL_BRK_MSK	0x0004	/* ...break */
+#define ALTERA_UART_CONTROL_ROE_MSK	0x0008	/* ...RX overrun */
+#define ALTERA_UART_CONTROL_TOE_MSK	0x0010	/* ...TX overrun */
+#define ALTERA_UART_CONTROL_TMT_MSK	0x0020	/* ...TX shift register empty */
+#define ALTERA_UART_CONTROL_TRDY_MSK	0x0040	/* ...TX ready */
+#define ALTERA_UART_CONTROL_RRDY_MSK	0x0080	/* ...RX ready */
+#define ALTERA_UART_CONTROL_E_MSK	0x0100	/* ...exception*/
+
+#define ALTERA_UART_CONTROL_TRBK_MSK	0x0200	/* TX break */
+#define ALTERA_UART_CONTROL_DCTS_MSK	0x0400	/* Interrupt on CTS change */
+#define ALTERA_UART_CONTROL_RTS_MSK	0x0800	/* RTS signal */
+#define ALTERA_UART_CONTROL_EOP_MSK	0x1000	/* Interrupt on EOP */
+
+/*
+ * Local per-uart structure.
+ */
+struct altera_uart {
+	struct uart_port port;
+	unsigned int sigs;	/* Local copy of line sigs */
+	unsigned short imr;	/* Local IMR mirror */
+};
+
+static unsigned int altera_uart_tx_empty(struct uart_port *port)
+{
+	return (readl(port->membase + ALTERA_UART_STATUS_REG) &
+		ALTERA_UART_STATUS_TMT_MSK) ? TIOCSER_TEMT : 0;
+}
+
+static unsigned int altera_uart_get_mctrl(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+	unsigned int sigs;
+
+	spin_lock_irqsave(&port->lock, flags);
+	sigs =
+	    (readl(port->membase + ALTERA_UART_STATUS_REG) &
+	     ALTERA_UART_STATUS_CTS_MSK) ? TIOCM_CTS : 0;
+	sigs |= (pp->sigs & TIOCM_RTS);
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return sigs;
+}
+
+static void altera_uart_set_mctrl(struct uart_port *port, unsigned int sigs)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	pp->sigs = sigs;
+	if (sigs & TIOCM_RTS)
+		pp->imr |= ALTERA_UART_CONTROL_RTS_MSK;
+	else
+		pp->imr &= ~ALTERA_UART_CONTROL_RTS_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_start_tx(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	pp->imr |= ALTERA_UART_CONTROL_TRDY_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_stop_tx(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_stop_rx(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	pp->imr &= ~ALTERA_UART_CONTROL_RRDY_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_break_ctl(struct uart_port *port, int break_state)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+	if (break_state == -1)
+		pp->imr |= ALTERA_UART_CONTROL_TRBK_MSK;
+	else
+		pp->imr &= ~ALTERA_UART_CONTROL_TRBK_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_enable_ms(struct uart_port *port)
+{
+}
+
+static void altera_uart_set_termios(struct uart_port *port,
+				    struct ktermios *termios,
+				    struct ktermios *old)
+{
+	unsigned long flags;
+	unsigned int baud, baudclk;
+
+	baud = uart_get_baud_rate(port, termios, old, 0, 4000000);
+	baudclk = port->uartclk / baud;
+
+	if (old)
+		tty_termios_copy_hw(termios, old);
+	tty_termios_encode_baud_rate(termios, baud, baud);
+
+	spin_lock_irqsave(&port->lock, flags);
+	writel(baudclk, port->membase + ALTERA_UART_DIVISOR_REG);
+	spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void altera_uart_rx_chars(struct altera_uart *pp)
+{
+	struct uart_port *port = &pp->port;
+	unsigned char ch, flag;
+	unsigned short status;
+
+	while ((status = readl(port->membase + ALTERA_UART_STATUS_REG)) &
+	       ALTERA_UART_STATUS_RRDY_MSK) {
+		ch = readl(port->membase + ALTERA_UART_RXDATA_REG);
+		flag = TTY_NORMAL;
+		port->icount.rx++;
+
+		if (status & ALTERA_UART_STATUS_E_MSK) {
+			writel(status, port->membase + ALTERA_UART_STATUS_REG);
+
+			if (status & ALTERA_UART_STATUS_BRK_MSK) {
+				port->icount.brk++;
+				if (uart_handle_break(port))
+					continue;
+			} else if (status & ALTERA_UART_STATUS_PE_MSK) {
+				port->icount.parity++;
+			} else if (status & ALTERA_UART_STATUS_ROE_MSK) {
+				port->icount.overrun++;
+			} else if (status & ALTERA_UART_STATUS_FE_MSK) {
+				port->icount.frame++;
+			}
+
+			status &= port->read_status_mask;
+
+			if (status & ALTERA_UART_STATUS_BRK_MSK)
+				flag = TTY_BREAK;
+			else if (status & ALTERA_UART_STATUS_PE_MSK)
+				flag = TTY_PARITY;
+			else if (status & ALTERA_UART_STATUS_FE_MSK)
+				flag = TTY_FRAME;
+		}
+
+		if (uart_handle_sysrq_char(port, ch))
+			continue;
+		uart_insert_char(port, status, ALTERA_UART_STATUS_ROE_MSK, ch,
+				 flag);
+	}
+
+	tty_flip_buffer_push(port->state->port.tty);
+}
+
+static void altera_uart_tx_chars(struct altera_uart *pp)
+{
+	struct uart_port *port = &pp->port;
+	struct circ_buf *xmit = &port->state->xmit;
+
+	if (port->x_char) {
+		/* Send special char - probably flow control */
+		writel(port->x_char, port->membase + ALTERA_UART_TXDATA_REG);
+		port->x_char = 0;
+		port->icount.tx++;
+		return;
+	}
+
+	while (readl(port->membase + ALTERA_UART_STATUS_REG) &
+	       ALTERA_UART_STATUS_TRDY_MSK) {
+		if (xmit->head == xmit->tail)
+			break;
+		writel(xmit->buf[xmit->tail],
+		       port->membase + ALTERA_UART_TXDATA_REG);
+		xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
+		port->icount.tx++;
+	}
+
+	if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
+		uart_write_wakeup(port);
+
+	if (xmit->head == xmit->tail) {
+		pp->imr &= ~ALTERA_UART_CONTROL_TRDY_MSK;
+		writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+	}
+}
+
+static irqreturn_t altera_uart_interrupt(int irq, void *data)
+{
+	struct uart_port *port = data;
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned int isr;
+
+	isr = readl(port->membase + ALTERA_UART_STATUS_REG) & pp->imr;
+	if (isr & ALTERA_UART_STATUS_RRDY_MSK)
+		altera_uart_rx_chars(pp);
+	if (isr & ALTERA_UART_STATUS_TRDY_MSK)
+		altera_uart_tx_chars(pp);
+	return IRQ_RETVAL(isr);
+}
+
+static void altera_uart_config_port(struct uart_port *port, int flags)
+{
+	port->type = PORT_ALTERA_UART;
+
+	/* Clear mask, so no surprise interrupts. */
+	writel(0, port->membase + ALTERA_UART_CONTROL_REG);
+	/* Clear status register */
+	writel(0, port->membase + ALTERA_UART_STATUS_REG);
+}
+
+static int altera_uart_startup(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+	int ret;
+
+	ret = request_irq(port->irq, altera_uart_interrupt, IRQF_DISABLED,
+			DRV_NAME, port);
+	if (ret) {
+		pr_err(DRV_NAME ": unable to attach Altera UART %d "
+		       "interrupt vector=%d\n", port->line, port->irq);
+		return ret;
+	}
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Enable RX interrupts now */
+	pp->imr = ALTERA_UART_CONTROL_RRDY_MSK;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	return 0;
+}
+
+static void altera_uart_shutdown(struct uart_port *port)
+{
+	struct altera_uart *pp = container_of(port, struct altera_uart, port);
+	unsigned long flags;
+
+	spin_lock_irqsave(&port->lock, flags);
+
+	/* Disable all interrupts now */
+	pp->imr = 0;
+	writel(pp->imr, port->membase + ALTERA_UART_CONTROL_REG);
+
+	spin_unlock_irqrestore(&port->lock, flags);
+
+	free_irq(port->irq, port);
+}
+
+static const char *altera_uart_type(struct uart_port *port)
+{
+	return (port->type == PORT_ALTERA_UART) ? "Altera UART" : NULL;
+}
+
+static int altera_uart_request_port(struct uart_port *port)
+{
+	/* UARTs always present */
+	return 0;
+}
+
+static void altera_uart_release_port(struct uart_port *port)
+{
+	/* Nothing to release... */
+}
+
+static int altera_uart_verify_port(struct uart_port *port,
+				   struct serial_struct *ser)
+{
+	if ((ser->type != PORT_UNKNOWN) && (ser->type != PORT_ALTERA_UART))
+		return -EINVAL;
+	return 0;
+}
+
+/*
+ *	Define the basic serial functions we support.
+ */
+static struct uart_ops altera_uart_ops = {
+	.tx_empty	= altera_uart_tx_empty,
+	.get_mctrl	= altera_uart_get_mctrl,
+	.set_mctrl	= altera_uart_set_mctrl,
+	.start_tx	= altera_uart_start_tx,
+	.stop_tx	= altera_uart_stop_tx,
+	.stop_rx	= altera_uart_stop_rx,
+	.enable_ms	= altera_uart_enable_ms,
+	.break_ctl	= altera_uart_break_ctl,
+	.startup	= altera_uart_startup,
+	.shutdown	= altera_uart_shutdown,
+	.set_termios	= altera_uart_set_termios,
+	.type		= altera_uart_type,
+	.request_port	= altera_uart_request_port,
+	.release_port	= altera_uart_release_port,
+	.config_port	= altera_uart_config_port,
+	.verify_port	= altera_uart_verify_port,
+};
+
+static struct altera_uart altera_uart_ports[CONFIG_SERIAL_ALTERA_UART_MAXPORTS];
+
+#if defined(CONFIG_SERIAL_ALTERA_UART_CONSOLE)
+
+int __init early_altera_uart_setup(struct altera_uart_platform_uart *platp)
+{
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < CONFIG_SERIAL_ALTERA_UART_MAXPORTS && platp[i].mapbase; i++) {
+		port = &altera_uart_ports[i].port;
+
+		port->line = i;
+		port->type = PORT_ALTERA_UART;
+		port->mapbase = platp[i].mapbase;
+		port->membase = ioremap(port->mapbase, ALTERA_UART_SIZE);
+		port->iotype = SERIAL_IO_MEM;
+		port->irq = platp[i].irq;
+		port->uartclk = platp[i].uartclk;
+		port->flags = ASYNC_BOOT_AUTOCONF;
+		port->ops = &altera_uart_ops;
+	}
+
+	return 0;
+}
+
+static void altera_uart_console_putc(struct console *co, const char c)
+{
+	struct uart_port *port = &(altera_uart_ports + co->index)->port;
+	int i;
+
+	for (i = 0; i < 0x10000; i++) {
+		if (readl(port->membase + ALTERA_UART_STATUS_REG) &
+		    ALTERA_UART_STATUS_TRDY_MSK)
+			break;
+	}
+	writel(c, port->membase + ALTERA_UART_TXDATA_REG);
+	for (i = 0; i < 0x10000; i++) {
+		if (readl(port->membase + ALTERA_UART_STATUS_REG) &
+		    ALTERA_UART_STATUS_TRDY_MSK)
+			break;
+	}
+}
+
+static void altera_uart_console_write(struct console *co, const char *s,
+				      unsigned int count)
+{
+	for (; count; count--, s++) {
+		altera_uart_console_putc(co, *s);
+		if (*s == '\n')
+			altera_uart_console_putc(co, '\r');
+	}
+}
+
+static int __init altera_uart_console_setup(struct console *co, char *options)
+{
+	struct uart_port *port;
+	int baud = CONFIG_SERIAL_ALTERA_UART_BAUDRATE;
+	int bits = 8;
+	int parity = 'n';
+	int flow = 'n';
+
+	if (co->index < 0 || co->index >= CONFIG_SERIAL_ALTERA_UART_MAXPORTS)
+		return -EINVAL;
+	port = &altera_uart_ports[co->index].port;
+	if (port->membase == 0)
+		return -ENODEV;
+
+	if (options)
+		uart_parse_options(options, &baud, &parity, &bits, &flow);
+
+	return uart_set_options(port, co, baud, parity, bits, flow);
+}
+
+static struct uart_driver altera_uart_driver;
+
+static struct console altera_uart_console = {
+	.name	= "ttyS",
+	.write	= altera_uart_console_write,
+	.device	= uart_console_device,
+	.setup	= altera_uart_console_setup,
+	.flags	= CON_PRINTBUFFER,
+	.index	= -1,
+	.data	= &altera_uart_driver,
+};
+
+static int __init altera_uart_console_init(void)
+{
+	register_console(&altera_uart_console);
+	return 0;
+}
+
+console_initcall(altera_uart_console_init);
+
+#define	ALTERA_UART_CONSOLE	(&altera_uart_console)
+
+#else
+
+#define	ALTERA_UART_CONSOLE	NULL
+
+#endif /* CONFIG_ALTERA_UART_CONSOLE */
+
+/*
+ *	Define the altera_uart UART driver structure.
+ */
+static struct uart_driver altera_uart_driver = {
+	.owner		= THIS_MODULE,
+	.driver_name	= DRV_NAME,
+	.dev_name	= "ttyS",
+	.major		= TTY_MAJOR,
+	.minor		= 64,
+	.nr		= CONFIG_SERIAL_ALTERA_UART_MAXPORTS,
+	.cons		= ALTERA_UART_CONSOLE,
+};
+
+static int __devinit altera_uart_probe(struct platform_device *pdev)
+{
+	struct altera_uart_platform_uart *platp = pdev->dev.platform_data;
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < CONFIG_SERIAL_ALTERA_UART_MAXPORTS && platp[i].mapbase; i++) {
+		port = &altera_uart_ports[i].port;
+
+		port->line = i;
+		port->type = PORT_ALTERA_UART;
+		port->mapbase = platp[i].mapbase;
+		port->membase = ioremap(port->mapbase, ALTERA_UART_SIZE);
+		port->iotype = SERIAL_IO_MEM;
+		port->irq = platp[i].irq;
+		port->uartclk = platp[i].uartclk;
+		port->ops = &altera_uart_ops;
+		port->flags = ASYNC_BOOT_AUTOCONF;
+
+		uart_add_one_port(&altera_uart_driver, port);
+	}
+
+	return 0;
+}
+
+static int altera_uart_remove(struct platform_device *pdev)
+{
+	struct uart_port *port;
+	int i;
+
+	for (i = 0; i < CONFIG_SERIAL_ALTERA_UART_MAXPORTS; i++) {
+		port = &altera_uart_ports[i].port;
+		if (port)
+			uart_remove_one_port(&altera_uart_driver, port);
+	}
+
+	return 0;
+}
+
+static struct platform_driver altera_uart_platform_driver = {
+	.probe	= altera_uart_probe,
+	.remove	= __devexit_p(altera_uart_remove),
+	.driver	= {
+		.name	= DRV_NAME,
+		.owner	= THIS_MODULE,
+		.pm	= NULL,
+	},
+};
+
+static int __init altera_uart_init(void)
+{
+	int rc;
+
+	rc = uart_register_driver(&altera_uart_driver);
+	if (rc)
+		return rc;
+	rc = platform_driver_register(&altera_uart_platform_driver);
+	if (rc) {
+		uart_unregister_driver(&altera_uart_driver);
+		return rc;
+	}
+	return 0;
+}
+
+static void __exit altera_uart_exit(void)
+{
+	platform_driver_unregister(&altera_uart_platform_driver);
+	uart_unregister_driver(&altera_uart_driver);
+}
+
+module_init(altera_uart_init);
+module_exit(altera_uart_exit);
+
+MODULE_DESCRIPTION("Altera UART driver");
+MODULE_AUTHOR("Thomas Chou <thomas@wytron.com.tw>");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("platform:" DRV_NAME);
diff --git a/include/linux/altera_uart.h b/include/linux/altera_uart.h
new file mode 100644
index 000000000000..8d441064a30d
--- /dev/null
+++ b/include/linux/altera_uart.h
@@ -0,0 +1,14 @@
+/*
+ * altera_uart.h -- Altera UART driver defines.
+ */
+
+#ifndef	__ALTUART_H
+#define	__ALTUART_H
+
+struct altera_uart_platform_uart {
+	unsigned long mapbase;	/* Physical address base */
+	unsigned int irq;	/* Interrupt vector */
+	unsigned int uartclk;	/* UART clock rate */
+};
+
+#endif /* __ALTUART_H */
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ade1536e1a48..09d0d2d5a08b 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -184,6 +184,7 @@
 
 /* Altera UARTs */
 #define PORT_ALTERA_JTAGUART	91
+#define PORT_ALTERA_UART	92
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From dd336c554d8926c3348a2d5f2a5ef5597f6d1a06 Mon Sep 17 00:00:00 2001
From: David Woodhouse <David.Woodhouse@intel.com>
Date: Sun, 2 May 2010 11:21:21 +0300
Subject: firmware_class: fix memory leak - free allocated pages

fix memory leak introduced by the patch 6e03a201bbe:
firmware: speed up request_firmware()

1. vfree won't release pages there were allocated explicitly and mapped
using vmap. The memory has to be vunmap-ed and the pages needs
to be freed explicitly

2. page array is moved into the 'struct
firmware' so that we can free it from release_firmware()
and not only in fw_dev_release()

The fix doesn't break the firmware load speed.

Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Ming Lei <tom.leiming@gmail.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Singed-off-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: David Woodhouse <David.Woodhouse@intel.com>
Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/firmware_class.c | 26 ++++++++++++++++++++------
 include/linux/firmware.h      |  1 +
 2 files changed, 21 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index 985da11174e7..4c70b9148b28 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -130,6 +130,17 @@ static ssize_t firmware_loading_show(struct device *dev,
 	return sprintf(buf, "%d\n", loading);
 }
 
+static void firmware_free_data(const struct firmware *fw)
+{
+	int i;
+	vunmap(fw->data);
+	if (fw->pages) {
+		for (i = 0; i < PFN_UP(fw->size); i++)
+			__free_page(fw->pages[i]);
+		kfree(fw->pages);
+	}
+}
+
 /* Some architectures don't have PAGE_KERNEL_RO */
 #ifndef PAGE_KERNEL_RO
 #define PAGE_KERNEL_RO PAGE_KERNEL
@@ -162,21 +173,21 @@ static ssize_t firmware_loading_store(struct device *dev,
 			mutex_unlock(&fw_lock);
 			break;
 		}
-		vfree(fw_priv->fw->data);
-		fw_priv->fw->data = NULL;
+		firmware_free_data(fw_priv->fw);
+		memset(fw_priv->fw, 0, sizeof(struct firmware));
+		/* If the pages are not owned by 'struct firmware' */
 		for (i = 0; i < fw_priv->nr_pages; i++)
 			__free_page(fw_priv->pages[i]);
 		kfree(fw_priv->pages);
 		fw_priv->pages = NULL;
 		fw_priv->page_array_size = 0;
 		fw_priv->nr_pages = 0;
-		fw_priv->fw->size = 0;
 		set_bit(FW_STATUS_LOADING, &fw_priv->status);
 		mutex_unlock(&fw_lock);
 		break;
 	case 0:
 		if (test_bit(FW_STATUS_LOADING, &fw_priv->status)) {
-			vfree(fw_priv->fw->data);
+			vunmap(fw_priv->fw->data);
 			fw_priv->fw->data = vmap(fw_priv->pages,
 						 fw_priv->nr_pages,
 						 0, PAGE_KERNEL_RO);
@@ -184,7 +195,10 @@ static ssize_t firmware_loading_store(struct device *dev,
 				dev_err(dev, "%s: vmap() failed\n", __func__);
 				goto err;
 			}
-			/* Pages will be freed by vfree() */
+			/* Pages are now owned by 'struct firmware' */
+			fw_priv->fw->pages = fw_priv->pages;
+			fw_priv->pages = NULL;
+
 			fw_priv->page_array_size = 0;
 			fw_priv->nr_pages = 0;
 			complete(&fw_priv->completion);
@@ -578,7 +592,7 @@ release_firmware(const struct firmware *fw)
 			if (fw->data == builtin->data)
 				goto free_fw;
 		}
-		vfree(fw->data);
+		firmware_free_data(fw);
 	free_fw:
 		kfree(fw);
 	}
diff --git a/include/linux/firmware.h b/include/linux/firmware.h
index 043811f0d277..53d1e6c4f848 100644
--- a/include/linux/firmware.h
+++ b/include/linux/firmware.h
@@ -12,6 +12,7 @@
 struct firmware {
 	size_t size;
 	const u8 *data;
+	struct page **pages;
 };
 
 struct device;
-- 
cgit v1.2.3


From db1afffab0b5d9f6d31f8f4bea44c9cb3bc59351 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 16 Mar 2010 15:14:51 +1100
Subject: kref: remove kref_set

Of the three uses of kref_set in the kernel:

 One really should be kref_put as the code is letting go of a
    reference,
 Two really should be kref_init because the kref is being
    initialised.

This suggests that making kref_set available encourages bad code.
So fix the three uses and remove kref_set completely.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Mimi Zohar <zohar@us.ibm.com>
Acked-by: Serge Hallyn <serue@us.ibm.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kref.h              |  1 -
 kernel/user_namespace.c           |  4 ++--
 lib/kref.c                        | 15 ++-------------
 security/integrity/ima/ima_iint.c |  4 ++--
 4 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kref.h b/include/linux/kref.h
index baf4b9e4b194..6cc38fc07ab7 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -21,7 +21,6 @@ struct kref {
 	atomic_t refcount;
 };
 
-void kref_set(struct kref *kref, int num);
 void kref_init(struct kref *kref);
 void kref_get(struct kref *kref);
 int kref_put(struct kref *kref, void (*release) (struct kref *kref));
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 076c7c8215b0..b2d70d38dff4 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -54,8 +54,8 @@ int create_user_ns(struct cred *new)
 #endif
 	/* tgcred will be cleared in our caller bc CLONE_THREAD won't be set */
 
-	/* alloc_uid() incremented the userns refcount.  Just set it to 1 */
-	kref_set(&ns->kref, 1);
+	/* root_user holds a reference to ns, our reference can be dropped */
+	put_user_ns(ns);
 
 	return 0;
 }
diff --git a/lib/kref.c b/lib/kref.c
index 6d19f690380b..d3d227a08a4b 100644
--- a/lib/kref.c
+++ b/lib/kref.c
@@ -15,24 +15,14 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 
-/**
- * kref_set - initialize object and set refcount to requested number.
- * @kref: object in question.
- * @num: initial reference counter
- */
-void kref_set(struct kref *kref, int num)
-{
-	atomic_set(&kref->refcount, num);
-	smp_mb();
-}
-
 /**
  * kref_init - initialize object.
  * @kref: object in question.
  */
 void kref_init(struct kref *kref)
 {
-	kref_set(kref, 1);
+	atomic_set(&kref->refcount, 1);
+	smp_mb();
 }
 
 /**
@@ -72,7 +62,6 @@ int kref_put(struct kref *kref, void (*release)(struct kref *kref))
 	return 0;
 }
 
-EXPORT_SYMBOL(kref_set);
 EXPORT_SYMBOL(kref_init);
 EXPORT_SYMBOL(kref_get);
 EXPORT_SYMBOL(kref_put);
diff --git a/security/integrity/ima/ima_iint.c b/security/integrity/ima/ima_iint.c
index 2dc2d6594145..7625b85c2274 100644
--- a/security/integrity/ima/ima_iint.c
+++ b/security/integrity/ima/ima_iint.c
@@ -94,7 +94,7 @@ void iint_free(struct kref *kref)
 		       iint->opencount);
 		iint->opencount = 0;
 	}
-	kref_set(&iint->refcount, 1);
+	kref_init(&iint->refcount);
 	kmem_cache_free(iint_cache, iint);
 }
 
@@ -133,7 +133,7 @@ static void init_once(void *foo)
 	iint->readcount = 0;
 	iint->writecount = 0;
 	iint->opencount = 0;
-	kref_set(&iint->refcount, 1);
+	kref_init(&iint->refcount);
 }
 
 static int __init ima_iintcache_init(void)
-- 
cgit v1.2.3


From da5e4ef7fdb8f2fb0878dee3bd9d4dd10cea8cf1 Mon Sep 17 00:00:00 2001
From: Peter Korsgaard <jacmet@sunsite.dk>
Date: Tue, 16 Mar 2010 21:55:21 +0100
Subject: devtmpfs: support !CONFIG_TMPFS

Make devtmpfs available on (embedded) configurations without SHMEM/TMPFS,
using ramfs instead.

Saves ~15KB.

Signed-off-by: Peter Korsgaard <jacmet@sunsite.dk>
Acked-by: Kay Sievers <kay.sievers@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/Kconfig    | 7 +++++--
 drivers/base/devtmpfs.c | 5 +++++
 fs/ramfs/inode.c        | 2 +-
 include/linux/ramfs.h   | 2 ++
 4 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index fd52c48ee762..ef38aff737eb 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -18,9 +18,9 @@ config UEVENT_HELPER_PATH
 
 config DEVTMPFS
 	bool "Maintain a devtmpfs filesystem to mount at /dev"
-	depends on HOTPLUG && SHMEM && TMPFS
+	depends on HOTPLUG
 	help
-	  This creates a tmpfs filesystem instance early at bootup.
+	  This creates a tmpfs/ramfs filesystem instance early at bootup.
 	  In this filesystem, the kernel driver core maintains device
 	  nodes with their default names and permissions for all
 	  registered devices with an assigned major/minor number.
@@ -33,6 +33,9 @@ config DEVTMPFS
 	  functional /dev without any further help. It also allows simple
 	  rescue systems, and reliably handles dynamic major/minor numbers.
 
+	  Notice: if CONFIG_TMPFS isn't enabled, the simpler ramfs
+	  file system will be used instead.
+
 config DEVTMPFS_MOUNT
 	bool "Automount devtmpfs at /dev, after the kernel mounted the rootfs"
 	depends on DEVTMPFS
diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c
index 057cf11326bf..af0600143d1c 100644
--- a/drivers/base/devtmpfs.c
+++ b/drivers/base/devtmpfs.c
@@ -20,6 +20,7 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include <linux/shmem_fs.h>
+#include <linux/ramfs.h>
 #include <linux/cred.h>
 #include <linux/sched.h>
 #include <linux/init_task.h>
@@ -45,7 +46,11 @@ __setup("devtmpfs.mount=", mount_param);
 static int dev_get_sb(struct file_system_type *fs_type, int flags,
 		      const char *dev_name, void *data, struct vfsmount *mnt)
 {
+#ifdef CONFIG_TMPFS
 	return get_sb_single(fs_type, flags, data, shmem_fill_super, mnt);
+#else
+	return get_sb_single(fs_type, flags, data, ramfs_fill_super, mnt);
+#endif
 }
 
 static struct file_system_type dev_fs_type = {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index c94853473ca9..f47cd212dee1 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -214,7 +214,7 @@ static int ramfs_parse_options(char *data, struct ramfs_mount_opts *opts)
 	return 0;
 }
 
-static int ramfs_fill_super(struct super_block * sb, void * data, int silent)
+int ramfs_fill_super(struct super_block *sb, void *data, int silent)
 {
 	struct ramfs_fs_info *fsi;
 	struct inode *inode = NULL;
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 4e768dda87b0..8600508c77a6 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -20,4 +20,6 @@ extern const struct file_operations ramfs_file_operations;
 extern const struct vm_operations_struct generic_file_vm_ops;
 extern int __init init_rootfs(void);
 
+int ramfs_fill_super(struct super_block *sb, void *data, int silent);
+
 #endif
-- 
cgit v1.2.3


From 3142788b7967ccfd2f1813ee9e11aeb1e1cf7de2 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Jan 2010 20:39:02 +0000
Subject: drivers/base: Convert dev->sem to mutex

The semaphore is semantically a mutex. Convert it to a real mutex and
fix up a few places where code was relying on semaphore.h to be included
by device.h, as well as the users of the trylock function, as that value
is now reversed.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c                              | 3 +--
 drivers/net/mlx4/mlx4.h                          | 1 +
 drivers/staging/comedi/drivers/quatech_daqp_cs.c | 1 +
 drivers/usb/core/usb.c                           | 2 +-
 include/linux/device.h                           | 9 ++++-----
 5 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index e11c8c3e7416..cf507a7d200c 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -20,7 +20,6 @@
 #include <linux/notifier.h>
 #include <linux/genhd.h>
 #include <linux/kallsyms.h>
-#include <linux/semaphore.h>
 #include <linux/mutex.h>
 #include <linux/async.h>
 
@@ -559,7 +558,7 @@ void device_initialize(struct device *dev)
 	dev->kobj.kset = devices_kset;
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
-	init_MUTEX(&dev->sem);
+	mutex_init(&dev->mutex);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
diff --git a/drivers/net/mlx4/mlx4.h b/drivers/net/mlx4/mlx4.h
index bc72d6e4919b..13343e884999 100644
--- a/drivers/net/mlx4/mlx4.h
+++ b/drivers/net/mlx4/mlx4.h
@@ -40,6 +40,7 @@
 #include <linux/mutex.h>
 #include <linux/radix-tree.h>
 #include <linux/timer.h>
+#include <linux/semaphore.h>
 #include <linux/workqueue.h>
 
 #include <linux/mlx4/device.h>
diff --git a/drivers/staging/comedi/drivers/quatech_daqp_cs.c b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
index 1786db2f3378..8b274b708259 100644
--- a/drivers/staging/comedi/drivers/quatech_daqp_cs.c
+++ b/drivers/staging/comedi/drivers/quatech_daqp_cs.c
@@ -48,6 +48,7 @@ Devices: [Quatech] DAQP-208 (daqp), DAQP-308
 */
 
 #include "../comedidev.h"
+#include <linux/semaphore.h>
 
 #include <pcmcia/cs_types.h>
 #include <pcmcia/cs.h>
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 397b678f1c47..5ae14f6c1e7a 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -573,7 +573,7 @@ int usb_lock_device_for_reset(struct usb_device *udev,
 			iface->condition == USB_INTERFACE_UNBOUND))
 		return -EINTR;
 
-	while (usb_trylock_device(udev) != 0) {
+	while (!usb_trylock_device(udev)) {
 
 		/* If we can't acquire the lock after waiting one second,
 		 * we're probably deadlocked */
diff --git a/include/linux/device.h b/include/linux/device.h
index 241b96bcd7ad..6f9619190aaf 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -22,7 +22,6 @@
 #include <linux/types.h>
 #include <linux/module.h>
 #include <linux/pm.h>
-#include <linux/semaphore.h>
 #include <asm/atomic.h>
 #include <asm/device.h>
 
@@ -404,7 +403,7 @@ struct device {
 	const char		*init_name; /* initial name of the device */
 	struct device_type	*type;
 
-	struct semaphore	sem;	/* semaphore to synchronize calls to
+	struct mutex		mutex;	/* mutex to synchronize calls to
 					 * its driver.
 					 */
 
@@ -514,17 +513,17 @@ static inline bool device_async_suspend_enabled(struct device *dev)
 
 static inline void device_lock(struct device *dev)
 {
-	down(&dev->sem);
+	mutex_lock(&dev->mutex);
 }
 
 static inline int device_trylock(struct device *dev)
 {
-	return down_trylock(&dev->sem);
+	return mutex_trylock(&dev->mutex);
 }
 
 static inline void device_unlock(struct device *dev)
 {
-	up(&dev->sem);
+	mutex_unlock(&dev->mutex);
 }
 
 void driver_init(void);
-- 
cgit v1.2.3


From 1704f47b50b5d9e1b825e43e1baaf2c5897baf03 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 19 Mar 2010 01:37:42 +0100
Subject: lockdep: Add novalidate class for dev->mutex conversion

The conversion of device->sem to device->mutex resulted in lockdep
warnings. Create a novalidate class for now until the driver folks
come up with separate classes. That way we have at least the basic
mutex debugging coverage.

Add a checkpatch error so the usage is reserved for device->mutex.

[ tglx: checkpatch and compile fix for LOCKDEP=n ]

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/core.c     |  1 +
 include/linux/lockdep.h |  8 ++++++++
 kernel/lockdep.c        |  5 +++++
 scripts/checkpatch.pl   | 11 +++++++++++
 4 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index cf507a7d200c..4c5be85016b6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -559,6 +559,7 @@ void device_initialize(struct device *dev)
 	kobject_init(&dev->kobj, &device_ktype);
 	INIT_LIST_HEAD(&dev->dma_pools);
 	mutex_init(&dev->mutex);
+	lockdep_set_novalidate_class(&dev->mutex);
 	spin_lock_init(&dev->devres_lock);
 	INIT_LIST_HEAD(&dev->devres_head);
 	device_pm_init(dev);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index a03977a96d7e..06aed8305bf3 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -44,6 +44,8 @@ struct lock_class_key {
 	struct lockdep_subclass_key	subkeys[MAX_LOCKDEP_SUBCLASSES];
 };
 
+extern struct lock_class_key __lockdep_no_validate__;
+
 #define LOCKSTAT_POINTS		4
 
 /*
@@ -270,6 +272,9 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
 #define lockdep_set_subclass(lock, sub)	\
 		lockdep_init_map(&(lock)->dep_map, #lock, \
 				 (lock)->dep_map.key, sub)
+
+#define lockdep_set_novalidate_class(lock) \
+	lockdep_set_class(lock, &__lockdep_no_validate__)
 /*
  * Compare locking classes
  */
@@ -354,6 +359,9 @@ static inline void lockdep_on(void)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
 		do { (void)(key); } while (0)
 #define lockdep_set_subclass(lock, sub)		do { } while (0)
+
+#define lockdep_set_novalidate_class(lock) do { } while (0)
+
 /*
  * We don't define lockdep_match_class() and lockdep_match_key() for !LOCKDEP
  * case since the result is not well defined and the caller should rather
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index ec21304856d1..54286798c37b 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -2711,6 +2711,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 }
 EXPORT_SYMBOL_GPL(lockdep_init_map);
 
+struct lock_class_key __lockdep_no_validate__;
+
 /*
  * This gets called for every mutex_lock*()/spin_lock*() operation.
  * We maintain the dependency maps and validate the locking attempt:
@@ -2745,6 +2747,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 		return 0;
 	}
 
+	if (lock->key == &__lockdep_no_validate__)
+		check = 1;
+
 	if (!subclass)
 		class = lock->class_cache;
 	/*
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index a4d74344d805..f2bbea900700 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -2656,6 +2656,7 @@ sub process {
 # check for semaphores used as mutexes
 		if ($line =~ /^.\s*init_MUTEX_LOCKED\s*\(/) {
 			WARN("consider using a completion\n" . $herecurr);
+
 		}
 # recommend strict_strto* over simple_strto*
 		if ($line =~ /\bsimple_(strto.*?)\s*\(/) {
@@ -2740,6 +2741,16 @@ sub process {
 				WARN("use of in_atomic() is incorrect outside core kernel code\n" . $herecurr);
 			}
 		}
+
+# check for lockdep_set_novalidate_class
+		if ($line =~ /^.\s*lockdep_set_novalidate_class\s*\(/ ||
+		    $line =~ /__lockdep_no_validate__\s*\)/ ) {
+			if ($realfile !~ m@^kernel/lockdep@ &&
+			    $realfile !~ m@^include/linux/lockdep@ &&
+			    $realfile !~ m@^drivers/base/core@) {
+				ERROR("lockdep_no_validate class is reserved for device->mutex.\n" . $herecurr);
+			}
+		}
 	}
 
 	# If we have no input at all, then there is nothing to report on
-- 
cgit v1.2.3


From bc451f2058238013e1cdf4acd443c01734d332f0 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 11:31:25 -0700
Subject: kobj: Add basic infrastructure for dealing with namespaces.

Move complete knowledge of namespaces into the kobject layer
so we can use that information when reporting kobjects to
userspace.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/base/class.c    |   9 +++++
 drivers/base/core.c     |  77 +++++++++++++++++++++++++++++-------
 include/linux/device.h  |   3 ++
 include/linux/kobject.h |  26 ++++++++++++
 lib/kobject.c           | 103 ++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 204 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/class.c b/drivers/base/class.c
index 9c6a0d6408e7..8e231d05b400 100644
--- a/drivers/base/class.c
+++ b/drivers/base/class.c
@@ -63,6 +63,14 @@ static void class_release(struct kobject *kobj)
 	kfree(cp);
 }
 
+static const struct kobj_ns_type_operations *class_child_ns_type(struct kobject *kobj)
+{
+	struct class_private *cp = to_class(kobj);
+	struct class *class = cp->class;
+
+	return class->ns_type;
+}
+
 static const struct sysfs_ops class_sysfs_ops = {
 	.show	= class_attr_show,
 	.store	= class_attr_store,
@@ -71,6 +79,7 @@ static const struct sysfs_ops class_sysfs_ops = {
 static struct kobj_type class_ktype = {
 	.sysfs_ops	= &class_sysfs_ops,
 	.release	= class_release,
+	.child_ns_type	= class_child_ns_type,
 };
 
 /* Hotplug events for classes go to the class class_subsys */
diff --git a/drivers/base/core.c b/drivers/base/core.c
index 356dd011b8f9..f0699918e2f6 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -131,9 +131,21 @@ static void device_release(struct kobject *kobj)
 	kfree(p);
 }
 
+static const void *device_namespace(struct kobject *kobj)
+{
+	struct device *dev = to_dev(kobj);
+	const void *ns = NULL;
+
+	if (dev->class && dev->class->ns_type)
+		ns = dev->class->namespace(dev);
+
+	return ns;
+}
+
 static struct kobj_type device_ktype = {
 	.release	= device_release,
 	.sysfs_ops	= &dev_sysfs_ops,
+	.namespace	= device_namespace,
 };
 
 
@@ -595,11 +607,59 @@ static struct kobject *virtual_device_parent(struct device *dev)
 	return virtual_dir;
 }
 
-static struct kobject *get_device_parent(struct device *dev,
-					 struct device *parent)
+struct class_dir {
+	struct kobject kobj;
+	struct class *class;
+};
+
+#define to_class_dir(obj) container_of(obj, struct class_dir, kobj)
+
+static void class_dir_release(struct kobject *kobj)
+{
+	struct class_dir *dir = to_class_dir(kobj);
+	kfree(dir);
+}
+
+static const
+struct kobj_ns_type_operations *class_dir_child_ns_type(struct kobject *kobj)
 {
+	struct class_dir *dir = to_class_dir(kobj);
+	return dir->class->ns_type;
+}
+
+static struct kobj_type class_dir_ktype = {
+	.release	= class_dir_release,
+	.sysfs_ops	= &kobj_sysfs_ops,
+	.child_ns_type	= class_dir_child_ns_type
+};
+
+static struct kobject *
+class_dir_create_and_add(struct class *class, struct kobject *parent_kobj)
+{
+	struct class_dir *dir;
 	int retval;
 
+	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
+	if (!dir)
+		return NULL;
+
+	dir->class = class;
+	kobject_init(&dir->kobj, &class_dir_ktype);
+
+	dir->kobj.kset = &class->p->class_dirs;
+
+	retval = kobject_add(&dir->kobj, parent_kobj, "%s", class->name);
+	if (retval < 0) {
+		kobject_put(&dir->kobj);
+		return NULL;
+	}
+	return &dir->kobj;
+}
+
+
+static struct kobject *get_device_parent(struct device *dev,
+					 struct device *parent)
+{
 	if (dev->class) {
 		static DEFINE_MUTEX(gdp_mutex);
 		struct kobject *kobj = NULL;
@@ -634,18 +694,7 @@ static struct kobject *get_device_parent(struct device *dev,
 		}
 
 		/* or create a new class-directory at the parent device */
-		k = kobject_create();
-		if (!k) {
-			mutex_unlock(&gdp_mutex);
-			return NULL;
-		}
-		k->kset = &dev->class->p->class_dirs;
-		retval = kobject_add(k, parent_kobj, "%s", dev->class->name);
-		if (retval < 0) {
-			mutex_unlock(&gdp_mutex);
-			kobject_put(k);
-			return NULL;
-		}
+		k = class_dir_create_and_add(dev->class, parent_kobj);
 		/* do not emit an uevent for this simple "glue" directory */
 		mutex_unlock(&gdp_mutex);
 		return k;
diff --git a/include/linux/device.h b/include/linux/device.h
index 6f9619190aaf..7bb9f426f3e6 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -202,6 +202,9 @@ struct class {
 	int (*suspend)(struct device *dev, pm_message_t state);
 	int (*resume)(struct device *dev);
 
+	const struct kobj_ns_type_operations *ns_type;
+	const void *(*namespace)(struct device *dev);
+
 	const struct dev_pm_ops *pm;
 
 	struct class_private *p;
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index 3950d3c2850d..d9456f69904f 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -108,6 +108,8 @@ struct kobj_type {
 	void (*release)(struct kobject *kobj);
 	const struct sysfs_ops *sysfs_ops;
 	struct attribute **default_attrs;
+	const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj);
+	const void *(*namespace)(struct kobject *kobj);
 };
 
 struct kobj_uevent_env {
@@ -134,6 +136,30 @@ struct kobj_attribute {
 
 extern const struct sysfs_ops kobj_sysfs_ops;
 
+enum kobj_ns_type {
+	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPES
+};
+
+struct sock;
+struct kobj_ns_type_operations {
+	enum kobj_ns_type type;
+	const void *(*current_ns)(void);
+	const void *(*netlink_ns)(struct sock *sk);
+	const void *(*initial_ns)(void);
+};
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops);
+int kobj_ns_type_registered(enum kobj_ns_type type);
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent);
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj);
+
+const void *kobj_ns_current(enum kobj_ns_type type);
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk);
+const void *kobj_ns_initial(enum kobj_ns_type type);
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns);
+
+
 /**
  * struct kset - a set of kobjects of a specific type, belonging to a specific subsystem.
  *
diff --git a/lib/kobject.c b/lib/kobject.c
index 8115eb1bbf4d..bbb2bb40ee1f 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -850,6 +850,109 @@ struct kset *kset_create_and_add(const char *name,
 }
 EXPORT_SYMBOL_GPL(kset_create_and_add);
 
+
+static DEFINE_SPINLOCK(kobj_ns_type_lock);
+static const struct kobj_ns_type_operations *kobj_ns_ops_tbl[KOBJ_NS_TYPES];
+
+int kobj_ns_type_register(const struct kobj_ns_type_operations *ops)
+{
+	enum kobj_ns_type type = ops->type;
+	int error;
+
+	spin_lock(&kobj_ns_type_lock);
+
+	error = -EINVAL;
+	if (type >= KOBJ_NS_TYPES)
+		goto out;
+
+	error = -EINVAL;
+	if (type <= KOBJ_NS_TYPE_NONE)
+		goto out;
+
+	error = -EBUSY;
+	if (kobj_ns_ops_tbl[type])
+		goto out;
+
+	error = 0;
+	kobj_ns_ops_tbl[type] = ops;
+
+out:
+	spin_unlock(&kobj_ns_type_lock);
+	return error;
+}
+
+int kobj_ns_type_registered(enum kobj_ns_type type)
+{
+	int registered = 0;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES))
+		registered = kobj_ns_ops_tbl[type] != NULL;
+	spin_unlock(&kobj_ns_type_lock);
+
+	return registered;
+}
+
+const struct kobj_ns_type_operations *kobj_child_ns_ops(struct kobject *parent)
+{
+	const struct kobj_ns_type_operations *ops = NULL;
+
+	if (parent && parent->ktype->child_ns_type)
+		ops = parent->ktype->child_ns_type(parent);
+
+	return ops;
+}
+
+const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj)
+{
+	return kobj_child_ns_ops(kobj->parent);
+}
+
+
+const void *kobj_ns_current(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->current_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_netlink(enum kobj_ns_type type, struct sock *sk)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->netlink_ns(sk);
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+const void *kobj_ns_initial(enum kobj_ns_type type)
+{
+	const void *ns = NULL;
+
+	spin_lock(&kobj_ns_type_lock);
+	if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) &&
+	    kobj_ns_ops_tbl[type])
+		ns = kobj_ns_ops_tbl[type]->initial_ns();
+	spin_unlock(&kobj_ns_type_lock);
+
+	return ns;
+}
+
+void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
+{
+}
+
+
 EXPORT_SYMBOL(kobject_get);
 EXPORT_SYMBOL(kobject_put);
 EXPORT_SYMBOL(kobject_del);
-- 
cgit v1.2.3


From 3ff195b011d7decf501a4d55aeed312731094796 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 11:31:26 -0700
Subject: sysfs: Implement sysfs tagged directory support.

The problem.  When implementing a network namespace I need to be able
to have multiple network devices with the same name.  Currently this
is a problem for /sys/class/net/*, /sys/devices/virtual/net/*, and
potentially a few other directories of the form /sys/ ... /net/*.

What this patch does is to add an additional tag field to the
sysfs dirent structure.  For directories that should show different
contents depending on the context such as /sys/class/net/, and
/sys/devices/virtual/net/ this tag field is used to specify the
context in which those directories should be visible.  Effectively
this is the same as creating multiple distinct directories with
the same name but internally to sysfs the result is nicer.

I am calling the concept of a single directory that looks like multiple
directories all at the same path in the filesystem tagged directories.

For the networking namespace the set of directories whose contents I need
to filter with tags can depend on the presence or absence of hotplug
hardware or which modules are currently loaded.  Which means I need
a simple race free way to setup those directories as tagged.

To achieve a reace free design all tagged directories are created
and managed by sysfs itself.

Users of this interface:
- define a type in the sysfs_tag_type enumeration.
- call sysfs_register_ns_types with the type and it's operations
- sysfs_exit_ns when an individual tag is no longer valid

- Implement mount_ns() which returns the ns of the calling process
  so we can attach it to a sysfs superblock.
- Implement ktype.namespace() which returns the ns of a syfs kobject.

Everything else is left up to sysfs and the driver layer.

For the network namespace mount_ns and namespace() are essentially
one line functions, and look to remain that.

Tags are currently represented a const void * pointers as that is
both generic, prevides enough information for equality comparisons,
and is trivial to create for current users, as it is just the
existing namespace pointer.

The work needed in sysfs is more extensive.  At each directory
or symlink creating I need to check if the directory it is being
created in is a tagged directory and if so generate the appropriate
tag to place on the sysfs_dirent.  Likewise at each symlink or
directory removal I need to check if the sysfs directory it is
being removed from is a tagged directory and if so figure out
which tag goes along with the name I am deleting.

Currently only directories which hold kobjects, and
symlinks are supported.  There is not enough information
in the current file attribute interfaces to give us anything
to discriminate on which makes it useless, and there are
no potential users which makes it an uninteresting problem
to solve.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/gpio/gpiolib.c |   2 +-
 drivers/md/bitmap.c    |   4 +-
 drivers/md/md.c        |   6 +--
 fs/sysfs/bin.c         |   2 +-
 fs/sysfs/dir.c         | 112 ++++++++++++++++++++++++++++++++++++++-----------
 fs/sysfs/file.c        |  17 ++++----
 fs/sysfs/group.c       |   6 +--
 fs/sysfs/inode.c       |   4 +-
 fs/sysfs/mount.c       |  33 ++++++++++++++-
 fs/sysfs/symlink.c     |  15 +++++--
 fs/sysfs/sysfs.h       |  20 +++++++--
 include/linux/sysfs.h  |  10 +++++
 lib/kobject.c          |   1 +
 13 files changed, 181 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index eb0c3fe44b29..cae1b8c5b08c 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -399,7 +399,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 			goto free_id;
 		}
 
-		pdesc->value_sd = sysfs_get_dirent(dev->kobj.sd, "value");
+		pdesc->value_sd = sysfs_get_dirent(dev->kobj.sd, NULL, "value");
 		if (!pdesc->value_sd) {
 			ret = -ENODEV;
 			goto free_id;
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 26ac8aad0b19..f084249295d9 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1678,9 +1678,9 @@ int bitmap_create(mddev_t *mddev)
 
 	bitmap->mddev = mddev;
 
-	bm = sysfs_get_dirent(mddev->kobj.sd, "bitmap");
+	bm = sysfs_get_dirent(mddev->kobj.sd, NULL, "bitmap");
 	if (bm) {
-		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, "can_clear");
+		bitmap->sysfs_can_clear = sysfs_get_dirent(bm, NULL, "can_clear");
 		sysfs_put(bm);
 	} else
 		bitmap->sysfs_can_clear = NULL;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index cefd63daff31..a9fd491796ac 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1766,7 +1766,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
 		kobject_del(&rdev->kobj);
 		goto fail;
 	}
-	rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, "state");
+	rdev->sysfs_state = sysfs_get_dirent(rdev->kobj.sd, NULL, "state");
 
 	list_add_rcu(&rdev->same_set, &mddev->disks);
 	bd_claim_by_disk(rdev->bdev, rdev->bdev->bd_holder, mddev->gendisk);
@@ -4189,7 +4189,7 @@ static int md_alloc(dev_t dev, char *name)
 	mutex_unlock(&disks_mutex);
 	if (!error) {
 		kobject_uevent(&mddev->kobj, KOBJ_ADD);
-		mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, "array_state");
+		mddev->sysfs_state = sysfs_get_dirent(mddev->kobj.sd, NULL, "array_state");
 	}
 	mddev_put(mddev);
 	return error;
@@ -4398,7 +4398,7 @@ static int do_md_run(mddev_t * mddev)
 			printk(KERN_WARNING
 			       "md: cannot register extra attributes for %s\n",
 			       mdname(mddev));
-		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, "sync_action");
+		mddev->sysfs_action = sysfs_get_dirent(mddev->kobj.sd, NULL, "sync_action");
 	} else if (mddev->ro == 2) /* auto-readonly not meaningful */
 		mddev->ro = 0;
 
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index e9d293593e52..806b277453f9 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -501,7 +501,7 @@ int sysfs_create_bin_file(struct kobject *kobj,
 void sysfs_remove_bin_file(struct kobject *kobj,
 			   const struct bin_attribute *attr)
 {
-	sysfs_hash_and_remove(kobj->sd, attr->attr.name);
+	sysfs_hash_and_remove(kobj->sd, NULL, attr->attr.name);
 }
 
 EXPORT_SYMBOL_GPL(sysfs_create_bin_file);
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 590717861c7a..b2b83067ccc8 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -380,9 +380,15 @@ int __sysfs_add_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd)
 {
 	struct sysfs_inode_attrs *ps_iattr;
 
-	if (sysfs_find_dirent(acxt->parent_sd, sd->s_name))
+	if (sysfs_find_dirent(acxt->parent_sd, sd->s_ns, sd->s_name))
 		return -EEXIST;
 
+	if (sysfs_ns_type(acxt->parent_sd) && !sd->s_ns) {
+		WARN(1, KERN_WARNING "sysfs: ns required in '%s' for '%s'\n",
+			acxt->parent_sd->s_name, sd->s_name);
+		return -EINVAL;
+	}
+
 	sd->s_parent = sysfs_get(acxt->parent_sd);
 
 	sysfs_link_sibling(sd);
@@ -533,13 +539,17 @@ void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt)
  *	Pointer to sysfs_dirent if found, NULL if not.
  */
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+				       const void *ns,
 				       const unsigned char *name)
 {
 	struct sysfs_dirent *sd;
 
-	for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling)
+	for (sd = parent_sd->s_dir.children; sd; sd = sd->s_sibling) {
+		if (sd->s_ns != ns)
+			continue;
 		if (!strcmp(sd->s_name, name))
 			return sd;
+	}
 	return NULL;
 }
 
@@ -558,12 +568,13 @@ struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
  *	Pointer to sysfs_dirent if found, NULL if not.
  */
 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const void *ns,
 				      const unsigned char *name)
 {
 	struct sysfs_dirent *sd;
 
 	mutex_lock(&sysfs_mutex);
-	sd = sysfs_find_dirent(parent_sd, name);
+	sd = sysfs_find_dirent(parent_sd, ns, name);
 	sysfs_get(sd);
 	mutex_unlock(&sysfs_mutex);
 
@@ -572,7 +583,8 @@ struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
 EXPORT_SYMBOL_GPL(sysfs_get_dirent);
 
 static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
-		      const char *name, struct sysfs_dirent **p_sd)
+	enum kobj_ns_type type, const void *ns, const char *name,
+	struct sysfs_dirent **p_sd)
 {
 	umode_t mode = S_IFDIR| S_IRWXU | S_IRUGO | S_IXUGO;
 	struct sysfs_addrm_cxt acxt;
@@ -583,6 +595,9 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 	sd = sysfs_new_dirent(name, mode, SYSFS_DIR);
 	if (!sd)
 		return -ENOMEM;
+
+	sd->s_flags |= (type << SYSFS_NS_TYPE_SHIFT);
+	sd->s_ns = ns;
 	sd->s_dir.kobj = kobj;
 
 	/* link in */
@@ -601,7 +616,25 @@ static int create_dir(struct kobject *kobj, struct sysfs_dirent *parent_sd,
 int sysfs_create_subdir(struct kobject *kobj, const char *name,
 			struct sysfs_dirent **p_sd)
 {
-	return create_dir(kobj, kobj->sd, name, p_sd);
+	return create_dir(kobj, kobj->sd,
+			  KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
+}
+
+static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
+{
+	const struct kobj_ns_type_operations *ops;
+	enum kobj_ns_type type;
+
+	ops = kobj_child_ns_ops(kobj);
+	if (!ops)
+		return KOBJ_NS_TYPE_NONE;
+
+	type = ops->type;
+	BUG_ON(type <= KOBJ_NS_TYPE_NONE);
+	BUG_ON(type >= KOBJ_NS_TYPES);
+	BUG_ON(!kobj_ns_type_registered(type));
+
+	return type;
 }
 
 /**
@@ -610,7 +643,9 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
  */
 int sysfs_create_dir(struct kobject * kobj)
 {
+	enum kobj_ns_type type;
 	struct sysfs_dirent *parent_sd, *sd;
+	const void *ns = NULL;
 	int error = 0;
 
 	BUG_ON(!kobj);
@@ -620,7 +655,11 @@ int sysfs_create_dir(struct kobject * kobj)
 	else
 		parent_sd = &sysfs_root;
 
-	error = create_dir(kobj, parent_sd, kobject_name(kobj), &sd);
+	if (sysfs_ns_type(parent_sd))
+		ns = kobj->ktype->namespace(kobj);
+	type = sysfs_read_ns_type(kobj);
+
+	error = create_dir(kobj, parent_sd, type, ns, kobject_name(kobj), &sd);
 	if (!error)
 		kobj->sd = sd;
 	return error;
@@ -630,13 +669,19 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 				struct nameidata *nd)
 {
 	struct dentry *ret = NULL;
-	struct sysfs_dirent *parent_sd = dentry->d_parent->d_fsdata;
+	struct dentry *parent = dentry->d_parent;
+	struct sysfs_dirent *parent_sd = parent->d_fsdata;
 	struct sysfs_dirent *sd;
 	struct inode *inode;
+	enum kobj_ns_type type;
+	const void *ns;
 
 	mutex_lock(&sysfs_mutex);
 
-	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
+	type = sysfs_ns_type(parent_sd);
+	ns = sysfs_info(dir->i_sb)->ns[type];
+
+	sd = sysfs_find_dirent(parent_sd, ns, dentry->d_name.name);
 
 	/* no such entry */
 	if (!sd) {
@@ -735,7 +780,8 @@ void sysfs_remove_dir(struct kobject * kobj)
 }
 
 int sysfs_rename(struct sysfs_dirent *sd,
-	struct sysfs_dirent *new_parent_sd, const char *new_name)
+	struct sysfs_dirent *new_parent_sd, const void *new_ns,
+	const char *new_name)
 {
 	const char *dup_name = NULL;
 	int error;
@@ -743,12 +789,12 @@ int sysfs_rename(struct sysfs_dirent *sd,
 	mutex_lock(&sysfs_mutex);
 
 	error = 0;
-	if ((sd->s_parent == new_parent_sd) &&
+	if ((sd->s_parent == new_parent_sd) && (sd->s_ns == new_ns) &&
 	    (strcmp(sd->s_name, new_name) == 0))
 		goto out;	/* nothing to rename */
 
 	error = -EEXIST;
-	if (sysfs_find_dirent(new_parent_sd, new_name))
+	if (sysfs_find_dirent(new_parent_sd, new_ns, new_name))
 		goto out;
 
 	/* rename sysfs_dirent */
@@ -770,6 +816,7 @@ int sysfs_rename(struct sysfs_dirent *sd,
 		sd->s_parent = new_parent_sd;
 		sysfs_link_sibling(sd);
 	}
+	sd->s_ns = new_ns;
 
 	error = 0;
  out:
@@ -780,19 +827,28 @@ int sysfs_rename(struct sysfs_dirent *sd,
 
 int sysfs_rename_dir(struct kobject *kobj, const char *new_name)
 {
-	return sysfs_rename(kobj->sd, kobj->sd->s_parent, new_name);
+	struct sysfs_dirent *parent_sd = kobj->sd->s_parent;
+	const void *new_ns = NULL;
+
+	if (sysfs_ns_type(parent_sd))
+		new_ns = kobj->ktype->namespace(kobj);
+
+	return sysfs_rename(kobj->sd, parent_sd, new_ns, new_name);
 }
 
 int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 {
 	struct sysfs_dirent *sd = kobj->sd;
 	struct sysfs_dirent *new_parent_sd;
+	const void *new_ns = NULL;
 
 	BUG_ON(!sd->s_parent);
+	if (sysfs_ns_type(sd->s_parent))
+		new_ns = kobj->ktype->namespace(kobj);
 	new_parent_sd = new_parent_kobj && new_parent_kobj->sd ?
 		new_parent_kobj->sd : &sysfs_root;
 
-	return sysfs_rename(sd, new_parent_sd, sd->s_name);
+	return sysfs_rename(sd, new_parent_sd, new_ns, sd->s_name);
 }
 
 /* Relationship between s_mode and the DT_xxx types */
@@ -807,32 +863,35 @@ static int sysfs_dir_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-static struct sysfs_dirent *sysfs_dir_pos(struct sysfs_dirent *parent_sd,
-	ino_t ino, struct sysfs_dirent *pos)
+static struct sysfs_dirent *sysfs_dir_pos(const void *ns,
+	struct sysfs_dirent *parent_sd,	ino_t ino, struct sysfs_dirent *pos)
 {
 	if (pos) {
 		int valid = !(pos->s_flags & SYSFS_FLAG_REMOVED) &&
 			pos->s_parent == parent_sd &&
 			ino == pos->s_ino;
 		sysfs_put(pos);
-		if (valid)
-			return pos;
+		if (!valid)
+			pos = NULL;
 	}
-	pos = NULL;
-	if ((ino > 1) && (ino < INT_MAX)) {
+	if (!pos && (ino > 1) && (ino < INT_MAX)) {
 		pos = parent_sd->s_dir.children;
 		while (pos && (ino > pos->s_ino))
 			pos = pos->s_sibling;
 	}
+	while (pos && pos->s_ns != ns)
+		pos = pos->s_sibling;
 	return pos;
 }
 
-static struct sysfs_dirent *sysfs_dir_next_pos(struct sysfs_dirent *parent_sd,
-	ino_t ino, struct sysfs_dirent *pos)
+static struct sysfs_dirent *sysfs_dir_next_pos(const void *ns,
+	struct sysfs_dirent *parent_sd,	ino_t ino, struct sysfs_dirent *pos)
 {
-	pos = sysfs_dir_pos(parent_sd, ino, pos);
+	pos = sysfs_dir_pos(ns, parent_sd, ino, pos);
 	if (pos)
 		pos = pos->s_sibling;
+	while (pos && pos->s_ns != ns)
+		pos = pos->s_sibling;
 	return pos;
 }
 
@@ -841,8 +900,13 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 	struct dentry *dentry = filp->f_path.dentry;
 	struct sysfs_dirent * parent_sd = dentry->d_fsdata;
 	struct sysfs_dirent *pos = filp->private_data;
+	enum kobj_ns_type type;
+	const void *ns;
 	ino_t ino;
 
+	type = sysfs_ns_type(parent_sd);
+	ns = sysfs_info(dentry->d_sb)->ns[type];
+
 	if (filp->f_pos == 0) {
 		ino = parent_sd->s_ino;
 		if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) == 0)
@@ -857,9 +921,9 @@ static int sysfs_readdir(struct file * filp, void * dirent, filldir_t filldir)
 			filp->f_pos++;
 	}
 	mutex_lock(&sysfs_mutex);
-	for (pos = sysfs_dir_pos(parent_sd, filp->f_pos, pos);
+	for (pos = sysfs_dir_pos(ns, parent_sd, filp->f_pos, pos);
 	     pos;
-	     pos = sysfs_dir_next_pos(parent_sd, filp->f_pos, pos)) {
+	     pos = sysfs_dir_next_pos(ns, parent_sd, filp->f_pos, pos)) {
 		const char * name;
 		unsigned int type;
 		int len, ret;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index e222b2582746..1beaa739d0a6 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -478,9 +478,12 @@ void sysfs_notify(struct kobject *k, const char *dir, const char *attr)
 	mutex_lock(&sysfs_mutex);
 
 	if (sd && dir)
-		sd = sysfs_find_dirent(sd, dir);
+		/* Only directories are tagged, so no need to pass
+		 * a tag explicitly.
+		 */
+		sd = sysfs_find_dirent(sd, NULL, dir);
 	if (sd && attr)
-		sd = sysfs_find_dirent(sd, attr);
+		sd = sysfs_find_dirent(sd, NULL, attr);
 	if (sd)
 		sysfs_notify_dirent(sd);
 
@@ -569,7 +572,7 @@ int sysfs_add_file_to_group(struct kobject *kobj,
 	int error;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 
@@ -599,7 +602,7 @@ int sysfs_chmod_file(struct kobject *kobj, struct attribute *attr, mode_t mode)
 	mutex_lock(&sysfs_mutex);
 
 	rc = -ENOENT;
-	sd = sysfs_find_dirent(kobj->sd, attr->name);
+	sd = sysfs_find_dirent(kobj->sd, NULL, attr->name);
 	if (!sd)
 		goto out;
 
@@ -624,7 +627,7 @@ EXPORT_SYMBOL_GPL(sysfs_chmod_file);
 
 void sysfs_remove_file(struct kobject * kobj, const struct attribute * attr)
 {
-	sysfs_hash_and_remove(kobj->sd, attr->name);
+	sysfs_hash_and_remove(kobj->sd, NULL, attr->name);
 }
 
 void sysfs_remove_files(struct kobject * kobj, const struct attribute **ptr)
@@ -646,11 +649,11 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 	struct sysfs_dirent *dir_sd;
 
 	if (group)
-		dir_sd = sysfs_get_dirent(kobj->sd, group);
+		dir_sd = sysfs_get_dirent(kobj->sd, NULL, group);
 	else
 		dir_sd = sysfs_get(kobj->sd);
 	if (dir_sd) {
-		sysfs_hash_and_remove(dir_sd, attr->name);
+		sysfs_hash_and_remove(dir_sd, NULL, attr->name);
 		sysfs_put(dir_sd);
 	}
 }
diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c
index fe611949a7f7..23c1e598792a 100644
--- a/fs/sysfs/group.c
+++ b/fs/sysfs/group.c
@@ -23,7 +23,7 @@ static void remove_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 	int i;
 
 	for (i = 0, attr = grp->attrs; *attr; i++, attr++)
-		sysfs_hash_and_remove(dir_sd, (*attr)->name);
+		sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
 }
 
 static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
@@ -39,7 +39,7 @@ static int create_files(struct sysfs_dirent *dir_sd, struct kobject *kobj,
 		 * visibility.  Do this by first removing then
 		 * re-adding (if required) the file */
 		if (update)
-			sysfs_hash_and_remove(dir_sd, (*attr)->name);
+			sysfs_hash_and_remove(dir_sd, NULL, (*attr)->name);
 		if (grp->is_visible) {
 			mode = grp->is_visible(kobj, *attr, i);
 			if (!mode)
@@ -132,7 +132,7 @@ void sysfs_remove_group(struct kobject * kobj,
 	struct sysfs_dirent *sd;
 
 	if (grp->name) {
-		sd = sysfs_get_dirent(dir_sd, grp->name);
+		sd = sysfs_get_dirent(dir_sd, NULL, grp->name);
 		if (!sd) {
 			WARN(!sd, KERN_WARNING "sysfs group %p not found for "
 				"kobject '%s'\n", grp, kobject_name(kobj));
diff --git a/fs/sysfs/inode.c b/fs/sysfs/inode.c
index a4a0a9419711..cf2bad1462ea 100644
--- a/fs/sysfs/inode.c
+++ b/fs/sysfs/inode.c
@@ -324,7 +324,7 @@ void sysfs_delete_inode(struct inode *inode)
 	sysfs_put(sd);
 }
 
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name)
 {
 	struct sysfs_addrm_cxt acxt;
 	struct sysfs_dirent *sd;
@@ -334,7 +334,7 @@ int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name)
 
 	sysfs_addrm_start(&acxt, dir_sd);
 
-	sd = sysfs_find_dirent(dir_sd, name);
+	sd = sysfs_find_dirent(dir_sd, ns, name);
 	if (sd)
 		sysfs_remove_one(&acxt, sd);
 
diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c
index 50e4fb6a7403..1afa32ba242c 100644
--- a/fs/sysfs/mount.c
+++ b/fs/sysfs/mount.c
@@ -35,7 +35,7 @@ static const struct super_operations sysfs_ops = {
 struct sysfs_dirent sysfs_root = {
 	.s_name		= "",
 	.s_count	= ATOMIC_INIT(1),
-	.s_flags	= SYSFS_DIR,
+	.s_flags	= SYSFS_DIR | (KOBJ_NS_TYPE_NONE << SYSFS_NS_TYPE_SHIFT),
 	.s_mode		= S_IFDIR | S_IRWXU | S_IRUGO | S_IXUGO,
 	.s_ino		= 1,
 };
@@ -76,7 +76,13 @@ static int sysfs_test_super(struct super_block *sb, void *data)
 {
 	struct sysfs_super_info *sb_info = sysfs_info(sb);
 	struct sysfs_super_info *info = data;
+	enum kobj_ns_type type;
 	int found = 1;
+
+	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++) {
+		if (sb_info->ns[type] != info->ns[type])
+			found = 0;
+	}
 	return found;
 }
 
@@ -93,6 +99,7 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
 	struct sysfs_super_info *info;
+	enum kobj_ns_type type;
 	struct super_block *sb;
 	int error;
 
@@ -100,6 +107,10 @@ static int sysfs_get_sb(struct file_system_type *fs_type,
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info)
 		goto out;
+
+	for (type = KOBJ_NS_TYPE_NONE; type < KOBJ_NS_TYPES; type++)
+		info->ns[type] = kobj_ns_current(type);
+
 	sb = sget(fs_type, sysfs_test_super, sysfs_set_super, info);
 	if (IS_ERR(sb) || sb->s_fs_info != info)
 		kfree(info);
@@ -137,6 +148,26 @@ static struct file_system_type sysfs_fs_type = {
 	.kill_sb	= sysfs_kill_sb,
 };
 
+void sysfs_exit_ns(enum kobj_ns_type type, const void *ns)
+{
+	struct super_block *sb;
+
+	mutex_lock(&sysfs_mutex);
+	spin_lock(&sb_lock);
+	list_for_each_entry(sb, &sysfs_fs_type.fs_supers, s_instances) {
+		struct sysfs_super_info *info = sysfs_info(sb);
+		/* Ignore superblocks that are in the process of unmounting */
+		if (sb->s_count <= S_BIAS)
+			continue;
+		/* Ignore superblocks with the wrong ns */
+		if (info->ns[type] != ns)
+			continue;
+		info->ns[type] = NULL;
+	}
+	spin_unlock(&sb_lock);
+	mutex_unlock(&sysfs_mutex);
+}
+
 int __init sysfs_init(void)
 {
 	int err = -ENOMEM;
diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index 942f239a2132..b6ebdaa00f37 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -58,6 +58,8 @@ static int sysfs_do_create_link(struct kobject *kobj, struct kobject *target,
 	if (!sd)
 		goto out_put;
 
+	if (sysfs_ns_type(parent_sd))
+		sd->s_ns = target->ktype->namespace(target);
 	sd->s_symlink.target_sd = target_sd;
 	target_sd = NULL;	/* reference is now owned by the symlink */
 
@@ -121,7 +123,7 @@ void sysfs_remove_link(struct kobject * kobj, const char * name)
 	else
 		parent_sd = kobj->sd;
 
-	sysfs_hash_and_remove(parent_sd, name);
+	sysfs_hash_and_remove(parent_sd, NULL, name);
 }
 
 /**
@@ -137,6 +139,7 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 			const char *old, const char *new)
 {
 	struct sysfs_dirent *parent_sd, *sd = NULL;
+	const void *old_ns = NULL, *new_ns = NULL;
 	int result;
 
 	if (!kobj)
@@ -144,8 +147,11 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 	else
 		parent_sd = kobj->sd;
 
+	if (targ->sd)
+		old_ns = targ->sd->s_ns;
+
 	result = -ENOENT;
-	sd = sysfs_get_dirent(parent_sd, old);
+	sd = sysfs_get_dirent(parent_sd, old_ns, old);
 	if (!sd)
 		goto out;
 
@@ -155,7 +161,10 @@ int sysfs_rename_link(struct kobject *kobj, struct kobject *targ,
 	if (sd->s_symlink.target_sd->s_dir.kobj != targ)
 		goto out;
 
-	result = sysfs_rename(sd, parent_sd, new);
+	if (sysfs_ns_type(parent_sd))
+		new_ns = targ->ktype->namespace(targ);
+
+	result = sysfs_rename(sd, parent_sd, new_ns, new);
 
 out:
 	sysfs_put(sd);
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 030a39dbb02c..93847d54c2e3 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -58,6 +58,7 @@ struct sysfs_dirent {
 	struct sysfs_dirent	*s_sibling;
 	const char		*s_name;
 
+	const void		*s_ns;
 	union {
 		struct sysfs_elem_dir		s_dir;
 		struct sysfs_elem_symlink	s_symlink;
@@ -81,14 +82,22 @@ struct sysfs_dirent {
 #define SYSFS_COPY_NAME			(SYSFS_DIR | SYSFS_KOBJ_LINK)
 #define SYSFS_ACTIVE_REF		(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
 
-#define SYSFS_FLAG_MASK			~SYSFS_TYPE_MASK
-#define SYSFS_FLAG_REMOVED		0x0200
+#define SYSFS_NS_TYPE_MASK		0xff00
+#define SYSFS_NS_TYPE_SHIFT		8
+
+#define SYSFS_FLAG_MASK			~(SYSFS_NS_TYPE_MASK|SYSFS_TYPE_MASK)
+#define SYSFS_FLAG_REMOVED		0x020000
 
 static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 {
 	return sd->s_flags & SYSFS_TYPE_MASK;
 }
 
+static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
+{
+	return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
+}
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 #define sysfs_dirent_init_lockdep(sd)				\
 do {								\
@@ -115,6 +124,7 @@ struct sysfs_addrm_cxt {
  * mount.c
  */
 struct sysfs_super_info {
+	const void *ns[KOBJ_NS_TYPES];
 };
 #define sysfs_info(SB) ((struct sysfs_super_info *)(SB->s_fs_info))
 extern struct sysfs_dirent sysfs_root;
@@ -140,8 +150,10 @@ void sysfs_remove_one(struct sysfs_addrm_cxt *acxt, struct sysfs_dirent *sd);
 void sysfs_addrm_finish(struct sysfs_addrm_cxt *acxt);
 
 struct sysfs_dirent *sysfs_find_dirent(struct sysfs_dirent *parent_sd,
+				       const void *ns,
 				       const unsigned char *name);
 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const void *ns,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_new_dirent(const char *name, umode_t mode, int type);
 
@@ -152,7 +164,7 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
 void sysfs_remove_subdir(struct sysfs_dirent *sd);
 
 int sysfs_rename(struct sysfs_dirent *sd,
-	struct sysfs_dirent *new_parent_sd, const char *new_name);
+	struct sysfs_dirent *new_parent_sd, const void *ns, const char *new_name);
 
 static inline struct sysfs_dirent *__sysfs_get(struct sysfs_dirent *sd)
 {
@@ -182,7 +194,7 @@ int sysfs_setattr(struct dentry *dentry, struct iattr *iattr);
 int sysfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat);
 int sysfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 		size_t size, int flags);
-int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const char *name);
+int sysfs_hash_and_remove(struct sysfs_dirent *dir_sd, const void *ns, const char *name);
 int sysfs_inode_init(void);
 
 /*
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f0496b3d1811..1885d21b0c80 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -20,6 +20,7 @@
 
 struct kobject;
 struct module;
+enum kobj_ns_type;
 
 /* FIXME
  * The *owner field is no longer used.
@@ -168,10 +169,14 @@ void sysfs_remove_file_from_group(struct kobject *kobj,
 void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr);
 void sysfs_notify_dirent(struct sysfs_dirent *sd);
 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const void *ns,
 				      const unsigned char *name);
 struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
 void sysfs_printk_last_file(void);
+
+void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
+
 int __must_check sysfs_init(void);
 
 #else /* CONFIG_SYSFS */
@@ -301,6 +306,7 @@ static inline void sysfs_notify_dirent(struct sysfs_dirent *sd)
 }
 static inline
 struct sysfs_dirent *sysfs_get_dirent(struct sysfs_dirent *parent_sd,
+				      const void *ns,
 				      const unsigned char *name)
 {
 	return NULL;
@@ -313,6 +319,10 @@ static inline void sysfs_put(struct sysfs_dirent *sd)
 {
 }
 
+static inline void sysfs_exit_ns(enum kobj_ns_type type, const void *tag)
+{
+}
+
 static inline int __must_check sysfs_init(void)
 {
 	return 0;
diff --git a/lib/kobject.c b/lib/kobject.c
index bbb2bb40ee1f..b2c6d1f56e65 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -950,6 +950,7 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 
 void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
 {
+	sysfs_exit_ns(type, ns);
 }
 
 
-- 
cgit v1.2.3


From 746edb7ae8a1abdd39be2b28c03aa073183340db Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 30 Mar 2010 11:31:28 -0700
Subject: sysfs: Implement sysfs_delete_link

When removing a symlink sysfs_remove_link does not provide
enough information to figure out which tagged directory the symlink
falls in.  So I need sysfs_delete_link which is passed the target
of the symlink to delete.

sysfs_rename_link is updated to call sysfs_delete_link instead
of sysfs_remove_link as we have all of the information necessary
and the callers are interesting.

Both of these functions now have enough information to find a symlink
in a tagged directory.  The only restriction is that they must be called
before the target kobject is renamed or deleted.  If they are called
later I loose track of which tag the target kobject was marked with
and can no longer find the old symlink to remove it.

This patch was split from an earlier patch.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Benjamin Thery <benjamin.thery@bull.net>
Signed-off-by: Daniel Lezcano <dlezcano@fr.ibm.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/symlink.c    | 20 ++++++++++++++++++++
 include/linux/sysfs.h |  8 ++++++++
 2 files changed, 28 insertions(+)

(limited to 'include/linux')

diff --git a/fs/sysfs/symlink.c b/fs/sysfs/symlink.c
index b6ebdaa00f37..f71246bebfe4 100644
--- a/fs/sysfs/symlink.c
+++ b/fs/sysfs/symlink.c
@@ -108,6 +108,26 @@ int sysfs_create_link_nowarn(struct kobject *kobj, struct kobject *target,
 	return sysfs_do_create_link(kobj, target, name, 0);
 }
 
+/**
+ *	sysfs_delete_link - remove symlink in object's directory.
+ *	@kobj:	object we're acting for.
+ *	@targ:	object we're pointing to.
+ *	@name:	name of the symlink to remove.
+ *
+ *	Unlike sysfs_remove_link sysfs_delete_link has enough information
+ *	to successfully delete symlinks in tagged directories.
+ */
+void sysfs_delete_link(struct kobject *kobj, struct kobject *targ,
+			const char *name)
+{
+	const void *ns = NULL;
+	spin_lock(&sysfs_assoc_lock);
+	if (targ->sd)
+		ns = targ->sd->s_ns;
+	spin_unlock(&sysfs_assoc_lock);
+	sysfs_hash_and_remove(kobj->sd, ns, name);
+}
+
 /**
  *	sysfs_remove_link - remove symlink in object's directory.
  *	@kobj:	object we're acting for.
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 1885d21b0c80..976c4664b216 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -155,6 +155,9 @@ void sysfs_remove_link(struct kobject *kobj, const char *name);
 int sysfs_rename_link(struct kobject *kobj, struct kobject *target,
 			const char *old_name, const char *new_name);
 
+void sysfs_delete_link(struct kobject *dir, struct kobject *targ,
+			const char *name);
+
 int __must_check sysfs_create_group(struct kobject *kobj,
 				    const struct attribute_group *grp);
 int sysfs_update_group(struct kobject *kobj,
@@ -269,6 +272,11 @@ static inline int sysfs_rename_link(struct kobject *k, struct kobject *t,
 	return 0;
 }
 
+static inline void sysfs_delete_link(struct kobject *k, struct kobject *t,
+				     const char *name)
+{
+}
+
 static inline int sysfs_create_group(struct kobject *kobj,
 				     const struct attribute_group *grp)
 {
-- 
cgit v1.2.3


From be867b194a3ae3c680c29521287ae49b4d44d420 Mon Sep 17 00:00:00 2001
From: "Serge E. Hallyn" <serue@us.ibm.com>
Date: Mon, 3 May 2010 16:23:15 -0500
Subject: sysfs: Comment sysfs directory tagging logic

Add some in-line comments to explain the new infrastructure, which
was introduced to support sysfs directory tagging with namespaces.
I think an overall description someplace might be good too, but it
didn't really seem to fit into Documentation/filesystems/sysfs.txt,
which appears more geared toward users, rather than maintainers, of
sysfs.

(Tejun, please let me know if I can make anything clearer or failed
altogether to comment something that should be commented.)

Signed-off-by: Serge E. Hallyn <serue@us.ibm.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 fs/sysfs/dir.c          |  8 ++++++++
 fs/sysfs/sysfs.h        | 13 ++++++++++++-
 include/linux/kobject.h | 11 +++++++++++
 include/linux/sysfs.h   |  1 +
 lib/kobject.c           | 11 +++++++++++
 5 files changed, 43 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index a63eb4ba7867..7e54bac8c4b0 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -614,6 +614,14 @@ int sysfs_create_subdir(struct kobject *kobj, const char *name,
 			  KOBJ_NS_TYPE_NONE, NULL, name, p_sd);
 }
 
+/**
+ *	sysfs_read_ns_type: return associated ns_type
+ *	@kobj: the kobject being queried
+ *
+ *	Each kobject can be tagged with exactly one namespace type
+ *	(i.e. network or user).  Return the ns_type associated with
+ *	this object if any
+ */
 static enum kobj_ns_type sysfs_read_ns_type(struct kobject *kobj)
 {
 	const struct kobj_ns_type_operations *ops;
diff --git a/fs/sysfs/sysfs.h b/fs/sysfs/sysfs.h
index 93847d54c2e3..6a13105b5594 100644
--- a/fs/sysfs/sysfs.h
+++ b/fs/sysfs/sysfs.h
@@ -58,7 +58,7 @@ struct sysfs_dirent {
 	struct sysfs_dirent	*s_sibling;
 	const char		*s_name;
 
-	const void		*s_ns;
+	const void		*s_ns; /* namespace tag */
 	union {
 		struct sysfs_elem_dir		s_dir;
 		struct sysfs_elem_symlink	s_symlink;
@@ -82,6 +82,7 @@ struct sysfs_dirent {
 #define SYSFS_COPY_NAME			(SYSFS_DIR | SYSFS_KOBJ_LINK)
 #define SYSFS_ACTIVE_REF		(SYSFS_KOBJ_ATTR | SYSFS_KOBJ_BIN_ATTR)
 
+/* identify any namespace tag on sysfs_dirents */
 #define SYSFS_NS_TYPE_MASK		0xff00
 #define SYSFS_NS_TYPE_SHIFT		8
 
@@ -93,6 +94,10 @@ static inline unsigned int sysfs_type(struct sysfs_dirent *sd)
 	return sd->s_flags & SYSFS_TYPE_MASK;
 }
 
+/*
+ * Return any namespace tags on this dirent.
+ * enum kobj_ns_type is defined in linux/kobject.h
+ */
 static inline enum kobj_ns_type sysfs_ns_type(struct sysfs_dirent *sd)
 {
 	return (sd->s_flags & SYSFS_NS_TYPE_MASK) >> SYSFS_NS_TYPE_SHIFT;
@@ -123,6 +128,12 @@ struct sysfs_addrm_cxt {
 /*
  * mount.c
  */
+
+/*
+ * Each sb is associated with a set of namespace tags (i.e.
+ * the network namespace of the task which mounted this sysfs
+ * instance).
+ */
 struct sysfs_super_info {
 	const void *ns[KOBJ_NS_TYPES];
 };
diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index d9456f69904f..b60d2dfe4e69 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -136,12 +136,23 @@ struct kobj_attribute {
 
 extern const struct sysfs_ops kobj_sysfs_ops;
 
+/*
+ * Namespace types which are used to tag kobjects and sysfs entries.
+ * Network namespace will likely be the first.
+ */
 enum kobj_ns_type {
 	KOBJ_NS_TYPE_NONE = 0,
 	KOBJ_NS_TYPES
 };
 
 struct sock;
+
+/*
+ * Callbacks so sysfs can determine namespaces
+ *   @current_ns: return calling task's namespace
+ *   @netlink_ns: return namespace to which a sock belongs (right?)
+ *   @initial_ns: return the initial namespace (i.e. init_net_ns)
+ */
 struct kobj_ns_type_operations {
 	enum kobj_ns_type type;
 	const void *(*current_ns)(void);
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 976c4664b216..47f1c95b5298 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -178,6 +178,7 @@ struct sysfs_dirent *sysfs_get(struct sysfs_dirent *sd);
 void sysfs_put(struct sysfs_dirent *sd);
 void sysfs_printk_last_file(void);
 
+/* Called to clear a ns tag when it is no longer valid */
 void sysfs_exit_ns(enum kobj_ns_type type, const void *tag);
 
 int __must_check sysfs_init(void);
diff --git a/lib/kobject.c b/lib/kobject.c
index b2c6d1f56e65..f07c57252e82 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -948,6 +948,17 @@ const void *kobj_ns_initial(enum kobj_ns_type type)
 	return ns;
 }
 
+/*
+ * kobj_ns_exit - invalidate a namespace tag
+ *
+ * @type: the namespace type (i.e. KOBJ_NS_TYPE_NET)
+ * @ns: the actual namespace being invalidated
+ *
+ * This is called when a tag is no longer valid.  For instance,
+ * when a network namespace exits, it uses this helper to
+ * make sure no sb's sysfs_info points to the now-invalidated
+ * netns.
+ */
 void kobj_ns_exit(enum kobj_ns_type type, const void *ns)
 {
 	sysfs_exit_ns(type, ns);
-- 
cgit v1.2.3


From 27eabc7cb4b357c68e4ce77d0b014f7a23add752 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 5 May 2010 14:54:00 -0700
Subject: sysfs: Don't use enums in inline function declaration.

It appears gcc can't cope with using an enum that is only declared in
an inline function declaration, that doesn't even use the variable
that is so declared.

Avoid the silliness and replace the enum with an int, and make gcc
happy.

Signed-off-by: Eric W. Biederman <ebiederm@aristanetworks.com>
Acked-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/sysfs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 47f1c95b5298..6903e9204032 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -328,7 +328,7 @@ static inline void sysfs_put(struct sysfs_dirent *sd)
 {
 }
 
-static inline void sysfs_exit_ns(enum kobj_ns_type type, const void *tag)
+static inline void sysfs_exit_ns(int type, const void *tag)
 {
 }
 
-- 
cgit v1.2.3


From 2c3c8bea608866d8bd9dcf92657d57fdcac011c5 Mon Sep 17 00:00:00 2001
From: Chris Wright <chrisw@sous-sol.org>
Date: Wed, 12 May 2010 18:28:57 -0700
Subject: sysfs: add struct file* to bin_attr callbacks

This allows bin_attr->read,write,mmap callbacks to check file specific data
(such as inode owner) as part of any privilege validation.

Signed-off-by: Chris Wright <chrisw@sous-sol.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 arch/alpha/kernel/pci-sysfs.c        |  8 ++++---
 arch/mips/txx9/generic/setup.c       |  4 ++--
 arch/powerpc/sysdev/mv64x60_pci.c    |  4 ++--
 arch/s390/kernel/ipl.c               | 14 ++++++------
 drivers/acpi/system.c                |  2 +-
 drivers/base/firmware_class.c        | 11 ++++++----
 drivers/firmware/dcdbas.c            |  4 ++--
 drivers/firmware/dell_rbu.c          | 10 ++++-----
 drivers/firmware/efivars.c           |  4 ++--
 drivers/gpu/drm/drm_sysfs.c          |  5 +++--
 drivers/misc/c2port/core.c           |  4 ++--
 drivers/misc/ds1682.c                |  6 ++++--
 drivers/misc/eeprom/at24.c           |  6 ++++--
 drivers/misc/eeprom/at25.c           |  6 ++++--
 drivers/misc/eeprom/eeprom.c         |  3 ++-
 drivers/misc/eeprom/max6875.c        |  2 +-
 drivers/net/netxen/netxen_nic_main.c | 11 ++++++----
 drivers/net/qlcnic/qlcnic_main.c     | 12 +++++++----
 drivers/pci/hotplug/acpiphp_ibm.c    |  5 +++--
 drivers/pci/pci-sysfs.c              | 42 +++++++++++++++++++++++++-----------
 drivers/pcmcia/cistpl.c              |  4 ++--
 drivers/power/olpc_battery.c         |  2 +-
 drivers/rapidio/rio-sysfs.c          |  6 ++++--
 drivers/rtc/rtc-cmos.c               |  6 ++++--
 drivers/rtc/rtc-ds1305.c             |  6 ++++--
 drivers/rtc/rtc-ds1307.c             |  6 ++++--
 drivers/rtc/rtc-ds1511.c             | 10 +++++----
 drivers/rtc/rtc-ds1553.c             |  4 ++--
 drivers/rtc/rtc-ds1742.c             |  4 ++--
 drivers/rtc/rtc-m48t59.c             |  4 ++--
 drivers/rtc/rtc-stk17ta8.c           |  4 ++--
 drivers/rtc/rtc-tx4939.c             |  4 ++--
 drivers/s390/cio/chp.c               |  5 +++--
 drivers/scsi/3w-sas.c                |  4 ++--
 drivers/scsi/arcmsr/arcmsr_attr.c    |  9 +++++---
 drivers/scsi/ibmvscsi/ibmvfc.c       |  3 ++-
 drivers/scsi/ipr.c                   |  9 +++++---
 drivers/scsi/lpfc/lpfc_attr.c        | 20 ++++++++++++-----
 drivers/scsi/qla2xxx/qla_attr.c      | 32 +++++++++++++--------------
 drivers/staging/udlfb/udlfb.c        |  3 ++-
 drivers/usb/core/sysfs.c             |  3 ++-
 drivers/video/aty/radeon_base.c      |  4 ++--
 drivers/w1/slaves/w1_ds2431.c        |  4 ++--
 drivers/w1/slaves/w1_ds2433.c        |  4 ++--
 drivers/w1/slaves/w1_ds2760.c        |  2 +-
 drivers/w1/w1.c                      |  4 ++--
 drivers/zorro/zorro-sysfs.c          |  2 +-
 fs/sysfs/bin.c                       | 24 ++++++++++-----------
 include/linux/sysfs.h                |  7 +++---
 kernel/ksysfs.c                      |  3 ++-
 kernel/module.c                      |  2 +-
 net/bridge/br_sysfs_br.c             |  2 +-
 52 files changed, 220 insertions(+), 149 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/pci-sysfs.c b/arch/alpha/kernel/pci-sysfs.c
index d979e7c7bc4b..a5fffc882c72 100644
--- a/arch/alpha/kernel/pci-sysfs.c
+++ b/arch/alpha/kernel/pci-sysfs.c
@@ -53,6 +53,7 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
 
 /**
  * pci_mmap_resource - map a PCI resource into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject for mapping
  * @attr: struct bin_attribute for the file being mapped
  * @vma: struct vm_area_struct passed into the mmap
@@ -60,7 +61,8 @@ static int __pci_mmap_fits(struct pci_dev *pdev, int num,
  *
  * Use the bus mapping routines to map a PCI resource into userspace.
  */
-static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
+static int pci_mmap_resource(struct file *filp, struct kobject *kobj,
+			     struct bin_attribute *attr,
 			     struct vm_area_struct *vma, int sparse)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj,
@@ -89,14 +91,14 @@ static int pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 	return hose_mmap_page_range(pdev->sysdata, vma, mmap_type, sparse);
 }
 
-static int pci_mmap_resource_sparse(struct kobject *kobj,
+static int pci_mmap_resource_sparse(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *attr,
 				    struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 1);
 }
 
-static int pci_mmap_resource_dense(struct kobject *kobj,
+static int pci_mmap_resource_dense(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *attr,
 				   struct vm_area_struct *vma)
 {
diff --git a/arch/mips/txx9/generic/setup.c b/arch/mips/txx9/generic/setup.c
index adc69291f9e2..575d219b8001 100644
--- a/arch/mips/txx9/generic/setup.c
+++ b/arch/mips/txx9/generic/setup.c
@@ -905,7 +905,7 @@ struct txx9_sramc_sysdev {
 	void __iomem *base;
 };
 
-static ssize_t txx9_sram_read(struct kobject *kobj,
+static ssize_t txx9_sram_read(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t pos, size_t size)
 {
@@ -920,7 +920,7 @@ static ssize_t txx9_sram_read(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t txx9_sram_write(struct kobject *kobj,
+static ssize_t txx9_sram_write(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t pos, size_t size)
 {
diff --git a/arch/powerpc/sysdev/mv64x60_pci.c b/arch/powerpc/sysdev/mv64x60_pci.c
index 1456015a22d8..198f288570cc 100644
--- a/arch/powerpc/sysdev/mv64x60_pci.c
+++ b/arch/powerpc/sysdev/mv64x60_pci.c
@@ -24,7 +24,7 @@
 #define MV64X60_VAL_LEN_MAX		11
 #define MV64X60_PCICFG_CPCI_HOTSWAP	0x68
 
-static ssize_t mv64x60_hs_reg_read(struct kobject *kobj,
+static ssize_t mv64x60_hs_reg_read(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *attr, char *buf,
 				   loff_t off, size_t count)
 {
@@ -45,7 +45,7 @@ static ssize_t mv64x60_hs_reg_read(struct kobject *kobj,
 	return sprintf(buf, "0x%08x\n", v);
 }
 
-static ssize_t mv64x60_hs_reg_write(struct kobject *kobj,
+static ssize_t mv64x60_hs_reg_write(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *attr, char *buf,
 				    loff_t off, size_t count)
 {
diff --git a/arch/s390/kernel/ipl.c b/arch/s390/kernel/ipl.c
index 72c8b0d070c8..a689070be287 100644
--- a/arch/s390/kernel/ipl.c
+++ b/arch/s390/kernel/ipl.c
@@ -403,8 +403,9 @@ static ssize_t sys_ipl_device_show(struct kobject *kobj,
 static struct kobj_attribute sys_ipl_device_attr =
 	__ATTR(device, S_IRUGO, sys_ipl_device_show, NULL);
 
-static ssize_t ipl_parameter_read(struct kobject *kobj, struct bin_attribute *attr,
-				  char *buf, loff_t off, size_t count)
+static ssize_t ipl_parameter_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr, char *buf,
+				  loff_t off, size_t count)
 {
 	return memory_read_from_buffer(buf, count, &off, IPL_PARMBLOCK_START,
 					IPL_PARMBLOCK_SIZE);
@@ -419,8 +420,9 @@ static struct bin_attribute ipl_parameter_attr = {
 	.read = &ipl_parameter_read,
 };
 
-static ssize_t ipl_scp_data_read(struct kobject *kobj, struct bin_attribute *attr,
-				 char *buf, loff_t off, size_t count)
+static ssize_t ipl_scp_data_read(struct file *filp, struct kobject *kobj,
+				 struct bin_attribute *attr, char *buf,
+				 loff_t off, size_t count)
 {
 	unsigned int size = IPL_PARMBLOCK_START->ipl_info.fcp.scp_data_len;
 	void *scp_data = &IPL_PARMBLOCK_START->ipl_info.fcp.scp_data;
@@ -694,7 +696,7 @@ static struct kobj_attribute sys_reipl_ccw_vmparm_attr =
 
 /* FCP reipl device attributes */
 
-static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
+static ssize_t reipl_fcp_scpdata_read(struct file *filp, struct kobject *kobj,
 				      struct bin_attribute *attr,
 				      char *buf, loff_t off, size_t count)
 {
@@ -704,7 +706,7 @@ static ssize_t reipl_fcp_scpdata_read(struct kobject *kobj,
 	return memory_read_from_buffer(buf, count, &off, scp_data, size);
 }
 
-static ssize_t reipl_fcp_scpdata_write(struct kobject *kobj,
+static ssize_t reipl_fcp_scpdata_write(struct file *filp, struct kobject *kobj,
 				       struct bin_attribute *attr,
 				       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/acpi/system.c b/drivers/acpi/system.c
index e35525b39f6b..c79e789ed03a 100644
--- a/drivers/acpi/system.c
+++ b/drivers/acpi/system.c
@@ -71,7 +71,7 @@ struct acpi_table_attr {
 	struct list_head node;
 };
 
-static ssize_t acpi_table_show(struct kobject *kobj,
+static ssize_t acpi_table_show(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr, char *buf,
 			       loff_t offset, size_t count)
 {
diff --git a/drivers/base/firmware_class.c b/drivers/base/firmware_class.c
index d98e424675cf..3f093b0dd217 100644
--- a/drivers/base/firmware_class.c
+++ b/drivers/base/firmware_class.c
@@ -278,8 +278,9 @@ static ssize_t firmware_loading_store(struct device *dev,
 static DEVICE_ATTR(loading, 0644, firmware_loading_show, firmware_loading_store);
 
 static ssize_t
-firmware_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
-		   char *buffer, loff_t offset, size_t count)
+firmware_data_read(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr, char *buffer, loff_t offset,
+		   size_t count)
 {
 	struct device *dev = to_dev(kobj);
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
@@ -362,6 +363,7 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
 
 /**
  * firmware_data_write - write method for firmware
+ * @filp: open sysfs file
  * @kobj: kobject for the device
  * @bin_attr: bin_attr structure
  * @buffer: buffer being written
@@ -372,8 +374,9 @@ fw_realloc_buffer(struct firmware_priv *fw_priv, int min_size)
  *	the driver as a firmware image.
  **/
 static ssize_t
-firmware_data_write(struct kobject *kobj, struct bin_attribute *bin_attr,
-		    char *buffer, loff_t offset, size_t count)
+firmware_data_write(struct file* filp, struct kobject *kobj,
+		    struct bin_attribute *bin_attr, char *buffer,
+		    loff_t offset, size_t count)
 {
 	struct device *dev = to_dev(kobj);
 	struct firmware_priv *fw_priv = dev_get_drvdata(dev);
diff --git a/drivers/firmware/dcdbas.c b/drivers/firmware/dcdbas.c
index fb09bb3c0ad6..aa9bc9e980e1 100644
--- a/drivers/firmware/dcdbas.c
+++ b/drivers/firmware/dcdbas.c
@@ -149,7 +149,7 @@ static ssize_t smi_data_buf_size_store(struct device *dev,
 	return count;
 }
 
-static ssize_t smi_data_read(struct kobject *kobj,
+static ssize_t smi_data_read(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
@@ -162,7 +162,7 @@ static ssize_t smi_data_read(struct kobject *kobj,
 	return ret;
 }
 
-static ssize_t smi_data_write(struct kobject *kobj,
+static ssize_t smi_data_write(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t pos, size_t count)
 {
diff --git a/drivers/firmware/dell_rbu.c b/drivers/firmware/dell_rbu.c
index 3a4460265b10..2f452f1f7c8a 100644
--- a/drivers/firmware/dell_rbu.c
+++ b/drivers/firmware/dell_rbu.c
@@ -522,7 +522,7 @@ static ssize_t read_rbu_mono_data(char *buffer, loff_t pos, size_t count)
 			rbu_data.image_update_buffer, rbu_data.bios_image_size);
 }
 
-static ssize_t read_rbu_data(struct kobject *kobj,
+static ssize_t read_rbu_data(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buffer, loff_t pos, size_t count)
 {
@@ -576,7 +576,7 @@ static void callbackfn_rbu(const struct firmware *fw, void *context)
 	release_firmware(fw);
 }
 
-static ssize_t read_rbu_image_type(struct kobject *kobj,
+static ssize_t read_rbu_image_type(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t count)
 {
@@ -586,7 +586,7 @@ static ssize_t read_rbu_image_type(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t write_rbu_image_type(struct kobject *kobj,
+static ssize_t write_rbu_image_type(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buffer, loff_t pos, size_t count)
 {
@@ -647,7 +647,7 @@ static ssize_t write_rbu_image_type(struct kobject *kobj,
 	return rc;
 }
 
-static ssize_t read_rbu_packet_size(struct kobject *kobj,
+static ssize_t read_rbu_packet_size(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buffer, loff_t pos, size_t count)
 {
@@ -660,7 +660,7 @@ static ssize_t read_rbu_packet_size(struct kobject *kobj,
 	return size;
 }
 
-static ssize_t write_rbu_packet_size(struct kobject *kobj,
+static ssize_t write_rbu_packet_size(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *buffer, loff_t pos, size_t count)
 {
diff --git a/drivers/firmware/efivars.c b/drivers/firmware/efivars.c
index 81b70bd07586..2a62ec6390e0 100644
--- a/drivers/firmware/efivars.c
+++ b/drivers/firmware/efivars.c
@@ -402,7 +402,7 @@ efivar_unregister(struct efivar_entry *var)
 }
 
 
-static ssize_t efivar_create(struct kobject *kobj,
+static ssize_t efivar_create(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
@@ -461,7 +461,7 @@ static ssize_t efivar_create(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t efivar_delete(struct kobject *kobj,
+static ssize_t efivar_delete(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t pos, size_t count)
 {
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 25bbd30ed7af..387166d5a109 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -193,8 +193,9 @@ static ssize_t enabled_show(struct device *device,
 			"disabled");
 }
 
-static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *attr,
-			 char *buf, loff_t off, size_t count)
+static ssize_t edid_show(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *attr, char *buf, loff_t off,
+			 size_t count)
 {
 	struct device *connector_dev = container_of(kobj, struct device, kobj);
 	struct drm_connector *connector = to_drm_connector(connector_dev);
diff --git a/drivers/misc/c2port/core.c b/drivers/misc/c2port/core.c
index ed090e77c9cd..19fc7c1cb428 100644
--- a/drivers/misc/c2port/core.c
+++ b/drivers/misc/c2port/core.c
@@ -707,7 +707,7 @@ static ssize_t __c2port_read_flash_data(struct c2port_device *dev,
 	return nread;
 }
 
-static ssize_t c2port_read_flash_data(struct kobject *kobj,
+static ssize_t c2port_read_flash_data(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr,
 				char *buffer, loff_t offset, size_t count)
 {
@@ -824,7 +824,7 @@ static ssize_t __c2port_write_flash_data(struct c2port_device *dev,
 	return nwrite;
 }
 
-static ssize_t c2port_write_flash_data(struct kobject *kobj,
+static ssize_t c2port_write_flash_data(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *attr,
 				char *buffer, loff_t offset, size_t count)
 {
diff --git a/drivers/misc/ds1682.c b/drivers/misc/ds1682.c
index 9197cfc55015..a513f0aa6432 100644
--- a/drivers/misc/ds1682.c
+++ b/drivers/misc/ds1682.c
@@ -140,7 +140,8 @@ static const struct attribute_group ds1682_group = {
 /*
  * User data attribute
  */
-static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t ds1682_eeprom_read(struct file *filp, struct kobject *kobj,
+				  struct bin_attribute *attr,
 				  char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = kobj_to_i2c_client(kobj);
@@ -163,7 +164,8 @@ static ssize_t ds1682_eeprom_read(struct kobject *kobj, struct bin_attribute *at
 	return count;
 }
 
-static ssize_t ds1682_eeprom_write(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t ds1682_eeprom_write(struct file *filp, struct kobject *kobj,
+				   struct bin_attribute *attr,
 				   char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = kobj_to_i2c_client(kobj);
diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c
index db7d0f21b65d..a79a62f75481 100644
--- a/drivers/misc/eeprom/at24.c
+++ b/drivers/misc/eeprom/at24.c
@@ -274,7 +274,8 @@ static ssize_t at24_read(struct at24_data *at24,
 	return retval;
 }
 
-static ssize_t at24_bin_read(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_bin_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct at24_data *at24;
@@ -395,7 +396,8 @@ static ssize_t at24_write(struct at24_data *at24, const char *buf, loff_t off,
 	return retval;
 }
 
-static ssize_t at24_bin_write(struct kobject *kobj, struct bin_attribute *attr,
+static ssize_t at24_bin_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct at24_data *at24;
diff --git a/drivers/misc/eeprom/at25.c b/drivers/misc/eeprom/at25.c
index d194212a41f6..c627e4174ccd 100644
--- a/drivers/misc/eeprom/at25.c
+++ b/drivers/misc/eeprom/at25.c
@@ -126,7 +126,8 @@ at25_ee_read(
 }
 
 static ssize_t
-at25_bin_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+at25_bin_read(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct device		*dev;
@@ -253,7 +254,8 @@ at25_ee_write(struct at25_data *at25, const char *buf, loff_t off,
 }
 
 static ssize_t
-at25_bin_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+at25_bin_write(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *bin_attr,
 	       char *buf, loff_t off, size_t count)
 {
 	struct device		*dev;
diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c
index e306a8cd2f96..45060ddc4e59 100644
--- a/drivers/misc/eeprom/eeprom.c
+++ b/drivers/misc/eeprom/eeprom.c
@@ -81,7 +81,8 @@ exit:
 	mutex_unlock(&data->update_lock);
 }
 
-static ssize_t eeprom_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t eeprom_read(struct file *filp, struct kobject *kobj,
+			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
 	struct i2c_client *client = to_i2c_client(container_of(kobj, struct device, kobj));
diff --git a/drivers/misc/eeprom/max6875.c b/drivers/misc/eeprom/max6875.c
index fe2909278507..5653a3ce0517 100644
--- a/drivers/misc/eeprom/max6875.c
+++ b/drivers/misc/eeprom/max6875.c
@@ -107,7 +107,7 @@ exit_up:
 	mutex_unlock(&data->update_lock);
 }
 
-static ssize_t max6875_read(struct kobject *kobj,
+static ssize_t max6875_read(struct file *filp, struct kobject *kobj,
 			    struct bin_attribute *bin_attr,
 			    char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/net/netxen/netxen_nic_main.c b/drivers/net/netxen/netxen_nic_main.c
index c61a61f177b7..6ce6ce1df6d2 100644
--- a/drivers/net/netxen/netxen_nic_main.c
+++ b/drivers/net/netxen/netxen_nic_main.c
@@ -2560,7 +2560,8 @@ netxen_sysfs_validate_crb(struct netxen_adapter *adapter,
 }
 
 static ssize_t
-netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_read_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2587,7 +2588,8 @@ netxen_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-netxen_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_write_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2627,7 +2629,8 @@ netxen_sysfs_validate_mem(struct netxen_adapter *adapter,
 }
 
 static ssize_t
-netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
+netxen_sysfs_read_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2647,7 +2650,7 @@ netxen_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
 	return size;
 }
 
-static ssize_t netxen_sysfs_write_mem(struct kobject *kobj,
+static ssize_t netxen_sysfs_write_mem(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr, char *buf,
 		loff_t offset, size_t size)
 {
diff --git a/drivers/net/qlcnic/qlcnic_main.c b/drivers/net/qlcnic/qlcnic_main.c
index 1003eb76fda3..23ea9caa5261 100644
--- a/drivers/net/qlcnic/qlcnic_main.c
+++ b/drivers/net/qlcnic/qlcnic_main.c
@@ -2464,7 +2464,8 @@ qlcnic_sysfs_validate_crb(struct qlcnic_adapter *adapter,
 }
 
 static ssize_t
-qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_read_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2488,7 +2489,8 @@ qlcnic_sysfs_read_crb(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-qlcnic_sysfs_write_crb(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_write_crb(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2525,7 +2527,8 @@ qlcnic_sysfs_validate_mem(struct qlcnic_adapter *adapter,
 }
 
 static ssize_t
-qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_read_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -2546,7 +2549,8 @@ qlcnic_sysfs_read_mem(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-qlcnic_sysfs_write_mem(struct kobject *kobj, struct bin_attribute *attr,
+qlcnic_sysfs_write_mem(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t offset, size_t size)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index 6ecbfb27db9d..e525263210ee 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -108,7 +108,7 @@ static int ibm_set_attention_status(struct hotplug_slot *slot, u8 status);
 static int ibm_get_attention_status(struct hotplug_slot *slot, u8 *status);
 static void ibm_handle_events(acpi_handle handle, u32 event, void *context);
 static int ibm_get_table_from_acpi(char **bufp);
-static ssize_t ibm_read_apci_table(struct kobject *kobj,
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t size);
 static acpi_status __init ibm_find_acpi_device(acpi_handle handle,
@@ -351,6 +351,7 @@ read_table_done:
 
 /**
  * ibm_read_apci_table - callback for the sysfs apci_table file
+ * @filp: the open sysfs file
  * @kobj: the kobject this binary attribute is a part of
  * @bin_attr: struct bin_attribute for this file
  * @buffer: the kernel space buffer to fill
@@ -364,7 +365,7 @@ read_table_done:
  * things get really tricky here...
  * our solution is to only allow reading the table in all at once.
  */
-static ssize_t ibm_read_apci_table(struct kobject *kobj,
+static ssize_t ibm_read_apci_table(struct file *filp, struct kobject *kobj,
 				   struct bin_attribute *bin_attr,
 				   char *buffer, loff_t pos, size_t size)
 {
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index fad93983bfed..ad44557e65c4 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -357,7 +357,8 @@ boot_vga_show(struct device *dev, struct device_attribute *attr, char *buf)
 struct device_attribute vga_attr = __ATTR_RO(boot_vga);
 
 static ssize_t
-pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_config(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
@@ -430,7 +431,8 @@ pci_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_config(struct file* filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev = to_pci_dev(container_of(kobj,struct device,kobj));
@@ -487,7 +489,8 @@ pci_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+read_vpd_attr(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
@@ -502,7 +505,8 @@ read_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
+write_vpd_attr(struct file *filp, struct kobject *kobj,
+	       struct bin_attribute *bin_attr,
 	       char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *dev =
@@ -519,6 +523,7 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
 #ifdef HAVE_PCI_LEGACY
 /**
  * pci_read_legacy_io - read byte(s) from legacy I/O port space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to file to read from
  * @bin_attr: struct bin_attribute for this file
  * @buf: buffer to store results
@@ -529,7 +534,8 @@ write_vpd_attr(struct kobject *kobj, struct bin_attribute *bin_attr,
  * callback routine (pci_legacy_read).
  */
 static ssize_t
-pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_legacy_io(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -545,6 +551,7 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_write_legacy_io - write byte(s) to legacy I/O port space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to file to read from
  * @bin_attr: struct bin_attribute for this file
  * @buf: buffer containing value to be written
@@ -555,7 +562,8 @@ pci_read_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * callback routine (pci_legacy_write).
  */
 static ssize_t
-pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_legacy_io(struct file *filp, struct kobject *kobj,
+		    struct bin_attribute *bin_attr,
 		    char *buf, loff_t off, size_t count)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -570,6 +578,7 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_mmap_legacy_mem - map legacy PCI memory into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to device to be mapped
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
@@ -579,7 +588,8 @@ pci_write_legacy_io(struct kobject *kobj, struct bin_attribute *bin_attr,
  * memory space.
  */
 static int
-pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_legacy_mem(struct file *filp, struct kobject *kobj,
+		    struct bin_attribute *attr,
                     struct vm_area_struct *vma)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -591,6 +601,7 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
 
 /**
  * pci_mmap_legacy_io - map legacy PCI IO into user memory space
+ * @filp: open sysfs file
  * @kobj: kobject corresponding to device to be mapped
  * @attr: struct bin_attribute for this file
  * @vma: struct vm_area_struct passed to mmap
@@ -600,7 +611,8 @@ pci_mmap_legacy_mem(struct kobject *kobj, struct bin_attribute *attr,
  * memory space. Returns -ENOSYS if the operation isn't supported
  */
 static int
-pci_mmap_legacy_io(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_legacy_io(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *attr,
 		   struct vm_area_struct *vma)
 {
         struct pci_bus *bus = to_pci_bus(container_of(kobj,
@@ -750,14 +762,16 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static int
-pci_mmap_resource_uc(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_resource_uc(struct file *filp, struct kobject *kobj,
+		     struct bin_attribute *attr,
 		     struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 0);
 }
 
 static int
-pci_mmap_resource_wc(struct kobject *kobj, struct bin_attribute *attr,
+pci_mmap_resource_wc(struct file *filp, struct kobject *kobj,
+		     struct bin_attribute *attr,
 		     struct vm_area_struct *vma)
 {
 	return pci_mmap_resource(kobj, attr, vma, 1);
@@ -861,6 +875,7 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
 
 /**
  * pci_write_rom - used to enable access to the PCI ROM display
+ * @filp: sysfs file
  * @kobj: kernel object handle
  * @bin_attr: struct bin_attribute for this file
  * @buf: user input
@@ -870,7 +885,8 @@ void __weak pci_remove_resource_files(struct pci_dev *dev) { return; }
  * writing anything except 0 enables it
  */
 static ssize_t
-pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_write_rom(struct file *filp, struct kobject *kobj,
+	      struct bin_attribute *bin_attr,
 	      char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
@@ -885,6 +901,7 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * pci_read_rom - read a PCI ROM
+ * @filp: sysfs file
  * @kobj: kernel object handle
  * @bin_attr: struct bin_attribute for this file
  * @buf: where to put the data we read from the ROM
@@ -895,7 +912,8 @@ pci_write_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
  * device corresponding to @kobj.
  */
 static ssize_t
-pci_read_rom(struct kobject *kobj, struct bin_attribute *bin_attr,
+pci_read_rom(struct file *filp, struct kobject *kobj,
+	     struct bin_attribute *bin_attr,
 	     char *buf, loff_t off, size_t count)
 {
 	struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj));
diff --git a/drivers/pcmcia/cistpl.c b/drivers/pcmcia/cistpl.c
index 60d428be0b07..8844bc3e3118 100644
--- a/drivers/pcmcia/cistpl.c
+++ b/drivers/pcmcia/cistpl.c
@@ -1531,7 +1531,7 @@ static ssize_t pccard_extract_cis(struct pcmcia_socket *s, char *buf,
 }
 
 
-static ssize_t pccard_show_cis(struct kobject *kobj,
+static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -1562,7 +1562,7 @@ static ssize_t pccard_show_cis(struct kobject *kobj,
 }
 
 
-static ssize_t pccard_store_cis(struct kobject *kobj,
+static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/power/olpc_battery.c b/drivers/power/olpc_battery.c
index 8fefe5a73558..baefcf1cffc9 100644
--- a/drivers/power/olpc_battery.c
+++ b/drivers/power/olpc_battery.c
@@ -354,7 +354,7 @@ static enum power_supply_property olpc_bat_props[] = {
 #define EEPROM_END	0x80
 #define EEPROM_SIZE	(EEPROM_END - EEPROM_START)
 
-static ssize_t olpc_bat_eeprom_read(struct kobject *kobj,
+static ssize_t olpc_bat_eeprom_read(struct file *filp, struct kobject *kobj,
 		struct bin_attribute *attr, char *buf, loff_t off, size_t count)
 {
 	uint8_t ec_byte;
diff --git a/drivers/rapidio/rio-sysfs.c b/drivers/rapidio/rio-sysfs.c
index ba742e82c57d..00b475658356 100644
--- a/drivers/rapidio/rio-sysfs.c
+++ b/drivers/rapidio/rio-sysfs.c
@@ -68,7 +68,8 @@ struct device_attribute rio_dev_attrs[] = {
 };
 
 static ssize_t
-rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+rio_read_config(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct rio_dev *dev =
@@ -139,7 +140,8 @@ rio_read_config(struct kobject *kobj, struct bin_attribute *bin_attr,
 }
 
 static ssize_t
-rio_write_config(struct kobject *kobj, struct bin_attribute *bin_attr,
+rio_write_config(struct file *filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct rio_dev *dev =
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index e9aa814ddd23..ece4dbddc0ea 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -519,7 +519,8 @@ static const struct rtc_class_ops cmos_rtc_ops = {
 #define NVRAM_OFFSET	(RTC_REG_D + 1)
 
 static ssize_t
-cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+cmos_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	int	retval;
@@ -547,7 +548,8 @@ cmos_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-cmos_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+cmos_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct cmos_rtc	*cmos;
diff --git a/drivers/rtc/rtc-ds1305.c b/drivers/rtc/rtc-ds1305.c
index 7836c9cec557..48da85e97ca4 100644
--- a/drivers/rtc/rtc-ds1305.c
+++ b/drivers/rtc/rtc-ds1305.c
@@ -542,7 +542,8 @@ static void msg_init(struct spi_message *m, struct spi_transfer *x,
 }
 
 static ssize_t
-ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+ds1305_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct spi_device	*spi;
@@ -572,7 +573,8 @@ ds1305_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-ds1305_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+ds1305_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct spi_device	*spi;
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index c4ec5c158aa1..de033b7ac21f 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -556,7 +556,8 @@ static const struct rtc_class_ops ds13xx_rtc_ops = {
 #define NVRAM_SIZE	56
 
 static ssize_t
-ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
+ds1307_nvram_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct i2c_client	*client;
@@ -580,7 +581,8 @@ ds1307_nvram_read(struct kobject *kobj, struct bin_attribute *attr,
 }
 
 static ssize_t
-ds1307_nvram_write(struct kobject *kobj, struct bin_attribute *attr,
+ds1307_nvram_write(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct i2c_client	*client;
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 06b8566c4532..37268e97de49 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -423,8 +423,9 @@ static const struct rtc_class_ops ds1511_rtc_ops = {
 };
 
  static ssize_t
-ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba,
-				char *buf, loff_t pos, size_t size)
+ds1511_nvram_read(struct file *filp, struct kobject *kobj,
+		  struct bin_attribute *ba,
+		  char *buf, loff_t pos, size_t size)
 {
 	ssize_t count;
 
@@ -452,8 +453,9 @@ ds1511_nvram_read(struct kobject *kobj, struct bin_attribute *ba,
 }
 
  static ssize_t
-ds1511_nvram_write(struct kobject *kobj, struct bin_attribute *bin_attr,
-				char *buf, loff_t pos, size_t size)
+ds1511_nvram_write(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
+		   char *buf, loff_t pos, size_t size)
 {
 	ssize_t count;
 
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 244f9994bcbb..ff432e2ca275 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -252,7 +252,7 @@ static const struct rtc_class_ops ds1553_rtc_ops = {
 	.update_irq_enable	= ds1553_rtc_update_irq_enable,
 };
 
-static ssize_t ds1553_nvram_read(struct kobject *kobj,
+static ssize_t ds1553_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t size)
 {
@@ -267,7 +267,7 @@ static ssize_t ds1553_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t ds1553_nvram_write(struct kobject *kobj,
+static ssize_t ds1553_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 2b4b0bc42d6f..042630c90dd3 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -128,7 +128,7 @@ static const struct rtc_class_ops ds1742_rtc_ops = {
 	.set_time	= ds1742_rtc_set_time,
 };
 
-static ssize_t ds1742_nvram_read(struct kobject *kobj,
+static ssize_t ds1742_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t size)
 {
@@ -143,7 +143,7 @@ static ssize_t ds1742_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t ds1742_nvram_write(struct kobject *kobj,
+static ssize_t ds1742_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-m48t59.c b/drivers/rtc/rtc-m48t59.c
index 365ff3ac2348..be8359fdb65a 100644
--- a/drivers/rtc/rtc-m48t59.c
+++ b/drivers/rtc/rtc-m48t59.c
@@ -343,7 +343,7 @@ static const struct rtc_class_ops m48t02_rtc_ops = {
 	.set_time	= m48t59_rtc_set_time,
 };
 
-static ssize_t m48t59_nvram_read(struct kobject *kobj,
+static ssize_t m48t59_nvram_read(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t pos, size_t size)
 {
@@ -363,7 +363,7 @@ static ssize_t m48t59_nvram_read(struct kobject *kobj,
 	return cnt;
 }
 
-static ssize_t m48t59_nvram_write(struct kobject *kobj,
+static ssize_t m48t59_nvram_write(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index b53a00198dbe..3b943673cd3e 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -244,7 +244,7 @@ static const struct rtc_class_ops stk17ta8_rtc_ops = {
 	.alarm_irq_enable	= stk17ta8_rtc_alarm_irq_enable,
 };
 
-static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
+static ssize_t stk17ta8_nvram_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *attr, char *buf,
 				 loff_t pos, size_t size)
 {
@@ -259,7 +259,7 @@ static ssize_t stk17ta8_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t stk17ta8_nvram_write(struct kobject *kobj,
+static ssize_t stk17ta8_nvram_write(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *attr, char *buf,
 				  loff_t pos, size_t size)
 {
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index 20bfc64a15c8..ec6313d15359 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -188,7 +188,7 @@ static const struct rtc_class_ops tx4939_rtc_ops = {
 	.alarm_irq_enable	= tx4939_rtc_alarm_irq_enable,
 };
 
-static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
+static ssize_t tx4939_rtc_nvram_read(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *buf, loff_t pos, size_t size)
 {
@@ -207,7 +207,7 @@ static ssize_t tx4939_rtc_nvram_read(struct kobject *kobj,
 	return count;
 }
 
-static ssize_t tx4939_rtc_nvram_write(struct kobject *kobj,
+static ssize_t tx4939_rtc_nvram_write(struct file *filp, struct kobject *kobj,
 				      struct bin_attribute *bin_attr,
 				      char *buf, loff_t pos, size_t size)
 {
diff --git a/drivers/s390/cio/chp.c b/drivers/s390/cio/chp.c
index 1d16189f2f2d..6c9fa15aac7b 100644
--- a/drivers/s390/cio/chp.c
+++ b/drivers/s390/cio/chp.c
@@ -135,7 +135,8 @@ static int s390_vary_chpid(struct chp_id chpid, int on)
 /*
  * Channel measurement related functions
  */
-static ssize_t chp_measurement_chars_read(struct kobject *kobj,
+static ssize_t chp_measurement_chars_read(struct file *filp,
+					  struct kobject *kobj,
 					  struct bin_attribute *bin_attr,
 					  char *buf, loff_t off, size_t count)
 {
@@ -182,7 +183,7 @@ static void chp_measurement_copy_block(struct cmg_entry *buf,
 	} while (reference_buf.values[0] != buf->values[0]);
 }
 
-static ssize_t chp_measurement_read(struct kobject *kobj,
+static ssize_t chp_measurement_read(struct file *filp, struct kobject *kobj,
 				    struct bin_attribute *bin_attr,
 				    char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 54c5ffb1eaa1..d38000db9237 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -98,7 +98,7 @@ static int twl_reset_device_extension(TW_Device_Extension *tw_dev, int ioctl_res
 /* Functions */
 
 /* This function returns AENs through sysfs */
-static ssize_t twl_sysfs_aen_read(struct kobject *kobj,
+static ssize_t twl_sysfs_aen_read(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *outbuf, loff_t offset, size_t count)
 {
@@ -129,7 +129,7 @@ static struct bin_attribute twl_sysfs_aen_read_attr = {
 };
 
 /* This function returns driver compatibility info through sysfs */
-static ssize_t twl_sysfs_compat_info(struct kobject *kobj,
+static ssize_t twl_sysfs_compat_info(struct file *filp, struct kobject *kobj,
 				     struct bin_attribute *bin_attr,
 				     char *outbuf, loff_t offset, size_t count)
 {
diff --git a/drivers/scsi/arcmsr/arcmsr_attr.c b/drivers/scsi/arcmsr/arcmsr_attr.c
index 5877f29a6005..a4e04c50c436 100644
--- a/drivers/scsi/arcmsr/arcmsr_attr.c
+++ b/drivers/scsi/arcmsr/arcmsr_attr.c
@@ -59,7 +59,8 @@
 
 struct device_attribute *arcmsr_host_attrs[];
 
-static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_read(struct file *filp,
+					     struct kobject *kobj,
 					     struct bin_attribute *bin,
 					     char *buf, loff_t off,
 					     size_t count)
@@ -105,7 +106,8 @@ static ssize_t arcmsr_sysfs_iop_message_read(struct kobject *kobj,
 	return (allxfer_len);
 }
 
-static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_write(struct file *filp,
+					      struct kobject *kobj,
 					      struct bin_attribute *bin,
 					      char *buf, loff_t off,
 					      size_t count)
@@ -153,7 +155,8 @@ static ssize_t arcmsr_sysfs_iop_message_write(struct kobject *kobj,
 	}
 }
 
-static ssize_t arcmsr_sysfs_iop_message_clear(struct kobject *kobj,
+static ssize_t arcmsr_sysfs_iop_message_clear(struct file *filp,
+					      struct kobject *kobj,
 					      struct bin_attribute *bin,
 					      char *buf, loff_t off,
 					      size_t count)
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index d18f45c95639..3eb2b7b3d8b0 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2919,6 +2919,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
 #ifdef CONFIG_SCSI_IBMVFC_TRACE
 /**
  * ibmvfc_read_trace - Dump the adapter trace
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:	bin_attribute struct
  * @buf:		buffer
@@ -2928,7 +2929,7 @@ static DEVICE_ATTR(log_level, S_IRUGO | S_IWUSR,
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ibmvfc_read_trace(struct kobject *kobj,
+static ssize_t ibmvfc_read_trace(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index b90c118119d7..6a6661c35b2f 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -3120,6 +3120,7 @@ restart:
 #ifdef CONFIG_SCSI_IPR_TRACE
 /**
  * ipr_read_trace - Dump the adapter trace
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3129,7 +3130,7 @@ restart:
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_read_trace(struct kobject *kobj,
+static ssize_t ipr_read_trace(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
@@ -3764,6 +3765,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
 #ifdef CONFIG_SCSI_IPR_DUMP
 /**
  * ipr_read_dump - Dump the adapter
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3773,7 +3775,7 @@ static struct device_attribute *ipr_ioa_attrs[] = {
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_read_dump(struct kobject *kobj,
+static ssize_t ipr_read_dump(struct file *filp, struct kobject *kobj,
 			     struct bin_attribute *bin_attr,
 			     char *buf, loff_t off, size_t count)
 {
@@ -3927,6 +3929,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg)
 
 /**
  * ipr_write_dump - Setup dump state of adapter
+ * @filp:		open sysfs file
  * @kobj:		kobject struct
  * @bin_attr:		bin_attribute struct
  * @buf:		buffer
@@ -3936,7 +3939,7 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg)
  * Return value:
  *	number of bytes printed to buffer
  **/
-static ssize_t ipr_write_dump(struct kobject *kobj,
+static ssize_t ipr_write_dump(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c
index 2e5f376d9ccc..bf33b315f93e 100644
--- a/drivers/scsi/lpfc/lpfc_attr.c
+++ b/drivers/scsi/lpfc/lpfc_attr.c
@@ -2643,6 +2643,7 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR,
 
 /**
  * sysfs_drvr_stat_data_read - Read function for lpfc_drvr_stat_data attribute
+ * @filp: sysfs file
  * @kobj: Pointer to the kernel object
  * @bin_attr: Attribute object
  * @buff: Buffer pointer
@@ -2654,7 +2655,8 @@ static DEVICE_ATTR(lpfc_stat_data_ctrl, S_IRUGO | S_IWUSR,
  * applications.
  **/
 static ssize_t
-sysfs_drvr_stat_data_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_drvr_stat_data_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device,
@@ -3362,6 +3364,7 @@ struct device_attribute *lpfc_vport_attrs[] = {
 
 /**
  * sysfs_ctlreg_write - Write method for writing to ctlreg
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be written to the adapter IOREG space.
@@ -3379,7 +3382,8 @@ struct device_attribute *lpfc_vport_attrs[] = {
  * value of count, buf contents written
  **/
 static ssize_t
-sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_ctlreg_write(struct file *filp, struct kobject *kobj,
+		   struct bin_attribute *bin_attr,
 		   char *buf, loff_t off, size_t count)
 {
 	size_t buf_off;
@@ -3415,6 +3419,7 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * sysfs_ctlreg_read - Read method for reading from ctlreg
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: if successful contains the data from the adapter IOREG space.
@@ -3431,7 +3436,8 @@ sysfs_ctlreg_write(struct kobject *kobj, struct bin_attribute *bin_attr,
  * value of count, buf contents read
  **/
 static ssize_t
-sysfs_ctlreg_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_ctlreg_read(struct file *filp, struct kobject *kobj,
+		  struct bin_attribute *bin_attr,
 		  char *buf, loff_t off, size_t count)
 {
 	size_t buf_off;
@@ -3496,6 +3502,7 @@ sysfs_mbox_idle(struct lpfc_hba *phba)
 
 /**
  * sysfs_mbox_write - Write method for writing information via mbox
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be written to sysfs mbox.
@@ -3516,7 +3523,8 @@ sysfs_mbox_idle(struct lpfc_hba *phba)
  * count number of bytes transferred
  **/
 static ssize_t
-sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_mbox_write(struct file *filp, struct kobject *kobj,
+		 struct bin_attribute *bin_attr,
 		 char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
@@ -3571,6 +3579,7 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
 
 /**
  * sysfs_mbox_read - Read method for reading information via mbox
+ * @filp: open sysfs file
  * @kobj: kernel kobject that contains the kernel class device.
  * @bin_attr: kernel attributes passed to us.
  * @buf: contains the data to be read from sysfs mbox.
@@ -3593,7 +3602,8 @@ sysfs_mbox_write(struct kobject *kobj, struct bin_attribute *bin_attr,
  * count number of bytes transferred
  **/
 static ssize_t
-sysfs_mbox_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+sysfs_mbox_read(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *bin_attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 3b708606b932..1e4cafabba15 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -16,7 +16,7 @@ static int qla24xx_vport_disable(struct fc_vport *, bool);
 /* SYSFS attributes --------------------------------------------------------- */
 
 static ssize_t
-qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_read_fw_dump(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -32,7 +32,7 @@ qla2x00_sysfs_read_fw_dump(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_fw_dump(struct kobject *kobj,
+qla2x00_sysfs_write_fw_dump(struct file *filp, struct kobject *kobj,
 			    struct bin_attribute *bin_attr,
 			    char *buf, loff_t off, size_t count)
 {
@@ -92,7 +92,7 @@ static struct bin_attribute sysfs_fw_dump_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_nvram(struct kobject *kobj,
+qla2x00_sysfs_read_nvram(struct file *filp, struct kobject *kobj,
 			 struct bin_attribute *bin_attr,
 			 char *buf, loff_t off, size_t count)
 {
@@ -111,7 +111,7 @@ qla2x00_sysfs_read_nvram(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_nvram(struct kobject *kobj,
+qla2x00_sysfs_write_nvram(struct file *filp, struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
@@ -177,7 +177,7 @@ static struct bin_attribute sysfs_nvram_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_optrom(struct kobject *kobj,
+qla2x00_sysfs_read_optrom(struct file *filp, struct kobject *kobj,
 			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
@@ -193,7 +193,7 @@ qla2x00_sysfs_read_optrom(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_optrom(struct kobject *kobj,
+qla2x00_sysfs_write_optrom(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -224,7 +224,7 @@ static struct bin_attribute sysfs_optrom_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_optrom_ctl(struct kobject *kobj,
+qla2x00_sysfs_write_optrom_ctl(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -387,7 +387,7 @@ static struct bin_attribute sysfs_optrom_ctl_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_vpd(struct kobject *kobj,
+qla2x00_sysfs_read_vpd(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -408,7 +408,7 @@ qla2x00_sysfs_read_vpd(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_write_vpd(struct kobject *kobj,
+qla2x00_sysfs_write_vpd(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -461,7 +461,7 @@ static struct bin_attribute sysfs_vpd_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_sfp(struct kobject *kobj,
+qla2x00_sysfs_read_sfp(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -522,7 +522,7 @@ static struct bin_attribute sysfs_sfp_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_reset(struct kobject *kobj,
+qla2x00_sysfs_write_reset(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -592,7 +592,7 @@ static struct bin_attribute sysfs_reset_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_edc(struct kobject *kobj,
+qla2x00_sysfs_write_edc(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -650,7 +650,7 @@ static struct bin_attribute sysfs_edc_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_write_edc_status(struct kobject *kobj,
+qla2x00_sysfs_write_edc_status(struct file *filp, struct kobject *kobj,
 			struct bin_attribute *bin_attr,
 			char *buf, loff_t off, size_t count)
 {
@@ -700,7 +700,7 @@ qla2x00_sysfs_write_edc_status(struct kobject *kobj,
 }
 
 static ssize_t
-qla2x00_sysfs_read_edc_status(struct kobject *kobj,
+qla2x00_sysfs_read_edc_status(struct file *filp, struct kobject *kobj,
 			   struct bin_attribute *bin_attr,
 			   char *buf, loff_t off, size_t count)
 {
@@ -730,7 +730,7 @@ static struct bin_attribute sysfs_edc_status_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_xgmac_stats(struct kobject *kobj,
+qla2x00_sysfs_read_xgmac_stats(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
@@ -782,7 +782,7 @@ static struct bin_attribute sysfs_xgmac_stats_attr = {
 };
 
 static ssize_t
-qla2x00_sysfs_read_dcbx_tlv(struct kobject *kobj,
+qla2x00_sysfs_read_dcbx_tlv(struct file *filp, struct kobject *kobj,
 		       struct bin_attribute *bin_attr,
 		       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/staging/udlfb/udlfb.c b/drivers/staging/udlfb/udlfb.c
index aa8195199a2c..577f2bf6eb23 100644
--- a/drivers/staging/udlfb/udlfb.c
+++ b/drivers/staging/udlfb/udlfb.c
@@ -1063,7 +1063,8 @@ static ssize_t metrics_misc_show(struct device *fbdev,
 			atomic_read(&dev->lost_pixels) ? "yes" : "no");
 }
 
-static ssize_t edid_show(struct kobject *kobj, struct bin_attribute *a,
+static ssize_t edid_show(struct file *filp, struct kobject *kobj,
+			 struct bin_attribute *a,
 			 char *buf, loff_t off, size_t count) {
 	struct device *fbdev = container_of(kobj, struct device, kobj);
 	struct fb_info *fb_info = dev_get_drvdata(fbdev);
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 06863befaf3a..448f5b47fc48 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -658,7 +658,8 @@ const struct attribute_group *usb_device_groups[] = {
 /* Binary descriptors */
 
 static ssize_t
-read_descriptors(struct kobject *kobj, struct bin_attribute *attr,
+read_descriptors(struct file *filp, struct kobject *kobj,
+		struct bin_attribute *attr,
 		char *buf, loff_t off, size_t count)
 {
 	struct device *dev = container_of(kobj, struct device, kobj);
diff --git a/drivers/video/aty/radeon_base.c b/drivers/video/aty/radeon_base.c
index 6c37e8ee5efe..3c1e13ed1cba 100644
--- a/drivers/video/aty/radeon_base.c
+++ b/drivers/video/aty/radeon_base.c
@@ -2099,7 +2099,7 @@ static ssize_t radeon_show_one_edid(char *buf, loff_t off, size_t count, const u
 }
 
 
-static ssize_t radeon_show_edid1(struct kobject *kobj,
+static ssize_t radeon_show_edid1(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
@@ -2112,7 +2112,7 @@ static ssize_t radeon_show_edid1(struct kobject *kobj,
 }
 
 
-static ssize_t radeon_show_edid2(struct kobject *kobj,
+static ssize_t radeon_show_edid2(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2431.c b/drivers/w1/slaves/w1_ds2431.c
index 2c6c0cf6a20f..84e2410aec1d 100644
--- a/drivers/w1/slaves/w1_ds2431.c
+++ b/drivers/w1/slaves/w1_ds2431.c
@@ -96,7 +96,7 @@ static int w1_f2d_readblock(struct w1_slave *sl, int off, int count, char *buf)
 	return -1;
 }
 
-static ssize_t w1_f2d_read_bin(struct kobject *kobj,
+static ssize_t w1_f2d_read_bin(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -202,7 +202,7 @@ retry:
 	return 0;
 }
 
-static ssize_t w1_f2d_write_bin(struct kobject *kobj,
+static ssize_t w1_f2d_write_bin(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2433.c b/drivers/w1/slaves/w1_ds2433.c
index d2bf32118a98..0f7b8f9c509a 100644
--- a/drivers/w1/slaves/w1_ds2433.c
+++ b/drivers/w1/slaves/w1_ds2433.c
@@ -92,7 +92,7 @@ static int w1_f23_refresh_block(struct w1_slave *sl, struct w1_f23_data *data,
 }
 #endif	/* CONFIG_W1_SLAVE_DS2433_CRC */
 
-static ssize_t w1_f23_read_bin(struct kobject *kobj,
+static ssize_t w1_f23_read_bin(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
@@ -206,7 +206,7 @@ static int w1_f23_write(struct w1_slave *sl, int addr, int len, const u8 *data)
 	return 0;
 }
 
-static ssize_t w1_f23_write_bin(struct kobject *kobj,
+static ssize_t w1_f23_write_bin(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/slaves/w1_ds2760.c b/drivers/w1/slaves/w1_ds2760.c
index 6e153343e117..483d45180911 100644
--- a/drivers/w1/slaves/w1_ds2760.c
+++ b/drivers/w1/slaves/w1_ds2760.c
@@ -97,7 +97,7 @@ int w1_ds2760_recall_eeprom(struct device *dev, int addr)
 	return w1_ds2760_eeprom_cmd(dev, addr, W1_DS2760_RECALL_DATA);
 }
 
-static ssize_t w1_ds2760_read_bin(struct kobject *kobj,
+static ssize_t w1_ds2760_read_bin(struct file *filp, struct kobject *kobj,
 				  struct bin_attribute *bin_attr,
 				  char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/w1/w1.c b/drivers/w1/w1.c
index ad5897dc4495..2839e281cd65 100644
--- a/drivers/w1/w1.c
+++ b/drivers/w1/w1.c
@@ -120,7 +120,7 @@ static struct device_attribute w1_slave_attr_id =
 
 /* Default family */
 
-static ssize_t w1_default_write(struct kobject *kobj,
+static ssize_t w1_default_write(struct file *filp, struct kobject *kobj,
 				struct bin_attribute *bin_attr,
 				char *buf, loff_t off, size_t count)
 {
@@ -139,7 +139,7 @@ out_up:
 	return count;
 }
 
-static ssize_t w1_default_read(struct kobject *kobj,
+static ssize_t w1_default_read(struct file *filp, struct kobject *kobj,
 			       struct bin_attribute *bin_attr,
 			       char *buf, loff_t off, size_t count)
 {
diff --git a/drivers/zorro/zorro-sysfs.c b/drivers/zorro/zorro-sysfs.c
index eb924e0a64ce..26f7184ef9e1 100644
--- a/drivers/zorro/zorro-sysfs.c
+++ b/drivers/zorro/zorro-sysfs.c
@@ -49,7 +49,7 @@ static ssize_t zorro_show_resource(struct device *dev, struct device_attribute *
 
 static DEVICE_ATTR(resource, S_IRUGO, zorro_show_resource, NULL);
 
-static ssize_t zorro_read_config(struct kobject *kobj,
+static ssize_t zorro_read_config(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t off, size_t count)
 {
diff --git a/fs/sysfs/bin.c b/fs/sysfs/bin.c
index 806b277453f9..4e321f7353fa 100644
--- a/fs/sysfs/bin.c
+++ b/fs/sysfs/bin.c
@@ -46,9 +46,9 @@ struct bin_buffer {
 };
 
 static int
-fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
+fill_read(struct file *file, char *buffer, loff_t off, size_t count)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	int rc;
@@ -59,7 +59,7 @@ fill_read(struct dentry *dentry, char *buffer, loff_t off, size_t count)
 
 	rc = -EIO;
 	if (attr->read)
-		rc = attr->read(kobj, attr, buffer, off, count);
+		rc = attr->read(file, kobj, attr, buffer, off, count);
 
 	sysfs_put_active(attr_sd);
 
@@ -70,8 +70,7 @@ static ssize_t
 read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	struct dentry *dentry = file->f_path.dentry;
-	int size = dentry->d_inode->i_size;
+	int size = file->f_path.dentry->d_inode->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -92,7 +91,7 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 
 	mutex_lock(&bb->mutex);
 
-	count = fill_read(dentry, bb->buffer, offs, count);
+	count = fill_read(file, bb->buffer, offs, count);
 	if (count < 0) {
 		mutex_unlock(&bb->mutex);
 		goto out_free;
@@ -117,9 +116,9 @@ read(struct file *file, char __user *userbuf, size_t bytes, loff_t *off)
 }
 
 static int
-flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
+flush_write(struct file *file, char *buffer, loff_t offset, size_t count)
 {
-	struct sysfs_dirent *attr_sd = dentry->d_fsdata;
+	struct sysfs_dirent *attr_sd = file->f_path.dentry->d_fsdata;
 	struct bin_attribute *attr = attr_sd->s_bin_attr.bin_attr;
 	struct kobject *kobj = attr_sd->s_parent->s_dir.kobj;
 	int rc;
@@ -130,7 +129,7 @@ flush_write(struct dentry *dentry, char *buffer, loff_t offset, size_t count)
 
 	rc = -EIO;
 	if (attr->write)
-		rc = attr->write(kobj, attr, buffer, offset, count);
+		rc = attr->write(file, kobj, attr, buffer, offset, count);
 
 	sysfs_put_active(attr_sd);
 
@@ -141,8 +140,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 		     size_t bytes, loff_t *off)
 {
 	struct bin_buffer *bb = file->private_data;
-	struct dentry *dentry = file->f_path.dentry;
-	int size = dentry->d_inode->i_size;
+	int size = file->f_path.dentry->d_inode->i_size;
 	loff_t offs = *off;
 	int count = min_t(size_t, bytes, PAGE_SIZE);
 	char *temp;
@@ -165,7 +163,7 @@ static ssize_t write(struct file *file, const char __user *userbuf,
 
 	memcpy(bb->buffer, temp, count);
 
-	count = flush_write(dentry, bb->buffer, offs, count);
+	count = flush_write(file, bb->buffer, offs, count);
 	mutex_unlock(&bb->mutex);
 
 	if (count > 0)
@@ -363,7 +361,7 @@ static int mmap(struct file *file, struct vm_area_struct *vma)
 	if (!attr->mmap)
 		goto out_put;
 
-	rc = attr->mmap(kobj, attr, vma);
+	rc = attr->mmap(file, kobj, attr, vma);
 	if (rc)
 		goto out_put;
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index 6903e9204032..f2694eb4dd3d 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -87,17 +87,18 @@ struct attribute_group {
 
 #define attr_name(_attr) (_attr).attr.name
 
+struct file;
 struct vm_area_struct;
 
 struct bin_attribute {
 	struct attribute	attr;
 	size_t			size;
 	void			*private;
-	ssize_t (*read)(struct kobject *, struct bin_attribute *,
+	ssize_t (*read)(struct file *, struct kobject *, struct bin_attribute *,
 			char *, loff_t, size_t);
-	ssize_t (*write)(struct kobject *, struct bin_attribute *,
+	ssize_t (*write)(struct file *,struct kobject *, struct bin_attribute *,
 			 char *, loff_t, size_t);
-	int (*mmap)(struct kobject *, struct bin_attribute *attr,
+	int (*mmap)(struct file *, struct kobject *, struct bin_attribute *attr,
 		    struct vm_area_struct *vma);
 };
 
diff --git a/kernel/ksysfs.c b/kernel/ksysfs.c
index 21fe3c426948..0b624e791805 100644
--- a/kernel/ksysfs.c
+++ b/kernel/ksysfs.c
@@ -138,7 +138,8 @@ extern const void __start_notes __attribute__((weak));
 extern const void __stop_notes __attribute__((weak));
 #define	notes_size (&__stop_notes - &__start_notes)
 
-static ssize_t notes_read(struct kobject *kobj, struct bin_attribute *bin_attr,
+static ssize_t notes_read(struct file *filp, struct kobject *kobj,
+			  struct bin_attribute *bin_attr,
 			  char *buf, loff_t off, size_t count)
 {
 	memcpy(buf, &__start_notes + off, count);
diff --git a/kernel/module.c b/kernel/module.c
index e2564580f3f1..5e14483768bb 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -1182,7 +1182,7 @@ struct module_notes_attrs {
 	struct bin_attribute attrs[0];
 };
 
-static ssize_t module_notes_read(struct kobject *kobj,
+static ssize_t module_notes_read(struct file *filp, struct kobject *kobj,
 				 struct bin_attribute *bin_attr,
 				 char *buf, loff_t pos, size_t count)
 {
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index dd321e39e621..486b8f3861d2 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -659,7 +659,7 @@ static struct attribute_group bridge_group = {
  *
  * Returns the number of bytes read.
  */
-static ssize_t brforward_read(struct kobject *kobj,
+static ssize_t brforward_read(struct file *filp, struct kobject *kobj,
 			      struct bin_attribute *bin_attr,
 			      char *buf, loff_t off, size_t count)
 {
-- 
cgit v1.2.3


From 608b4b9548dedf4185ca47edcaae4bff2ceb62de Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:45 -0700
Subject: netns: Teach network device kobjects which namespace they are in.

The problem.  Network devices show up in sysfs and with the network
namespace active multiple devices with the same name can show up in
the same directory, ouch!

To avoid that problem and allow existing applications in network namespaces
to see the same interface that is currently presented in sysfs, this
patch enables the tagging directory support in sysfs.

By using the network namespace pointers as tags to separate out the
the sysfs directory entries we ensure that we don't have conflicts
in the directories and applications only see a limited set of
the network devices.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/kobject.h |  1 +
 net/Kconfig             |  8 ++++++++
 net/core/net-sysfs.c    | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kobject.h b/include/linux/kobject.h
index b60d2dfe4e69..cf343a852534 100644
--- a/include/linux/kobject.h
+++ b/include/linux/kobject.h
@@ -142,6 +142,7 @@ extern const struct sysfs_ops kobj_sysfs_ops;
  */
 enum kobj_ns_type {
 	KOBJ_NS_TYPE_NONE = 0,
+	KOBJ_NS_TYPE_NET,
 	KOBJ_NS_TYPES
 };
 
diff --git a/net/Kconfig b/net/Kconfig
index 0d68b40fc0e6..f49532053a98 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -45,6 +45,14 @@ config COMPAT_NETLINK_MESSAGES
 
 menu "Networking options"
 
+config NET_NS
+	bool "Network namespace support"
+	default n
+	depends on EXPERIMENTAL && NAMESPACES
+	help
+	  Allow user space to create what appear to be multiple instances
+	  of the network stack.
+
 source "net/packet/Kconfig"
 source "net/unix/Kconfig"
 source "net/xfrm/Kconfig"
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index c57c4b228bb5..b388cdab9316 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -14,7 +14,9 @@
 #include <linux/netdevice.h>
 #include <linux/if_arp.h>
 #include <linux/slab.h>
+#include <linux/nsproxy.h>
 #include <net/sock.h>
+#include <net/net_namespace.h>
 #include <linux/rtnetlink.h>
 #include <linux/wireless.h>
 #include <linux/vmalloc.h>
@@ -766,6 +768,38 @@ static void rx_queue_remove_kobjects(struct net_device *net)
 	kset_unregister(net->queues_kset);
 }
 #endif /* CONFIG_RPS */
+
+static const void *net_current_ns(void)
+{
+	return current->nsproxy->net_ns;
+}
+
+static const void *net_initial_ns(void)
+{
+	return &init_net;
+}
+
+static const void *net_netlink_ns(struct sock *sk)
+{
+	return sock_net(sk);
+}
+
+static struct kobj_ns_type_operations net_ns_type_operations = {
+	.type = KOBJ_NS_TYPE_NET,
+	.current_ns = net_current_ns,
+	.netlink_ns = net_netlink_ns,
+	.initial_ns = net_initial_ns,
+};
+
+static void net_kobj_ns_exit(struct net *net)
+{
+	kobj_ns_exit(KOBJ_NS_TYPE_NET, net);
+}
+
+static struct pernet_operations sysfs_net_ops = {
+	.exit = net_kobj_ns_exit,
+};
+
 #endif /* CONFIG_SYSFS */
 
 #ifdef CONFIG_HOTPLUG
@@ -806,6 +840,13 @@ static void netdev_release(struct device *d)
 	kfree((char *)dev - dev->padded);
 }
 
+static const void *net_namespace(struct device *d)
+{
+	struct net_device *dev;
+	dev = container_of(d, struct net_device, dev);
+	return dev_net(dev);
+}
+
 static struct class net_class = {
 	.name = "net",
 	.dev_release = netdev_release,
@@ -815,6 +856,8 @@ static struct class net_class = {
 #ifdef CONFIG_HOTPLUG
 	.dev_uevent = netdev_uevent,
 #endif
+	.ns_type = &net_ns_type_operations,
+	.namespace = net_namespace,
 };
 
 /* Delete sysfs entries but hold kobject reference until after all
@@ -904,5 +947,9 @@ void netdev_initialize_kobject(struct net_device *net)
 
 int netdev_kobject_init(void)
 {
+	kobj_ns_type_register(&net_ns_type_operations);
+#ifdef CONFIG_SYSFS
+	register_pernet_subsys(&sysfs_net_ops);
+#endif
 	return class_register(&net_class);
 }
-- 
cgit v1.2.3


From 910a7e905f36e51a17d6e8bb4ad6dcd5ac5f1d53 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 4 May 2010 17:36:46 -0700
Subject: netlink: Implment netlink_broadcast_filtered

When netlink sockets are used to convey data that is in a namespace
we need a way to select a subset of the listening sockets to deliver
the packet to.  For the network namespace we have been doing this
by only transmitting packets in the correct network namespace.

For data belonging to other namespaces netlink_bradcast_filtered
provides a mechanism that allows us to examine the destination
socket and to decide if we should transmit the specified packet
to it.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 include/linux/netlink.h  |  4 ++++
 net/netlink/af_netlink.c | 21 +++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 6eaca5e1e8ca..59d066936ab9 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -188,6 +188,10 @@ extern int netlink_has_listeners(struct sock *sk, unsigned int group);
 extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
 extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
 			     __u32 group, gfp_t allocation);
+extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb,
+	__u32 pid, __u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data);
 extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
 extern int netlink_register_notifier(struct notifier_block *nb);
 extern int netlink_unregister_notifier(struct notifier_block *nb);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6464a1972a69..a2eb965207d3 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -978,6 +978,8 @@ struct netlink_broadcast_data {
 	int delivered;
 	gfp_t allocation;
 	struct sk_buff *skb, *skb2;
+	int (*tx_filter)(struct sock *dsk, struct sk_buff *skb, void *data);
+	void *tx_data;
 };
 
 static inline int do_one_broadcast(struct sock *sk,
@@ -1020,6 +1022,9 @@ static inline int do_one_broadcast(struct sock *sk,
 		p->failure = 1;
 		if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
 			p->delivery_failure = 1;
+	} else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+		kfree_skb(p->skb2);
+		p->skb2 = NULL;
 	} else if (sk_filter(sk, p->skb2)) {
 		kfree_skb(p->skb2);
 		p->skb2 = NULL;
@@ -1038,8 +1043,10 @@ out:
 	return 0;
 }
 
-int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
-		      u32 group, gfp_t allocation)
+int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid,
+	u32 group, gfp_t allocation,
+	int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data),
+	void *filter_data)
 {
 	struct net *net = sock_net(ssk);
 	struct netlink_broadcast_data info;
@@ -1059,6 +1066,8 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	info.allocation = allocation;
 	info.skb = skb;
 	info.skb2 = NULL;
+	info.tx_filter = filter;
+	info.tx_data = filter_data;
 
 	/* While we sleep in clone, do not allow to change socket list */
 
@@ -1083,6 +1092,14 @@ int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
 	}
 	return -ESRCH;
 }
+EXPORT_SYMBOL(netlink_broadcast_filtered);
+
+int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+		      u32 group, gfp_t allocation)
+{
+	return netlink_broadcast_filtered(ssk, skb, pid, group, allocation,
+		NULL, NULL);
+}
 EXPORT_SYMBOL(netlink_broadcast);
 
 struct netlink_set_err_data {
-- 
cgit v1.2.3


From c15271f4e74cd6dbdf461335d6d1450949c4b956 Mon Sep 17 00:00:00 2001
From: Jan Blunck <jblunck@suse.de>
Date: Wed, 14 Apr 2010 14:38:38 +0200
Subject: ext2: Add ext2_sb_info s_lock spinlock

Add a spinlock that protects against concurrent modifications of
s_mount_state, s_blocks_last, s_overhead_last and the content of the
superblock's buffer pointed to by sbi->s_es. The spinlock is now used in
ext2_xattr_update_super_block() which was setting the
EXT2_FEATURE_COMPAT_EXT_ATTR flag on the superblock without protection
before. Likewise the spinlock is used in ext2_show_options() to have a
consistent view of the mount options.

This is a preparation patch for removing the BKL from ext2 in the next
patch.

Signed-off-by: Jan Blunck <jblunck@suse.de>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Jan Kara <jack@suse.cz>
Cc: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/inode.c            |  2 ++
 fs/ext2/super.c            | 27 ++++++++++++++++++++++++++-
 fs/ext2/xattr.c            |  2 ++
 include/linux/ext2_fs_sb.h |  9 +++++++++
 4 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fc13cc119aad..5d15442abbd0 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1407,9 +1407,11 @@ static int __ext2_write_inode(struct inode *inode, int do_sync)
 				* created, add a flag to the superblock.
 				*/
 				lock_kernel();
+				spin_lock(&EXT2_SB(sb)->s_lock);
 				ext2_update_dynamic_rev(sb);
 				EXT2_SET_RO_COMPAT_FEATURE(sb,
 					EXT2_FEATURE_RO_COMPAT_LARGE_FILE);
+				spin_unlock(&EXT2_SB(sb)->s_lock);
 				unlock_kernel();
 				ext2_write_super(sb);
 			}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index f28a7ad02af9..28f65609589d 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -52,8 +52,10 @@ void ext2_error (struct super_block * sb, const char * function,
 	struct ext2_super_block *es = sbi->s_es;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
+		spin_lock(&sbi->s_lock);
 		sbi->s_mount_state |= EXT2_ERROR_FS;
 		es->s_state |= cpu_to_le16(EXT2_ERROR_FS);
+		spin_unlock(&sbi->s_lock);
 		ext2_sync_super(sb, es, 1);
 	}
 
@@ -84,6 +86,9 @@ void ext2_msg(struct super_block *sb, const char *prefix,
 	va_end(args);
 }
 
+/*
+ * This must be called with sbi->s_lock held.
+ */
 void ext2_update_dynamic_rev(struct super_block *sb)
 {
 	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
@@ -124,7 +129,9 @@ static void ext2_put_super (struct super_block * sb)
 	if (!(sb->s_flags & MS_RDONLY)) {
 		struct ext2_super_block *es = sbi->s_es;
 
+		spin_lock(&sbi->s_lock);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
+		spin_unlock(&sbi->s_lock);
 		ext2_sync_super(sb, es, 1);
 	}
 	db_count = sbi->s_gdb_count;
@@ -209,6 +216,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	struct ext2_super_block *es = sbi->s_es;
 	unsigned long def_mount_opts;
 
+	spin_lock(&sbi->s_lock);
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
 
 	if (sbi->s_sb_block != 1)
@@ -281,6 +289,7 @@ static int ext2_show_options(struct seq_file *seq, struct vfsmount *vfs)
 	if (!test_opt(sb, RESERVATION))
 		seq_puts(seq, ",noreservation");
 
+	spin_unlock(&sbi->s_lock);
 	return 0;
 }
 
@@ -766,6 +775,8 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = sbi;
 	sbi->s_sb_block = sb_block;
 
+	spin_lock_init(&sbi->s_lock);
+
 	/*
 	 * See what the current blocksize for the device is, and
 	 * use that as the blocksize.  Otherwise (or if the blocksize
@@ -1132,9 +1143,12 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
 			    int wait)
 {
 	ext2_clear_super_error(sb);
+	spin_lock(&EXT2_SB(sb)->s_lock);
 	es->s_free_blocks_count = cpu_to_le32(ext2_count_free_blocks(sb));
 	es->s_free_inodes_count = cpu_to_le32(ext2_count_free_inodes(sb));
 	es->s_wtime = cpu_to_le32(get_seconds());
+	/* unlock before we do IO */
+	spin_unlock(&EXT2_SB(sb)->s_lock);
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
 	if (wait)
 		sync_dirty_buffer(EXT2_SB(sb)->s_sbh);
@@ -1151,16 +1165,18 @@ static void ext2_sync_super(struct super_block *sb, struct ext2_super_block *es,
  * may have been checked while mounted and e2fsck may have
  * set s_state to EXT2_VALID_FS after some corrections.
  */
-
 static int ext2_sync_fs(struct super_block *sb, int wait)
 {
+	struct ext2_sb_info *sbi = EXT2_SB(sb);
 	struct ext2_super_block *es = EXT2_SB(sb)->s_es;
 
 	lock_kernel();
+	spin_lock(&sbi->s_lock);
 	if (es->s_state & cpu_to_le16(EXT2_VALID_FS)) {
 		ext2_debug("setting valid to 0\n");
 		es->s_state &= cpu_to_le16(~EXT2_VALID_FS);
 	}
+	spin_unlock(&sbi->s_lock);
 	ext2_sync_super(sb, es, wait);
 	unlock_kernel();
 
@@ -1186,6 +1202,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 	int err;
 
 	lock_kernel();
+	spin_lock(&sbi->s_lock);
 
 	/* Store the old options */
 	old_sb_flags = sb->s_flags;
@@ -1224,12 +1241,14 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		sbi->s_mount_opt |= old_mount_opt & EXT2_MOUNT_XIP;
 	}
 	if ((*flags & MS_RDONLY) == (sb->s_flags & MS_RDONLY)) {
+		spin_unlock(&sbi->s_lock);
 		unlock_kernel();
 		return 0;
 	}
 	if (*flags & MS_RDONLY) {
 		if (le16_to_cpu(es->s_state) & EXT2_VALID_FS ||
 		    !(sbi->s_mount_state & EXT2_VALID_FS)) {
+			spin_unlock(&sbi->s_lock);
 			unlock_kernel();
 			return 0;
 		}
@@ -1239,6 +1258,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		 */
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 		es->s_mtime = cpu_to_le32(get_seconds());
+		spin_unlock(&sbi->s_lock);
 		ext2_sync_super(sb, es, 1);
 	} else {
 		__le32 ret = EXT2_HAS_RO_COMPAT_FEATURE(sb,
@@ -1259,6 +1279,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
 		sbi->s_mount_state = le16_to_cpu(es->s_state);
 		if (!ext2_setup_super (sb, es, 0))
 			sb->s_flags &= ~MS_RDONLY;
+		spin_unlock(&sbi->s_lock);
 		ext2_write_super(sb);
 	}
 	unlock_kernel();
@@ -1268,6 +1289,7 @@ restore_opts:
 	sbi->s_resuid = old_opts.s_resuid;
 	sbi->s_resgid = old_opts.s_resgid;
 	sb->s_flags = old_sb_flags;
+	spin_unlock(&sbi->s_lock);
 	unlock_kernel();
 	return err;
 }
@@ -1279,6 +1301,8 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	struct ext2_super_block *es = sbi->s_es;
 	u64 fsid;
 
+	spin_lock(&sbi->s_lock);
+
 	if (test_opt (sb, MINIX_DF))
 		sbi->s_overhead_last = 0;
 	else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
@@ -1333,6 +1357,7 @@ static int ext2_statfs (struct dentry * dentry, struct kstatfs * buf)
 	       le64_to_cpup((void *)es->s_uuid + sizeof(u64));
 	buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
 	buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
+	spin_unlock(&sbi->s_lock);
 	return 0;
 }
 
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index e44dc92609be..3b96045a00ce 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -345,7 +345,9 @@ static void ext2_xattr_update_super_block(struct super_block *sb)
 	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
+	spin_lock(&EXT2_SB(sb)->s_lock);
 	EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
+	spin_unlock(&EXT2_SB(sb)->s_lock);
 	sb->s_dirt = 1;
 	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
 }
diff --git a/include/linux/ext2_fs_sb.h b/include/linux/ext2_fs_sb.h
index 1cdb66367c98..db4d9f586bb6 100644
--- a/include/linux/ext2_fs_sb.h
+++ b/include/linux/ext2_fs_sb.h
@@ -106,6 +106,15 @@ struct ext2_sb_info {
 	spinlock_t s_rsv_window_lock;
 	struct rb_root s_rsv_window_root;
 	struct ext2_reserve_window_node s_rsv_window_head;
+	/*
+	 * s_lock protects against concurrent modifications of s_mount_state,
+	 * s_blocks_last, s_overhead_last and the content of superblock's
+	 * buffer pointed to by sbi->s_es.
+	 *
+	 * Note: It is used in ext2_show_options() to provide a consistent view
+	 * of the mount options.
+	 */
+	spinlock_t s_lock;
 };
 
 static inline spinlock_t *
-- 
cgit v1.2.3


From 03f4d804a1b4748885dc4613a4afe10089a731c8 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 15 Apr 2010 22:16:24 +0200
Subject: jbd: Provide function to check whether transaction will issue data
 barrier

Provide a function which returns whether a transaction with given tid
will send a barrier to the filesystem device. The function will be used
by ext3 to detect whether fsync needs to send a separate barrier or not.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/jbd/commit.c     |  8 +++++++-
 fs/jbd/journal.c    | 33 +++++++++++++++++++++++++++++++++
 include/linux/jbd.h |  3 ++-
 3 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index ecb44c94ba8d..28a9ddaa0c49 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -786,6 +786,12 @@ wait_for_iobuf:
 
 	jbd_debug(3, "JBD: commit phase 6\n");
 
+	/* All metadata is written, now write commit record and do cleanup */
+	spin_lock(&journal->j_state_lock);
+	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+	commit_transaction->t_state = T_COMMIT_RECORD;
+	spin_unlock(&journal->j_state_lock);
+
 	if (journal_write_commit_record(journal, commit_transaction))
 		err = -EIO;
 
@@ -923,7 +929,7 @@ restart_loop:
 
 	jbd_debug(3, "JBD: commit phase 8\n");
 
-	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+	J_ASSERT(commit_transaction->t_state == T_COMMIT_RECORD);
 
 	commit_transaction->t_state = T_FINISHED;
 	J_ASSERT(commit_transaction == journal->j_committing_transaction);
diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c
index bd224eec9b07..99c71940155a 100644
--- a/fs/jbd/journal.c
+++ b/fs/jbd/journal.c
@@ -564,6 +564,38 @@ int log_wait_commit(journal_t *journal, tid_t tid)
 	return err;
 }
 
+/*
+ * Return 1 if a given transaction has not yet sent barrier request
+ * connected with a transaction commit. If 0 is returned, transaction
+ * may or may not have sent the barrier. Used to avoid sending barrier
+ * twice in common cases.
+ */
+int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
+{
+	int ret = 0;
+	transaction_t *commit_trans;
+
+	if (!(journal->j_flags & JFS_BARRIER))
+		return 0;
+	spin_lock(&journal->j_state_lock);
+	/* Transaction already committed? */
+	if (tid_geq(journal->j_commit_sequence, tid))
+		goto out;
+	/*
+	 * Transaction is being committed and we already proceeded to
+	 * writing commit record?
+	 */
+	commit_trans = journal->j_committing_transaction;
+	if (commit_trans && commit_trans->t_tid == tid &&
+	    commit_trans->t_state >= T_COMMIT_RECORD)
+		goto out;
+	ret = 1;
+out:
+	spin_unlock(&journal->j_state_lock);
+	return ret;
+}
+EXPORT_SYMBOL(journal_commit_will_send_barrier);
+
 /*
  * Log buffer allocation routines:
  */
@@ -1157,6 +1189,7 @@ int journal_destroy(journal_t *journal)
 {
 	int err = 0;
 
+	
 	/* Wait for the commit thread to wake up and die. */
 	journal_kill_thread(journal);
 
diff --git a/include/linux/jbd.h b/include/linux/jbd.h
index 516a2a27e87a..e06965081ba5 100644
--- a/include/linux/jbd.h
+++ b/include/linux/jbd.h
@@ -427,9 +427,9 @@ struct transaction_s
 	enum {
 		T_RUNNING,
 		T_LOCKED,
-		T_RUNDOWN,
 		T_FLUSH,
 		T_COMMIT,
+		T_COMMIT_RECORD,
 		T_FINISHED
 	}			t_state;
 
@@ -991,6 +991,7 @@ int journal_start_commit(journal_t *journal, tid_t *tid);
 int journal_force_commit_nested(journal_t *journal);
 int log_wait_commit(journal_t *journal, tid_t tid);
 int log_do_checkpoint(journal_t *journal);
+int journal_trans_will_send_data_barrier(journal_t *journal, tid_t tid);
 
 void __log_wait_for_space(journal_t *journal);
 extern void	__journal_drop_transaction(journal_t *, transaction_t *);
-- 
cgit v1.2.3


From dde9588853b1bde542eab247f8838c472806688f Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Mon, 26 Apr 2010 20:03:33 +0400
Subject: quota: Make quota stat accounting lockless.

Quota stats is mostly writable data structure. Let's alloc percpu
bucket for each value.

NOTE: dqstats_read() function is racy against dqstats_{inc,dec}
and may return inconsistent value. But this is ok since absolute
accuracy is not required.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c      | 102 +++++++++++++++++++++++++++++++++-----------------
 fs/quota/quota_tree.c |   4 +-
 fs/quota/quota_v1.c   |   4 +-
 include/linux/quota.h |  42 +++++++++++++++++----
 4 files changed, 106 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index ae766056350d..01347e81d0ca 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -82,7 +82,7 @@
 
 /*
  * There are three quota SMP locks. dq_list_lock protects all lists with quotas
- * and quota formats, dqstats structure containing statistics about the lists
+ * and quota formats.
  * dq_data_lock protects data from dq_dqb and also mem_dqinfo structures and
  * also guards consistency of dquot->dq_dqb with inode->i_blocks, i_bytes.
  * i_blocks and i_bytes updates itself are guarded by i_lock acquired directly
@@ -228,6 +228,10 @@ static struct hlist_head *dquot_hash;
 
 struct dqstats dqstats;
 EXPORT_SYMBOL(dqstats);
+#ifdef CONFIG_SMP
+struct dqstats *dqstats_pcpu;
+EXPORT_SYMBOL(dqstats_pcpu);
+#endif
 
 static qsize_t inode_get_rsv_space(struct inode *inode);
 static void __dquot_initialize(struct inode *inode, int type);
@@ -275,7 +279,7 @@ static struct dquot *find_dquot(unsigned int hashent, struct super_block *sb,
 static inline void put_dquot_last(struct dquot *dquot)
 {
 	list_add_tail(&dquot->dq_free, &free_dquots);
-	dqstats.free_dquots++;
+	dqstats_inc(DQST_FREE_DQUOTS);
 }
 
 static inline void remove_free_dquot(struct dquot *dquot)
@@ -283,7 +287,7 @@ static inline void remove_free_dquot(struct dquot *dquot)
 	if (list_empty(&dquot->dq_free))
 		return;
 	list_del_init(&dquot->dq_free);
-	dqstats.free_dquots--;
+	dqstats_dec(DQST_FREE_DQUOTS);
 }
 
 static inline void put_inuse(struct dquot *dquot)
@@ -291,12 +295,12 @@ static inline void put_inuse(struct dquot *dquot)
 	/* We add to the back of inuse list so we don't have to restart
 	 * when traversing this list and we block */
 	list_add_tail(&dquot->dq_inuse, &inuse_list);
-	dqstats.allocated_dquots++;
+	dqstats_inc(DQST_ALLOC_DQUOTS);
 }
 
 static inline void remove_inuse(struct dquot *dquot)
 {
-	dqstats.allocated_dquots--;
+	dqstats_dec(DQST_ALLOC_DQUOTS);
 	list_del(&dquot->dq_inuse);
 }
 /*
@@ -561,8 +565,8 @@ int dquot_scan_active(struct super_block *sb,
 			continue;
 		/* Now we have active dquot so we can just increase use count */
 		atomic_inc(&dquot->dq_count);
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_LOOKUPS);
 		dqput(old_dquot);
 		old_dquot = dquot;
 		ret = fn(dquot, priv);
@@ -607,8 +611,8 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
  			 * holding reference so we can safely just increase
 			 * use count */
 			atomic_inc(&dquot->dq_count);
-			dqstats.lookups++;
 			spin_unlock(&dq_list_lock);
+			dqstats_inc(DQST_LOOKUPS);
 			sb->dq_op->write_dquot(dquot);
 			dqput(dquot);
 			spin_lock(&dq_list_lock);
@@ -620,9 +624,7 @@ int vfs_quota_sync(struct super_block *sb, int type, int wait)
 		if ((cnt == type || type == -1) && sb_has_quota_active(sb, cnt)
 		    && info_dirty(&dqopt->info[cnt]))
 			sb->dq_op->write_info(sb, cnt);
-	spin_lock(&dq_list_lock);
-	dqstats.syncs++;
-	spin_unlock(&dq_list_lock);
+	dqstats_inc(DQST_SYNCS);
 	mutex_unlock(&dqopt->dqonoff_mutex);
 
 	if (!wait || (sb_dqopt(sb)->flags & DQUOT_QUOTA_SYS_FILE))
@@ -674,6 +676,22 @@ static void prune_dqcache(int count)
 	}
 }
 
+static int dqstats_read(unsigned int type)
+{
+	int count = 0;
+#ifdef CONFIG_SMP
+	int cpu;
+	for_each_possible_cpu(cpu)
+		count += per_cpu_ptr(dqstats_pcpu, cpu)->stat[type];
+	/* Statistics reading is racy, but absolute accuracy isn't required */
+	if (count < 0)
+		count = 0;
+#else
+	count = dqstats.stat[type];
+#endif
+	return count;
+}
+
 /*
  * This is called from kswapd when we think we need some
  * more memory
@@ -686,7 +704,7 @@ static int shrink_dqcache_memory(int nr, gfp_t gfp_mask)
 		prune_dqcache(nr);
 		spin_unlock(&dq_list_lock);
 	}
-	return (dqstats.free_dquots / 100) * sysctl_vfs_cache_pressure;
+	return (dqstats_read(DQST_FREE_DQUOTS)/100) * sysctl_vfs_cache_pressure;
 }
 
 static struct shrinker dqcache_shrinker = {
@@ -714,10 +732,7 @@ void dqput(struct dquot *dquot)
 		BUG();
 	}
 #endif
-	
-	spin_lock(&dq_list_lock);
-	dqstats.drops++;
-	spin_unlock(&dq_list_lock);
+	dqstats_inc(DQST_DROPS);
 we_slept:
 	spin_lock(&dq_list_lock);
 	if (atomic_read(&dquot->dq_count) > 1) {
@@ -834,15 +849,15 @@ we_slept:
 		put_inuse(dquot);
 		/* hash it first so it can be found */
 		insert_dquot_hash(dquot);
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_LOOKUPS);
 	} else {
 		if (!atomic_read(&dquot->dq_count))
 			remove_free_dquot(dquot);
 		atomic_inc(&dquot->dq_count);
-		dqstats.cache_hits++;
-		dqstats.lookups++;
 		spin_unlock(&dq_list_lock);
+		dqstats_inc(DQST_CACHE_HITS);
+		dqstats_inc(DQST_LOOKUPS);
 	}
 	/* Wait for dq_lock - after this we know that either dquot_release() is
 	 * already finished or it will be canceled due to dq_count > 1 test */
@@ -2476,62 +2491,74 @@ const struct quotactl_ops vfs_quotactl_ops = {
 	.set_dqblk	= vfs_set_dqblk
 };
 
+
+static int do_proc_dqstats(struct ctl_table *table, int write,
+		     void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+#ifdef CONFIG_SMP
+	/* Update global table */
+	unsigned int type = (int *)table->data - dqstats.stat;
+	dqstats.stat[type] = dqstats_read(type);
+#endif
+	return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+
 static ctl_table fs_dqstats_table[] = {
 	{
 		.procname	= "lookups",
-		.data		= &dqstats.lookups,
+		.data		= &dqstats.stat[DQST_LOOKUPS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "drops",
-		.data		= &dqstats.drops,
+		.data		= &dqstats.stat[DQST_DROPS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "reads",
-		.data		= &dqstats.reads,
+		.data		= &dqstats.stat[DQST_READS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "writes",
-		.data		= &dqstats.writes,
+		.data		= &dqstats.stat[DQST_WRITES],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "cache_hits",
-		.data		= &dqstats.cache_hits,
+		.data		= &dqstats.stat[DQST_CACHE_HITS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "allocated_dquots",
-		.data		= &dqstats.allocated_dquots,
+		.data		= &dqstats.stat[DQST_ALLOC_DQUOTS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "free_dquots",
-		.data		= &dqstats.free_dquots,
+		.data		= &dqstats.stat[DQST_FREE_DQUOTS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 	{
 		.procname	= "syncs",
-		.data		= &dqstats.syncs,
+		.data		= &dqstats.stat[DQST_SYNCS],
 		.maxlen		= sizeof(int),
 		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
+		.proc_handler	= do_proc_dqstats,
 	},
 #ifdef CONFIG_PRINT_QUOTA_WARNING
 	{
@@ -2583,6 +2610,13 @@ static int __init dquot_init(void)
 	if (!dquot_hash)
 		panic("Cannot create dquot hash table");
 
+#ifdef CONFIG_SMP
+	dqstats_pcpu = alloc_percpu(struct dqstats);
+	if (!dqstats_pcpu)
+		panic("Cannot create dquot stats table");
+#endif
+	memset(&dqstats, 0, sizeof(struct dqstats));
+
 	/* Find power-of-two hlist_heads which can fit into allocation */
 	nr_hash = (1UL << order) * PAGE_SIZE / sizeof(struct hlist_head);
 	dq_hash_bits = 0;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index f81f4bcfb178..5b7f7416ec7a 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -384,7 +384,7 @@ int qtree_write_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	} else {
 		ret = 0;
 	}
-	dqstats.writes++;
+	dqstats_inc(DQST_WRITES);
 	kfree(ddquot);
 
 	return ret;
@@ -634,7 +634,7 @@ int qtree_read_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
 	spin_unlock(&dq_data_lock);
 	kfree(ddquot);
 out:
-	dqstats.reads++;
+	dqstats_inc(DQST_READS);
 	return ret;
 }
 EXPORT_SYMBOL(qtree_read_dquot);
diff --git a/fs/quota/quota_v1.c b/fs/quota/quota_v1.c
index 2ae757e9c008..4af344c5852a 100644
--- a/fs/quota/quota_v1.c
+++ b/fs/quota/quota_v1.c
@@ -71,7 +71,7 @@ static int v1_read_dqblk(struct dquot *dquot)
 	    dquot->dq_dqb.dqb_ihardlimit == 0 &&
 	    dquot->dq_dqb.dqb_isoftlimit == 0)
 		set_bit(DQ_FAKE_B, &dquot->dq_flags);
-	dqstats.reads++;
+	dqstats_inc(DQST_READS);
 
 	return 0;
 }
@@ -104,7 +104,7 @@ static int v1_commit_dqblk(struct dquot *dquot)
 	ret = 0;
 
 out:
-	dqstats.writes++;
+	dqstats_inc(DQST_WRITES);
 
 	return ret;
 }
diff --git a/include/linux/quota.h b/include/linux/quota.h
index b462916b2a0a..cdfde10481b7 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -174,6 +174,8 @@ enum {
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
+#include <linux/percpu.h>
+#include <linux/smp.h>
 
 #include <linux/dqblk_xfs.h>
 #include <linux/dqblk_v1.h>
@@ -238,19 +240,43 @@ static inline int info_dirty(struct mem_dqinfo *info)
 	return test_bit(DQF_INFO_DIRTY_B, &info->dqi_flags);
 }
 
+enum {
+	DQST_LOOKUPS,
+	DQST_DROPS,
+	DQST_READS,
+	DQST_WRITES,
+	DQST_CACHE_HITS,
+	DQST_ALLOC_DQUOTS,
+	DQST_FREE_DQUOTS,
+	DQST_SYNCS,
+	_DQST_DQSTAT_LAST
+};
+
 struct dqstats {
-	int lookups;
-	int drops;
-	int reads;
-	int writes;
-	int cache_hits;
-	int allocated_dquots;
-	int free_dquots;
-	int syncs;
+	int stat[_DQST_DQSTAT_LAST];
 };
 
+extern struct dqstats *dqstats_pcpu;
 extern struct dqstats dqstats;
 
+static inline void dqstats_inc(unsigned int type)
+{
+#ifdef CONFIG_SMP
+	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]++;
+#else
+	dqstats.stat[type]++;
+#endif
+}
+
+static inline void dqstats_dec(unsigned int type)
+{
+#ifdef CONFIG_SMP
+	per_cpu_ptr(dqstats_pcpu, smp_processor_id())->stat[type]--;
+#else
+	dqstats.stat[type]--;
+#endif
+}
+
 #define DQ_MOD_B	0	/* dquot modified since read */
 #define DQ_BLKS_B	1	/* uid/gid has been warned about blk limit */
 #define DQ_INODES_B	2	/* uid/gid has been warned about inode limit */
-- 
cgit v1.2.3


From b9b2dd36c1bc64430f8e13990ab135cbecc10076 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 6 May 2010 17:04:58 -0400
Subject: quota: unify ->get_dqblk

Pass the larger struct fs_disk_quota to the ->get_dqblk operation so
that the Q_GETQUOTA and Q_XGETQUOTA operations can be implemented
with a single filesystem operation and we can retire the ->get_xquota
operation.  The additional information (RT-subvolume accounting and
warn counts) are left zero for the VFS quota implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c                 |  6 +++---
 fs/quota/dquot.c                | 27 ++++++++++++++++-----------
 fs/quota/quota.c                | 23 ++++++++++++++++++-----
 fs/xfs/linux-2.6/xfs_quotaops.c |  4 ++--
 include/linux/quota.h           |  3 +--
 include/linux/quotaops.h        |  3 ++-
 6 files changed, 42 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index d5f4661287f9..dec93577a783 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1476,8 +1476,8 @@ static int gfs2_quota_get_xstate(struct super_block *sb,
 	return 0;
 }
 
-static int gfs2_xquota_get(struct super_block *sb, int type, qid_t id,
-			   struct fs_disk_quota *fdq)
+static int gfs2_get_dqblk(struct super_block *sb, int type, qid_t id,
+			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_quota_lvb *qlvb;
@@ -1629,7 +1629,7 @@ out_put:
 const struct quotactl_ops gfs2_quotactl_ops = {
 	.quota_sync     = gfs2_quota_sync,
 	.get_xstate     = gfs2_quota_get_xstate,
-	.get_xquota	= gfs2_xquota_get,
+	.get_dqblk	= gfs2_get_dqblk,
 	.set_xquota	= gfs2_xquota_set,
 };
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 01347e81d0ca..6aed77fc99c7 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2301,25 +2301,30 @@ static inline qsize_t stoqb(qsize_t space)
 }
 
 /* Generic routine for getting common part of quota structure */
-static void do_get_dqblk(struct dquot *dquot, struct if_dqblk *di)
+static void do_get_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 
+	memset(di, 0, sizeof(*di));
+	di->d_version = FS_DQUOT_VERSION;
+	di->d_flags = dquot->dq_type == USRQUOTA ?
+			XFS_USER_QUOTA : XFS_GROUP_QUOTA;
+	di->d_id = dquot->dq_id;
+
 	spin_lock(&dq_data_lock);
-	di->dqb_bhardlimit = stoqb(dm->dqb_bhardlimit);
-	di->dqb_bsoftlimit = stoqb(dm->dqb_bsoftlimit);
-	di->dqb_curspace = dm->dqb_curspace + dm->dqb_rsvspace;
-	di->dqb_ihardlimit = dm->dqb_ihardlimit;
-	di->dqb_isoftlimit = dm->dqb_isoftlimit;
-	di->dqb_curinodes = dm->dqb_curinodes;
-	di->dqb_btime = dm->dqb_btime;
-	di->dqb_itime = dm->dqb_itime;
-	di->dqb_valid = QIF_ALL;
+	di->d_blk_hardlimit = stoqb(dm->dqb_bhardlimit);
+	di->d_blk_softlimit = stoqb(dm->dqb_bsoftlimit);
+	di->d_ino_hardlimit = dm->dqb_ihardlimit;
+	di->d_ino_softlimit = dm->dqb_isoftlimit;
+	di->d_bcount = dm->dqb_curspace + dm->dqb_rsvspace;
+	di->d_icount = dm->dqb_curinodes;
+	di->d_btimer = dm->dqb_btime;
+	di->d_itimer = dm->dqb_itime;
 	spin_unlock(&dq_data_lock);
 }
 
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
-		  struct if_dqblk *di)
+		  struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 95388f9b7356..8680e257c2bd 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -136,19 +136,32 @@ static int quota_setinfo(struct super_block *sb, int type, void __user *addr)
 	return sb->s_qcop->set_info(sb, type, &info);
 }
 
+static void copy_to_if_dqblk(struct if_dqblk *dst, struct fs_disk_quota *src)
+{
+	dst->dqb_bhardlimit = src->d_blk_hardlimit;
+	dst->dqb_bsoftlimit = src->d_blk_softlimit;
+	dst->dqb_curspace = src->d_bcount;
+	dst->dqb_ihardlimit = src->d_ino_hardlimit;
+	dst->dqb_isoftlimit = src->d_ino_softlimit;
+	dst->dqb_curinodes = src->d_icount;
+	dst->dqb_btime = src->d_btimer;
+	dst->dqb_itime = src->d_itimer;
+	dst->dqb_valid = QIF_ALL;
+}
+
 static int quota_getquota(struct super_block *sb, int type, qid_t id,
 			  void __user *addr)
 {
+	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
 	int ret;
 
-	if (!sb_has_quota_active(sb, type))
-		return -ESRCH;
 	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_dqblk(sb, type, id, &idq);
+	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
 	if (ret)
 		return ret;
+	copy_to_if_dqblk(&idq, &fdq);
 	if (copy_to_user(addr, &idq, sizeof(idq)))
 		return -EFAULT;
 	return 0;
@@ -210,9 +223,9 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
 	struct fs_disk_quota fdq;
 	int ret;
 
-	if (!sb->s_qcop->get_xquota)
+	if (!sb->s_qcop->get_dqblk)
 		return -ENOSYS;
-	ret = sb->s_qcop->get_xquota(sb, type, id, &fdq);
+	ret = sb->s_qcop->get_dqblk(sb, type, id, &fdq);
 	if (!ret && copy_to_user(addr, &fdq, sizeof(fdq)))
 		return -EFAULT;
 	return ret;
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 1947514ce1ad..3d473f43c9a9 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -97,7 +97,7 @@ xfs_fs_set_xstate(
 }
 
 STATIC int
-xfs_fs_get_xquota(
+xfs_fs_get_dqblk(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -135,6 +135,6 @@ xfs_fs_set_xquota(
 const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
-	.get_xquota		= xfs_fs_get_xquota,
+	.get_dqblk		= xfs_fs_get_dqblk,
 	.set_xquota		= xfs_fs_set_xquota,
 };
diff --git a/include/linux/quota.h b/include/linux/quota.h
index cdfde10481b7..42364219dc9b 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -337,11 +337,10 @@ struct quotactl_ops {
 	int (*quota_sync)(struct super_block *, int, int);
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
-	int (*get_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
+	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
-	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 };
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index e6fa7acce290..d32a48631b0d 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -63,7 +63,8 @@ int vfs_quota_disable(struct super_block *sb, int type, unsigned int flags);
 int vfs_quota_sync(struct super_block *sb, int type, int wait);
 int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
-int vfs_get_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
+		struct fs_disk_quota *di);
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
-- 
cgit v1.2.3


From c472b43275976512e4c1c32da5ced03f339cb380 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@infradead.org>
Date: Thu, 6 May 2010 17:05:17 -0400
Subject: quota: unify ->set_dqblk

Pass the larger struct fs_disk_quota to the ->set_dqblk operation so
that the Q_SETQUOTA and Q_XSETQUOTA operations can be implemented
with a single filesystem operation and we can retire the ->set_xquota
operation.  The additional information (RT-subvolume accounting and
warn counts) are left zero for the VFS quota implementation.

Add new fieldmask values for setting the numer of blocks and inodes
values which is required for the VFS quota, but wasn't for XFS.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/gfs2/quota.c                 |  6 ++--
 fs/quota/dquot.c                | 67 ++++++++++++++++++++++++++---------------
 fs/quota/quota.c                | 36 +++++++++++++++++++---
 fs/xfs/linux-2.6/xfs_quotaops.c |  4 +--
 fs/xfs/quota/xfs_qm_syscalls.c  | 10 ++++--
 include/linux/dqblk_xfs.h       |  9 ++++++
 include/linux/quota.h           |  3 +-
 include/linux/quotaops.h        |  3 +-
 8 files changed, 98 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index dec93577a783..49667d68769e 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1521,8 +1521,8 @@ out:
 /* GFS2 only supports a subset of the XFS fields */
 #define GFS2_FIELDMASK (FS_DQ_BSOFT|FS_DQ_BHARD)
 
-static int gfs2_xquota_set(struct super_block *sb, int type, qid_t id,
-			   struct fs_disk_quota *fdq)
+static int gfs2_set_dqblk(struct super_block *sb, int type, qid_t id,
+			  struct fs_disk_quota *fdq)
 {
 	struct gfs2_sbd *sdp = sb->s_fs_info;
 	struct gfs2_inode *ip = GFS2_I(sdp->sd_quota_inode);
@@ -1630,6 +1630,6 @@ const struct quotactl_ops gfs2_quotactl_ops = {
 	.quota_sync     = gfs2_quota_sync,
 	.get_xstate     = gfs2_quota_get_xstate,
 	.get_dqblk	= gfs2_get_dqblk,
-	.set_xquota	= gfs2_xquota_set,
+	.set_dqblk	= gfs2_set_dqblk,
 };
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 6aed77fc99c7..b1a5036560a9 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2338,51 +2338,70 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 }
 EXPORT_SYMBOL(vfs_get_dqblk);
 
+#define VFS_FS_DQ_MASK \
+	(FS_DQ_BCOUNT | FS_DQ_BSOFT | FS_DQ_BHARD | \
+	 FS_DQ_ICOUNT | FS_DQ_ISOFT | FS_DQ_IHARD | \
+	 FS_DQ_BTIMER | FS_DQ_ITIMER)
+
 /* Generic routine for setting common part of quota structure */
-static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
+static int do_set_dqblk(struct dquot *dquot, struct fs_disk_quota *di)
 {
 	struct mem_dqblk *dm = &dquot->dq_dqb;
 	int check_blim = 0, check_ilim = 0;
 	struct mem_dqinfo *dqi = &sb_dqopt(dquot->dq_sb)->info[dquot->dq_type];
 
-	if ((di->dqb_valid & QIF_BLIMITS &&
-	     (di->dqb_bhardlimit > dqi->dqi_maxblimit ||
-	      di->dqb_bsoftlimit > dqi->dqi_maxblimit)) ||
-	    (di->dqb_valid & QIF_ILIMITS &&
-	     (di->dqb_ihardlimit > dqi->dqi_maxilimit ||
-	      di->dqb_isoftlimit > dqi->dqi_maxilimit)))
+	if (di->d_fieldmask & ~VFS_FS_DQ_MASK)
+		return -EINVAL;
+
+	if (((di->d_fieldmask & FS_DQ_BSOFT) &&
+	     (di->d_blk_softlimit > dqi->dqi_maxblimit)) ||
+	    ((di->d_fieldmask & FS_DQ_BHARD) &&
+	     (di->d_blk_hardlimit > dqi->dqi_maxblimit)) ||
+	    ((di->d_fieldmask & FS_DQ_ISOFT) &&
+	     (di->d_ino_softlimit > dqi->dqi_maxilimit)) ||
+	    ((di->d_fieldmask & FS_DQ_IHARD) &&
+	     (di->d_ino_hardlimit > dqi->dqi_maxilimit)))
 		return -ERANGE;
 
 	spin_lock(&dq_data_lock);
-	if (di->dqb_valid & QIF_SPACE) {
-		dm->dqb_curspace = di->dqb_curspace - dm->dqb_rsvspace;
+	if (di->d_fieldmask & FS_DQ_BCOUNT) {
+		dm->dqb_curspace = di->d_bcount - dm->dqb_rsvspace;
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_SPACE_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BLIMITS) {
-		dm->dqb_bsoftlimit = qbtos(di->dqb_bsoftlimit);
-		dm->dqb_bhardlimit = qbtos(di->dqb_bhardlimit);
+
+	if (di->d_fieldmask & FS_DQ_BSOFT)
+		dm->dqb_bsoftlimit = qbtos(di->d_blk_softlimit);
+	if (di->d_fieldmask & FS_DQ_BHARD)
+		dm->dqb_bhardlimit = qbtos(di->d_blk_hardlimit);
+	if (di->d_fieldmask & (FS_DQ_BSOFT | FS_DQ_BHARD)) {
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_BLIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_INODES) {
-		dm->dqb_curinodes = di->dqb_curinodes;
+
+	if (di->d_fieldmask & FS_DQ_ICOUNT) {
+		dm->dqb_curinodes = di->d_icount;
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_INODES_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_ILIMITS) {
-		dm->dqb_isoftlimit = di->dqb_isoftlimit;
-		dm->dqb_ihardlimit = di->dqb_ihardlimit;
+
+	if (di->d_fieldmask & FS_DQ_ISOFT)
+		dm->dqb_isoftlimit = di->d_ino_softlimit;
+	if (di->d_fieldmask & FS_DQ_IHARD)
+		dm->dqb_ihardlimit = di->d_ino_hardlimit;
+	if (di->d_fieldmask & (FS_DQ_ISOFT | FS_DQ_IHARD)) {
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_ILIMITS_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_BTIME) {
-		dm->dqb_btime = di->dqb_btime;
+
+	if (di->d_fieldmask & FS_DQ_BTIMER) {
+		dm->dqb_btime = di->d_btimer;
 		check_blim = 1;
 		set_bit(DQ_LASTSET_B + QIF_BTIME_B, &dquot->dq_flags);
 	}
-	if (di->dqb_valid & QIF_ITIME) {
-		dm->dqb_itime = di->dqb_itime;
+
+	if (di->d_fieldmask & FS_DQ_ITIMER) {
+		dm->dqb_itime = di->d_itimer;
 		check_ilim = 1;
 		set_bit(DQ_LASTSET_B + QIF_ITIME_B, &dquot->dq_flags);
 	}
@@ -2392,7 +2411,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		    dm->dqb_curspace < dm->dqb_bsoftlimit) {
 			dm->dqb_btime = 0;
 			clear_bit(DQ_BLKS_B, &dquot->dq_flags);
-		} else if (!(di->dqb_valid & QIF_BTIME))
+		} else if (!(di->d_fieldmask & FS_DQ_BTIMER))
 			/* Set grace only if user hasn't provided his own... */
 			dm->dqb_btime = get_seconds() + dqi->dqi_bgrace;
 	}
@@ -2401,7 +2420,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 		    dm->dqb_curinodes < dm->dqb_isoftlimit) {
 			dm->dqb_itime = 0;
 			clear_bit(DQ_INODES_B, &dquot->dq_flags);
-		} else if (!(di->dqb_valid & QIF_ITIME))
+		} else if (!(di->d_fieldmask & FS_DQ_ITIMER))
 			/* Set grace only if user hasn't provided his own... */
 			dm->dqb_itime = get_seconds() + dqi->dqi_igrace;
 	}
@@ -2417,7 +2436,7 @@ static int do_set_dqblk(struct dquot *dquot, struct if_dqblk *di)
 }
 
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
-		  struct if_dqblk *di)
+		  struct fs_disk_quota *di)
 {
 	struct dquot *dquot;
 	int rc;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 8680e257c2bd..d6ee49dda4fd 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -167,18 +167,44 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
 	return 0;
 }
 
+static void copy_from_if_dqblk(struct fs_disk_quota *dst, struct if_dqblk *src)
+{
+	dst->d_blk_hardlimit = src->dqb_bhardlimit;
+	dst->d_blk_softlimit  = src->dqb_bsoftlimit;
+	dst->d_bcount = src->dqb_curspace;
+	dst->d_ino_hardlimit = src->dqb_ihardlimit;
+	dst->d_ino_softlimit = src->dqb_isoftlimit;
+	dst->d_icount = src->dqb_curinodes;
+	dst->d_btimer = src->dqb_btime;
+	dst->d_itimer = src->dqb_itime;
+
+	dst->d_fieldmask = 0;
+	if (src->dqb_valid & QIF_BLIMITS)
+		dst->d_fieldmask |= FS_DQ_BSOFT | FS_DQ_BHARD;
+	if (src->dqb_valid & QIF_SPACE)
+		dst->d_fieldmask |= FS_DQ_BCOUNT;
+	if (src->dqb_valid & QIF_ILIMITS)
+		dst->d_fieldmask |= FS_DQ_ISOFT | FS_DQ_IHARD;
+	if (src->dqb_valid & QIF_INODES)
+		dst->d_fieldmask |= FS_DQ_ICOUNT;
+	if (src->dqb_valid & QIF_BTIME)
+		dst->d_fieldmask |= FS_DQ_BTIMER;
+	if (src->dqb_valid & QIF_ITIME)
+		dst->d_fieldmask |= FS_DQ_ITIMER;
+}
+
 static int quota_setquota(struct super_block *sb, int type, qid_t id,
 			  void __user *addr)
 {
+	struct fs_disk_quota fdq;
 	struct if_dqblk idq;
 
 	if (copy_from_user(&idq, addr, sizeof(idq)))
 		return -EFAULT;
-	if (!sb_has_quota_active(sb, type))
-		return -ESRCH;
 	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
-	return sb->s_qcop->set_dqblk(sb, type, id, &idq);
+	copy_from_if_dqblk(&fdq, &idq);
+	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
 }
 
 static int quota_setxstate(struct super_block *sb, int cmd, void __user *addr)
@@ -212,9 +238,9 @@ static int quota_setxquota(struct super_block *sb, int type, qid_t id,
 
 	if (copy_from_user(&fdq, addr, sizeof(fdq)))
 		return -EFAULT;
-	if (!sb->s_qcop->set_xquota)
+	if (!sb->s_qcop->set_dqblk)
 		return -ENOSYS;
-	return sb->s_qcop->set_xquota(sb, type, id, &fdq);
+	return sb->s_qcop->set_dqblk(sb, type, id, &fdq);
 }
 
 static int quota_getxquota(struct super_block *sb, int type, qid_t id,
diff --git a/fs/xfs/linux-2.6/xfs_quotaops.c b/fs/xfs/linux-2.6/xfs_quotaops.c
index 3d473f43c9a9..e31bf21fe5d3 100644
--- a/fs/xfs/linux-2.6/xfs_quotaops.c
+++ b/fs/xfs/linux-2.6/xfs_quotaops.c
@@ -114,7 +114,7 @@ xfs_fs_get_dqblk(
 }
 
 STATIC int
-xfs_fs_set_xquota(
+xfs_fs_set_dqblk(
 	struct super_block	*sb,
 	int			type,
 	qid_t			id,
@@ -136,5 +136,5 @@ const struct quotactl_ops xfs_quotactl_operations = {
 	.get_xstate		= xfs_fs_get_xstate,
 	.set_xstate		= xfs_fs_set_xstate,
 	.get_dqblk		= xfs_fs_get_dqblk,
-	.set_xquota		= xfs_fs_set_xquota,
+	.set_dqblk		= xfs_fs_set_dqblk,
 };
diff --git a/fs/xfs/quota/xfs_qm_syscalls.c b/fs/xfs/quota/xfs_qm_syscalls.c
index 26fa43140f2e..92b002f1805f 100644
--- a/fs/xfs/quota/xfs_qm_syscalls.c
+++ b/fs/xfs/quota/xfs_qm_syscalls.c
@@ -448,6 +448,9 @@ xfs_qm_scall_getqstat(
 	return 0;
 }
 
+#define XFS_DQ_MASK \
+	(FS_DQ_LIMIT_MASK | FS_DQ_TIMER_MASK | FS_DQ_WARNS_MASK)
+
 /*
  * Adjust quota limits, and start/stop timers accordingly.
  */
@@ -465,9 +468,10 @@ xfs_qm_scall_setqlim(
 	int			error;
 	xfs_qcnt_t		hard, soft;
 
-	if ((newlim->d_fieldmask &
-	    (FS_DQ_LIMIT_MASK|FS_DQ_TIMER_MASK|FS_DQ_WARNS_MASK)) == 0)
-		return (0);
+	if (newlim->d_fieldmask & ~XFS_DQ_MASK)
+		return EINVAL;
+	if ((newlim->d_fieldmask & XFS_DQ_MASK) == 0)
+		return 0;
 
 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
 	if ((error = xfs_trans_reserve(tp, 0, sizeof(xfs_disk_dquot_t) + 128,
diff --git a/include/linux/dqblk_xfs.h b/include/linux/dqblk_xfs.h
index 527504c11c5e..4389ae72024e 100644
--- a/include/linux/dqblk_xfs.h
+++ b/include/linux/dqblk_xfs.h
@@ -109,6 +109,15 @@ typedef struct fs_disk_quota {
 #define FS_DQ_RTBWARNS	(1<<11)
 #define FS_DQ_WARNS_MASK	(FS_DQ_BWARNS | FS_DQ_IWARNS | FS_DQ_RTBWARNS)
 
+/*
+ * Accounting values.  These can only be set for filesystem with
+ * non-transactional quotas that require quotacheck(8) in userspace.
+ */
+#define FS_DQ_BCOUNT		(1<<12)
+#define FS_DQ_ICOUNT		(1<<13)
+#define FS_DQ_RTBCOUNT		(1<<14)
+#define FS_DQ_ACCT_MASK		(FS_DQ_BCOUNT | FS_DQ_ICOUNT | FS_DQ_RTBCOUNT)
+
 /*
  * Various flags related to quotactl(2).  Only relevant to XFS filesystems.
  */
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 42364219dc9b..7126a15467f1 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -338,10 +338,9 @@ struct quotactl_ops {
 	int (*get_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*set_info)(struct super_block *, int, struct if_dqinfo *);
 	int (*get_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
-	int (*set_dqblk)(struct super_block *, int, qid_t, struct if_dqblk *);
+	int (*set_dqblk)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 	int (*get_xstate)(struct super_block *, struct fs_quota_stat *);
 	int (*set_xstate)(struct super_block *, unsigned int, int);
-	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
 };
 
 struct quota_format_type {
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index d32a48631b0d..82c70c42d035 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -65,7 +65,8 @@ int vfs_get_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_set_dqinfo(struct super_block *sb, int type, struct if_dqinfo *ii);
 int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 		struct fs_disk_quota *di);
-int vfs_set_dqblk(struct super_block *sb, int type, qid_t id, struct if_dqblk *di);
+int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
+		struct fs_disk_quota *di);
 
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
-- 
cgit v1.2.3


From 12755627bdcddcdb30a1bfb9a09395a52b1d6838 Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Thu, 8 Apr 2010 22:04:20 +0400
Subject: quota: unify quota init condition in setattr

Quota must being initialized if size or uid/git changes requested.
But initialization performed in two different places:
in case of i_size file system is responsible for dquot init
, but in case of uid/gid init will be called internally in
dquot_transfer().
This ambiguity makes code harder to understand.
Let's move this logic to one common helper function.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/ext2/inode.c          | 2 +-
 fs/ext3/inode.c          | 2 +-
 fs/ext4/inode.c          | 2 +-
 fs/jfs/file.c            | 2 +-
 fs/ocfs2/file.c          | 4 ++--
 fs/quota/dquot.c         | 5 ++---
 fs/reiserfs/inode.c      | 3 ++-
 fs/udf/file.c            | 2 +-
 fs/ufs/truncate.c        | 8 ++++----
 include/linux/quotaops.h | 8 ++++++++
 10 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index b90c3bf6e9ba..527c46d9bc1f 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1466,7 +1466,7 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
-	if (iattr->ia_valid & ATTR_SIZE)
+	if (is_quota_modification(inode, iattr))
 		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index ea33bdf0a300..735f0190ec2a 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -3151,7 +3151,7 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
-	if (ia_valid & ATTR_SIZE)
+	if (is_quota_modification(inode, attr))
 		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 81d605412844..3e0f6af9d08d 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -5425,7 +5425,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
-	if (ia_valid & ATTR_SIZE)
+	if (is_quota_modification(inode, attr))
 		dquot_initialize(inode);
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index 14ba982b3f24..85d9ec659225 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -98,7 +98,7 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (rc)
 		return rc;
 
-	if (iattr->ia_valid & ATTR_SIZE)
+	if (is_quota_modification(inode, iattr))
 		dquot_initialize(inode);
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
 	    (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)) {
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index f74f1400eccd..e127c53ec2e7 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -966,10 +966,10 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
 	if (status)
 		return status;
 
+	if (is_quota_modification(inode, attr))
+		dquot_initialize(inode);
 	size_change = S_ISREG(inode->i_mode) && attr->ia_valid & ATTR_SIZE;
 	if (size_change) {
-		dquot_initialize(inode);
-
 		status = ocfs2_rw_lock(inode, 1);
 		if (status < 0) {
 			mlog_errno(status);
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b1a5036560a9..1056a21f0300 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1822,10 +1822,9 @@ int dquot_transfer(struct inode *inode, struct iattr *iattr)
 		mask |= 1 << GRPQUOTA;
 		chid[GRPQUOTA] = iattr->ia_gid;
 	}
-	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode)) {
-		dquot_initialize(inode);
+	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
 		return __dquot_transfer(inode, chid, mask);
-	}
+
 	return 0;
 }
 EXPORT_SYMBOL(dquot_transfer);
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index dc2c65e04853..0f22fdaf54ac 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -3076,9 +3076,10 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
 	ia_valid = attr->ia_valid &= ~(ATTR_KILL_SUID|ATTR_KILL_SGID);
 
 	depth = reiserfs_write_lock_once(inode->i_sb);
-	if (attr->ia_valid & ATTR_SIZE) {
+	if (is_quota_modification(inode, attr))
 		dquot_initialize(inode);
 
+	if (attr->ia_valid & ATTR_SIZE) {
 		/* version 2 items will be caught by the s_maxbytes check
 		 ** done for us in vmtruncate
 		 */
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 4b6a46ccbf46..6ebc043f3a2a 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -227,7 +227,7 @@ int udf_setattr(struct dentry *dentry, struct iattr *iattr)
 	if (error)
 		return error;
 
-	if (iattr->ia_valid & ATTR_SIZE)
+	if (is_quota_modification(inode, iattr))
 		dquot_initialize(inode);
 
 	if ((iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) ||
diff --git a/fs/ufs/truncate.c b/fs/ufs/truncate.c
index ee8db3e77bfe..f294c44577dc 100644
--- a/fs/ufs/truncate.c
+++ b/fs/ufs/truncate.c
@@ -518,18 +518,18 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
 	if (error)
 		return error;
 
+	if (is_quota_modification(inode, attr))
+		dquot_initialize(inode);
+
 	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
 	    (ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
 		error = dquot_transfer(inode, attr);
 		if (error)
 			return error;
 	}
-	if (ia_valid & ATTR_SIZE &&
-	    attr->ia_size != i_size_read(inode)) {
+	if (ia_valid & ATTR_SIZE && attr->ia_size != inode->i_size) {
 		loff_t old_i_size = inode->i_size;
 
-		dquot_initialize(inode);
-
 		error = vmtruncate(inode, attr->ia_size);
 		if (error)
 			return error;
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 82c70c42d035..8a7818764a67 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -14,6 +14,14 @@ static inline struct quota_info *sb_dqopt(struct super_block *sb)
 	return &sb->s_dquot;
 }
 
+/* i_mutex must being held */
+static inline bool is_quota_modification(struct inode *inode, struct iattr *ia)
+{
+	return (ia->ia_valid & ATTR_SIZE && ia->ia_size != inode->i_size) ||
+		(ia->ia_valid & ATTR_UID && ia->ia_uid != inode->i_uid) ||
+		(ia->ia_valid & ATTR_GID && ia->ia_gid != inode->i_gid);
+}
+
 #if defined(CONFIG_QUOTA)
 
 /*
-- 
cgit v1.2.3


From bc8e5f07392f05c47c8bdeff4f7098db440d065c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 13 May 2010 19:58:50 +0200
Subject: quota: Refactor dquot_transfer code so that OCFS2 can pass in its
 references

Currently, __dquot_transfer() acquires its own references of dquot structures
that will be put into inode. But for OCFS2, this creates a lock inversion
between dq_lock (waited on in dqget) and transaction start (started in
ocfs2_setattr). Currently, deadlock is impossible because dq_lock is acquired
only during dquot_acquire and dquot_release and we already hold a reference to
dquot structures in ocfs2_setattr so neither of these functions can be called
while we call dquot_transfer. But this is rather subtle and it is hard to teach
lockdep about it. So provide __dquot_transfer function that can be passed dquot
references directly. OCFS2 can then pass acquired dquot references directly to
__dquot_transfer with proper locking.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 61 +++++++++++++++++++++---------------------------
 include/linux/quotaops.h |  1 +
 2 files changed, 27 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 1056a21f0300..655a4c52b8c3 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1703,16 +1703,19 @@ EXPORT_SYMBOL(dquot_free_inode);
 
 /*
  * Transfer the number of inode and blocks from one diskquota to an other.
+ * On success, dquot references in transfer_to are consumed and references
+ * to original dquots that need to be released are placed there. On failure,
+ * references are kept untouched.
  *
  * This operation can block, but only after everything is updated
  * A transaction must be started when entering this function.
+ *
  */
-static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask)
+int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 {
 	qsize_t space, cur_space;
 	qsize_t rsv_space = 0;
-	struct dquot *transfer_from[MAXQUOTAS];
-	struct dquot *transfer_to[MAXQUOTAS];
+	struct dquot *transfer_from[MAXQUOTAS] = {};
 	int cnt, ret = 0;
 	char warntype_to[MAXQUOTAS];
 	char warntype_from_inodes[MAXQUOTAS], warntype_from_space[MAXQUOTAS];
@@ -1722,19 +1725,12 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 	if (IS_NOQUOTA(inode))
 		return 0;
 	/* Initialize the arrays */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		transfer_from[cnt] = NULL;
-		transfer_to[cnt] = NULL;
+	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
 		warntype_to[cnt] = QUOTA_NL_NOWARN;
-	}
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (mask & (1 << cnt))
-			transfer_to[cnt] = dqget(inode->i_sb, chid[cnt], cnt);
-	}
 	down_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
 	if (IS_NOQUOTA(inode)) {	/* File without quota accounting? */
 		up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-		goto put_all;
+		return 0;
 	}
 	spin_lock(&dq_data_lock);
 	cur_space = inode_get_bytes(inode);
@@ -1786,46 +1782,41 @@ static int __dquot_transfer(struct inode *inode, qid_t *chid, unsigned long mask
 
 	mark_all_dquot_dirty(transfer_from);
 	mark_all_dquot_dirty(transfer_to);
-	/* The reference we got is transferred to the inode */
+	/* Pass back references to put */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		transfer_to[cnt] = NULL;
-warn_put_all:
+		transfer_to[cnt] = transfer_from[cnt];
+warn:
 	flush_warnings(transfer_to, warntype_to);
 	flush_warnings(transfer_from, warntype_from_inodes);
 	flush_warnings(transfer_from, warntype_from_space);
-put_all:
-	dqput_all(transfer_from);
-	dqput_all(transfer_to);
 	return ret;
 over_quota:
 	spin_unlock(&dq_data_lock);
 	up_write(&sb_dqopt(inode->i_sb)->dqptr_sem);
-	/* Clear dquot pointers we don't want to dqput() */
-	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		transfer_from[cnt] = NULL;
-	goto warn_put_all;
+	goto warn;
 }
+EXPORT_SYMBOL(__dquot_transfer);
 
 /* Wrapper for transferring ownership of an inode for uid/gid only
  * Called from FSXXX_setattr()
  */
 int dquot_transfer(struct inode *inode, struct iattr *iattr)
 {
-	qid_t chid[MAXQUOTAS];
-	unsigned long mask = 0;
+	struct dquot *transfer_to[MAXQUOTAS] = {};
+	struct super_block *sb = inode->i_sb;
+	int ret;
 
-	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid) {
-		mask |= 1 << USRQUOTA;
-		chid[USRQUOTA] = iattr->ia_uid;
-	}
-	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid) {
-		mask |= 1 << GRPQUOTA;
-		chid[GRPQUOTA] = iattr->ia_gid;
-	}
-	if (sb_any_quota_active(inode->i_sb) && !IS_NOQUOTA(inode))
-		return __dquot_transfer(inode, chid, mask);
+	if (!sb_any_quota_active(sb) || IS_NOQUOTA(inode))
+		return 0;
 
-	return 0;
+	if (iattr->ia_valid & ATTR_UID && iattr->ia_uid != inode->i_uid)
+		transfer_to[USRQUOTA] = dqget(sb, iattr->ia_uid, USRQUOTA);
+	if (iattr->ia_valid & ATTR_GID && iattr->ia_gid != inode->i_gid)
+		transfer_to[GRPQUOTA] = dqget(sb, iattr->ia_uid, GRPQUOTA);
+
+	ret = __dquot_transfer(inode, transfer_to);
+	dqput_all(transfer_to);
+	return ret;
 }
 EXPORT_SYMBOL(dquot_transfer);
 
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 8a7818764a67..370abb1e99cb 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -76,6 +76,7 @@ int vfs_get_dqblk(struct super_block *sb, int type, qid_t id,
 int vfs_set_dqblk(struct super_block *sb, int type, qid_t id,
 		struct fs_disk_quota *di);
 
+int __dquot_transfer(struct inode *inode, struct dquot **transfer_to);
 int dquot_transfer(struct inode *inode, struct iattr *iattr);
 int vfs_dq_quota_on_remount(struct super_block *sb);
 
-- 
cgit v1.2.3


From b4e6b09738fde057ce885703705f71cc953d0512 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 21 May 2010 09:03:01 -0700
Subject: interrupt.h: fix fatal kernel-doc error

Fix kernel-doc fatal error:
/** beginning a non-kernel-doc comment block:
(That alone does not kill kernel-doc, but the 'enum' was
totally confusing to it.)

Error(/lnx/src/TMP/linux-2.6.34-git6//include/linux/interrupt.h:88): cannot understand prototype: 'enum '
make[2]: *** [Documentation/DocBook/genericirq.xml] Error 1

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/interrupt.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 5137db3317f9..c2331138ca1b 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -78,7 +78,7 @@ enum {
 	IRQTF_AFFINITY,
 };
 
-/**
+/*
  * These values can be returned by request_any_context_irq() and
  * describe the context the interrupt will be run in.
  *
-- 
cgit v1.2.3