From 4951704b4e23d71b99ac933d8e6993bc6225ac13 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 12 May 2008 03:29:11 -0700
Subject: syncppp: Fix crashes.

The syncppp layer wants a mid-level netdev private pointer.

It was using netdev->priv but that only worked by accident,
and thus this scheme was broken when the device private
allocation strategy changed.

Add a proper mid-layer private pointer for uses like this,
update syncppp and all users, and remove the HDLC_PPP broken
tag from drivers/net/wan/Kconfig

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/syncppp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/syncppp.h b/include/net/syncppp.h
index 877efa434700..e43f4070d892 100644
--- a/include/net/syncppp.h
+++ b/include/net/syncppp.h
@@ -59,7 +59,7 @@ struct ppp_device
 
 static inline struct sppp *sppp_of(struct net_device *dev) 
 {
-	struct ppp_device **ppp = dev->priv;
+	struct ppp_device **ppp = dev->ml_priv;
 	BUG_ON((*ppp)->dev != dev);
 	return &(*ppp)->sppp;
 }
-- 
cgit v1.2.3


From 332223831e86b2e17b48b4afafad07d8e3b73861 Mon Sep 17 00:00:00 2001
From: Graf Yang <graf.yang@analog.com>
Date: Tue, 13 May 2008 23:25:57 -0700
Subject: irda: Fix a misalign access issue. (v2)

Replace u16ho with put/get_unaligned functions

Signed-off-by: Graf Yang <graf.yang@analog.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/irda/discovery.h | 3 ---
 net/irda/discovery.c         | 8 +++++---
 net/irda/irlmp.c             | 5 +++--
 net/irda/irnet/irnet_irda.c  | 4 ++--
 4 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/net')

diff --git a/include/net/irda/discovery.h b/include/net/irda/discovery.h
index e4efad1f9eff..0ce93398720d 100644
--- a/include/net/irda/discovery.h
+++ b/include/net/irda/discovery.h
@@ -57,9 +57,6 @@ typedef union {
 	__u8  byte[2];
 } __u16_host_order;
 
-/* Same purpose, different application */
-#define u16ho(array) (* ((__u16 *) array))
-
 /* Types of discovery */
 typedef enum {
 	DISCOVERY_LOG,		/* What's in our discovery log */
diff --git a/net/irda/discovery.c b/net/irda/discovery.c
index bfacef8b76f4..a6f99b5a1499 100644
--- a/net/irda/discovery.c
+++ b/net/irda/discovery.c
@@ -40,6 +40,8 @@
 
 #include <net/irda/discovery.h>
 
+#include <asm/unaligned.h>
+
 /*
  * Function irlmp_add_discovery (cachelog, discovery)
  *
@@ -87,7 +89,7 @@ void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new)
 			 */
 			hashbin_remove_this(cachelog, (irda_queue_t *) node);
 			/* Check if hints bits are unchanged */
-			if(u16ho(node->data.hints) == u16ho(new->data.hints))
+			if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints))
 				/* Set time of first discovery for this node */
 				new->firststamp = node->firststamp;
 			kfree(node);
@@ -281,9 +283,9 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn,
 		/* Mask out the ones we don't want :
 		 * We want to match the discovery mask, and to get only
 		 * the most recent one (unless we want old ones) */
-		if ((u16ho(discovery->data.hints) & mask) &&
+		if ((get_unaligned((__u16 *)discovery->data.hints) & mask) &&
 		    ((old_entries) ||
-		     ((jiffies - discovery->firststamp) < j_timeout)) ) {
+		     ((jiffies - discovery->firststamp) < j_timeout))) {
 			/* Create buffer as needed.
 			 * As this function get called a lot and most time
 			 * we don't have anything to put in the log (we are
diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c
index 1f81f8e7c61d..7bf5b913828b 100644
--- a/net/irda/irlmp.c
+++ b/net/irda/irlmp.c
@@ -1062,7 +1062,8 @@ void irlmp_discovery_expiry(discinfo_t *expiries, int number)
 		for(i = 0; i < number; i++) {
 			/* Check if we should notify client */
 			if ((client->expir_callback) &&
-			    (client->hint_mask.word & u16ho(expiries[i].hints)
+			    (client->hint_mask.word &
+			     get_unaligned((__u16 *)expiries[i].hints)
 			     & 0x7f7f) )
 				client->expir_callback(&(expiries[i]),
 						       EXPIRY_TIMEOUT,
@@ -1086,7 +1087,7 @@ discovery_t *irlmp_get_discovery_response(void)
 
 	IRDA_ASSERT(irlmp != NULL, return NULL;);
 
-	u16ho(irlmp->discovery_rsp.data.hints) = irlmp->hints.word;
+	put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints);
 
 	/*
 	 *  Set character set for device name (we use ASCII), and
diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c
index 75497e55927d..a3ec0026cdb2 100644
--- a/net/irda/irnet/irnet_irda.c
+++ b/net/irda/irnet/irnet_irda.c
@@ -1673,7 +1673,7 @@ irnet_discovery_indication(discinfo_t *		discovery,
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_DISCOVER,
 		   discovery->saddr, discovery->daddr, discovery->info,
-		   u16ho(discovery->hints));
+		   get_unaligned((__u16 *)discovery->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
@@ -1704,7 +1704,7 @@ irnet_expiry_indication(discinfo_t *	expiry,
   /* Notify the control channel */
   irnet_post_event(NULL, IRNET_EXPIRE,
 		   expiry->saddr, expiry->daddr, expiry->info,
-		   u16ho(expiry->hints));
+		   get_unaligned((__u16 *)expiry->hints));
 
   DEXIT(IRDA_OCB_TRACE, "\n");
 }
-- 
cgit v1.2.3


From ee443996a35c1e04f210cafd43d5a98d41e46085 Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@ericvh-desktop.(none)>
Date: Wed, 5 Mar 2008 07:08:09 -0600
Subject: 9p: Documentation updates

The kernel-doc comments of much of the 9p system have been in disarray since
reorganization.  This patch fixes those problems, adds additional documentation
and a template book which collects the 9p information.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/fid.h                |  15 +++
 fs/9p/v9fs.c               |   8 +-
 fs/9p/v9fs.h               |  85 +++++++++++-----
 fs/9p/vfs_addr.c           |   2 +-
 fs/9p/vfs_dir.c            |   2 +-
 fs/9p/vfs_file.c           |  11 ++-
 fs/9p/vfs_inode.c          |  50 +++++++---
 fs/9p/vfs_super.c          |   1 +
 include/net/9p/9p.h        | 239 +++++++++++++++++++++++++++++++++++++++------
 include/net/9p/client.h    |  35 +++++++
 include/net/9p/transport.h |  43 ++++++++
 net/9p/conv.c              | 128 +++++++++++++++++++++++-
 net/9p/error.c             |  11 ++-
 net/9p/fcprint.c           |   8 ++
 net/9p/mod.c               |   7 +-
 net/9p/trans_fd.c          | 146 ++++++++++++++++++++++-----
 net/9p/trans_virtio.c      | 155 +++++++++++++++++++++++++++--
 net/9p/util.c              |  32 ++++--
 18 files changed, 854 insertions(+), 124 deletions(-)

(limited to 'include/net')

diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 26e07df783b9..c3bbd6af996d 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -22,6 +22,21 @@
 
 #include <linux/list.h>
 
+/**
+ * struct v9fs_dentry - 9p private data stored in dentry d_fsdata
+ * @lock: protects the fidlist
+ * @fidlist: list of FIDs currently associated with this dentry
+ *
+ * This structure defines the 9p private data associated with
+ * a particular dentry.  In particular, this private data is used
+ * to lookup which 9P FID handle should be used for a particular VFS
+ * operation.  FID handles are associated with dentries instead of
+ * inodes in order to more closely map functionality to the Plan 9
+ * expected behavior for FID reclaimation and tracking.
+ *
+ * See Also: Mapping FIDs to Linux VFS model in
+ * Design and Implementation of the Linux 9P File System documentation
+ */
 struct v9fs_dentry {
 	spinlock_t lock; /* protect fidlist */
 	struct list_head fidlist;
diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index e307fbd34fa0..79d310c00188 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -71,7 +71,6 @@ static match_table_t tokens = {
 
 /**
  * v9fs_parse_options - parse mount options into session structure
- * @options: options string passed from mount
  * @v9ses: existing v9fs session information
  *
  */
@@ -256,9 +255,12 @@ void v9fs_session_close(struct v9fs_session_info *v9ses)
 }
 
 /**
- * v9fs_session_cancel - mark transport as disconnected
- * 	and cancel all pending requests.
+ * v9fs_session_cancel - terminate a session
+ * @v9ses: session to terminate
+ *
+ * mark transport as disconnected and cancel all pending requests.
  */
+
 void v9fs_session_cancel(struct v9fs_session_info *v9ses) {
 	P9_DPRINTK(P9_DEBUG_ERROR, "cancel session %p\n", v9ses);
 	p9_client_disconnect(v9ses->clnt);
diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h
index 7d3a1018db52..a7d567192998 100644
--- a/fs/9p/v9fs.h
+++ b/fs/9p/v9fs.h
@@ -21,18 +21,69 @@
  *
  */
 
-/*
-  * Session structure provides information for an opened session
-  *
-  */
+/**
+ * enum p9_session_flags - option flags for each 9P session
+ * @V9FS_EXTENDED: whether or not to use 9P2000.u extensions
+ * @V9FS_ACCESS_SINGLE: only the mounting user can access the hierarchy
+ * @V9FS_ACCESS_USER: a new attach will be issued for every user (default)
+ * @V9FS_ACCESS_ANY: use a single attach for all users
+ * @V9FS_ACCESS_MASK: bit mask of different ACCESS options
+ *
+ * Session flags reflect options selected by users at mount time
+ */
+enum p9_session_flags {
+	V9FS_EXTENDED		= 0x01,
+	V9FS_ACCESS_SINGLE	= 0x02,
+	V9FS_ACCESS_USER	= 0x04,
+	V9FS_ACCESS_ANY		= 0x06,
+	V9FS_ACCESS_MASK	= 0x06,
+};
+
+/* possible values of ->cache */
+/**
+ * enum p9_cache_modes - user specified cache preferences
+ * @CACHE_NONE: do not cache data, dentries, or directory contents (default)
+ * @CACHE_LOOSE: cache data, dentries, and directory contents w/no consistency
+ *
+ * eventually support loose, tight, time, session, default always none
+ */
+
+enum p9_cache_modes {
+	CACHE_NONE,
+	CACHE_LOOSE,
+};
+
+/**
+ * struct v9fs_session_info - per-instance session information
+ * @flags: session options of type &p9_session_flags
+ * @nodev: set to 1 to disable device mapping
+ * @debug: debug level
+ * @afid: authentication handle
+ * @cache: cache mode of type &p9_cache_modes
+ * @options: copy of options string given by user
+ * @uname: string user name to mount hierarchy as
+ * @aname: mount specifier for remote hierarchy
+ * @maxdata: maximum data to be sent/recvd per protocol message
+ * @dfltuid: default numeric userid to mount hierarchy as
+ * @dfltgid: default numeric groupid to mount hierarchy as
+ * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy
+ * @clnt: reference to 9P network client instantiated for this session
+ * @debugfs_dir: reference to debugfs_dir which can be used for add'l debug
+ *
+ * This structure holds state for each session instance established during
+ * a sys_mount() .
+ *
+ * Bugs: there seems to be a lot of state which could be condensed and/or
+ * removed.
+ */
 
 struct v9fs_session_info {
 	/* options */
-	unsigned char flags;	/* session flags */
-	unsigned char nodev;	/* set to 1 if no disable device mapping */
-	unsigned short debug;	/* debug level */
-	unsigned int afid;	/* authentication fid */
-	unsigned int cache;	/* cache mode */
+	unsigned char flags;
+	unsigned char nodev;
+	unsigned short debug;
+	unsigned int afid;
+	unsigned int cache;
 
 	char *options;		/* copy of mount options */
 	char *uname;		/* user name to mount as */
@@ -45,22 +96,6 @@ struct v9fs_session_info {
 	struct dentry *debugfs_dir;
 };
 
-/* session flags */
-enum {
-	V9FS_EXTENDED		= 0x01,	/* 9P2000.u */
-	V9FS_ACCESS_MASK	= 0x06,	/* access mask */
-	V9FS_ACCESS_SINGLE	= 0x02,	/* only one user can access the files */
-	V9FS_ACCESS_USER	= 0x04,	/* attache per user */
-	V9FS_ACCESS_ANY		= 0x06,	/* use the same attach for all users */
-};
-
-/* possible values of ->cache */
-/* eventually support loose, tight, time, session, default always none */
-enum {
-	CACHE_NONE,		/* default */
-	CACHE_LOOSE,		/* no consistency */
-};
-
 extern struct dentry *v9fs_debugfs_root;
 
 struct p9_fid *v9fs_session_init(struct v9fs_session_info *, const char *,
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 6248f0e727a3..97d3aed57983 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -43,7 +43,7 @@
 /**
  * v9fs_vfs_readpage - read an entire page in from 9P
  *
- * @file: file being read
+ * @filp: file being read
  * @page: structure to page
  *
  */
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 0924d4477da3..88e3787c6ea9 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -60,7 +60,7 @@ static inline int dt_type(struct p9_stat *mistat)
 
 /**
  * v9fs_dir_readdir - read a directory
- * @filep: opened file structure
+ * @filp: opened file structure
  * @dirent: directory structure ???
  * @filldir: function to populate directory structure ???
  *
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index a616fff8906d..0d55affe37d4 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -90,10 +90,11 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 
 /**
  * v9fs_file_lock - lock a file (or directory)
- * @inode: inode to be opened
- * @file: file being opened
+ * @filp: file to be locked
+ * @cmd: lock command
+ * @fl: file lock structure
  *
- * XXX - this looks like a local only lock, we should extend into 9P
+ * Bugs: this looks like a local only lock, we should extend into 9P
  *       by using open exclusive
  */
 
@@ -118,7 +119,7 @@ static int v9fs_file_lock(struct file *filp, int cmd, struct file_lock *fl)
 
 /**
  * v9fs_file_read - read from a file
- * @filep: file pointer to read
+ * @filp: file pointer to read
  * @data: data buffer to read data into
  * @count: size of buffer
  * @offset: offset at which to read data
@@ -142,7 +143,7 @@ v9fs_file_read(struct file *filp, char __user * data, size_t count,
 
 /**
  * v9fs_file_write - write to a file
- * @filep: file pointer to write
+ * @filp: file pointer to write
  * @data: data buffer to write data from
  * @count: size of buffer
  * @offset: offset at which to write data
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 6a28842052ea..40fa807bd929 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -129,6 +129,12 @@ static int p9mode2unixmode(struct v9fs_session_info *v9ses, int mode)
 	return res;
 }
 
+/**
+ * v9fs_uflags2omode- convert posix open flags to plan 9 mode bits
+ * @uflags: flags to convert
+ *
+ */
+
 int v9fs_uflags2omode(int uflags)
 {
 	int ret;
@@ -312,6 +318,14 @@ error:
 }
 */
 
+/**
+ * v9fs_inode_from_fid - populate an inode by issuing a attribute request
+ * @v9ses: session information
+ * @fid: fid to issue attribute request for
+ * @sb: superblock on which to create inode
+ *
+ */
+
 static struct inode *
 v9fs_inode_from_fid(struct v9fs_session_info *v9ses, struct p9_fid *fid,
 	struct super_block *sb)
@@ -384,9 +398,12 @@ v9fs_open_created(struct inode *inode, struct file *file)
 
 /**
  * v9fs_create - Create a file
+ * @v9ses: session information
+ * @dir: directory that dentry is being created in
  * @dentry:  dentry that is being created
  * @perm: create permissions
  * @mode: open mode
+ * @extension: 9p2000.u extension string to support devices, etc.
  *
  */
 static struct p9_fid *
@@ -461,7 +478,7 @@ error:
 
 /**
  * v9fs_vfs_create - VFS hook to create files
- * @inode: directory inode that is being created
+ * @dir: directory inode that is being created
  * @dentry:  dentry that is being deleted
  * @mode: create permissions
  * @nd: path information
@@ -519,7 +536,7 @@ error:
 
 /**
  * v9fs_vfs_mkdir - VFS mkdir hook to create a directory
- * @inode:  inode that is being unlinked
+ * @dir:  inode that is being unlinked
  * @dentry: dentry that is being unlinked
  * @mode: mode for new directory
  *
@@ -703,9 +720,9 @@ done:
 
 /**
  * v9fs_vfs_getattr - retrieve file metadata
- * @mnt - mount information
- * @dentry - file to get attributes on
- * @stat - metadata structure to populate
+ * @mnt: mount information
+ * @dentry: file to get attributes on
+ * @stat: metadata structure to populate
  *
  */
 
@@ -928,7 +945,7 @@ done:
 /**
  * v9fs_vfs_readlink - read a symlink's location
  * @dentry: dentry for symlink
- * @buf: buffer to load symlink location into
+ * @buffer: buffer to load symlink location into
  * @buflen: length of buffer
  *
  */
@@ -996,10 +1013,12 @@ static void *v9fs_vfs_follow_link(struct dentry *dentry, struct nameidata *nd)
  * v9fs_vfs_put_link - release a symlink path
  * @dentry: dentry for symlink
  * @nd: nameidata
+ * @p: unused
  *
  */
 
-static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
+static void
+v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void *p)
 {
 	char *s = nd_get_link(nd);
 
@@ -1008,6 +1027,15 @@ static void v9fs_vfs_put_link(struct dentry *dentry, struct nameidata *nd, void
 		__putname(s);
 }
 
+/**
+ * v9fs_vfs_mkspecial - create a special file
+ * @dir: inode to create special file in
+ * @dentry: dentry to create
+ * @mode: mode to create special file
+ * @extension: 9p2000.u format extension string representing special file
+ *
+ */
+
 static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
 	int mode, const char *extension)
 {
@@ -1037,7 +1065,7 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
  * @dentry: dentry for symlink
  * @symname: symlink data
  *
- * See 9P2000.u RFC for more information
+ * See Also: 9P2000.u RFC for more information
  *
  */
 
@@ -1058,10 +1086,6 @@ v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
  *
  */
 
-/* XXX - lots of code dup'd from symlink and creates,
- * figure out a better reuse strategy
- */
-
 static int
 v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
 	      struct dentry *dentry)
@@ -1098,7 +1122,7 @@ clunk_fid:
  * @dir: inode destination for new link
  * @dentry: dentry for file
  * @mode: mode for creation
- * @dev_t: device associated with special file
+ * @rdev: device associated with special file
  *
  */
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index a452ac67fc94..ba10f172626d 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -75,6 +75,7 @@ static int v9fs_set_super(struct super_block *s, void *data)
  * v9fs_fill_super - populate superblock with info
  * @sb: superblock
  * @v9ses: session information
+ * @flags: flags propagated from v9fs_get_sb()
  *
  */
 
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 585eb4496990..7bfb2f2e423c 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -29,14 +29,31 @@
 
 #ifdef CONFIG_NET_9P_DEBUG
 
-#define P9_DEBUG_ERROR		(1<<0)
-#define P9_DEBUG_9P	        (1<<2)
-#define P9_DEBUG_VFS	        (1<<3)
-#define P9_DEBUG_CONV		(1<<4)
-#define P9_DEBUG_MUX		(1<<5)
-#define P9_DEBUG_TRANS		(1<<6)
-#define P9_DEBUG_SLABS	      	(1<<7)
-#define P9_DEBUG_FCALL		(1<<8)
+/**
+ * enum p9_debug_flags - bits for mount time debug parameter
+ * @P9_DEBUG_ERROR: more verbose error messages including original error string
+ * @P9_DEBUG_9P: 9P protocol tracing
+ * @P9_DEBUG_VFS: VFS API tracing
+ * @P9_DEBUG_CONV: protocol conversion tracing
+ * @P9_DEBUG_MUX: trace management of concurrent transactions
+ * @P9_DEBUG_TRANS: transport tracing
+ * @P9_DEBUG_SLABS: memory management tracing
+ * @P9_DEBUG_FCALL: verbose dump of protocol messages
+ *
+ * These flags are passed at mount time to turn on various levels of
+ * verbosity and tracing which will be output to the system logs.
+ */
+
+enum p9_debug_flags {
+	P9_DEBUG_ERROR = 	(1<<0),
+	P9_DEBUG_9P = 		(1<<2),
+	P9_DEBUG_VFS =		(1<<3),
+	P9_DEBUG_CONV =		(1<<4),
+	P9_DEBUG_MUX =		(1<<5),
+	P9_DEBUG_TRANS =	(1<<6),
+	P9_DEBUG_SLABS =      	(1<<7),
+	P9_DEBUG_FCALL =	(1<<8),
+};
 
 extern unsigned int p9_debug_level;
 
@@ -62,9 +79,47 @@ do { \
 		format , __FUNCTION__, task_pid_nr(current), ## arg); \
 } while (0)
 
+/**
+ * enum p9_msg_t - 9P message types
+ * @P9_TVERSION: version handshake request
+ * @P9_RVERSION: version handshake response
+ * @P9_TAUTH: request to establish authentication channel
+ * @P9_RAUTH: response with authentication information
+ * @P9_TATTACH: establish user access to file service
+ * @P9_RATTACH: response with top level handle to file hierarchy
+ * @P9_TERROR: not used
+ * @P9_RERROR: response for any failed request
+ * @P9_TFLUSH: request to abort a previous request
+ * @P9_RFLUSH: response when previous request has been cancelled
+ * @P9_TWALK: descend a directory hierarchy
+ * @P9_RWALK: response with new handle for position within hierarchy
+ * @P9_TOPEN: prepare a handle for I/O on an existing file
+ * @P9_ROPEN: response with file access information
+ * @P9_TCREATE: prepare a handle for I/O on a new file
+ * @P9_RCREATE: response with file access information
+ * @P9_TREAD: request to transfer data from a file or directory
+ * @P9_RREAD: response with data requested
+ * @P9_TWRITE: reuqest to transfer data to a file
+ * @P9_RWRITE: response with out much data was transfered to file
+ * @P9_TCLUNK: forget about a handle to an entity within the file system
+ * @P9_RCLUNK: response when server has forgotten about the handle
+ * @P9_TREMOVE: request to remove an entity from the hierarchy
+ * @P9_RREMOVE: response when server has removed the entity
+ * @P9_TSTAT: request file entity attributes
+ * @P9_RSTAT: response with file entity attributes
+ * @P9_TWSTAT: request to update file entity attributes
+ * @P9_RWSTAT: response when file entity attributes are updated
+ *
+ * There are 14 basic operations in 9P2000, paired as
+ * requests and responses.  The one special case is ERROR
+ * as there is no @P9_TERROR request for clients to transmit to
+ * the server, but the server may respond to any other request
+ * with an @P9_RERROR.
+ *
+ * See Also: http://plan9.bell-labs.com/sys/man/5/INDEX.html
+ */
 
-/* Message Types */
-enum {
+enum p9_msg_t {
 	P9_TVERSION = 100,
 	P9_RVERSION,
 	P9_TAUTH = 102,
@@ -95,30 +150,71 @@ enum {
 	P9_RWSTAT,
 };
 
-/* open modes */
-enum {
+/**
+ * enum p9_open_mode_t - 9P open modes
+ * @P9_OREAD: open file for reading only
+ * @P9_OWRITE: open file for writing only
+ * @P9_ORDWR: open file for reading or writing
+ * @P9_OEXEC: open file for execution
+ * @P9_OTRUNC: truncate file to zero-length before opening it
+ * @P9_OREXEC: close the file when an exec(2) system call is made
+ * @P9_ORCLOSE: remove the file when the file is closed
+ * @P9_OAPPEND: open the file and seek to the end
+ * @P9_OEXCL: only create a file, do not open it
+ *
+ * 9P open modes differ slightly from Posix standard modes.
+ * In particular, there are extra modes which specify different
+ * semantic behaviors than may be available on standard Posix
+ * systems.  For example, @P9_OREXEC and @P9_ORCLOSE are modes that
+ * most likely will not be issued from the Linux VFS client, but may
+ * be supported by servers.
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/open
+ */
+
+enum p9_open_mode_t {
 	P9_OREAD = 0x00,
 	P9_OWRITE = 0x01,
 	P9_ORDWR = 0x02,
 	P9_OEXEC = 0x03,
-	P9_OEXCL = 0x04,
 	P9_OTRUNC = 0x10,
 	P9_OREXEC = 0x20,
 	P9_ORCLOSE = 0x40,
 	P9_OAPPEND = 0x80,
-};
-
-/* permissions */
-enum {
+	P9_OEXCL = 0x1000,
+};
+
+/**
+ * enum p9_perm_t - 9P permissions
+ * @P9_DMDIR: mode bite for directories
+ * @P9_DMAPPEND: mode bit for is append-only
+ * @P9_DMEXCL: mode bit for excluse use (only one open handle allowed)
+ * @P9_DMMOUNT: mode bite for mount points
+ * @P9_DMAUTH: mode bit for authentication file
+ * @P9_DMTMP: mode bit for non-backed-up files
+ * @P9_DMSYMLINK: mode bit for symbolic links (9P2000.u)
+ * @P9_DMLINK: mode bit for hard-link (9P2000.u)
+ * @P9_DMDEVICE: mode bit for device files (9P2000.u)
+ * @P9_DMNAMEDPIPE: mode bit for named pipe (9P2000.u)
+ * @P9_DMSOCKET: mode bit for socket (9P2000.u)
+ * @P9_DMSETUID: mode bit for setuid (9P2000.u)
+ * @P9_DMSETGID: mode bit for setgid (9P2000.u)
+ * @P9_DMSETVTX: mode bit for sticky bit (9P2000.u)
+ *
+ * 9P permissions differ slightly from Posix standard modes.
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
+ */
+enum p9_perm_t {
 	P9_DMDIR = 0x80000000,
 	P9_DMAPPEND = 0x40000000,
 	P9_DMEXCL = 0x20000000,
 	P9_DMMOUNT = 0x10000000,
 	P9_DMAUTH = 0x08000000,
 	P9_DMTMP = 0x04000000,
+/* 9P2000.u extensions */
 	P9_DMSYMLINK = 0x02000000,
 	P9_DMLINK = 0x01000000,
-	/* 9P2000.u extensions */
 	P9_DMDEVICE = 0x00800000,
 	P9_DMNAMEDPIPE = 0x00200000,
 	P9_DMSOCKET = 0x00100000,
@@ -127,8 +223,26 @@ enum {
 	P9_DMSETVTX = 0x00010000,
 };
 
-/* qid.types */
-enum {
+/**
+ * enum p9_qid_t - QID types
+ * @P9_QTDIR: directory
+ * @P9_QTAPPEND: append-only
+ * @P9_QTEXCL: excluse use (only one open handle allowed)
+ * @P9_QTMOUNT: mount points
+ * @P9_QTAUTH: authentication file
+ * @P9_QTTMP: non-backed-up files
+ * @P9_QTSYMLINK: symbolic links (9P2000.u)
+ * @P9_QTLINK: hard-link (9P2000.u)
+ * @P9_QTFILE: normal files
+ *
+ * QID types are a subset of permissions - they are primarily
+ * used to differentiate semantics for a file system entity via
+ * a jump-table.  Their value is also the most signifigant 16 bits
+ * of the permission_t
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
+ */
+enum p9_qid_t {
 	P9_QTDIR = 0x80,
 	P9_QTAPPEND = 0x40,
 	P9_QTEXCL = 0x20,
@@ -140,6 +254,7 @@ enum {
 	P9_QTFILE = 0x00,
 };
 
+/* 9P Magic Numbers */
 #define P9_NOTAG	(u16)(~0)
 #define P9_NOFID	(u32)(~0)
 #define P9_MAXWELEM	16
@@ -147,19 +262,69 @@ enum {
 /* ample room for Twrite/Rread header */
 #define P9_IOHDRSZ	24
 
+/**
+ * struct p9_str - length prefixed string type
+ * @len: length of the string
+ * @str: the string
+ *
+ * The protocol uses length prefixed strings for all
+ * string data, so we replicate that for our internal
+ * string members.
+ */
+
 struct p9_str {
 	u16 len;
 	char *str;
 };
 
-/* qids are the unique ID for a file (like an inode */
+/**
+ * struct p9_qid - file system entity information
+ * @type: 8-bit type &p9_qid_t
+ * @version: 16-bit monotonically incrementing version number
+ * @path: 64-bit per-server-unique ID for a file system element
+ *
+ * qids are identifiers used by 9P servers to track file system
+ * entities.  The type is used to differentiate semantics for operations
+ * on the entity (ie. read means something different on a directory than
+ * on a file).  The path provides a server unique index for an entity
+ * (roughly analogous to an inode number), while the version is updated
+ * every time a file is modified and can be used to maintain cache
+ * coherency between clients and serves.
+ * Servers will often differentiate purely synthetic entities by setting
+ * their version to 0, signaling that they should never be cached and
+ * should be accessed synchronously.
+ *
+ * See Also://plan9.bell-labs.com/magic/man2html/2/stat
+ */
+
 struct p9_qid {
 	u8 type;
 	u32 version;
 	u64 path;
 };
 
-/* Plan 9 file metadata (stat) structure */
+/**
+ * struct p9_stat - file system metadata information
+ * @size: length prefix for this stat structure instance
+ * @type: the type of the server (equivilent to a major number)
+ * @dev: the sub-type of the server (equivilent to a minor number)
+ * @qid: unique id from the server of type &p9_qid
+ * @mode: Plan 9 format permissions of type &p9_perm_t
+ * @atime: Last access/read time
+ * @mtime: Last modify/write time
+ * @length: file length
+ * @name: last element of path (aka filename) in type &p9_str
+ * @uid: owner name in type &p9_str
+ * @gid: group owner in type &p9_str
+ * @muid: last modifier in type &p9_str
+ * @extension: area used to encode extended UNIX support in type &p9_str
+ * @n_uid: numeric user id of owner (part of 9p2000.u extension)
+ * @n_gid: numeric group id (part of 9p2000.u extension)
+ * @n_muid: numeric user id of laster modifier (part of 9p2000.u extension)
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
+ */
+
 struct p9_stat {
 	u16 size;
 	u16 type;
@@ -179,10 +344,14 @@ struct p9_stat {
 	u32 n_muid;			/* 9p2000.u extensions */
 };
 
-/* file metadata (stat) structure used to create Twstat message
-   The is similar to p9_stat, but the strings don't point to
-   the same memory block and should be freed separately
-*/
+/*
+ * file metadata (stat) structure used to create Twstat message
+ * The is identical to &p9_stat, but the strings don't point to
+ * the same memory block and should be freed separately
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/stat
+ */
+
 struct p9_wstat {
 	u16 size;
 	u16 type;
@@ -335,10 +504,20 @@ struct p9_twstat {
 struct p9_rwstat {
 };
 
-/*
-  * fcall is the primary packet structure
-  *
-  */
+/**
+ * struct p9_fcall - primary packet structure
+ * @size: prefixed length of the structure
+ * @id: protocol operating identifier of type &p9_msg_t
+ * @tag: transaction id of the request
+ * @sdata: payload
+ * @params: per-operation parameters
+ *
+ * &p9_fcall represents the structure for all 9P RPC
+ * transactions.  Requests are packaged into fcalls, and reponses
+ * must be extracted from them.
+ *
+ * See Also: http://plan9.bell-labs.com/magic/man2html/2/fcall
+ */
 
 struct p9_fcall {
 	u32 size;
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index e52f93d9ac5f..c936dd14de41 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -26,6 +26,23 @@
 #ifndef NET_9P_CLIENT_H
 #define NET_9P_CLIENT_H
 
+/**
+ * struct p9_client - per client instance state
+ * @lock: protect @fidlist
+ * @msize: maximum data size negotiated by protocol
+ * @dotu: extension flags negotiated by protocol
+ * @trans_mod: module API instantiated with this client
+ * @trans: tranport instance state and API
+ * @conn: connection state information used by trans_fd
+ * @fidpool: fid handle accounting for session
+ * @fidlist: List of active fid handles
+ *
+ * The client structure is used to keep track of various per-client
+ * state that has been instantiated.
+ *
+ * Bugs: duplicated data and potentially unnecessary elements.
+ */
+
 struct p9_client {
 	spinlock_t lock; /* protect client structure */
 	int msize;
@@ -38,6 +55,24 @@ struct p9_client {
 	struct list_head fidlist;
 };
 
+/**
+ * struct p9_fid - file system entity handle
+ * @clnt: back pointer to instantiating &p9_client
+ * @fid: numeric identifier for this handle
+ * @mode: current mode of this fid (enum?)
+ * @qid: the &p9_qid server identifier this handle points to
+ * @iounit: the server reported maximum transaction size for this file
+ * @uid: the numeric uid of the local user who owns this handle
+ * @aux: transport specific information (unused?)
+ * @rdir_fpos: tracks offset of file position when reading directory contents
+ * @rdir_pos: (unused?)
+ * @rdir_fcall: holds response of last directory read request
+ * @flist: per-client-instance fid tracking
+ * @dlist: per-dentry fid tracking
+ *
+ * TODO: This needs lots of explanation.
+ */
+
 struct p9_fid {
 	struct p9_client *clnt;
 	u32 fid;
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index d2209ae9d18b..240e0de888c6 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -26,12 +26,40 @@
 #ifndef NET_9P_TRANSPORT_H
 #define NET_9P_TRANSPORT_H
 
+/**
+ * enum p9_trans_status - different states of underlying transports
+ * @Connected: transport is connected and healthy
+ * @Disconnected: transport has been disconnected
+ * @Hung: transport is connected by wedged
+ *
+ * This enumeration details the various states a transport
+ * instatiation can be in.
+ */
+
 enum p9_trans_status {
 	Connected,
 	Disconnected,
 	Hung,
 };
 
+/**
+ * struct p9_trans - per-transport state and API
+ * @status: transport &p9_trans_status
+ * @msize: negotiated maximum packet size (duplicate from client)
+ * @extended: negotiated protocol extensions (duplicate from client)
+ * @priv: transport private data
+ * @close: member function to disconnect and close the transport
+ * @rpc: member function to issue a request to the transport
+ *
+ * This is the basic API for a transport instance.  It is used as
+ * a handle by the client to issue requests.  This interface is currently
+ * in flux during reorganization.
+ *
+ * Bugs: there is lots of duplicated data here and its not clear that
+ * the member functions need to be per-instance versus per transport
+ * module.
+ */
+
 struct p9_trans {
 	enum p9_trans_status status;
 	int msize;
@@ -42,6 +70,21 @@ struct p9_trans {
 							struct p9_fcall **rc);
 };
 
+/**
+ * struct p9_trans_module - transport module interface
+ * @list: used to maintain a list of currently available transports
+ * @name: the human-readable name of the transport
+ * @maxsize: transport provided maximum packet size
+ * @def: set if this transport should be considered the default
+ * @create: member function to create a new connection on this transport
+ *
+ * This is the basic API for a transport module which is registered by the
+ * transport module with the 9P core network module and used by the client
+ * to instantiate a new connection on a transport.
+ *
+ * Bugs: the transport module list isn't protected.
+ */
+
 struct p9_trans_module {
 	struct list_head list;
 	char *name;		/* name of transport */
diff --git a/net/9p/conv.c b/net/9p/conv.c
index 3fe35d532c87..44547201f5bc 100644
--- a/net/9p/conv.c
+++ b/net/9p/conv.c
@@ -197,7 +197,7 @@ static void buf_get_qid(struct cbuf *bufp, struct p9_qid *qid)
 
 /**
  * p9_size_wstat - calculate the size of a variable length stat struct
- * @stat: metadata (stat) structure
+ * @wstat: metadata (stat) structure
  * @dotu: non-zero if 9P2000.u
  *
  */
@@ -511,6 +511,12 @@ p9_create_common(struct cbuf *bufp, u32 size, u8 id)
 	return fc;
 }
 
+/**
+ * p9_set_tag - set the tag field of an &p9_fcall structure
+ * @fc: fcall structure to set tag within
+ * @tag: tag id to set
+ */
+
 void p9_set_tag(struct p9_fcall *fc, u16 tag)
 {
 	fc->tag = tag;
@@ -518,6 +524,12 @@ void p9_set_tag(struct p9_fcall *fc, u16 tag)
 }
 EXPORT_SYMBOL(p9_set_tag);
 
+/**
+ * p9_create_tversion - allocates and creates a T_VERSION request
+ * @msize: requested maximum data size
+ * @version: version string to negotiate
+ *
+ */
 struct p9_fcall *p9_create_tversion(u32 msize, char *version)
 {
 	int size;
@@ -542,6 +554,16 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tversion);
 
+/**
+ * p9_create_tauth - allocates and creates a T_AUTH request
+ * @afid: handle to use for authentication protocol
+ * @uname: user name attempting to authenticate
+ * @aname: mount specifier for remote server
+ * @n_uname: numeric id for user attempting to authneticate
+ * @dotu: 9P2000.u extension flag
+ *
+ */
+
 struct p9_fcall *p9_create_tauth(u32 afid, char *uname, char *aname,
 	u32 n_uname, int dotu)
 {
@@ -580,6 +602,18 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tauth);
 
+/**
+ * p9_create_tattach - allocates and creates a T_ATTACH request
+ * @fid: handle to use for the new mount point
+ * @afid: handle to use for authentication protocol
+ * @uname: user name attempting to attach
+ * @aname: mount specifier for remote server
+ * @n_uname: numeric id for user attempting to attach
+ * @n_uname: numeric id for user attempting to attach
+ * @dotu: 9P2000.u extension flag
+ *
+ */
+
 struct p9_fcall *
 p9_create_tattach(u32 fid, u32 afid, char *uname, char *aname,
 	u32 n_uname, int dotu)
@@ -616,6 +650,12 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tattach);
 
+/**
+ * p9_create_tflush - allocates and creates a T_FLUSH request
+ * @oldtag: tag id for the transaction we are attempting to cancel
+ *
+ */
+
 struct p9_fcall *p9_create_tflush(u16 oldtag)
 {
 	int size;
@@ -639,6 +679,15 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tflush);
 
+/**
+ * p9_create_twalk - allocates and creates a T_FLUSH request
+ * @fid: handle we are traversing from
+ * @newfid: a new handle for this transaction
+ * @nwname: number of path elements to traverse
+ * @wnames: array of path elements
+ *
+ */
+
 struct p9_fcall *p9_create_twalk(u32 fid, u32 newfid, u16 nwname,
 				     char **wnames)
 {
@@ -677,6 +726,13 @@ error:
 }
 EXPORT_SYMBOL(p9_create_twalk);
 
+/**
+ * p9_create_topen - allocates and creates a T_OPEN request
+ * @fid: handle we are trying to open
+ * @mode: what mode we are trying to open the file in
+ *
+ */
+
 struct p9_fcall *p9_create_topen(u32 fid, u8 mode)
 {
 	int size;
@@ -701,6 +757,19 @@ error:
 }
 EXPORT_SYMBOL(p9_create_topen);
 
+/**
+ * p9_create_tcreate - allocates and creates a T_CREATE request
+ * @fid: handle of directory we are trying to create in
+ * @name: name of the file we are trying to create
+ * @perm: permissions for the file we are trying to create
+ * @mode: what mode we are trying to open the file in
+ * @extension: 9p2000.u extension string (for special files)
+ * @dotu: 9p2000.u enabled flag
+ *
+ * Note: Plan 9 create semantics include opening the resulting file
+ * which is why mode is included.
+ */
+
 struct p9_fcall *p9_create_tcreate(u32 fid, char *name, u32 perm, u8 mode,
 	char *extension, int dotu)
 {
@@ -736,6 +805,13 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tcreate);
 
+/**
+ * p9_create_tread - allocates and creates a T_READ request
+ * @fid: handle of the file we are trying to read
+ * @offset: offset to start reading from
+ * @count: how many bytes to read
+ */
+
 struct p9_fcall *p9_create_tread(u32 fid, u64 offset, u32 count)
 {
 	int size;
@@ -761,6 +837,17 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tread);
 
+/**
+ * p9_create_twrite - allocates and creates a T_WRITE request from the kernel
+ * @fid: handle of the file we are trying to write
+ * @offset: offset to start writing at
+ * @count: how many bytes to write
+ * @data: data to write
+ *
+ * This function will create a requst with data buffers from the kernel
+ * such as the page cache.
+ */
+
 struct p9_fcall *p9_create_twrite(u32 fid, u64 offset, u32 count,
 				      const char *data)
 {
@@ -794,6 +881,16 @@ error:
 }
 EXPORT_SYMBOL(p9_create_twrite);
 
+/**
+ * p9_create_twrite_u - allocates and creates a T_WRITE request from userspace
+ * @fid: handle of the file we are trying to write
+ * @offset: offset to start writing at
+ * @count: how many bytes to write
+ * @data: data to write
+ *
+ * This function will create a request with data buffers from userspace
+ */
+
 struct p9_fcall *p9_create_twrite_u(u32 fid, u64 offset, u32 count,
 				      const char __user *data)
 {
@@ -827,6 +924,14 @@ error:
 }
 EXPORT_SYMBOL(p9_create_twrite_u);
 
+/**
+ * p9_create_tclunk - allocate a request to forget about a file handle
+ * @fid: handle of the file we closing or forgetting about
+ *
+ * clunk is used both to close open files and to discard transient handles
+ * which may be created during meta-data operations and hierarchy traversal.
+ */
+
 struct p9_fcall *p9_create_tclunk(u32 fid)
 {
 	int size;
@@ -850,6 +955,12 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tclunk);
 
+/**
+ * p9_create_tremove - allocate and create a request to remove a file
+ * @fid: handle of the file or directory we are removing
+ *
+ */
+
 struct p9_fcall *p9_create_tremove(u32 fid)
 {
 	int size;
@@ -873,6 +984,12 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tremove);
 
+/**
+ * p9_create_tstat - allocate and populate a request for attributes
+ * @fid: handle of the file or directory we are trying to get the attributes of
+ *
+ */
+
 struct p9_fcall *p9_create_tstat(u32 fid)
 {
 	int size;
@@ -896,6 +1013,14 @@ error:
 }
 EXPORT_SYMBOL(p9_create_tstat);
 
+/**
+ * p9_create_tstat - allocate and populate a request to change attributes
+ * @fid: handle of the file or directory we are trying to change
+ * @wstat: &p9_stat structure with attributes we wish to set
+ * @dotu: 9p2000.u enabled flag
+ *
+ */
+
 struct p9_fcall *p9_create_twstat(u32 fid, struct p9_wstat *wstat,
 				      int dotu)
 {
@@ -922,3 +1047,4 @@ error:
 	return fc;
 }
 EXPORT_SYMBOL(p9_create_twstat);
+
diff --git a/net/9p/error.c b/net/9p/error.c
index 64104b9cb422..388770c3631e 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -33,6 +33,13 @@
 #include <linux/errno.h>
 #include <net/9p/9p.h>
 
+/**
+ * struct errormap - map string errors from Plan 9 to Linux numeric ids
+ * @name: string sent over 9P
+ * @val: numeric id most closely representing @name
+ * @namelen: length of string
+ * @list: hash-table list for string lookup
+ */
 struct errormap {
 	char *name;
 	int val;
@@ -177,8 +184,7 @@ static struct errormap errmap[] = {
 };
 
 /**
- * p9_error_init - preload
- * @errstr: error string
+ * p9_error_init - preload mappings into hash list
  *
  */
 
@@ -206,6 +212,7 @@ EXPORT_SYMBOL(p9_error_init);
 /**
  * errstr2errno - convert error string to error number
  * @errstr: error string
+ * @len: length of error string
  *
  */
 
diff --git a/net/9p/fcprint.c b/net/9p/fcprint.c
index 40244fbd9b0d..53dd8e28dd8a 100644
--- a/net/9p/fcprint.c
+++ b/net/9p/fcprint.c
@@ -142,6 +142,14 @@ p9_printdata(char *buf, int buflen, u8 *data, int datalen)
 	return p9_dumpdata(buf, buflen, data, datalen < 16?datalen:16);
 }
 
+/**
+ * p9_printfcall - decode and print a protocol structure into a buffer
+ * @buf: buffer to deposit decoded structure into
+ * @buflen: available space in buffer
+ * @fc: protocol rpc structure of type &p9_fcall
+ * @extended: whether or not session is operating with extended protocol
+ */
+
 int
 p9_printfcall(char *buf, int buflen, struct p9_fcall *fc, int extended)
 {
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c285aab2af04..c6d9695949e0 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -39,9 +39,6 @@ module_param_named(debug, p9_debug_level, uint, 0);
 MODULE_PARM_DESC(debug, "9P debugging level");
 #endif
 
-extern int p9_mux_global_init(void);
-extern void p9_mux_global_exit(void);
-
 /*
  * Dynamic Transport Registration Routines
  *
@@ -52,7 +49,7 @@ static struct p9_trans_module *v9fs_default_transport;
 
 /**
  * v9fs_register_trans - register a new transport with 9p
- * @m - structure describing the transport module and entry points
+ * @m: structure describing the transport module and entry points
  *
  */
 void v9fs_register_trans(struct p9_trans_module *m)
@@ -65,7 +62,7 @@ EXPORT_SYMBOL(v9fs_register_trans);
 
 /**
  * v9fs_match_trans - match transport versus registered transports
- * @arg: string identifying transport
+ * @name: string identifying transport
  *
  */
 struct p9_trans_module *v9fs_match_trans(const substring_t *name)
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index f624dff76852..c6eda999fa7d 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -47,12 +47,29 @@
 #define SCHED_TIMEOUT	10
 #define MAXPOLLWADDR	2
 
+/**
+ * struct p9_fd_opts - per-transport options
+ * @rfd: file descriptor for reading (trans=fd)
+ * @wfd: file descriptor for writing (trans=fd)
+ * @port: port to connect to (trans=tcp)
+ *
+ */
+
 struct p9_fd_opts {
 	int rfd;
 	int wfd;
 	u16 port;
 };
 
+
+/**
+ * struct p9_trans_fd - transport state
+ * @rd: reference to file to read from
+ * @wr: reference of file to write to
+ * @conn: connection state reference
+ *
+ */
+
 struct p9_trans_fd {
 	struct file *rd;
 	struct file *wr;
@@ -90,10 +107,24 @@ enum {
 };
 
 struct p9_req;
-
 typedef void (*p9_conn_req_callback)(struct p9_req *req, void *a);
+
+/**
+ * struct p9_req - fd mux encoding of an rpc transaction
+ * @lock: protects req_list
+ * @tag: numeric tag for rpc transaction
+ * @tcall: request &p9_fcall structure
+ * @rcall: response &p9_fcall structure
+ * @err: error state
+ * @cb: callback for when response is received
+ * @cba: argument to pass to callback
+ * @flush: flag to indicate RPC has been flushed
+ * @req_list: list link for higher level objects to chain requests
+ *
+ */
+
 struct p9_req {
-	spinlock_t lock; /* protect request structure */
+	spinlock_t lock;
 	int tag;
 	struct p9_fcall *tcall;
 	struct p9_fcall *rcall;
@@ -104,7 +135,39 @@ struct p9_req {
 	struct list_head req_list;
 };
 
-struct p9_mux_poll_task;
+struct p9_mux_poll_task {
+	struct task_struct *task;
+	struct list_head mux_list;
+	int muxnum;
+};
+
+/**
+ * struct p9_conn - fd mux connection state information
+ * @lock: protects mux_list (?)
+ * @mux_list: list link for mux to manage multiple connections (?)
+ * @poll_task: task polling on this connection
+ * @msize: maximum size for connection (dup)
+ * @extended: 9p2000.u flag (dup)
+ * @trans: reference to transport instance for this connection
+ * @tagpool: id accounting for transactions
+ * @err: error state
+ * @equeue: event wait_q (?)
+ * @req_list: accounting for requests which have been sent
+ * @unsent_req_list: accounting for requests that haven't been sent
+ * @rcall: current response &p9_fcall structure
+ * @rpos: read position in current frame
+ * @rbuf: current read buffer
+ * @wpos: write position for current frame
+ * @wsize: amount of data to write for current frame
+ * @wbuf: current write buffer
+ * @poll_wait: array of wait_q's for various worker threads
+ * @poll_waddr: ????
+ * @pt: poll state
+ * @rq: current read work
+ * @wq: current write work
+ * @wsched: ????
+ *
+ */
 
 struct p9_conn {
 	spinlock_t lock; /* protect lock structure */
@@ -132,11 +195,16 @@ struct p9_conn {
 	unsigned long wsched;
 };
 
-struct p9_mux_poll_task {
-	struct task_struct *task;
-	struct list_head mux_list;
-	int muxnum;
-};
+/**
+ * struct p9_mux_rpc - fd mux rpc accounting structure
+ * @m: connection this request was issued on
+ * @err: error state
+ * @tcall: request &p9_fcall
+ * @rcall: response &p9_fcall
+ * @wqueue: wait queue that client is blocked on for this rpc
+ *
+ * Bug: isn't this information duplicated elsewhere like &p9_req
+ */
 
 struct p9_mux_rpc {
 	struct p9_conn *m;
@@ -207,10 +275,12 @@ static void p9_mux_put_tag(struct p9_conn *m, u16 tag)
 
 /**
  * p9_mux_calc_poll_procs - calculates the number of polling procs
- * based on the number of mounted v9fs filesystems.
+ * @muxnum: number of mounts
  *
+ * Calculation is based on the number of mounted v9fs filesystems.
  * The current implementation returns sqrt of the number of mounts.
  */
+
 static int p9_mux_calc_poll_procs(int muxnum)
 {
 	int n;
@@ -331,12 +401,11 @@ static void p9_mux_poll_stop(struct p9_conn *m)
 
 /**
  * p9_conn_create - allocate and initialize the per-session mux data
- * Creates the polling task if this is the first session.
+ * @trans: transport structure
  *
- * @trans - transport structure
- * @msize - maximum message size
- * @extended - extended flag
+ * Note: Creates the polling task if this is the first session.
  */
+
 static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 {
 	int i, n;
@@ -406,7 +475,10 @@ static struct p9_conn *p9_conn_create(struct p9_trans *trans)
 
 /**
  * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * @m: mux to destroy
+ *
  */
+
 static void p9_conn_destroy(struct p9_conn *m)
 {
 	P9_DPRINTK(P9_DEBUG_MUX, "mux %p prev %p next %p\n", m,
@@ -429,9 +501,14 @@ static void p9_conn_destroy(struct p9_conn *m)
 }
 
 /**
- * p9_pollwait - called by files poll operation to add v9fs-poll task
- * 	to files wait queue
+ * p9_pollwait - add poll task to the wait queue
+ * @filp: file pointer being polled
+ * @wait_address: wait_q to block on
+ * @p: poll state
+ *
+ * called by files poll operation to add v9fs-poll task to files wait queue
  */
+
 static void
 p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 {
@@ -462,7 +539,10 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
 
 /**
  * p9_poll_mux - polls a mux and schedules read or write works if necessary
+ * @m: connection to poll
+ *
  */
+
 static void p9_poll_mux(struct p9_conn *m)
 {
 	int n;
@@ -499,9 +579,14 @@ static void p9_poll_mux(struct p9_conn *m)
 }
 
 /**
- * p9_poll_proc - polls all v9fs transports for new events and queues
- * 	the appropriate work to the work queue
+ * p9_poll_proc - poll worker thread
+ * @a: thread state and arguments
+ *
+ * polls all v9fs transports for new events and queues the appropriate
+ * work to the work queue
+ *
  */
+
 static int p9_poll_proc(void *a)
 {
 	struct p9_conn *m, *mtmp;
@@ -527,7 +612,10 @@ static int p9_poll_proc(void *a)
 
 /**
  * p9_write_work - called when a transport can send some data
+ * @work: container for work to be done
+ *
  */
+
 static void p9_write_work(struct work_struct *work)
 {
 	int n, err;
@@ -638,7 +726,10 @@ static void process_request(struct p9_conn *m, struct p9_req *req)
 
 /**
  * p9_read_work - called when there is some data to be read from a transport
+ * @work: container of work to be done
+ *
  */
+
 static void p9_read_work(struct work_struct *work)
 {
 	int n, err;
@@ -793,7 +884,9 @@ error:
  * @tc: request to be sent
  * @cb: callback function to call when response is received
  * @cba: parameter to pass to the callback function
+ *
  */
+
 static struct p9_req *p9_send_request(struct p9_conn *m,
 					  struct p9_fcall *tc,
 					  p9_conn_req_callback cb, void *cba)
@@ -961,10 +1054,12 @@ p9_conn_rpc_cb(struct p9_req *req, void *a)
 /**
  * p9_fd_rpc- sends 9P request and waits until a response is available.
  *	The function can be interrupted.
- * @m: mux data
+ * @t: transport data
  * @tc: request to be sent
  * @rc: pointer where a pointer to the response is stored
+ *
  */
+
 int
 p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 {
@@ -1041,8 +1136,10 @@ p9_fd_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
  * @m: mux data
  * @tc: request to be sent
  * @cb: callback function to be called when response arrives
- * @cba: value to pass to the callback function
+ * @a: value to pass to the callback function
+ *
  */
+
 int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
 		   p9_conn_req_callback cb, void *a)
 {
@@ -1065,7 +1162,9 @@ int p9_conn_rpcnb(struct p9_conn *m, struct p9_fcall *tc,
  * p9_conn_cancel - cancel all pending requests with error
  * @m: mux data
  * @err: error code
+ *
  */
+
 void p9_conn_cancel(struct p9_conn *m, int err)
 {
 	struct p9_req *req, *rtmp;
@@ -1099,7 +1198,7 @@ void p9_conn_cancel(struct p9_conn *m, int err)
 /**
  * v9fs_parse_options - parse mount options into session structure
  * @options: options string passed from mount
- * @v9ses: existing v9fs session information
+ * @opts: transport-specific structure to parse options into
  *
  */
 
@@ -1193,11 +1292,12 @@ static int p9_socket_open(struct p9_trans *trans, struct socket *csocket)
 
 /**
  * p9_fd_read- read from a fd
- * @v9ses: session information
+ * @trans: transport instance state
  * @v: buffer to receive data into
  * @len: size of receive buffer
  *
  */
+
 static int p9_fd_read(struct p9_trans *trans, void *v, int len)
 {
 	int ret;
@@ -1220,11 +1320,12 @@ static int p9_fd_read(struct p9_trans *trans, void *v, int len)
 
 /**
  * p9_fd_write - write to a socket
- * @v9ses: session information
+ * @trans: transport instance state
  * @v: buffer to send data from
  * @len: size of send buffer
  *
  */
+
 static int p9_fd_write(struct p9_trans *trans, void *v, int len)
 {
 	int ret;
@@ -1296,6 +1397,7 @@ end:
  * @trans: private socket structure
  *
  */
+
 static void p9_fd_close(struct p9_trans *trans)
 {
 	struct p9_trans_fd *ts;
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index de7a9f532edc..0bab1f23590e 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -55,23 +55,69 @@ static int chan_index;
 
 #define P9_INIT_MAXTAG	16
 
-#define REQ_STATUS_IDLE	0
-#define REQ_STATUS_SENT 1
-#define REQ_STATUS_RCVD 2
-#define REQ_STATUS_FLSH 3
+
+/**
+ * enum p9_req_status_t - virtio request status
+ * @REQ_STATUS_IDLE: request slot unused
+ * @REQ_STATUS_SENT: request sent to server
+ * @REQ_STATUS_RCVD: response received from server
+ * @REQ_STATUS_FLSH: request has been flushed
+ *
+ * The @REQ_STATUS_IDLE state is used to mark a request slot as unused
+ * but use is actually tracked by the idpool structure which handles tag
+ * id allocation.
+ *
+ */
+
+enum p9_req_status_t {
+	REQ_STATUS_IDLE,
+	REQ_STATUS_SENT,
+	REQ_STATUS_RCVD,
+	REQ_STATUS_FLSH,
+};
+
+/**
+ * struct p9_req_t - virtio request slots
+ * @status: status of this request slot
+ * @wq: wait_queue for the client to block on for this request
+ *
+ * The virtio transport uses an array to track outstanding requests
+ * instead of a list.  While this may incurr overhead during initial
+ * allocation or expansion, it makes request lookup much easier as the
+ * tag id is a index into an array.  (We use tag+1 so that we can accomodate
+ * the -1 tag for the T_VERSION request).
+ * This also has the nice effect of only having to allocate wait_queues
+ * once, instead of constantly allocating and freeing them.  Its possible
+ * other resources could benefit from this scheme as well.
+ *
+ */
 
 struct p9_req_t {
 	int status;
 	wait_queue_head_t *wq;
 };
 
-/* We keep all per-channel information in a structure.
+/**
+ * struct virtio_chan - per-instance transport information
+ * @initialized: whether the channel is initialized
+ * @inuse: whether the channel is in use
+ * @lock: protects multiple elements within this structure
+ * @vdev: virtio dev associated with this channel
+ * @vq: virtio queue associated with this channel
+ * @tagpool: accounting for tag ids (and request slots)
+ * @reqs: array of request slots
+ * @max_tag: current number of request_slots allocated
+ * @sg: scatter gather list which is used to pack a request (protected?)
+ *
+ * We keep all per-channel information in a structure.
  * This structure is allocated within the devices dev->mem space.
  * A pointer to the structure will get put in the transport private.
+ *
  */
+
 static struct virtio_chan {
-	bool initialized;		/* channel is initialized */
-	bool inuse;			/* channel is in use */
+	bool initialized;
+	bool inuse;
 
 	spinlock_t lock;
 
@@ -86,7 +132,19 @@ static struct virtio_chan {
 	struct scatterlist sg[VIRTQUEUE_NUM];
 } channels[MAX_9P_CHAN];
 
-/* Lookup requests by tag */
+/**
+ * p9_lookup_tag - Lookup requests by tag
+ * @c: virtio channel to lookup tag within
+ * @tag: numeric id for transaction
+ *
+ * this is a simple array lookup, but will grow the
+ * request_slots as necessary to accomodate transaction
+ * ids which did not previously have a slot.
+ *
+ * Bugs: there is currently no upper limit on request slots set
+ * here, but that should be constrained by the id accounting.
+ */
+
 static struct p9_req_t *p9_lookup_tag(struct virtio_chan *c, u16 tag)
 {
 	/* This looks up the original request by tag so we know which
@@ -130,6 +188,15 @@ static unsigned int rest_of_page(void *data)
 	return PAGE_SIZE - ((unsigned long)data % PAGE_SIZE);
 }
 
+/**
+ * p9_virtio_close - reclaim resources of a channel
+ * @trans: transport state
+ *
+ * This reclaims a channel by freeing its resources and
+ * reseting its inuse flag.
+ *
+ */
+
 static void p9_virtio_close(struct p9_trans *trans)
 {
 	struct virtio_chan *chan = trans->priv;
@@ -151,6 +218,19 @@ static void p9_virtio_close(struct p9_trans *trans)
 	kfree(trans);
 }
 
+/**
+ * req_done - callback which signals activity from the server
+ * @vq: virtio queue activity was received on
+ *
+ * This notifies us that the server has triggered some activity
+ * on the virtio channel - most likely a response to request we
+ * sent.  Figure out which requests now have responses and wake up
+ * those threads.
+ *
+ * Bugs: could do with some additional sanity checking, but appears to work.
+ *
+ */
+
 static void req_done(struct virtqueue *vq)
 {
 	struct virtio_chan *chan = vq->vdev->priv;
@@ -169,6 +249,20 @@ static void req_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
+/**
+ * pack_sg_list - pack a scatter gather list from a linear buffer
+ * @sg: scatter/gather list to pack into
+ * @start: which segment of the sg_list to start at
+ * @limit: maximum segment to pack data to
+ * @data: data to pack into scatter/gather list
+ * @count: amount of data to pack into the scatter/gather list
+ *
+ * sg_lists have multiple segments of various sizes.  This will pack
+ * arbitrary data into an existing scatter gather list, segmenting the
+ * data as necessary within constraints.
+ *
+ */
+
 static int
 pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 								int count)
@@ -189,6 +283,14 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,
 	return index-start;
 }
 
+/**
+ * p9_virtio_rpc - issue a request and wait for a response
+ * @t: transport state
+ * @tc: &p9_fcall request to transmit
+ * @rc: &p9_fcall to put reponse into
+ *
+ */
+
 static int
 p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 {
@@ -263,6 +365,16 @@ p9_virtio_rpc(struct p9_trans *t, struct p9_fcall *tc, struct p9_fcall **rc)
 	return 0;
 }
 
+/**
+ * p9_virtio_probe - probe for existence of 9P virtio channels
+ * @vdev: virtio device to probe
+ *
+ * This probes for existing virtio channels.  At present only
+ * a single channel is in use, so in the future more work may need
+ * to be done here.
+ *
+ */
+
 static int p9_virtio_probe(struct virtio_device *vdev)
 {
 	int err;
@@ -307,11 +419,28 @@ fail:
 	return err;
 }
 
-/* This sets up a transport channel for 9p communication.  Right now
+
+/**
+ * p9_virtio_create - allocate a new virtio channel
+ * @devname: string identifying the channel to connect to (unused)
+ * @args: args passed from sys_mount() for per-transport options (unused)
+ * @msize: requested maximum packet size
+ * @extended: 9p2000.u enabled flag
+ *
+ * This sets up a transport channel for 9p communication.  Right now
  * we only match the first available channel, but eventually we couldlook up
  * alternate channels by matching devname versus a virtio_config entry.
  * We use a simple reference count mechanism to ensure that only a single
- * mount has a channel open at a time. */
+ * mount has a channel open at a time.
+ *
+ * Bugs: doesn't allow identification of a specific channel
+ * to allocate, channels are allocated sequentially. This was
+ * a pragmatic decision to get things rolling, but ideally some
+ * way of identifying the channel to attach to would be nice
+ * if we are going to support multiple channels.
+ *
+ */
+
 static struct p9_trans *
 p9_virtio_create(const char *devname, char *args, int msize,
 							unsigned char extended)
@@ -360,6 +489,12 @@ p9_virtio_create(const char *devname, char *args, int msize,
 	return trans;
 }
 
+/**
+ * p9_virtio_remove - clean up resources associated with a virtio device
+ * @vdev: virtio device to remove
+ *
+ */
+
 static void p9_virtio_remove(struct virtio_device *vdev)
 {
 	struct virtio_chan *chan = vdev->priv;
diff --git a/net/9p/util.c b/net/9p/util.c
index ef7215565d88..4d5646045330 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -32,11 +32,23 @@
 #include <linux/idr.h>
 #include <net/9p/9p.h>
 
+/**
+ * struct p9_idpool - per-connection accounting for tag idpool
+ * @lock: protects the pool
+ * @pool: idr to allocate tag id from
+ *
+ */
+
 struct p9_idpool {
 	spinlock_t lock;
 	struct idr pool;
 };
 
+/**
+ * p9_idpool_create - create a new per-connection id pool
+ *
+ */
+
 struct p9_idpool *p9_idpool_create(void)
 {
 	struct p9_idpool *p;
@@ -52,6 +64,11 @@ struct p9_idpool *p9_idpool_create(void)
 }
 EXPORT_SYMBOL(p9_idpool_create);
 
+/**
+ * p9_idpool_destroy - create a new per-connection id pool
+ * @p: idpool to destory
+ */
+
 void p9_idpool_destroy(struct p9_idpool *p)
 {
 	idr_destroy(&p->pool);
@@ -61,9 +78,9 @@ EXPORT_SYMBOL(p9_idpool_destroy);
 
 /**
  * p9_idpool_get - allocate numeric id from pool
- * @p - pool to allocate from
+ * @p: pool to allocate from
  *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * Bugs: This seems to be an awful generic function, should it be in idr.c with
  *            the lock included in struct idr?
  */
 
@@ -94,9 +111,10 @@ EXPORT_SYMBOL(p9_idpool_get);
 
 /**
  * p9_idpool_put - release numeric id from pool
- * @p - pool to allocate from
+ * @id: numeric id which is being released
+ * @p: pool to release id into
  *
- * XXX - This seems to be an awful generic function, should it be in idr.c with
+ * Bugs: This seems to be an awful generic function, should it be in idr.c with
  *            the lock included in struct idr?
  */
 
@@ -111,11 +129,13 @@ EXPORT_SYMBOL(p9_idpool_put);
 
 /**
  * p9_idpool_check - check if the specified id is available
- * @id - id to check
- * @p - pool
+ * @id: id to check
+ * @p: pool to check
  */
+
 int p9_idpool_check(int id, struct p9_idpool *p)
 {
 	return idr_find(&p->pool, id) != NULL;
 }
 EXPORT_SYMBOL(p9_idpool_check);
+
-- 
cgit v1.2.3


From 887b3ece65be7b643dfdae0d433c91a26a3f437d Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@opteron.9grid.us>
Date: Thu, 8 May 2008 20:26:37 -0500
Subject: 9p: fix error path during early mount

There was some cleanup issues during early mount which would trigger
a kernel bug for certain types of failure.  This patch reorganizes the
cleanup to get rid of the bad behavior.

This also merges the 9pnet and 9pnet_fd modules for the purpose of
configuration and initialization.  Keeping the fd transport separate
from the core 9pnet code seemed like a good idea at the time, but in
practice has caused more harm and confusion than good.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 fs/9p/v9fs.c               | 13 +++++++------
 fs/9p/vfs_super.c          | 34 ++++++++++++++++------------------
 include/net/9p/9p.h        |  1 +
 include/net/9p/transport.h |  1 -
 net/9p/Kconfig             | 10 ----------
 net/9p/Makefile            |  3 ---
 net/9p/mod.c               |  1 +
 net/9p/trans_fd.c          | 29 ++++++++++++++++++++++-------
 8 files changed, 47 insertions(+), 45 deletions(-)

(limited to 'include/net')

diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c
index 5c1ccaf0416c..047c791427aa 100644
--- a/fs/9p/v9fs.c
+++ b/fs/9p/v9fs.c
@@ -206,12 +206,14 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	v9ses->uid = ~0;
 	v9ses->dfltuid = V9FS_DEFUID;
 	v9ses->dfltgid = V9FS_DEFGID;
-	v9ses->options = kstrdup(data, GFP_KERNEL);
-	if (!v9ses->options) {
-		P9_DPRINTK(P9_DEBUG_ERROR,
+	if (data) {
+		v9ses->options = kstrdup(data, GFP_KERNEL);
+		if (!v9ses->options) {
+			P9_DPRINTK(P9_DEBUG_ERROR,
 			   "failed to allocate copy of option string\n");
-		retval = -ENOMEM;
-		goto error;
+			retval = -ENOMEM;
+			goto error;
+		}
 	}
 
 	rc = v9fs_parse_options(v9ses);
@@ -260,7 +262,6 @@ struct p9_fid *v9fs_session_init(struct v9fs_session_info *v9ses,
 	return fid;
 
 error:
-	v9fs_session_close(v9ses);
 	return ERR_PTR(retval);
 }
 
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index ba10f172626d..bf59c3960494 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -128,29 +128,26 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	fid = v9fs_session_init(v9ses, dev_name, data);
 	if (IS_ERR(fid)) {
 		retval = PTR_ERR(fid);
-		fid = NULL;
-		kfree(v9ses);
-		v9ses = NULL;
-		goto error;
+		goto close_session;
 	}
 
 	st = p9_client_stat(fid);
 	if (IS_ERR(st)) {
 		retval = PTR_ERR(st);
-		goto error;
+		goto clunk_fid;
 	}
 
 	sb = sget(fs_type, NULL, v9fs_set_super, v9ses);
 	if (IS_ERR(sb)) {
 		retval = PTR_ERR(sb);
-		goto error;
+		goto free_stat;
 	}
 	v9fs_fill_super(sb, v9ses, flags);
 
 	inode = v9fs_get_inode(sb, S_IFDIR | mode);
 	if (IS_ERR(inode)) {
 		retval = PTR_ERR(inode);
-		goto error;
+		goto release_sb;
 	}
 
 	inode->i_uid = uid;
@@ -159,7 +156,7 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 	root = d_alloc_root(inode);
 	if (!root) {
 		retval = -ENOMEM;
-		goto error;
+		goto release_sb;
 	}
 
 	sb->s_root = root;
@@ -170,21 +167,22 @@ static int v9fs_get_sb(struct file_system_type *fs_type, int flags,
 
 	return simple_set_mnt(mnt, sb);
 
-error:
-	kfree(st);
-	if (fid)
-		p9_client_clunk(fid);
-
-	if (v9ses) {
-		v9fs_session_close(v9ses);
-		kfree(v9ses);
-	}
-
+release_sb:
 	if (sb) {
 		up_write(&sb->s_umount);
 		deactivate_super(sb);
 	}
 
+free_stat:
+	kfree(st);
+
+clunk_fid:
+	p9_client_clunk(fid);
+
+close_session:
+	v9fs_session_close(v9ses);
+	kfree(v9ses);
+
 	return retval;
 }
 
diff --git a/include/net/9p/9p.h b/include/net/9p/9p.h
index 7bfb2f2e423c..b3d3e27c6299 100644
--- a/include/net/9p/9p.h
+++ b/include/net/9p/9p.h
@@ -595,4 +595,5 @@ int p9_idpool_check(int id, struct p9_idpool *p);
 
 int p9_error_init(void);
 int p9_errstr2errno(char *, int);
+int p9_trans_fd_init(void);
 #endif /* NET_9P_H */
diff --git a/include/net/9p/transport.h b/include/net/9p/transport.h
index 240e0de888c6..0db3a4038dc0 100644
--- a/include/net/9p/transport.h
+++ b/include/net/9p/transport.h
@@ -96,5 +96,4 @@ struct p9_trans_module {
 void v9fs_register_trans(struct p9_trans_module *m);
 struct p9_trans_module *v9fs_match_trans(const substring_t *name);
 struct p9_trans_module *v9fs_default_trans(void);
-
 #endif /* NET_9P_TRANSPORT_H */
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index bafc50c9e6ff..ff34c5acc130 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -13,16 +13,6 @@ menuconfig NET_9P
 
 	  If unsure, say N.
 
-config NET_9P_FD
-	depends on NET_9P
-	default y if NET_9P
-	tristate "9P File Descriptor Transports (Experimental)"
-	help
-	  This builds support for file descriptor transports for 9p
-	  which includes support for TCP/IP, named pipes, or passed
-	  file descriptors.  TCP/IP is the default transport for 9p,
-	  so if you are going to use 9p, you'll likely want this.
-
 config NET_9P_VIRTIO
 	depends on NET_9P && EXPERIMENTAL && VIRTIO
 	tristate "9P Virtio Transport (Experimental)"
diff --git a/net/9p/Makefile b/net/9p/Makefile
index 8a1051101898..519219480db1 100644
--- a/net/9p/Makefile
+++ b/net/9p/Makefile
@@ -1,5 +1,4 @@
 obj-$(CONFIG_NET_9P) := 9pnet.o
-obj-$(CONFIG_NET_9P_FD) += 9pnet_fd.o
 obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 
 9pnet-objs := \
@@ -9,8 +8,6 @@ obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o
 	error.o \
 	fcprint.o \
 	util.o \
-
-9pnet_fd-objs := \
 	trans_fd.o \
 
 9pnet_virtio-objs := \
diff --git a/net/9p/mod.c b/net/9p/mod.c
index c6d9695949e0..bdee1fb7cc62 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -107,6 +107,7 @@ static int __init init_p9(void)
 
 	p9_error_init();
 	printk(KERN_INFO "Installing 9P2000 support\n");
+	p9_trans_fd_init();
 
 	return ret;
 }
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 97b103b70499..4507f744f44e 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -1433,6 +1433,23 @@ static void p9_fd_close(struct p9_trans *trans)
 	kfree(ts);
 }
 
+/*
+ * stolen from NFS - maybe should be made a generic function?
+ */
+static inline int valid_ipaddr4(const char *buf)
+{
+	int rc, count, in[4];
+
+	rc = sscanf(buf, "%d.%d.%d.%d", &in[0], &in[1], &in[2], &in[3]);
+	if (rc != 4)
+		return -EINVAL;
+	for (count = 0; count < 4; count++) {
+		if (in[count] > 255)
+			return -EINVAL;
+	}
+	return 0;
+}
+
 static struct p9_trans *
 p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
 {
@@ -1447,6 +1464,9 @@ p9_trans_create_tcp(const char *addr, char *args, int msize, unsigned char dotu)
 	if (err < 0)
 		return ERR_PTR(err);
 
+	if (valid_ipaddr4(addr) < 0)
+		return ERR_PTR(-EINVAL);
+
 	csocket = NULL;
 	trans = kmalloc(sizeof(struct p9_trans), GFP_KERNEL);
 	if (!trans)
@@ -1625,7 +1645,7 @@ static struct p9_trans_module p9_fd_trans = {
 	.create = p9_trans_create_fd,
 };
 
-static int __init p9_trans_fd_init(void)
+int p9_trans_fd_init(void)
 {
 	int ret = p9_mux_global_init();
 	if (ret) {
@@ -1639,9 +1659,4 @@ static int __init p9_trans_fd_init(void)
 
 	return 0;
 }
-
-module_init(p9_trans_fd_init);
-
-MODULE_AUTHOR("Latchesar Ionkov <lucho@ionkov.net>");
-MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>");
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL(p9_trans_fd_init);
-- 
cgit v1.2.3


From 2f561feb386d6adefbad63c59a1fcd298ac6a79c Mon Sep 17 00:00:00 2001
From: Ivo van Doorn <ivdoorn@gmail.com>
Date: Sat, 10 May 2008 13:40:49 +0200
Subject: mac80211: Add RTNL version of ieee80211_iterate_active_interfaces

Since commit e38bad4766a110b61fa6038f10be16ced8c6cc38
	mac80211: make ieee80211_iterate_active_interfaces not need rtnl
rt2500usb and rt73usb broke down due to attempting register access
in atomic context (which is not possible for USB hardware).

This patch restores ieee80211_iterate_active_interfaces() to use RTNL lock,
and provides the non-RTNL version under a new name:
	ieee80211_iterate_active_interfaces_atomic()

So far only rt2x00 uses ieee80211_iterate_active_interfaces(), and those
drivers require the RTNL version of ieee80211_iterate_active_interfaces().
Since they already call that function directly, this patch will automatically
fix the USB rt2x00 drivers.

v2: Rename ieee80211_iterate_active_interfaces_rtnl

Signed-off-by: Ivo van Doorn <IvDoorn@gmail.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 25 +++++++++++++++++++++++--
 net/mac80211/util.c    | 37 ++++++++++++++++++++++++++++++++++++-
 2 files changed, 59 insertions(+), 3 deletions(-)

(limited to 'include/net')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 4a80d74975e8..dae3f9ec1154 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1594,13 +1594,16 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw);
 void ieee80211_scan_completed(struct ieee80211_hw *hw);
 
 /**
- * ieee80211_iterate_active_interfaces - iterate active interfaces
+ * ieee80211_iterate_active_interfaces- iterate active interfaces
  *
  * This function iterates over the interfaces associated with a given
  * hardware that are currently active and calls the callback for them.
+ * This function allows the iterator function to sleep, when the iterator
+ * function is atomic @ieee80211_iterate_active_interfaces_atomic can
+ * be used.
  *
  * @hw: the hardware struct of which the interfaces should be iterated over
- * @iterator: the iterator function to call, cannot sleep
+ * @iterator: the iterator function to call
  * @data: first argument of the iterator function
  */
 void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
@@ -1608,6 +1611,24 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw,
 						struct ieee80211_vif *vif),
 					 void *data);
 
+/**
+ * ieee80211_iterate_active_interfaces_atomic - iterate active interfaces
+ *
+ * This function iterates over the interfaces associated with a given
+ * hardware that are currently active and calls the callback for them.
+ * This function requires the iterator callback function to be atomic,
+ * if that is not desired, use @ieee80211_iterate_active_interfaces instead.
+ *
+ * @hw: the hardware struct of which the interfaces should be iterated over
+ * @iterator: the iterator function to call, cannot sleep
+ * @data: first argument of the iterator function
+ */
+void ieee80211_iterate_active_interfaces_atomic(struct ieee80211_hw *hw,
+						void (*iterator)(void *data,
+						    u8 *mac,
+						    struct ieee80211_vif *vif),
+						void *data);
+
 /**
  * ieee80211_start_tx_ba_session - Start a tx Block Ack session.
  * @hw: pointer as obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 24a465c4df09..131e9e6c8a32 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -389,6 +389,41 @@ void ieee80211_iterate_active_interfaces(
 	struct ieee80211_local *local = hw_to_local(hw);
 	struct ieee80211_sub_if_data *sdata;
 
+	rtnl_lock();
+
+	list_for_each_entry(sdata, &local->interfaces, list) {
+		switch (sdata->vif.type) {
+		case IEEE80211_IF_TYPE_INVALID:
+		case IEEE80211_IF_TYPE_MNTR:
+		case IEEE80211_IF_TYPE_VLAN:
+			continue;
+		case IEEE80211_IF_TYPE_AP:
+		case IEEE80211_IF_TYPE_STA:
+		case IEEE80211_IF_TYPE_IBSS:
+		case IEEE80211_IF_TYPE_WDS:
+		case IEEE80211_IF_TYPE_MESH_POINT:
+			break;
+		}
+		if (sdata->dev == local->mdev)
+			continue;
+		if (netif_running(sdata->dev))
+			iterator(data, sdata->dev->dev_addr,
+				 &sdata->vif);
+	}
+
+	rtnl_unlock();
+}
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+
+void ieee80211_iterate_active_interfaces_atomic(
+	struct ieee80211_hw *hw,
+	void (*iterator)(void *data, u8 *mac,
+			 struct ieee80211_vif *vif),
+	void *data)
+{
+	struct ieee80211_local *local = hw_to_local(hw);
+	struct ieee80211_sub_if_data *sdata;
+
 	rcu_read_lock();
 
 	list_for_each_entry_rcu(sdata, &local->interfaces, list) {
@@ -413,4 +448,4 @@ void ieee80211_iterate_active_interfaces(
 
 	rcu_read_unlock();
 }
-EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces);
+EXPORT_SYMBOL_GPL(ieee80211_iterate_active_interfaces_atomic);
-- 
cgit v1.2.3


From 0686caa35ed17cf5b9043f453957e702a7eb588d Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Mon, 19 May 2008 16:25:42 -0700
Subject: ndisc: Add missing strategies for per-device retrans timer/reachable
 time settings.

Noticed from Al Viro <viro@ftp.linux.org.uk> via David Miller
<davem@davemloft.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ndisc.h | 4 ++++
 net/ipv6/addrconf.c | 2 +-
 net/ipv6/ndisc.c    | 8 ++++----
 3 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/ndisc.h b/include/net/ndisc.h
index 9c451ff2f4f4..a01b7c4dc763 100644
--- a/include/net/ndisc.h
+++ b/include/net/ndisc.h
@@ -129,6 +129,10 @@ extern int 			ndisc_ifinfo_sysctl_change(struct ctl_table *ctl,
 							   void __user *buffer,
 							   size_t *lenp,
 							   loff_t *ppos);
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+				 int nlen, void __user *oldval,
+				 size_t __user *oldlenp,
+				 void __user *newval, size_t newlen);
 #endif
 
 extern void 			inet6_ifinfo_notify(int event,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index e591e09e5e4e..8dd9155502b5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -4242,7 +4242,7 @@ static void addrconf_sysctl_register(struct inet6_dev *idev)
 	neigh_sysctl_register(idev->dev, idev->nd_parms, NET_IPV6,
 			      NET_IPV6_NEIGH, "ipv6",
 			      &ndisc_ifinfo_sysctl_change,
-			      NULL);
+			      ndisc_ifinfo_sysctl_strategy);
 	__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,
 			idev->dev->ifindex, idev, &idev->cnf);
 }
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index a55fc05b8125..282fdb31f8ed 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1727,10 +1727,10 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * f
 	return ret;
 }
 
-static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
-					int nlen, void __user *oldval,
-					size_t __user *oldlenp,
-					void __user *newval, size_t newlen)
+int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
+				 int nlen, void __user *oldval,
+				 size_t __user *oldlenp,
+				 void __user *newval, size_t newlen)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
-- 
cgit v1.2.3


From b9a2f2e450b0f770bb4347ae8d48eb2dea701e24 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 22 May 2008 10:48:59 -0700
Subject: netlink: Fix nla_parse_nested_compat() to call nla_parse() directly

The purpose of nla_parse_nested_compat() is to parse attributes which
contain a struct followed by a stream of nested attributes.  So far,
it called nla_parse_nested() to parse the stream of nested attributes
which was wrong, as nla_parse_nested() expects a container attribute
as data which holds the attribute stream.  It needs to call
nla_parse() directly while pointing at the next possible alignment
point after the struct in the beginning of the attribute.

With this patch, I can no longer reproduce the reported leftover
warnings.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index a5506c42f03c..112dcdf7e34e 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -772,12 +772,13 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
 					    const struct nla_policy *policy,
 					    int len)
 {
-	if (nla_len(nla) < len)
+	int nested_len = nla_len(nla) - NLA_ALIGN(len);
+
+	if (nested_len < 0)
 		return -1;
-	if (nla_len(nla) >= NLA_ALIGN(len) + sizeof(struct nlattr))
-		return nla_parse_nested(tb, maxtype,
-					nla_data(nla) + NLA_ALIGN(len),
-					policy);
+	if (nested_len >= nla_attr_size(0))
+		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
+				 nested_len, policy);
 	memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 	return 0;
 }
-- 
cgit v1.2.3


From bc3ed28caaef55e7e3a9316464256353c5f9b1df Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 3 Jun 2008 16:36:54 -0700
Subject: netlink: Improve returned error codes

Make nlmsg_trim(), nlmsg_cancel(), genlmsg_cancel(), and
nla_nest_cancel() void functions.

Return -EMSGSIZE instead of -1 if the provided message buffer is not
big enough.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h |  4 ++--
 include/net/netlink.h   | 20 +++++++++-----------
 net/core/neighbour.c    |  3 ++-
 net/core/rtnetlink.c    |  3 ++-
 net/netlink/attr.c      | 12 ++++++------
 net/netlink/genetlink.c |  6 ++++--
 net/sched/sch_dsmark.c  |  6 ++++--
 net/sched/sch_gred.c    |  3 ++-
 net/sched/sch_hfsc.c    |  2 +-
 net/sched/sch_red.c     |  3 ++-
 net/wireless/nl80211.c  | 12 ++++++++----
 11 files changed, 42 insertions(+), 32 deletions(-)

(limited to 'include/net')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index decdda546829..747c255d1df0 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -162,9 +162,9 @@ static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
  * @skb: socket buffer the message is stored in
  * @hdr: generic netlink message header
  */
-static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr)
+static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr)
 {
-	return nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	nlmsg_cancel(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 112dcdf7e34e..dfc3701dfcc3 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -556,14 +556,12 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
  * @skb: socket buffer the message is stored in
  * @mark: mark to trim to
  *
- * Trims the message to the provided mark. Returns -1.
+ * Trims the message to the provided mark.
  */
-static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
+static inline void nlmsg_trim(struct sk_buff *skb, const void *mark)
 {
 	if (mark)
 		skb_trim(skb, (unsigned char *) mark - skb->data);
-
-	return -1;
 }
 
 /**
@@ -572,11 +570,11 @@ static inline int nlmsg_trim(struct sk_buff *skb, const void *mark)
  * @nlh: netlink message header
  *
  * Removes the complete netlink message including all
- * attributes from the socket buffer again. Returns -1.
+ * attributes from the socket buffer again.
  */
-static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline void nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
-	return nlmsg_trim(skb, nlh);
+	nlmsg_trim(skb, nlh);
 }
 
 /**
@@ -775,7 +773,7 @@ static inline int __nla_parse_nested_compat(struct nlattr *tb[], int maxtype,
 	int nested_len = nla_len(nla) - NLA_ALIGN(len);
 
 	if (nested_len < 0)
-		return -1;
+		return -EINVAL;
 	if (nested_len >= nla_attr_size(0))
 		return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len),
 				 nested_len, policy);
@@ -1080,11 +1078,11 @@ static inline int nla_nest_compat_end(struct sk_buff *skb, struct nlattr *start)
  * @start: container attribute
  *
  * Removes the container attribute and including all nested
- * attributes. Returns -1.
+ * attributes. Returns -EMSGSIZE
  */
-static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
+static inline void nla_nest_cancel(struct sk_buff *skb, struct nlattr *start)
 {
-	return nlmsg_trim(skb, start);
+	nlmsg_trim(skb, start);
 }
 
 /**
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 3896de79dfbf..65f01f71b3f3 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1714,7 +1714,8 @@ static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
 	return nla_nest_end(skb, nest);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, nest);
+	nla_nest_cancel(skb, nest);
+	return -EMSGSIZE;
 }
 
 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index cf857c4dc7b1..a9a77216310e 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -498,7 +498,8 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 	return nla_nest_end(skb, mx);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, mx);
+	nla_nest_cancel(skb, mx);
+	return -EMSGSIZE;
 }
 
 int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id,
diff --git a/net/netlink/attr.c b/net/netlink/attr.c
index feb326f4a752..47bbf45ae5d7 100644
--- a/net/netlink/attr.c
+++ b/net/netlink/attr.c
@@ -400,13 +400,13 @@ void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute header and payload.
  */
 int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < nla_total_size(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	__nla_put(skb, attrtype, attrlen, data);
 	return 0;
@@ -418,13 +418,13 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute payload.
  */
 int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	__nla_put_nohdr(skb, attrlen, data);
 	return 0;
@@ -436,13 +436,13 @@ int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data)
  * @attrlen: length of attribute payload
  * @data: head of attribute payload
  *
- * Returns -1 if the tailroom of the skb is insufficient to store
+ * Returns -EMSGSIZE if the tailroom of the skb is insufficient to store
  * the attribute payload.
  */
 int nla_append(struct sk_buff *skb, int attrlen, const void *data)
 {
 	if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen)))
-		return -1;
+		return -EMSGSIZE;
 
 	memcpy(skb_put(skb, attrlen), data, attrlen);
 	return 0;
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index d16929c9b4bc..f5aa23c3e886 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -554,7 +554,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq,
 	return genlmsg_end(skb, hdr);
 
 nla_put_failure:
-	return genlmsg_cancel(skb, hdr);
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
 }
 
 static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
@@ -590,7 +591,8 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid,
 	return genlmsg_end(skb, hdr);
 
 nla_put_failure:
-	return genlmsg_cancel(skb, hdr);
+	genlmsg_cancel(skb, hdr);
+	return -EMSGSIZE;
 }
 
 static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb)
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 0df911fd67b1..64465bacbe79 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -444,7 +444,8 @@ static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
@@ -466,7 +467,8 @@ static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static const struct Qdisc_class_ops dsmark_class_ops = {
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index 3a9d226ff1e4..c89fba56db56 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -582,7 +582,8 @@ append_opt:
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static void gred_destroy(struct Qdisc *sch)
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 87293d0db1d7..fdfaa3fcc16d 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1360,7 +1360,7 @@ hfsc_dump_class(struct Qdisc *sch, unsigned long arg, struct sk_buff *skb,
 
  nla_put_failure:
 	nla_nest_cancel(skb, nest);
-	return -1;
+	return -EMSGSIZE;
 }
 
 static int
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 3dcd493f4f4a..5c569853b9c0 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -281,7 +281,8 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	return nla_nest_end(skb, opts);
 
 nla_put_failure:
-	return nla_nest_cancel(skb, opts);
+	nla_nest_cancel(skb, opts);
+	return -EMSGSIZE;
 }
 
 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2bdd4dddc0e1..fb75f265b39c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -187,7 +187,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb)
@@ -273,7 +274,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *cb)
@@ -928,7 +930,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_station(struct sk_buff *skb,
@@ -1267,7 +1270,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq,
 	return genlmsg_end(msg, hdr);
 
  nla_put_failure:
-	return genlmsg_cancel(msg, hdr);
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
 }
 
 static int nl80211_dump_mpath(struct sk_buff *skb,
-- 
cgit v1.2.3


From e51171019bb0e1f9fb57c25bd2e38ce652eaea27 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Thu, 29 May 2008 19:55:05 +0900
Subject: [SCTP]: Fix NULL dereference of asoc.

Commit 7cbca67c073263c179f605bdbbdc565ab29d801d ("[IPV6]: Support
Source Address Selection API (RFC5014)") introduced NULL dereference
of asoc to sctp_v6_get_saddr in net/sctp/ipv6.c.
Pointed out by Johann Felix Soden <johfel@users.sourceforge.net>.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/sctp/structs.h | 3 ++-
 net/sctp/ipv6.c            | 5 +++--
 net/sctp/protocol.c        | 3 ++-
 net/sctp/transport.c       | 2 +-
 4 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 0ce0443c5b79..917d425f0542 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -548,7 +548,8 @@ struct sctp_af {
 	struct dst_entry *(*get_dst)	(struct sctp_association *asoc,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
-	void		(*get_saddr)	(struct sctp_association *asoc,
+	void		(*get_saddr)	(struct sctp_sock *sk,
+					 struct sctp_association *asoc,
 					 struct dst_entry *dst,
 					 union sctp_addr *daddr,
 					 union sctp_addr *saddr);
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e45e44c60635..e4aac3266fcd 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -299,7 +299,8 @@ static inline int sctp_v6_addr_match_len(union sctp_addr *s1,
 /* Fills in the source address(saddr) based on the destination address(daddr)
  * and asoc's bind address list.
  */
-static void sctp_v6_get_saddr(struct sctp_association *asoc,
+static void sctp_v6_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
@@ -318,7 +319,7 @@ static void sctp_v6_get_saddr(struct sctp_association *asoc,
 	if (!asoc) {
 		ipv6_dev_get_saddr(dst ? ip6_dst_idev(dst)->dev : NULL,
 				   &daddr->v6.sin6_addr,
-				   inet6_sk(asoc->base.sk)->srcprefs,
+				   inet6_sk(&sk->inet.sk)->srcprefs,
 				   &saddr->v6.sin6_addr);
 		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: " NIP6_FMT "\n",
 				  NIP6(saddr->v6.sin6_addr));
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 0ec234b762c2..13ee7fa92e07 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -519,7 +519,8 @@ out:
 /* For v4, the source address is cached in the route entry(dst). So no need
  * to cache it separately and hence this is an empty routine.
  */
-static void sctp_v4_get_saddr(struct sctp_association *asoc,
+static void sctp_v4_get_saddr(struct sctp_sock *sk,
+			      struct sctp_association *asoc,
 			      struct dst_entry *dst,
 			      union sctp_addr *daddr,
 			      union sctp_addr *saddr)
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index f4938f6c5abe..62082e7b7972 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -291,7 +291,7 @@ void sctp_transport_route(struct sctp_transport *transport,
 	if (saddr)
 		memcpy(&transport->saddr, saddr, sizeof(union sctp_addr));
 	else
-		af->get_saddr(asoc, dst, daddr, &transport->saddr);
+		af->get_saddr(opt, asoc, dst, daddr, &transport->saddr);
 
 	transport->dst = dst;
 	if ((transport->param_flags & SPP_PMTUD_DISABLE) && transport->pathmtu) {
-- 
cgit v1.2.3


From 4bed72e4f5502ea3322f0a00794815fa58951abe Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 27 May 2008 17:37:49 +0900
Subject: [IPV6] ADDRCONF: Allow longer lifetime on 64bit archs.

- Allow longer lifetimes (>= 0x7fffffff/HZ) on 64bit archs
  by using unsigned long.
- Shadow this arithmetic overflow workaround by introducing
  helper functions: addrconf_timeout_fixup() and
  addrconf_finite_timeout().

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/addrconf.h | 22 ++++++++++++
 net/ipv6/addrconf.c    | 97 ++++++++++++++++++++++++++------------------------
 net/ipv6/route.c       | 12 ++-----
 3 files changed, 75 insertions(+), 56 deletions(-)

(limited to 'include/net')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0a2f0372df31..bbd3d583c6e6 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -94,6 +94,28 @@ extern void			addrconf_join_solict(struct net_device *dev,
 extern void			addrconf_leave_solict(struct inet6_dev *idev,
 					struct in6_addr *addr);
 
+static inline unsigned long addrconf_timeout_fixup(u32 timeout,
+						    unsigned unit)
+{
+	if (timeout == 0xffffffff)
+		return ~0UL;
+
+	/*
+	 * Avoid arithmetic overflow.
+	 * Assuming unit is constant and non-zero, this "if" statement
+	 * will go away on 64bit archs.
+	 */
+	if (0xfffffffe > LONG_MAX / unit && timeout > LONG_MAX / unit)
+		return LONG_MAX / unit;
+
+	return timeout;
+}
+
+static inline int addrconf_finite_timeout(unsigned long timeout)
+{
+	return ~timeout;
+}
+
 /*
  *	IPv6 Address Label subsystem (addrlabel.c)
  */
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c3b20c5afa3e..147588f4c7c0 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -731,8 +731,13 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
 						onlink = -1;
 
 					spin_lock(&ifa->lock);
-					lifetime = min_t(unsigned long,
-							 ifa->valid_lft, 0x7fffffffUL/HZ);
+
+					lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ);
+					/*
+					 * Note: Because this address is
+					 * not permanent, lifetime <
+					 * LONG_MAX / HZ here.
+					 */
 					if (time_before(expires,
 							ifa->tstamp + lifetime * HZ))
 						expires = ifa->tstamp + lifetime * HZ;
@@ -1722,7 +1727,6 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	__u32 valid_lft;
 	__u32 prefered_lft;
 	int addr_type;
-	unsigned long rt_expires;
 	struct inet6_dev *in6_dev;
 
 	pinfo = (struct prefix_info *) opt;
@@ -1764,28 +1768,23 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 	 *	2) Configure prefixes with the auto flag set
 	 */
 
-	if (valid_lft == INFINITY_LIFE_TIME)
-		rt_expires = ~0UL;
-	else if (valid_lft >= 0x7FFFFFFF/HZ) {
+	if (pinfo->onlink) {
+		struct rt6_info *rt;
+		unsigned long rt_expires;
+
 		/* Avoid arithmetic overflow. Really, we could
 		 * save rt_expires in seconds, likely valid_lft,
 		 * but it would require division in fib gc, that it
 		 * not good.
 		 */
-		rt_expires = 0x7FFFFFFF - (0x7FFFFFFF % HZ);
-	} else
-		rt_expires = valid_lft * HZ;
+		if (HZ > USER_HZ)
+			rt_expires = addrconf_timeout_fixup(valid_lft, HZ);
+		else
+			rt_expires = addrconf_timeout_fixup(valid_lft, USER_HZ);
 
-	/*
-	 * We convert this (in jiffies) to clock_t later.
-	 * Avoid arithmetic overflow there as well.
-	 * Overflow can happen only if HZ < USER_HZ.
-	 */
-	if (HZ < USER_HZ && ~rt_expires && rt_expires > 0x7FFFFFFF / USER_HZ)
-		rt_expires = 0x7FFFFFFF / USER_HZ;
+		if (addrconf_finite_timeout(rt_expires))
+			rt_expires *= HZ;
 
-	if (pinfo->onlink) {
-		struct rt6_info *rt;
 		rt = rt6_lookup(dev_net(dev), &pinfo->prefix, NULL,
 				dev->ifindex, 1);
 
@@ -1794,7 +1793,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 			if (valid_lft == 0) {
 				ip6_del_rt(rt);
 				rt = NULL;
-			} else if (~rt_expires) {
+			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				rt->rt6i_expires = jiffies + rt_expires;
 				rt->rt6i_flags |= RTF_EXPIRES;
@@ -1803,9 +1802,9 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
 				rt->rt6i_expires = 0;
 			}
 		} else if (valid_lft) {
-			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
 			clock_t expires = 0;
-			if (~rt_expires) {
+			int flags = RTF_ADDRCONF | RTF_PREFIX_RT;
+			if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
 				flags |= RTF_EXPIRES;
 				expires = jiffies_to_clock_t(rt_expires);
@@ -2036,6 +2035,7 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 	int scope;
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	ASSERT_RTNL();
 
@@ -2055,22 +2055,23 @@ static int inet6_addr_add(struct net *net, int ifindex, struct in6_addr *pfx,
 
 	scope = ipv6_addr_scope(pfx);
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	ifp = ipv6_add_addr(idev, pfx, plen, scope, ifa_flags);
 
@@ -3175,26 +3176,28 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
 {
 	u32 flags;
 	clock_t expires;
+	unsigned long timeout;
 
 	if (!valid_lft || (prefered_lft > valid_lft))
 		return -EINVAL;
 
-	if (valid_lft == INFINITY_LIFE_TIME) {
-		ifa_flags |= IFA_F_PERMANENT;
-		flags = 0;
-		expires = 0;
-	} else {
-		if (valid_lft >= 0x7FFFFFFF/HZ)
-			valid_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(valid_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		expires = jiffies_to_clock_t(timeout * HZ);
+		valid_lft = timeout;
 		flags = RTF_EXPIRES;
-		expires = jiffies_to_clock_t(valid_lft * HZ);
+	} else {
+		expires = 0;
+		flags = 0;
+		ifa_flags |= IFA_F_PERMANENT;
 	}
 
-	if (prefered_lft == 0)
-		ifa_flags |= IFA_F_DEPRECATED;
-	else if ((prefered_lft >= 0x7FFFFFFF/HZ) &&
-		 (prefered_lft != INFINITY_LIFE_TIME))
-		prefered_lft = 0x7FFFFFFF/HZ;
+	timeout = addrconf_timeout_fixup(prefered_lft, HZ);
+	if (addrconf_finite_timeout(timeout)) {
+		if (timeout == 0)
+			ifa_flags |= IFA_F_DEPRECATED;
+		prefered_lft = timeout;
+	}
 
 	spin_lock_bh(&ifp->lock);
 	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 48534c6c0735..220cffe9e63b 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -446,7 +446,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	struct route_info *rinfo = (struct route_info *) opt;
 	struct in6_addr prefix_buf, *prefix;
 	unsigned int pref;
-	u32 lifetime;
+	unsigned long lifetime;
 	struct rt6_info *rt;
 
 	if (len < sizeof(struct route_info)) {
@@ -472,13 +472,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 	if (pref == ICMPV6_ROUTER_PREF_INVALID)
 		pref = ICMPV6_ROUTER_PREF_MEDIUM;
 
-	lifetime = ntohl(rinfo->lifetime);
-	if (lifetime == 0xffffffff) {
-		/* infinity */
-	} else if (lifetime > 0x7fffffff/HZ - 1) {
-		/* Avoid arithmetic overflow */
-		lifetime = 0x7fffffff/HZ - 1;
-	}
+	lifetime = addrconf_timeout_fixup(ntohl(rinfo->lifetime), HZ);
 
 	if (rinfo->length == 3)
 		prefix = (struct in6_addr *)rinfo->prefix;
@@ -506,7 +500,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 				 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
 	if (rt) {
-		if (lifetime == 0xffffffff) {
+		if (!addrconf_finite_timeout(lifetime)) {
 			rt->rt6i_flags &= ~RTF_EXPIRES;
 		} else {
 			rt->rt6i_expires = jiffies + HZ * lifetime;
-- 
cgit v1.2.3


From 91e1908f569dd96a25a3947de8771e6cc93999dd Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Wed, 4 Jun 2008 13:02:49 +0900
Subject: [IPV6] NETNS: Handle ancillary data in appropriate namespace.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/transp_v6.h  | 3 ++-
 net/ipv6/datagram.c      | 7 ++++---
 net/ipv6/ip6_flowlabel.c | 2 +-
 net/ipv6/ipv6_sockglue.c | 2 +-
 net/ipv6/raw.c           | 2 +-
 net/ipv6/udp.c           | 2 +-
 6 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/net')

diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h
index 27394e0447d8..112934a3288d 100644
--- a/include/net/transp_v6.h
+++ b/include/net/transp_v6.h
@@ -40,7 +40,8 @@ extern int			datagram_recv_ctl(struct sock *sk,
 						  struct msghdr *msg,
 						  struct sk_buff *skb);
 
-extern int			datagram_send_ctl(struct msghdr *msg,
+extern int			datagram_send_ctl(struct net *net,
+						  struct msghdr *msg,
 						  struct flowi *fl,
 						  struct ipv6_txoptions *opt,
 						  int *hlimit, int *tclass);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 53e3883f7666..b9c2de84a8a2 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -496,7 +496,8 @@ int datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb)
 	return 0;
 }
 
-int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
+int datagram_send_ctl(struct net *net,
+		      struct msghdr *msg, struct flowi *fl,
 		      struct ipv6_txoptions *opt,
 		      int *hlimit, int *tclass)
 {
@@ -540,7 +541,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 			addr_type = __ipv6_addr_type(&src_info->ipi6_addr);
 
 			if (fl->oif) {
-				dev = dev_get_by_index(&init_net, fl->oif);
+				dev = dev_get_by_index(net, fl->oif);
 				if (!dev)
 					return -ENODEV;
 			} else if (addr_type & IPV6_ADDR_LINKLOCAL)
@@ -548,7 +549,7 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl,
 
 			if (addr_type != IPV6_ADDR_ANY) {
 				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;
-				if (!ipv6_chk_addr(&init_net, &src_info->ipi6_addr,
+				if (!ipv6_chk_addr(net, &src_info->ipi6_addr,
 						   strict ? dev : NULL, 0))
 					err = -EINVAL;
 				else
diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c
index eb7a940310f4..37a4e777e347 100644
--- a/net/ipv6/ip6_flowlabel.c
+++ b/net/ipv6/ip6_flowlabel.c
@@ -354,7 +354,7 @@ fl_create(struct net *net, struct in6_flowlabel_req *freq, char __user *optval,
 		msg.msg_control = (void*)(fl->opt+1);
 		flowi.oif = 0;
 
-		err = datagram_send_ctl(&msg, &flowi, fl->opt, &junk, &junk);
+		err = datagram_send_ctl(net, &msg, &flowi, fl->opt, &junk, &junk);
 		if (err)
 			goto done;
 		err = -EINVAL;
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9293b9f0ac23..3eef8e5b3636 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -416,7 +416,7 @@ sticky_done:
 		msg.msg_controllen = optlen;
 		msg.msg_control = (void*)(opt+1);
 
-		retv = datagram_send_ctl(&msg, &fl, opt, &junk, &junk);
+		retv = datagram_send_ctl(net, &msg, &fl, opt, &junk, &junk);
 		if (retv)
 			goto done;
 update:
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 232e0dc45bf5..603df76e0522 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -813,7 +813,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(struct ipv6_txoptions);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 47123bf5eb0f..1b35c4722004 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -731,7 +731,7 @@ do_udp_sendmsg:
 		memset(opt, 0, sizeof(struct ipv6_txoptions));
 		opt->tot_len = sizeof(*opt);
 
-		err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
+		err = datagram_send_ctl(sock_net(sk), msg, &fl, opt, &hlimit, &tclass);
 		if (err < 0) {
 			fl6_sock_release(flowlabel);
 			return err;
-- 
cgit v1.2.3


From 36d926b94a9908937593e5669162305a071b9cc3 Mon Sep 17 00:00:00 2001
From: "Denis V. Lunev" <den@openvz.org>
Date: Wed, 4 Jun 2008 15:49:07 +0400
Subject: [IPV6]: inet_sk(sk)->cork.opt leak

IPv6 UDP sockets wth IPv4 mapped address use udp_sendmsg to send the data
actually. In this case ip_flush_pending_frames should be called instead
of ip6_flush_pending_frames.

Signed-off-by: Denis V. Lunev <den@openvz.org>
Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
---
 include/net/udp.h | 1 +
 net/ipv4/udp.c    | 3 ++-
 net/ipv6/udp.c    | 4 +++-
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/net')

diff --git a/include/net/udp.h b/include/net/udp.h
index 3e55a99b0ba3..ccce83707046 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -135,6 +135,7 @@ extern void	udp_err(struct sk_buff *, u32);
 
 extern int	udp_sendmsg(struct kiocb *iocb, struct sock *sk,
 			    struct msghdr *msg, size_t len);
+extern void	udp_flush_pending_frames(struct sock *sk);
 
 extern int	udp_rcv(struct sk_buff *skb);
 extern int	udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index db1cb7c96d63..56fcda3694ba 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -420,7 +420,7 @@ void udp_err(struct sk_buff *skb, u32 info)
 /*
  * Throw away all pending data and cancel the corking. Socket is locked.
  */
-static void udp_flush_pending_frames(struct sock *sk)
+void udp_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
@@ -430,6 +430,7 @@ static void udp_flush_pending_frames(struct sock *sk)
 		ip_flush_pending_frames(sk);
 	}
 }
+EXPORT_SYMBOL(udp_flush_pending_frames);
 
 /**
  * 	udp4_hwcsum_outgoing  -  handle outgoing HW checksumming
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 1b35c4722004..dd309626ae9a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -534,7 +534,9 @@ static void udp_v6_flush_pending_frames(struct sock *sk)
 {
 	struct udp_sock *up = udp_sk(sk);
 
-	if (up->pending) {
+	if (up->pending == AF_INET)
+		udp_flush_pending_frames(sk);
+	else if (up->pending) {
 		up->len = 0;
 		up->pending = 0;
 		ip6_flush_pending_frames(sk);
-- 
cgit v1.2.3


From a6465234814efda9ed1dccdba852953f7508e827 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:38:43 -0700
Subject: sctp: Correctly implement Fast Recovery cwnd manipulations.

Correctly keep track of Fast Recovery state and do not reduce
congestion window multiple times during sucht state.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  8 +++++++-
 net/sctp/transport.c       | 44 ++++++++++++++++++++++++++++++++------------
 2 files changed, 39 insertions(+), 13 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 917d425f0542..67592072a32e 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -902,7 +902,10 @@ struct sctp_transport {
 	 *		calculation completes (i.e. the DATA chunk
 	 *		is SACK'd) clear this flag.
 	 */
-	int rto_pending;
+	__u8 rto_pending;
+
+	/* Flag to track the current fast recovery state */
+	__u8 fast_recovery;
 
 	/*
 	 * These are the congestion stats.
@@ -921,6 +924,9 @@ struct sctp_transport {
 	/* Data that has been sent, but not acknowledged. */
 	__u32 flight_size;
 
+	/* TSN marking the fast recovery exit point */
+	__u32 fast_recovery_exit;
+
 	/* Destination */
 	struct dst_entry *dst;
 	/* Source address. */
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 62082e7b7972..9647fb277221 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -79,6 +79,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer,
 	peer->rttvar = 0;
 	peer->srtt = 0;
 	peer->rto_pending = 0;
+	peer->fast_recovery = 0;
 
 	peer->last_time_heard = jiffies;
 	peer->last_time_used = jiffies;
@@ -403,11 +404,16 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	cwnd = transport->cwnd;
 	flight_size = transport->flight_size;
 
+	/* See if we need to exit Fast Recovery first */
+	if (transport->fast_recovery &&
+	    TSN_lte(transport->fast_recovery_exit, sack_ctsn))
+		transport->fast_recovery = 0;
+
 	/* The appropriate cwnd increase algorithm is performed if, and only
-	 * if the cumulative TSN has advanced and the congestion window is
+	 * if the cumulative TSN whould advanced and the congestion window is
 	 * being fully utilized.
 	 */
-	if ((transport->asoc->ctsn_ack_point >= sack_ctsn) ||
+	if (TSN_lte(sack_ctsn, transport->asoc->ctsn_ack_point) ||
 	    (flight_size < cwnd))
 		return;
 
@@ -416,17 +422,23 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport,
 	pmtu = transport->asoc->pathmtu;
 
 	if (cwnd <= ssthresh) {
-		/* RFC 2960 7.2.1, sctpimpguide-05 2.14.2 When cwnd is less
-		 * than or equal to ssthresh an SCTP endpoint MUST use the
-		 * slow start algorithm to increase cwnd only if the current
-		 * congestion window is being fully utilized and an incoming
-		 * SACK advances the Cumulative TSN Ack Point. Only when these
-		 * two conditions are met can the cwnd be increased otherwise
-		 * the cwnd MUST not be increased. If these conditions are met
-		 * then cwnd MUST be increased by at most the lesser of
-		 * 1) the total size of the previously outstanding DATA
-		 * chunk(s) acknowledged, and 2) the destination's path MTU.
+		/* RFC 4960 7.2.1
+		 * o  When cwnd is less than or equal to ssthresh, an SCTP
+		 *    endpoint MUST use the slow-start algorithm to increase
+		 *    cwnd only if the current congestion window is being fully
+		 *    utilized, an incoming SACK advances the Cumulative TSN
+		 *    Ack Point, and the data sender is not in Fast Recovery.
+		 *    Only when these three conditions are met can the cwnd be
+		 *    increased; otherwise, the cwnd MUST not be increased.
+		 *    If these conditions are met, then cwnd MUST be increased
+		 *    by, at most, the lesser of 1) the total size of the
+		 *    previously outstanding DATA chunk(s) acknowledged, and
+		 *    2) the destination's path MTU.  This upper bound protects
+		 *    against the ACK-Splitting attack outlined in [SAVAGE99].
 		 */
+		if (transport->fast_recovery)
+			return;
+
 		if (bytes_acked > pmtu)
 			cwnd += pmtu;
 		else
@@ -502,6 +514,13 @@ void sctp_transport_lower_cwnd(struct sctp_transport *transport,
 		 *      cwnd = ssthresh
 		 *      partial_bytes_acked = 0
 		 */
+		if (transport->fast_recovery)
+			return;
+
+		/* Mark Fast recovery */
+		transport->fast_recovery = 1;
+		transport->fast_recovery_exit = transport->asoc->next_tsn - 1;
+
 		transport->ssthresh = max(transport->cwnd/2,
 					  4*transport->asoc->pathmtu);
 		transport->cwnd = transport->ssthresh;
@@ -586,6 +605,7 @@ void sctp_transport_reset(struct sctp_transport *t)
 	t->flight_size = 0;
 	t->error_count = 0;
 	t->rto_pending = 0;
+	t->fast_recovery = 0;
 
 	/* Initialize the state information for SFR-CACC */
 	t->cacc.changeover_active = 0;
-- 
cgit v1.2.3


From 62aeaff5ccd96462b7077046357a6d7886175a57 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:39:11 -0700
Subject: sctp: Start T3-RTX timer when fast retransmitting lowest TSN

When we are trying to fast retransmit the lowest outstanding TSN, we
need to restart the T3-RTX timer, so that subsequent timeouts will
correctly tag all the packets necessary for retransmissions.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Tested-by: Wei Yongjun <yjwei@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h |  5 ++++-
 net/sctp/outqueue.c        | 42 +++++++++++++++++++++++++++++++-----------
 net/sctp/transport.c       |  4 ++--
 3 files changed, 37 insertions(+), 14 deletions(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 67592072a32e..714dc43c0345 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1051,7 +1051,7 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *,
 			  struct sctp_sock *);
 void sctp_transport_pmtu(struct sctp_transport *);
 void sctp_transport_free(struct sctp_transport *);
-void sctp_transport_reset_timers(struct sctp_transport *);
+void sctp_transport_reset_timers(struct sctp_transport *, int);
 void sctp_transport_hold(struct sctp_transport *);
 void sctp_transport_put(struct sctp_transport *);
 void sctp_transport_update_rto(struct sctp_transport *, __u32);
@@ -1141,6 +1141,9 @@ struct sctp_outq {
 	/* How many unackd bytes do we have in-flight?	*/
 	__u32 outstanding_bytes;
 
+	/* Are we doing fast-rtx on this queue */
+	char fast_rtx;
+
 	/* Corked? */
 	char cork;
 
diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c
index 59edfd25a19c..5d3c441e84d3 100644
--- a/net/sctp/outqueue.c
+++ b/net/sctp/outqueue.c
@@ -208,6 +208,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q)
 	INIT_LIST_HEAD(&q->sacked);
 	INIT_LIST_HEAD(&q->abandoned);
 
+	q->fast_rtx = 0;
 	q->outstanding_bytes = 0;
 	q->empty = 1;
 	q->cork  = 0;
@@ -500,6 +501,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport,
 	case SCTP_RTXR_FAST_RTX:
 		SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS);
 		sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX);
+		q->fast_rtx = 1;
 		break;
 	case SCTP_RTXR_PMTUD:
 		SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS);
@@ -543,10 +545,13 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 	sctp_xmit_t status;
 	struct sctp_chunk *chunk, *chunk1;
 	struct sctp_association *asoc;
+	int fast_rtx;
 	int error = 0;
+	int timer = 0;
 
 	asoc = q->asoc;
 	lqueue = &q->retransmit;
+	fast_rtx = q->fast_rtx;
 
 	/* RFC 2960 6.3.3 Handle T3-rtx Expiration
 	 *
@@ -587,13 +592,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		switch (status) {
 		case SCTP_XMIT_PMTU_FULL:
 			/* Send this packet.  */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* If we are retransmitting, we should only
 			 * send a single packet.
 			 */
-			if (rtx_timeout) {
+			if (rtx_timeout || fast_rtx) {
 				list_add(lchunk, lqueue);
 				lchunk = NULL;
 			}
@@ -603,8 +607,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_RWND_FULL:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA as there is no more room
 			 * at the receiver.
@@ -615,8 +618,7 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 
 		case SCTP_XMIT_NAGLE_DELAY:
 			/* Send this packet. */
-			if ((error = sctp_packet_transmit(pkt)) == 0)
-				*start_timer = 1;
+			error = sctp_packet_transmit(pkt);
 
 			/* Stop sending DATA because of nagle delay. */
 			list_add(lchunk, lqueue);
@@ -635,7 +637,14 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			if (chunk->fast_retransmit > 0)
 				chunk->fast_retransmit = -1;
 
-			*start_timer = 1;
+			/* Force start T3-rtx timer when fast retransmitting
+			 * the earliest outstanding TSN
+			 */
+			if (!timer && fast_rtx &&
+			    ntohl(chunk->subh.data_hdr->tsn) ==
+					     asoc->ctsn_ack_point + 1)
+				timer = 2;
+
 			q->empty = 0;
 
 			/* Retrieve a new chunk to bundle. */
@@ -643,12 +652,16 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 			break;
 		}
 
+		/* Set the timer if there were no errors */
+		if (!error && !timer)
+			timer = 1;
+
 		/* If we are here due to a retransmit timeout or a fast
 		 * retransmit and if there are any chunks left in the retransmit
 		 * queue that could not fit in the PMTU sized packet, they need
 		 * to be marked as ineligible for a subsequent fast retransmit.
 		 */
-		if (rtx_timeout && !lchunk) {
+		if (rtx_timeout && fast_rtx) {
 			list_for_each_entry(chunk1, lqueue, transmitted_list) {
 				if (chunk1->fast_retransmit > 0)
 					chunk1->fast_retransmit = -1;
@@ -656,6 +669,12 @@ static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt,
 		}
 	}
 
+	*start_timer = timer;
+
+	/* Clear fast retransmit hint */
+	if (fast_rtx)
+		q->fast_rtx = 0;
+
 	return error;
 }
 
@@ -862,7 +881,8 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 						    rtx_timeout, &start_timer);
 
 			if (start_timer)
-				sctp_transport_reset_timers(transport);
+				sctp_transport_reset_timers(transport,
+							    start_timer-1);
 
 			/* This can happen on COOKIE-ECHO resend.  Only
 			 * one chunk can get bundled with a COOKIE-ECHO.
@@ -977,7 +997,7 @@ int sctp_outq_flush(struct sctp_outq *q, int rtx_timeout)
 			list_add_tail(&chunk->transmitted_list,
 				      &transport->transmitted);
 
-			sctp_transport_reset_timers(transport);
+			sctp_transport_reset_timers(transport, start_timer-1);
 
 			q->empty = 0;
 
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index 9647fb277221..3f34f61221ec 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -191,7 +191,7 @@ static void sctp_transport_destroy(struct sctp_transport *transport)
 /* Start T3_rtx timer if it is not already running and update the heartbeat
  * timer.  This routine is called every time a DATA chunk is sent.
  */
-void sctp_transport_reset_timers(struct sctp_transport *transport)
+void sctp_transport_reset_timers(struct sctp_transport *transport, int force)
 {
 	/* RFC 2960 6.3.2 Retransmission Timer Rules
 	 *
@@ -201,7 +201,7 @@ void sctp_transport_reset_timers(struct sctp_transport *transport)
 	 * address.
 	 */
 
-	if (!timer_pending(&transport->T3_rtx_timer))
+	if (force || !timer_pending(&transport->T3_rtx_timer))
 		if (!mod_timer(&transport->T3_rtx_timer,
 			       jiffies + transport->rto))
 			sctp_transport_hold(transport);
-- 
cgit v1.2.3


From b9031d9d87b24e24cd32ea15b5f4220a1e8da909 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Wed, 4 Jun 2008 12:40:15 -0700
Subject: sctp: Fix ECN markings for IPv6

Commit e9df2e8fd8fbc95c57dbd1d33dada66c4627b44c ("[IPV6]: Use
appropriate sock tclass setting for routing lookup.") also changed the
way that ECN capable transports mark this capability in IPv6.  As a
result, SCTP was not marking ECN capablity because the traffic class
was never set.  This patch brings back the markings for IPv6 traffic.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/structs.h | 1 +
 net/sctp/ipv6.c            | 6 ++++++
 net/sctp/output.c          | 2 +-
 net/sctp/protocol.c        | 6 ++++++
 4 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h
index 714dc43c0345..7f25195f9855 100644
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -588,6 +588,7 @@ struct sctp_af {
 	int		(*is_ce)	(const struct sk_buff *sk);
 	void		(*seq_dump_addr)(struct seq_file *seq,
 					 union sctp_addr *addr);
+	void		(*ecn_capable)(struct sock *sk);
 	__u16		net_header_len;
 	int		sockaddr_len;
 	sa_family_t	sa_family;
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index e4aac3266fcd..a2f4d4d51593 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -727,6 +727,11 @@ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, NIP6_FMT " ", NIP6(addr->v6.sin6_addr));
 }
 
+static void sctp_v6_ecn_capable(struct sock *sk)
+{
+	inet6_sk(sk)->tclass |= INET_ECN_ECT_0;
+}
+
 /* Initialize a PF_INET6 socket msg_name. */
 static void sctp_inet6_msgname(char *msgname, int *addr_len)
 {
@@ -997,6 +1002,7 @@ static struct sctp_af sctp_af_inet6 = {
 	.skb_iif	   = sctp_v6_skb_iif,
 	.is_ce		   = sctp_v6_is_ce,
 	.seq_dump_addr	   = sctp_v6_seq_dump_addr,
+	.ecn_capable	   = sctp_v6_ecn_capable,
 	.net_header_len	   = sizeof(struct ipv6hdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in6),
 #ifdef CONFIG_COMPAT
diff --git a/net/sctp/output.c b/net/sctp/output.c
index cf4f9fb6819d..6d45bae93b46 100644
--- a/net/sctp/output.c
+++ b/net/sctp/output.c
@@ -548,7 +548,7 @@ int sctp_packet_transmit(struct sctp_packet *packet)
 	 * Note: The works for IPv6 layer checks this bit too later
 	 * in transmission.  See IP6_ECN_flow_xmit().
 	 */
-	INET_ECN_xmit(nskb->sk);
+	(*tp->af_specific->ecn_capable)(nskb->sk);
 
 	/* Set up the IP options.  */
 	/* BUG: not implemented
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 56bdaf7fc425..b435a193c5df 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -617,6 +617,11 @@ static void sctp_v4_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
 	seq_printf(seq, "%d.%d.%d.%d ", NIPQUAD(addr->v4.sin_addr));
 }
 
+static void sctp_v4_ecn_capable(struct sock *sk)
+{
+	INET_ECN_xmit(sk);
+}
+
 /* Event handler for inet address addition/deletion events.
  * The sctp_local_addr_list needs to be protocted by a spin lock since
  * multiple notifiers (say IPv4 and IPv6) may be running at the same
@@ -935,6 +940,7 @@ static struct sctp_af sctp_af_inet = {
 	.skb_iif	   = sctp_v4_skb_iif,
 	.is_ce		   = sctp_v4_is_ce,
 	.seq_dump_addr	   = sctp_v4_seq_dump_addr,
+	.ecn_capable	   = sctp_v4_ecn_capable,
 	.net_header_len	   = sizeof(struct iphdr),
 	.sockaddr_len	   = sizeof(struct sockaddr_in),
 #ifdef CONFIG_COMPAT
-- 
cgit v1.2.3


From 45d465bc237ab1e1ebb4c65b9b318830dafb7509 Mon Sep 17 00:00:00 2001
From: Rami Rosen <ramirose@gmail.com>
Date: Tue, 10 Jun 2008 12:37:42 -0700
Subject: ipv4: Remove unused declaration from include/net/tcp.h.

- The tcp_unhash() method in /include/net/tcp.h is no more needed, as the
unhash method in tcp_prot structure is now inet_unhash (instead of
tcp_unhash in the
past); see tcp_prot structure in net/ipv4/tcp_ipv4.c.

- So, this patch removes tcp_unhash() declaration from include/net/tcp.h

Signed-off-by: Rami Rosen <ramirose@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/net')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 633147cb6bbc..d448310c82c1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -433,7 +433,6 @@ extern struct sk_buff *		tcp_make_synack(struct sock *sk,
 
 extern int			tcp_disconnect(struct sock *sk, int flags);
 
-extern void			tcp_unhash(struct sock *sk);
 
 /* From syncookies.c */
 extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS];
-- 
cgit v1.2.3


From ce4a7d0d48bbaed78ccbb0bafb9229651a40303a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 10 Jun 2008 12:39:35 -0700
Subject: inet{6}_request_sock: Init ->opt and ->pktopts in the constructor

Wei Yongjun noticed that we may call reqsk_free on request sock objects where
the opt fields may not be initialized, fix it by introducing inet_reqsk_alloc
where we initialize ->opt to NULL and set ->pktopts to NULL in
inet6_reqsk_alloc.

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h    |  4 +++-
 include/net/inet_sock.h | 10 ++++++++++
 net/dccp/ipv4.c         |  3 +--
 net/dccp/ipv6.c         |  1 -
 net/ipv4/syncookies.c   |  3 +--
 net/ipv4/tcp_ipv4.c     |  2 +-
 net/ipv6/syncookies.c   |  1 -
 net/ipv6/tcp_ipv6.c     |  1 -
 8 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 10b666b61add..cde056e08181 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -396,8 +396,10 @@ static inline struct request_sock *inet6_reqsk_alloc(struct request_sock_ops *op
 {
 	struct request_sock *req = reqsk_alloc(ops);
 
-	if (req != NULL)
+	if (req != NULL) {
 		inet_rsk(req)->inet6_rsk_offset = inet6_rsk_offset(req);
+		inet6_rsk(req)->pktopts = NULL;
+	}
 
 	return req;
 }
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a42cd63d241a..9fabe5b38912 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -197,4 +197,14 @@ static inline int inet_iif(const struct sk_buff *skb)
 	return skb->rtable->rt_iif;
 }
 
+static inline struct request_sock *inet_reqsk_alloc(struct request_sock_ops *ops)
+{
+	struct request_sock *req = reqsk_alloc(ops);
+
+	if (req != NULL)
+		inet_rsk(req)->opt = NULL;
+
+	return req;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index c22a3780c14e..37d27bcb361f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -589,7 +589,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&dccp_request_sock_ops);
+	req = inet_reqsk_alloc(&dccp_request_sock_ops);
 	if (req == NULL)
 		goto drop;
 
@@ -605,7 +605,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq->loc_addr = ip_hdr(skb)->daddr;
 	ireq->rmt_addr = ip_hdr(skb)->saddr;
-	ireq->opt	= NULL;
 
 	/*
 	 * Step 3: Process LISTEN state
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 9b1129bb7ece..f7fe2a572d7b 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -421,7 +421,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	ireq6 = inet6_rsk(req);
 	ipv6_addr_copy(&ireq6->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&ireq6->loc_addr, &ipv6_hdr(skb)->daddr);
-	ireq6->pktopts	= NULL;
 
 	if (ipv6_opt_accepted(sk, skb) ||
 	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
index 73ba98921d64..d182a2a26291 100644
--- a/net/ipv4/syncookies.c
+++ b/net/ipv4/syncookies.c
@@ -285,7 +285,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 		cookie_check_timestamp(&tcp_opt);
 
 	ret = NULL;
-	req = reqsk_alloc(&tcp_request_sock_ops); /* for safety */
+	req = inet_reqsk_alloc(&tcp_request_sock_ops); /* for safety */
 	if (!req)
 		goto out;
 
@@ -301,7 +301,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb,
 	ireq->rmt_port		= th->source;
 	ireq->loc_addr		= ip_hdr(skb)->daddr;
 	ireq->rmt_addr		= ip_hdr(skb)->saddr;
-	ireq->opt		= NULL;
 	ireq->snd_wscale	= tcp_opt.snd_wscale;
 	ireq->rcv_wscale	= tcp_opt.rcv_wscale;
 	ireq->sack_ok		= tcp_opt.sack_ok;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index cd601a866c2f..4f8485c67d1a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1285,7 +1285,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
 		goto drop;
 
-	req = reqsk_alloc(&tcp_request_sock_ops);
+	req = inet_reqsk_alloc(&tcp_request_sock_ops);
 	if (!req)
 		goto drop;
 
diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c
index 938ce4ecde55..3ecc1157994e 100644
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -198,7 +198,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)
 	ireq = inet_rsk(req);
 	ireq6 = inet6_rsk(req);
 	treq = tcp_rsk(req);
-	ireq6->pktopts = NULL;
 
 	if (security_inet_conn_request(sk, skb, req)) {
 		reqsk_free(req);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 715965f0fac0..cb46749d4c32 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1299,7 +1299,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
 	treq = inet6_rsk(req);
 	ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
 	ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
-	treq->pktopts = NULL;
 	if (!want_cookie)
 		TCP_ECN_create_request(req, tcp_hdr(skb));
 
-- 
cgit v1.2.3


From ec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Thu, 12 Jun 2008 16:31:35 -0700
Subject: tcp: Revert 'process defer accept as established' changes.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87
("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and
the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38
("tcp: Fix slab corruption with ipv6 and tcp6fuzz").

This change causes several problems, first reported by Ingo Molnar
as a distcc-over-loopback regression where connections were getting
stuck.

Ilpo Järvinen first spotted the locking problems.  The new function
added by this code, tcp_defer_accept_check(), only has the
child socket locked, yet it is modifying state of the parent
listening socket.

Fixing that is non-trivial at best, because we can't simply just grab
the parent listening socket lock at this point, because it would
create an ABBA deadlock.  The normal ordering is parent listening
socket --> child socket, but this code path would require the
reverse lock ordering.

Next is a problem noticed by Vitaliy Gusev, he noted:

----------------------------------------
>--- a/net/ipv4/tcp_timer.c
>+++ b/net/ipv4/tcp_timer.c
>@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data)
> 		goto death;
> 	}
>
>+	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
>+		tcp_send_active_reset(sk, GFP_ATOMIC);
>+		goto death;

Here socket sk is not attached to listening socket's request queue. tcp_done()
will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should
release this sk) as socket is not DEAD. Therefore socket sk will be lost for
freeing.
----------------------------------------

Finally, Alexey Kuznetsov argues that there might not even be any
real value or advantage to these new semantics even if we fix all
of the bugs:

----------------------------------------
Hiding from accept() sockets with only out-of-order data only
is the only thing which is impossible with old approach. Is this really
so valuable? My opinion: no, this is nothing but a new loophole
to consume memory without control.
----------------------------------------

So revert this thing for now.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h             |  7 -------
 include/net/request_sock.h      |  4 ++--
 include/net/tcp.h               |  1 -
 net/ipv4/inet_connection_sock.c | 11 +++++++---
 net/ipv4/tcp.c                  | 18 ++++++++++-------
 net/ipv4/tcp_input.c            | 45 -----------------------------------------
 net/ipv4/tcp_ipv4.c             |  8 --------
 net/ipv4/tcp_minisocks.c        | 32 +++++++++++------------------
 net/ipv4/tcp_timer.c            |  5 -----
 9 files changed, 33 insertions(+), 98 deletions(-)

(limited to 'include/net')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 18e62e3d406f..b31b6b74aa28 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -239,11 +239,6 @@ static inline struct tcp_request_sock *tcp_rsk(const struct request_sock *req)
 	return (struct tcp_request_sock *)req;
 }
 
-struct tcp_deferred_accept_info {
-	struct sock *listen_sk;
-	struct request_sock *request;
-};
-
 struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
@@ -379,8 +374,6 @@ struct tcp_sock {
 	unsigned int		keepalive_intvl;  /* time interval between keep alive probes */
 	int			linger2;
 
-	struct tcp_deferred_accept_info defer_tcp_accept;
-
 	unsigned long last_synq_overflow; 
 
 	u32	tso_deferred;
diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index b220b5f624de..0c96e7bed5db 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -115,8 +115,8 @@ struct request_sock_queue {
 	struct request_sock	*rskq_accept_head;
 	struct request_sock	*rskq_accept_tail;
 	rwlock_t		syn_wait_lock;
-	u16			rskq_defer_accept;
-	/* 2 bytes hole, try to pack */
+	u8			rskq_defer_accept;
+	/* 3 bytes hole, try to pack */
 	struct listen_sock	*listen_opt;
 };
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index d448310c82c1..cf54034019d9 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -139,7 +139,6 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
 #define MAX_TCP_KEEPINTVL	32767
 #define MAX_TCP_KEEPCNT		127
 #define MAX_TCP_SYNCNT		127
-#define MAX_TCP_ACCEPT_DEFERRED 65535
 
 #define TCP_SYNQ_INTERVAL	(HZ/5)	/* Period of SYNACK timer */
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 828ea211ff21..045e799d3e1d 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -419,7 +419,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 	struct inet_connection_sock *icsk = inet_csk(parent);
 	struct request_sock_queue *queue = &icsk->icsk_accept_queue;
 	struct listen_sock *lopt = queue->listen_opt;
-	int thresh = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries;
+	int thresh = max_retries;
 	unsigned long now = jiffies;
 	struct request_sock **reqp, *req;
 	int i, budget;
@@ -455,6 +456,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		}
 	}
 
+	if (queue->rskq_defer_accept)
+		max_retries = queue->rskq_defer_accept;
+
 	budget = 2 * (lopt->nr_table_entries / (timeout / interval));
 	i = lopt->clock_hand;
 
@@ -462,8 +466,9 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 		reqp=&lopt->syn_table[i];
 		while ((req = *reqp) != NULL) {
 			if (time_after_eq(now, req->expires)) {
-				if (req->retrans < thresh &&
-				    !req->rsk_ops->rtx_syn_ack(parent, req)) {
+				if ((req->retrans < (inet_rsk(req)->acked ? max_retries : thresh)) &&
+				    (inet_rsk(req)->acked ||
+				     !req->rsk_ops->rtx_syn_ack(parent, req))) {
 					unsigned long timeo;
 
 					if (req->retrans++ == 0)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index ab66683b8043..fc54a48fde1e 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2112,12 +2112,15 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_DEFER_ACCEPT:
-		if (val < 0) {
-			err = -EINVAL;
-		} else {
-			if (val > MAX_TCP_ACCEPT_DEFERRED)
-				val = MAX_TCP_ACCEPT_DEFERRED;
-			icsk->icsk_accept_queue.rskq_defer_accept = val;
+		icsk->icsk_accept_queue.rskq_defer_accept = 0;
+		if (val > 0) {
+			/* Translate value in seconds to number of
+			 * retransmits */
+			while (icsk->icsk_accept_queue.rskq_defer_accept < 32 &&
+			       val > ((TCP_TIMEOUT_INIT / HZ) <<
+				       icsk->icsk_accept_queue.rskq_defer_accept))
+				icsk->icsk_accept_queue.rskq_defer_accept++;
+			icsk->icsk_accept_queue.rskq_defer_accept++;
 		}
 		break;
 
@@ -2299,7 +2302,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 			val = (val ? : sysctl_tcp_fin_timeout) / HZ;
 		break;
 	case TCP_DEFER_ACCEPT:
-		val = icsk->icsk_accept_queue.rskq_defer_accept;
+		val = !icsk->icsk_accept_queue.rskq_defer_accept ? 0 :
+			((TCP_TIMEOUT_INIT / HZ) << (icsk->icsk_accept_queue.rskq_defer_accept - 1));
 		break;
 	case TCP_WINDOW_CLAMP:
 		val = tp->window_clamp;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eba873e9b560..cad73b7dfef0 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4541,49 +4541,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, struct tcphdr *th)
 	}
 }
 
-static int tcp_defer_accept_check(struct sock *sk)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (tp->defer_tcp_accept.request) {
-		int queued_data =  tp->rcv_nxt - tp->copied_seq;
-		int hasfin =  !skb_queue_empty(&sk->sk_receive_queue) ?
-			tcp_hdr((struct sk_buff *)
-				sk->sk_receive_queue.prev)->fin : 0;
-
-		if (queued_data && hasfin)
-			queued_data--;
-
-		if (queued_data &&
-		    tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
-			if (sock_flag(sk, SOCK_KEEPOPEN)) {
-				inet_csk_reset_keepalive_timer(sk,
-							       keepalive_time_when(tp));
-			} else {
-				inet_csk_delete_keepalive_timer(sk);
-			}
-
-			inet_csk_reqsk_queue_add(
-				tp->defer_tcp_accept.listen_sk,
-				tp->defer_tcp_accept.request,
-				sk);
-
-			tp->defer_tcp_accept.listen_sk->sk_data_ready(
-				tp->defer_tcp_accept.listen_sk, 0);
-
-			sock_put(tp->defer_tcp_accept.listen_sk);
-			sock_put(sk);
-			tp->defer_tcp_accept.listen_sk = NULL;
-			tp->defer_tcp_accept.request = NULL;
-		} else if (hasfin ||
-			   tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
-			tcp_reset(sk);
-			return -1;
-		}
-	}
-	return 0;
-}
-
 static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4944,8 +4901,6 @@ step5:
 
 	tcp_data_snd_check(sk);
 	tcp_ack_snd_check(sk);
-
-	tcp_defer_accept_check(sk);
 	return 0;
 
 csum_error:
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4f8485c67d1a..97a230026e13 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1918,14 +1918,6 @@ int tcp_v4_destroy_sock(struct sock *sk)
 		sk->sk_sndmsg_page = NULL;
 	}
 
-	if (tp->defer_tcp_accept.request) {
-		reqsk_free(tp->defer_tcp_accept.request);
-		sock_put(tp->defer_tcp_accept.listen_sk);
-		sock_put(sk);
-		tp->defer_tcp_accept.listen_sk = NULL;
-		tp->defer_tcp_accept.request = NULL;
-	}
-
 	atomic_dec(&tcp_sockets_allocated);
 
 	return 0;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 	   does sequence test, SYN is truncated, and thus we consider
 	   it a bare ACK.
 
-	   Both ends (listening sockets) accept the new incoming
-	   connection and try to talk to each other. 8-)
+	   If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
+	   bare ACK.  Otherwise, we create an established connection.  Both
+	   ends (listening sockets) accept the new incoming connection and try
+	   to talk to each other. 8-)
 
 	   Note: This case is both harmless, and rare.  Possibility is about the
 	   same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		if (!(flg & TCP_FLAG_ACK))
 			return NULL;
 
+		/* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
+		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
+		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
+			inet_rsk(req)->acked = 1;
+			return NULL;
+		}
+
 		/* OK, ACK is valid, create big socket and
 		 * feed this segment to it. It will repeat all
 		 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
 		inet_csk_reqsk_queue_unlink(sk, req, prev);
 		inet_csk_reqsk_queue_removed(sk, req);
 
-		if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
-		    TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
-
-			/* the accept queue handling is done is est recv slow
-			 * path so lets make sure to start there
-			 */
-			tcp_sk(child)->pred_flags = 0;
-			sock_hold(sk);
-			sock_hold(child);
-			tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
-			tcp_sk(child)->defer_tcp_accept.request = req;
-
-			inet_csk_reset_keepalive_timer(child,
-						       inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
-		} else {
-			inet_csk_reqsk_queue_add(sk, req, child);
-		}
-
+		inet_csk_reqsk_queue_add(sk, req, child);
 		return child;
 
 	listen_overflow:
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 4de68cf5f2aa..63ed9d6830e7 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -489,11 +489,6 @@ static void tcp_keepalive_timer (unsigned long data)
 		goto death;
 	}
 
-	if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) {
-		tcp_send_active_reset(sk, GFP_ATOMIC);
-		goto death;
-	}
-
 	if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
 		goto out;
 
-- 
cgit v1.2.3